mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[netzkino] Add new extractor (Fixes #4669)
This commit is contained in:
		| @@ -110,6 +110,20 @@ def expect_info_dict(self, got_dict, expected_dict): | |||||||
|         else: |         else: | ||||||
|             if isinstance(expected, compat_str) and expected.startswith('md5:'): |             if isinstance(expected, compat_str) and expected.startswith('md5:'): | ||||||
|                 got = 'md5:' + md5(got_dict.get(info_field)) |                 got = 'md5:' + md5(got_dict.get(info_field)) | ||||||
|  |             elif isinstance(expected, compat_str) and expected.startswith('mincount:'): | ||||||
|  |                 got = got_dict.get(info_field) | ||||||
|  |                 self.assertTrue( | ||||||
|  |                     isinstance(got, list), | ||||||
|  |                     'Expected field %s to be a list, but it is of type %s' % ( | ||||||
|  |                         info_field, type(got).__name__)) | ||||||
|  |                 expected_num = int(expected.partition(':')[2]) | ||||||
|  |                 assertGreaterEqual( | ||||||
|  |                     self, len(got), expected_num, | ||||||
|  |                     'Expected %d items in field %s, but only got %d' % ( | ||||||
|  |                         expected_num, info_field, len(got) | ||||||
|  |                     ) | ||||||
|  |                 ) | ||||||
|  |                 continue | ||||||
|             else: |             else: | ||||||
|                 got = got_dict.get(info_field) |                 got = got_dict.get(info_field) | ||||||
|             self.assertEqual(expected, got, |             self.assertEqual(expected, got, | ||||||
|   | |||||||
| @@ -274,6 +274,7 @@ from .nbc import ( | |||||||
| ) | ) | ||||||
| from .ndr import NDRIE | from .ndr import NDRIE | ||||||
| from .ndtv import NDTVIE | from .ndtv import NDTVIE | ||||||
|  | from .netzkino import NetzkinoIE | ||||||
| from .nerdcubed import NerdCubedFeedIE | from .nerdcubed import NerdCubedFeedIE | ||||||
| from .newgrounds import NewgroundsIE | from .newgrounds import NewgroundsIE | ||||||
| from .newstube import NewstubeIE | from .newstube import NewstubeIE | ||||||
|   | |||||||
| @@ -147,6 +147,17 @@ class InfoExtractor(object): | |||||||
|     like_count:     Number of positive ratings of the video |     like_count:     Number of positive ratings of the video | ||||||
|     dislike_count:  Number of negative ratings of the video |     dislike_count:  Number of negative ratings of the video | ||||||
|     comment_count:  Number of comments on the video |     comment_count:  Number of comments on the video | ||||||
|  |     comments:       A list of comments, each with one or more of the following | ||||||
|  |                     properties (all but one of text or html optional): | ||||||
|  |                         * "author" - human-readable name of the comment author | ||||||
|  |                         * "author_id" - user ID of the comment author | ||||||
|  |                         * "id" - Comment ID | ||||||
|  |                         * "html" - Comment as HTML | ||||||
|  |                         * "text" - Plain text of the comment | ||||||
|  |                         * "timestamp" - UNIX timestamp of comment | ||||||
|  |                         * "parent" - ID of the comment this one is replying to. | ||||||
|  |                                      Set to "root" to indicate that this is a | ||||||
|  |                                      comment to the original video. | ||||||
|     age_limit:      Age restriction for the video, as an integer (years) |     age_limit:      Age restriction for the video, as an integer (years) | ||||||
|     webpage_url:    The url to the video webpage, if given to youtube-dl it |     webpage_url:    The url to the video webpage, if given to youtube-dl it | ||||||
|                     should allow to get the same result again. (It will be set |                     should allow to get the same result again. (It will be set | ||||||
|   | |||||||
							
								
								
									
										86
									
								
								youtube_dl/extractor/netzkino.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										86
									
								
								youtube_dl/extractor/netzkino.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,86 @@ | |||||||
|  | # coding: utf-8 | ||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | import re | ||||||
|  |  | ||||||
|  | from .common import InfoExtractor | ||||||
|  | from ..utils import ( | ||||||
|  |     clean_html, | ||||||
|  |     int_or_none, | ||||||
|  |     js_to_json, | ||||||
|  |     parse_iso8601, | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class NetzkinoIE(InfoExtractor): | ||||||
|  |     _VALID_URL = r'https?://(?:www\.)?netzkino\.de/\#!/(?P<category>[^/]+)/(?P<id>[^/]+)' | ||||||
|  |  | ||||||
|  |     _TEST = { | ||||||
|  |         'url': 'http://www.netzkino.de/#!/scifikino/rakete-zum-mond', | ||||||
|  |         'md5': '92a3f8b76f8d7220acce5377ea5d4873', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'rakete-zum-mond', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': 'Rakete zum Mond (Endstation Mond, Destination Moon)', | ||||||
|  |             'comments': 'mincount:3', | ||||||
|  |             'description': 'md5:1eddeacc7e62d5a25a2d1a7290c64a28', | ||||||
|  |             'upload_date': '20120813', | ||||||
|  |             'thumbnail': 're:https?://.*\.jpg$', | ||||||
|  |             'timestamp': 1344858571, | ||||||
|  |             'age_limit': 12, | ||||||
|  |         }, | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         category_id = mobj.group('category') | ||||||
|  |         video_id = mobj.group('id') | ||||||
|  |  | ||||||
|  |         api_url = 'http://api.netzkino.de.simplecache.net/capi-2.0a/categories/%s.json?d=www' % category_id | ||||||
|  |         api_info = self._download_json(api_url, video_id) | ||||||
|  |         info = next( | ||||||
|  |             p for p in api_info['posts'] if p['slug'] == video_id) | ||||||
|  |         custom_fields = info['custom_fields'] | ||||||
|  |  | ||||||
|  |         production_js = self._download_webpage( | ||||||
|  |             'http://www.netzkino.de/beta/dist/production.min.js', video_id, | ||||||
|  |             note='Downloading player code') | ||||||
|  |         avo_js = self._search_regex( | ||||||
|  |             r'window\.avoCore\s*=.*?urlTemplate:\s*(\{.*?"\})', | ||||||
|  |             production_js, 'URL templates') | ||||||
|  |         templates = self._parse_json( | ||||||
|  |             avo_js, video_id, transform_source=js_to_json) | ||||||
|  |  | ||||||
|  |         suffix = { | ||||||
|  |             'hds': '.mp4/manifest.f4m', | ||||||
|  |             'hls': '.mp4/master.m3u8', | ||||||
|  |             'pmd': '.mp4', | ||||||
|  |         } | ||||||
|  |         film_fn = custom_fields['Streaming'][0] | ||||||
|  |         formats = [{ | ||||||
|  |             'format_id': key, | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'url': tpl.replace('{}', film_fn) + suffix[key], | ||||||
|  |         } for key, tpl in templates.items()] | ||||||
|  |         self._sort_formats(formats) | ||||||
|  |  | ||||||
|  |         comments = [{ | ||||||
|  |             'timestamp': parse_iso8601(c.get('date'), delimiter=' '), | ||||||
|  |             'id': c['id'], | ||||||
|  |             'author': c['name'], | ||||||
|  |             'html': c['content'], | ||||||
|  |             'parent': 'root' if c.get('parent', 0) == 0 else c['parent'], | ||||||
|  |         } for c in info.get('comments', [])] | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             'id': video_id, | ||||||
|  |             'formats': formats, | ||||||
|  |             'comments': comments, | ||||||
|  |             'title': info['title'], | ||||||
|  |             'age_limit': int_or_none(custom_fields.get('FSK')[0]), | ||||||
|  |             'timestamp': parse_iso8601(info.get('date'), delimiter=' '), | ||||||
|  |             'description': clean_html(info.get('content')), | ||||||
|  |             'thumbnail': info.get('thumbnail'), | ||||||
|  |             'playlist_title': api_info.get('title'), | ||||||
|  |             'playlist_id': category_id, | ||||||
|  |         } | ||||||
| @@ -205,6 +205,10 @@ def get_element_by_attribute(attribute, value, html): | |||||||
|  |  | ||||||
| def clean_html(html): | def clean_html(html): | ||||||
|     """Clean an HTML snippet into a readable string""" |     """Clean an HTML snippet into a readable string""" | ||||||
|  |  | ||||||
|  |     if html is None:  # Convenience for sanitizing descriptions etc. | ||||||
|  |         return html | ||||||
|  |  | ||||||
|     # Newline vs <br /> |     # Newline vs <br /> | ||||||
|     html = html.replace('\n', ' ') |     html = html.replace('\n', ' ') | ||||||
|     html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html) |     html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister