mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[netzkino] Add new extractor (Fixes #4669)
This commit is contained in:
		| @@ -110,6 +110,20 @@ def expect_info_dict(self, got_dict, expected_dict): | ||||
|         else: | ||||
|             if isinstance(expected, compat_str) and expected.startswith('md5:'): | ||||
|                 got = 'md5:' + md5(got_dict.get(info_field)) | ||||
|             elif isinstance(expected, compat_str) and expected.startswith('mincount:'): | ||||
|                 got = got_dict.get(info_field) | ||||
|                 self.assertTrue( | ||||
|                     isinstance(got, list), | ||||
|                     'Expected field %s to be a list, but it is of type %s' % ( | ||||
|                         info_field, type(got).__name__)) | ||||
|                 expected_num = int(expected.partition(':')[2]) | ||||
|                 assertGreaterEqual( | ||||
|                     self, len(got), expected_num, | ||||
|                     'Expected %d items in field %s, but only got %d' % ( | ||||
|                         expected_num, info_field, len(got) | ||||
|                     ) | ||||
|                 ) | ||||
|                 continue | ||||
|             else: | ||||
|                 got = got_dict.get(info_field) | ||||
|             self.assertEqual(expected, got, | ||||
|   | ||||
| @@ -274,6 +274,7 @@ from .nbc import ( | ||||
| ) | ||||
| from .ndr import NDRIE | ||||
| from .ndtv import NDTVIE | ||||
| from .netzkino import NetzkinoIE | ||||
| from .nerdcubed import NerdCubedFeedIE | ||||
| from .newgrounds import NewgroundsIE | ||||
| from .newstube import NewstubeIE | ||||
|   | ||||
| @@ -147,6 +147,17 @@ class InfoExtractor(object): | ||||
|     like_count:     Number of positive ratings of the video | ||||
|     dislike_count:  Number of negative ratings of the video | ||||
|     comment_count:  Number of comments on the video | ||||
|     comments:       A list of comments, each with one or more of the following | ||||
|                     properties (all but one of text or html optional): | ||||
|                         * "author" - human-readable name of the comment author | ||||
|                         * "author_id" - user ID of the comment author | ||||
|                         * "id" - Comment ID | ||||
|                         * "html" - Comment as HTML | ||||
|                         * "text" - Plain text of the comment | ||||
|                         * "timestamp" - UNIX timestamp of comment | ||||
|                         * "parent" - ID of the comment this one is replying to. | ||||
|                                      Set to "root" to indicate that this is a | ||||
|                                      comment to the original video. | ||||
|     age_limit:      Age restriction for the video, as an integer (years) | ||||
|     webpage_url:    The url to the video webpage, if given to youtube-dl it | ||||
|                     should allow to get the same result again. (It will be set | ||||
|   | ||||
							
								
								
									
										86
									
								
								youtube_dl/extractor/netzkino.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										86
									
								
								youtube_dl/extractor/netzkino.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,86 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     int_or_none, | ||||
|     js_to_json, | ||||
|     parse_iso8601, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NetzkinoIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?netzkino\.de/\#!/(?P<category>[^/]+)/(?P<id>[^/]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.netzkino.de/#!/scifikino/rakete-zum-mond', | ||||
|         'md5': '92a3f8b76f8d7220acce5377ea5d4873', | ||||
|         'info_dict': { | ||||
|             'id': 'rakete-zum-mond', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Rakete zum Mond (Endstation Mond, Destination Moon)', | ||||
|             'comments': 'mincount:3', | ||||
|             'description': 'md5:1eddeacc7e62d5a25a2d1a7290c64a28', | ||||
|             'upload_date': '20120813', | ||||
|             'thumbnail': 're:https?://.*\.jpg$', | ||||
|             'timestamp': 1344858571, | ||||
|             'age_limit': 12, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         category_id = mobj.group('category') | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         api_url = 'http://api.netzkino.de.simplecache.net/capi-2.0a/categories/%s.json?d=www' % category_id | ||||
|         api_info = self._download_json(api_url, video_id) | ||||
|         info = next( | ||||
|             p for p in api_info['posts'] if p['slug'] == video_id) | ||||
|         custom_fields = info['custom_fields'] | ||||
|  | ||||
|         production_js = self._download_webpage( | ||||
|             'http://www.netzkino.de/beta/dist/production.min.js', video_id, | ||||
|             note='Downloading player code') | ||||
|         avo_js = self._search_regex( | ||||
|             r'window\.avoCore\s*=.*?urlTemplate:\s*(\{.*?"\})', | ||||
|             production_js, 'URL templates') | ||||
|         templates = self._parse_json( | ||||
|             avo_js, video_id, transform_source=js_to_json) | ||||
|  | ||||
|         suffix = { | ||||
|             'hds': '.mp4/manifest.f4m', | ||||
|             'hls': '.mp4/master.m3u8', | ||||
|             'pmd': '.mp4', | ||||
|         } | ||||
|         film_fn = custom_fields['Streaming'][0] | ||||
|         formats = [{ | ||||
|             'format_id': key, | ||||
|             'ext': 'mp4', | ||||
|             'url': tpl.replace('{}', film_fn) + suffix[key], | ||||
|         } for key, tpl in templates.items()] | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         comments = [{ | ||||
|             'timestamp': parse_iso8601(c.get('date'), delimiter=' '), | ||||
|             'id': c['id'], | ||||
|             'author': c['name'], | ||||
|             'html': c['content'], | ||||
|             'parent': 'root' if c.get('parent', 0) == 0 else c['parent'], | ||||
|         } for c in info.get('comments', [])] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'formats': formats, | ||||
|             'comments': comments, | ||||
|             'title': info['title'], | ||||
|             'age_limit': int_or_none(custom_fields.get('FSK')[0]), | ||||
|             'timestamp': parse_iso8601(info.get('date'), delimiter=' '), | ||||
|             'description': clean_html(info.get('content')), | ||||
|             'thumbnail': info.get('thumbnail'), | ||||
|             'playlist_title': api_info.get('title'), | ||||
|             'playlist_id': category_id, | ||||
|         } | ||||
| @@ -205,6 +205,10 @@ def get_element_by_attribute(attribute, value, html): | ||||
|  | ||||
| def clean_html(html): | ||||
|     """Clean an HTML snippet into a readable string""" | ||||
|  | ||||
|     if html is None:  # Convenience for sanitizing descriptions etc. | ||||
|         return html | ||||
|  | ||||
|     # Newline vs <br /> | ||||
|     html = html.replace('\n', ' ') | ||||
|     html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister