mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[mtv] Add mtv.it and extract series metadata (#156)
* New extractors: MTVItalia, MTVItaliaProgramma * Extract fields: series, season_number, episode_number Authored-by: nixxo
This commit is contained in:
		| @@ -732,6 +732,8 @@ from .mtv import ( | |||||||
|     MTVServicesEmbeddedIE, |     MTVServicesEmbeddedIE, | ||||||
|     MTVDEIE, |     MTVDEIE, | ||||||
|     MTVJapanIE, |     MTVJapanIE, | ||||||
|  |     MTVItaliaIE, | ||||||
|  |     MTVItaliaProgrammaIE, | ||||||
| ) | ) | ||||||
| from .muenchentv import MuenchenTVIE | from .muenchentv import MuenchenTVIE | ||||||
| from .mwave import MwaveIE, MwaveMeetGreetIE | from .mwave import MwaveIE, MwaveMeetGreetIE | ||||||
|   | |||||||
| @@ -14,6 +14,7 @@ from ..utils import ( | |||||||
|     fix_xml_ampersands, |     fix_xml_ampersands, | ||||||
|     float_or_none, |     float_or_none, | ||||||
|     HEADRequest, |     HEADRequest, | ||||||
|  |     int_or_none, | ||||||
|     RegexNotFoundError, |     RegexNotFoundError, | ||||||
|     sanitized_Request, |     sanitized_Request, | ||||||
|     strip_or_none, |     strip_or_none, | ||||||
| @@ -176,6 +177,22 @@ class MTVServicesInfoExtractor(InfoExtractor): | |||||||
|             raise ExtractorError('Could not find video title') |             raise ExtractorError('Could not find video title') | ||||||
|         title = title.strip() |         title = title.strip() | ||||||
|  |  | ||||||
|  |         series = find_xpath_attr( | ||||||
|  |             itemdoc, './/{http://search.yahoo.com/mrss/}category', | ||||||
|  |             'scheme', 'urn:mtvn:franchise') | ||||||
|  |         season = find_xpath_attr( | ||||||
|  |             itemdoc, './/{http://search.yahoo.com/mrss/}category', | ||||||
|  |             'scheme', 'urn:mtvn:seasonN') | ||||||
|  |         episode = find_xpath_attr( | ||||||
|  |             itemdoc, './/{http://search.yahoo.com/mrss/}category', | ||||||
|  |             'scheme', 'urn:mtvn:episodeN') | ||||||
|  |         series = series.text if series is not None else None | ||||||
|  |         season = season.text if season is not None else None | ||||||
|  |         episode = episode.text if episode is not None else None | ||||||
|  |         if season and episode: | ||||||
|  |             # episode number includes season, so remove it | ||||||
|  |             episode = re.sub(r'^%s' % season, '', episode) | ||||||
|  |  | ||||||
|         # This a short id that's used in the webpage urls |         # This a short id that's used in the webpage urls | ||||||
|         mtvn_id = None |         mtvn_id = None | ||||||
|         mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category', |         mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category', | ||||||
| @@ -201,6 +218,9 @@ class MTVServicesInfoExtractor(InfoExtractor): | |||||||
|             'description': description, |             'description': description, | ||||||
|             'duration': float_or_none(content_el.attrib.get('duration')), |             'duration': float_or_none(content_el.attrib.get('duration')), | ||||||
|             'timestamp': timestamp, |             'timestamp': timestamp, | ||||||
|  |             'series': series, | ||||||
|  |             'season_number': int_or_none(season), | ||||||
|  |             'episode_number': int_or_none(episode), | ||||||
|         } |         } | ||||||
|  |  | ||||||
|     def _get_feed_query(self, uri): |     def _get_feed_query(self, uri): | ||||||
| @@ -483,3 +503,152 @@ class MTVDEIE(MTVServicesInfoExtractor): | |||||||
|             'arcEp': 'mtv.de', |             'arcEp': 'mtv.de', | ||||||
|             'mgid': uri, |             'mgid': uri, | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class MTVItaliaIE(MTVServicesInfoExtractor): | ||||||
|  |     IE_NAME = 'mtv.it' | ||||||
|  |     _VALID_URL = r'https?://(?:www\.)?mtv\.it/(?:episodi|video|musica)/(?P<id>[0-9a-z]+)' | ||||||
|  |     _TESTS = [{ | ||||||
|  |         'url': 'http://www.mtv.it/episodi/24bqab/mario-una-serie-di-maccio-capatonda-cavoli-amario-episodio-completo-S1-E1', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '0f0fc78e-45fc-4cce-8f24-971c25477530', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': 'Cavoli amario (episodio completo)', | ||||||
|  |             'description': 'md5:4962bccea8fed5b7c03b295ae1340660', | ||||||
|  |             'series': 'Mario - Una Serie Di Maccio Capatonda', | ||||||
|  |             'season_number': 1, | ||||||
|  |             'episode_number': 1, | ||||||
|  |         }, | ||||||
|  |         'params': { | ||||||
|  |             'skip_download': True, | ||||||
|  |         }, | ||||||
|  |     }] | ||||||
|  |     _GEO_COUNTRIES = ['IT'] | ||||||
|  |     _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed' | ||||||
|  |  | ||||||
|  |     def _get_feed_query(self, uri): | ||||||
|  |         return { | ||||||
|  |             'arcEp': 'mtv.it', | ||||||
|  |             'mgid': uri, | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class MTVItaliaProgrammaIE(MTVItaliaIE): | ||||||
|  |     IE_NAME = 'mtv.it:programma' | ||||||
|  |     _VALID_URL = r'https?://(?:www\.)?mtv\.it/(?:programmi|playlist)/(?P<id>[0-9a-z]+)' | ||||||
|  |     _TESTS = [{ | ||||||
|  |         # program page: general | ||||||
|  |         'url': 'http://www.mtv.it/programmi/s2rppv/mario-una-serie-di-maccio-capatonda', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'a6f155bc-8220-4640-aa43-9b95f64ffa3d', | ||||||
|  |             'title': 'Mario - Una Serie Di Maccio Capatonda', | ||||||
|  |             'description': 'md5:72fbffe1f77ccf4e90757dd4e3216153', | ||||||
|  |         }, | ||||||
|  |         'playlist_count': 2, | ||||||
|  |         'params': { | ||||||
|  |             'skip_download': True, | ||||||
|  |         }, | ||||||
|  |     }, { | ||||||
|  |         # program page: specific season | ||||||
|  |         'url': 'http://www.mtv.it/programmi/d9ncjf/mario-una-serie-di-maccio-capatonda-S2', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '4deeb5d8-f272-490c-bde2-ff8d261c6dd1', | ||||||
|  |             'title': 'Mario - Una Serie Di Maccio Capatonda - Stagione 2', | ||||||
|  |         }, | ||||||
|  |         'playlist_count': 34, | ||||||
|  |         'params': { | ||||||
|  |             'skip_download': True, | ||||||
|  |         }, | ||||||
|  |     }, { | ||||||
|  |         # playlist page + redirect | ||||||
|  |         'url': 'http://www.mtv.it/playlist/sexy-videos/ilctal', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'dee8f9ee-756d-493b-bf37-16d1d2783359', | ||||||
|  |             'title': 'Sexy Videos', | ||||||
|  |         }, | ||||||
|  |         'playlist_mincount': 145, | ||||||
|  |         'params': { | ||||||
|  |             'skip_download': True, | ||||||
|  |         }, | ||||||
|  |     }] | ||||||
|  |     _GEO_COUNTRIES = ['IT'] | ||||||
|  |     _FEED_URL = 'http://www.mtv.it/feeds/triforce/manifest/v8' | ||||||
|  |  | ||||||
|  |     def _get_entries(self, title, url): | ||||||
|  |         while True: | ||||||
|  |             pg = self._search_regex(r'/(\d+)$', url, 'entries', '1') | ||||||
|  |             entries = self._download_json(url, title, 'page %s' % pg) | ||||||
|  |             url = try_get( | ||||||
|  |                 entries, lambda x: x['result']['nextPageURL'], compat_str) | ||||||
|  |             entries = try_get( | ||||||
|  |                 entries, ( | ||||||
|  |                     lambda x: x['result']['data']['items'], | ||||||
|  |                     lambda x: x['result']['data']['seasons']), | ||||||
|  |                 list) | ||||||
|  |             for entry in entries or []: | ||||||
|  |                 if entry.get('canonicalURL'): | ||||||
|  |                     yield self.url_result(entry['canonicalURL']) | ||||||
|  |             if not url: | ||||||
|  |                 break | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         query = {'url': url} | ||||||
|  |         info_url = update_url_query(self._FEED_URL, query) | ||||||
|  |         video_id = self._match_id(url) | ||||||
|  |         info = self._download_json(info_url, video_id).get('manifest') | ||||||
|  |  | ||||||
|  |         redirect = try_get( | ||||||
|  |             info, lambda x: x['newLocation']['url'], compat_str) | ||||||
|  |         if redirect: | ||||||
|  |             return self.url_result(redirect) | ||||||
|  |  | ||||||
|  |         title = info.get('title') | ||||||
|  |         video_id = try_get( | ||||||
|  |             info, lambda x: x['reporting']['itemId'], compat_str) | ||||||
|  |         parent_id = try_get( | ||||||
|  |             info, lambda x: x['reporting']['parentId'], compat_str) | ||||||
|  |  | ||||||
|  |         playlist_url = current_url = None | ||||||
|  |         for z in (info.get('zones') or {}).values(): | ||||||
|  |             if z.get('moduleName') in ('INTL_M304', 'INTL_M209'): | ||||||
|  |                 info_url = z.get('feed') | ||||||
|  |             if z.get('moduleName') in ('INTL_M308', 'INTL_M317'): | ||||||
|  |                 playlist_url = playlist_url or z.get('feed') | ||||||
|  |             if z.get('moduleName') in ('INTL_M300',): | ||||||
|  |                 current_url = current_url or z.get('feed') | ||||||
|  |  | ||||||
|  |         if not info_url: | ||||||
|  |             raise ExtractorError('No info found') | ||||||
|  |  | ||||||
|  |         if video_id == parent_id: | ||||||
|  |             video_id = self._search_regex( | ||||||
|  |                 r'([^\/]+)/[^\/]+$', info_url, 'video_id') | ||||||
|  |  | ||||||
|  |         info = self._download_json(info_url, video_id, 'Show infos') | ||||||
|  |         info = try_get(info, lambda x: x['result']['data'], dict) | ||||||
|  |         title = title or try_get( | ||||||
|  |             info, ( | ||||||
|  |                 lambda x: x['title'], | ||||||
|  |                 lambda x: x['headline']), | ||||||
|  |             compat_str) | ||||||
|  |         description = try_get(info, lambda x: x['content'], compat_str) | ||||||
|  |  | ||||||
|  |         if current_url: | ||||||
|  |             season = try_get( | ||||||
|  |                 self._download_json(playlist_url, video_id, 'Seasons info'), | ||||||
|  |                 lambda x: x['result']['data'], dict) | ||||||
|  |             current = try_get( | ||||||
|  |                 season, lambda x: x['currentSeason'], compat_str) | ||||||
|  |             seasons = try_get( | ||||||
|  |                 season, lambda x: x['seasons'], list) or [] | ||||||
|  |  | ||||||
|  |             if current in [s.get('eTitle') for s in seasons]: | ||||||
|  |                 playlist_url = current_url | ||||||
|  |  | ||||||
|  |         title = re.sub( | ||||||
|  |             r'[-|]\s*(?:mtv\s*italia|programma|playlist)', | ||||||
|  |             '', title, flags=re.IGNORECASE).strip() | ||||||
|  |  | ||||||
|  |         return self.playlist_result( | ||||||
|  |             self._get_entries(title, playlist_url), | ||||||
|  |             video_id, title, description) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 nixxo
					nixxo