mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	The id for the videos is now the full id, no the one in the webpage url. Also extract more information: description, view_count and upload_date
This commit is contained in:
		| @@ -8,15 +8,16 @@ from .common import InfoExtractor | |||||||
| class TF1IE(InfoExtractor): | class TF1IE(InfoExtractor): | ||||||
|     """ |     """ | ||||||
|     TF1 uses the wat.tv player, currently it can only download videos with the |     TF1 uses the wat.tv player, currently it can only download videos with the | ||||||
|     html5 player enabled, it cannot download HD videos or the news. |     html5 player enabled, it cannot download HD videos. | ||||||
|     """ |     """ | ||||||
|     _VALID_URL = r'http://videos.tf1.fr/.*-(.*?).html' |     _VALID_URL = r'http://videos.tf1.fr/.*-(.*?).html' | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html', |         u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html', | ||||||
|         u'file': u'6bysb.mp4', |         u'file': u'10635995.mp4', | ||||||
|         u'md5': u'66789d3e91278d332f75e1feb7aea327', |         u'md5': u'66789d3e91278d332f75e1feb7aea327', | ||||||
|         u'info_dict': { |         u'info_dict': { | ||||||
|             u"title": u"Citroën Grand C4 Picasso 2013 : présentation officielle" |             u'title': u'Citroën Grand C4 Picasso 2013 : présentation officielle', | ||||||
|  |             u'description': u'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.', | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,3 +1,5 @@ | |||||||
|  | # coding: utf-8 | ||||||
|  |  | ||||||
| import json | import json | ||||||
| import re | import re | ||||||
|  |  | ||||||
| @@ -5,6 +7,7 @@ from .common import InfoExtractor | |||||||
|  |  | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     compat_urllib_parse, |     compat_urllib_parse, | ||||||
|  |     unified_strdate, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -13,36 +16,69 @@ class WatIE(InfoExtractor): | |||||||
|     IE_NAME = 'wat.tv' |     IE_NAME = 'wat.tv' | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         u'url': u'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html', |         u'url': u'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html', | ||||||
|         u'file': u'6bv55.mp4', |         u'file': u'10631273.mp4', | ||||||
|         u'md5': u'0a4fe7870f31eaeabb5e25fd8da8414a', |         u'md5': u'0a4fe7870f31eaeabb5e25fd8da8414a', | ||||||
|         u'info_dict': { |         u'info_dict': { | ||||||
|             u"title": u"World War Z - Philadelphia VOST" |             u'title': u'World War Z - Philadelphia VOST', | ||||||
|  |             u'description': u'La menace est partout. Que se passe-t-il à Philadelphia ?\r\nWORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet.\r\nhttp://www.worldwarz.fr', | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|      |      | ||||||
|  |     def download_video_info(self, real_id): | ||||||
|  |         # 'contentv4' is used in the website, but it also returns the related | ||||||
|  |         # videos, we don't need them | ||||||
|  |         info = self._download_webpage('http://www.wat.tv/interface/contentv3/' + real_id, real_id, 'Downloading video info') | ||||||
|  |         info = json.loads(info) | ||||||
|  |         return info['media'] | ||||||
|  |  | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|  |         def real_id_for_chapter(chapter): | ||||||
|  |             return chapter['tc_start'].split('-')[0] | ||||||
|         mobj = re.match(self._VALID_URL, url) |         mobj = re.match(self._VALID_URL, url) | ||||||
|         short_id = mobj.group('shortID') |         short_id = mobj.group('shortID') | ||||||
|  |         webpage = self._download_webpage(url, short_id) | ||||||
|  |         real_id = self._search_regex(r'xtpage = ".*-(.*?)";', webpage, 'real id') | ||||||
|  |  | ||||||
|  |         video_info = self.download_video_info(real_id) | ||||||
|  |         chapters = video_info['chapters'] | ||||||
|  |         first_chapter = chapters[0] | ||||||
|  |  | ||||||
|  |         if real_id_for_chapter(first_chapter) != real_id: | ||||||
|  |             self.to_screen('Multipart video detected') | ||||||
|  |             chapter_urls = [] | ||||||
|  |             for chapter in chapters: | ||||||
|  |                 chapter_id = real_id_for_chapter(chapter) | ||||||
|  |                 # Yes, when we this chapter is processed by WatIE, | ||||||
|  |                 # it will download the info again | ||||||
|  |                 chapter_info = self.download_video_info(chapter_id) | ||||||
|  |                 chapter_urls.append(chapter_info['url']) | ||||||
|  |             entries = [self.url_result(chapter_url) for chapter_url in chapter_urls] | ||||||
|  |             return self.playlist_result(entries, real_id, video_info['title']) | ||||||
|  |  | ||||||
|  |         # Otherwise we can continue and extract just one part, we have to use | ||||||
|  |         # the short id for getting the video url | ||||||
|         player_data = compat_urllib_parse.urlencode({'shortVideoId': short_id, |         player_data = compat_urllib_parse.urlencode({'shortVideoId': short_id, | ||||||
|                                                      'html5': '1'}) |                                                      'html5': '1'}) | ||||||
|         player_info = self._download_webpage('http://www.wat.tv/player?' + player_data, |         player_info = self._download_webpage('http://www.wat.tv/player?' + player_data, | ||||||
|                                              short_id, u'Downloading player info') |                                              real_id, u'Downloading player info') | ||||||
|         player = json.loads(player_info)['player'] |         player = json.loads(player_info)['player'] | ||||||
|         html5_player = self._html_search_regex(r'iframe src="(.*?)"', player, |         html5_player = self._html_search_regex(r'iframe src="(.*?)"', player, | ||||||
|                                                'html5 player') |                                                'html5 player') | ||||||
|         player_webpage = self._download_webpage(html5_player, short_id, |         player_webpage = self._download_webpage(html5_player, real_id, | ||||||
|                                                 u'Downloading player webpage') |                                                 u'Downloading player webpage') | ||||||
|  |  | ||||||
|         video_url = self._search_regex(r'urlhtml5 : "(.*?)"', player_webpage, |         video_url = self._search_regex(r'urlhtml5 : "(.*?)"', player_webpage, | ||||||
|                                        'video url') |                                        'video url') | ||||||
|         title = self._search_regex(r'contentTitle : "(.*?)"', player_webpage, |         info = {'id': real_id, | ||||||
|                                    'title') |  | ||||||
|         thumbnail = self._search_regex(r'previewMedia : "(.*?)"', player_webpage, |  | ||||||
|                                        'thumbnail') |  | ||||||
|         return {'id': short_id, |  | ||||||
|                 'url': video_url, |                 'url': video_url, | ||||||
|                 'ext': 'mp4', |                 'ext': 'mp4', | ||||||
|                 'title': title, |                 'title': first_chapter['title'], | ||||||
|                 'thumbnail': thumbnail, |                 'thumbnail': first_chapter['preview'], | ||||||
|  |                 'description': first_chapter['description'], | ||||||
|  |                 'view_count': video_info['views'], | ||||||
|                 } |                 } | ||||||
|  |         if 'date_diffusion' in first_chapter: | ||||||
|  |             info['upload_date'] = unified_strdate(first_chapter['date_diffusion']) | ||||||
|  |  | ||||||
|  |         return info | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Jaime Marquínez Ferrándiz
					Jaime Marquínez Ferrándiz