mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	Add support for tou.tv (Fixes #1792)
This commit is contained in:
		| @@ -133,6 +133,7 @@ from .techtalks import TechTalksIE | |||||||
| from .ted import TEDIE | from .ted import TEDIE | ||||||
| from .tf1 import TF1IE | from .tf1 import TF1IE | ||||||
| from .thisav import ThisAVIE | from .thisav import ThisAVIE | ||||||
|  | from .toutv import TouTvIE | ||||||
| from .traileraddict import TrailerAddictIE | from .traileraddict import TrailerAddictIE | ||||||
| from .trilulilu import TriluliluIE | from .trilulilu import TriluliluIE | ||||||
| from .tube8 import Tube8IE | from .tube8 import Tube8IE | ||||||
|   | |||||||
| @@ -350,6 +350,17 @@ class InfoExtractor(object): | |||||||
|         if secure: regexes = self._og_regexes('video:secure_url') + regexes |         if secure: regexes = self._og_regexes('video:secure_url') + regexes | ||||||
|         return self._html_search_regex(regexes, html, name, **kargs) |         return self._html_search_regex(regexes, html, name, **kargs) | ||||||
|  |  | ||||||
|  |     def _html_search_meta(self, name, html, display_name=None): | ||||||
|  |         if display_name is None: | ||||||
|  |             display_name = name | ||||||
|  |         return self._html_search_regex( | ||||||
|  |             r'''(?ix)<meta(?=[^>]+(?:name|property)=["\']%s["\']) | ||||||
|  |                     [^>]+content=["\']([^"\']+)["\']''' % re.escape(name), | ||||||
|  |             html, display_name, fatal=False) | ||||||
|  |  | ||||||
|  |     def _dc_search_uploader(self, html): | ||||||
|  |         return self._html_search_meta('dc.creator', html, 'uploader') | ||||||
|  |  | ||||||
|     def _rta_search(self, html): |     def _rta_search(self, html): | ||||||
|         # See http://www.rtalabel.org/index.php?content=howtofaq#single |         # See http://www.rtalabel.org/index.php?content=howtofaq#single | ||||||
|         if re.search(r'(?ix)<meta\s+name="rating"\s+' |         if re.search(r'(?ix)<meta\s+name="rating"\s+' | ||||||
| @@ -358,6 +369,23 @@ class InfoExtractor(object): | |||||||
|             return 18 |             return 18 | ||||||
|         return 0 |         return 0 | ||||||
|  |  | ||||||
|  |     def _media_rating_search(self, html): | ||||||
|  |         # See http://www.tjg-designs.com/WP/metadata-code-examples-adding-metadata-to-your-web-pages/ | ||||||
|  |         rating = self._html_search_meta('rating', html) | ||||||
|  |  | ||||||
|  |         if not rating: | ||||||
|  |             return None | ||||||
|  |  | ||||||
|  |         RATING_TABLE = { | ||||||
|  |             'safe for kids': 0, | ||||||
|  |             'general': 8, | ||||||
|  |             '14 years': 14, | ||||||
|  |             'mature': 17, | ||||||
|  |             'restricted': 19, | ||||||
|  |         } | ||||||
|  |         return RATING_TABLE.get(rating.lower(), None) | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class SearchInfoExtractor(InfoExtractor): | class SearchInfoExtractor(InfoExtractor): | ||||||
|     """ |     """ | ||||||
|   | |||||||
							
								
								
									
										75
									
								
								youtube_dl/extractor/toutv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										75
									
								
								youtube_dl/extractor/toutv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,75 @@ | |||||||
|  | # coding: utf-8 | ||||||
|  | import re | ||||||
|  | import xml.etree.ElementTree | ||||||
|  |  | ||||||
|  | from .common import InfoExtractor | ||||||
|  | from ..utils import ( | ||||||
|  |     ExtractorError, | ||||||
|  |     unified_strdate, | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class TouTvIE(InfoExtractor): | ||||||
|  |     IE_NAME = u'tou.tv' | ||||||
|  |     _VALID_URL = r'https?://www\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/(?P<episode>S[0-9]+E[0-9]+)))' | ||||||
|  |  | ||||||
|  |     _TEST = { | ||||||
|  |         u'url': u'http://www.tou.tv/30-vies/S04E41', | ||||||
|  |         u'file': u'30-vies_S04E41.mp4', | ||||||
|  |         u'info_dict': { | ||||||
|  |             u'title': u'30 vies Saison 4 / Épisode 41', | ||||||
|  |             u'description': u'md5:da363002db82ccbe4dafeb9cab039b09', | ||||||
|  |             u'age_limit': 8, | ||||||
|  |             u'uploader': u'Groupe des Nouveaux Médias', | ||||||
|  |             u'duration': 1296, | ||||||
|  |             u'upload_date': u'20131118', | ||||||
|  |             u'thumbnail': u'http://static.tou.tv/medias/images/2013-11-18_19_00_00_30VIES_0341_01_L.jpeg', | ||||||
|  |         }, | ||||||
|  |         u'params': { | ||||||
|  |             u'skip_download': True,  # Requires rtmpdump | ||||||
|  |         }, | ||||||
|  |         u'xskip': 'Only available in Canada' | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         video_id = mobj.group('id') | ||||||
|  |         webpage = self._download_webpage(url, video_id) | ||||||
|  |  | ||||||
|  |         mediaId = self._search_regex( | ||||||
|  |             r'"idMedia":\s*"([^"]+)"', webpage, u'media ID') | ||||||
|  |  | ||||||
|  |         # TODO test from de | ||||||
|  |         streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId | ||||||
|  |         streams_webpage = self._download_webpage( | ||||||
|  |             streams_url, video_id, note=u'Downloading stream list') | ||||||
|  |  | ||||||
|  |         streams_doc = xml.etree.ElementTree.fromstring( | ||||||
|  |             streams_webpage.encode('utf-8')) | ||||||
|  |         video_url = next(n.text | ||||||
|  |                          for n in streams_doc.findall('.//choice/url') | ||||||
|  |                          if u'//ad.doubleclick' not in n.text) | ||||||
|  |         if video_url.endswith('/Unavailable.flv'): | ||||||
|  |             raise ExtractorError( | ||||||
|  |                 u'Access to this video is blocked from outside of Canada', | ||||||
|  |                 expected=True) | ||||||
|  |  | ||||||
|  |         duration_str = self._html_search_meta( | ||||||
|  |             'video:duration', webpage, u'duration') | ||||||
|  |         duration = int(duration_str) if duration_str else None | ||||||
|  |         upload_date_str = self._html_search_meta( | ||||||
|  |             'video:release_date', webpage, u'upload date') | ||||||
|  |         upload_date = unified_strdate(upload_date_str) if upload_date_str else None | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             'id': video_id, | ||||||
|  |             'title': self._og_search_title(webpage), | ||||||
|  |             'url': video_url, | ||||||
|  |             'description': self._og_search_description(webpage), | ||||||
|  |             'uploader': self._dc_search_uploader(webpage), | ||||||
|  |             'thumbnail': self._og_search_thumbnail(webpage), | ||||||
|  |             'age_limit': self._media_rating_search(webpage), | ||||||
|  |             'duration': duration, | ||||||
|  |             'upload_date': upload_date, | ||||||
|  |             'ext': 'mp4', | ||||||
|  |         } | ||||||
| @@ -734,6 +734,8 @@ def unified_strdate(date_str): | |||||||
|         '%Y/%m/%d %H:%M:%S', |         '%Y/%m/%d %H:%M:%S', | ||||||
|         '%d.%m.%Y %H:%M', |         '%d.%m.%Y %H:%M', | ||||||
|         '%Y-%m-%dT%H:%M:%SZ', |         '%Y-%m-%dT%H:%M:%SZ', | ||||||
|  |         '%Y-%m-%dT%H:%M:%S.%fZ', | ||||||
|  |         '%Y-%m-%dT%H:%M:%S.%f0Z', | ||||||
|         '%Y-%m-%dT%H:%M:%S', |         '%Y-%m-%dT%H:%M:%S', | ||||||
|     ] |     ] | ||||||
|     for expression in format_expressions: |     for expression in format_expressions: | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister