mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	
							
								
								
									
										9
									
								
								.github/workflows/core.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										9
									
								
								.github/workflows/core.yml
									
									
									
									
										vendored
									
									
								
							| @@ -41,11 +41,18 @@ jobs: | |||||||
|     - name: Install Jython |     - name: Install Jython | ||||||
|       if: ${{ matrix.python-impl == 'jython' }} |       if: ${{ matrix.python-impl == 'jython' }} | ||||||
|       run: | |       run: | | ||||||
|         wget http://search.maven.org/remotecontent?filepath=org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar -O jython-installer.jar |         wget https://repo1.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar -O jython-installer.jar | ||||||
|         java -jar jython-installer.jar -s -d "$HOME/jython" |         java -jar jython-installer.jar -s -d "$HOME/jython" | ||||||
|         echo "$HOME/jython/bin" >> $GITHUB_PATH |         echo "$HOME/jython/bin" >> $GITHUB_PATH | ||||||
|     - name: Install nose |     - name: Install nose | ||||||
|  |       if: ${{ matrix.python-impl != 'jython' }} | ||||||
|       run: pip install nose |       run: pip install nose | ||||||
|  |     - name: Install nose (Jython) | ||||||
|  |       if: ${{ matrix.python-impl == 'jython' }} | ||||||
|  |       # Working around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb) | ||||||
|  |       run: | | ||||||
|  |         wget https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl | ||||||
|  |         pip install nose-1.3.7-py2-none-any.whl | ||||||
|     - name: Run tests |     - name: Run tests | ||||||
|       continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }} |       continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }} | ||||||
|       env: |       env: | ||||||
|   | |||||||
							
								
								
									
										9
									
								
								.github/workflows/download.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										9
									
								
								.github/workflows/download.yml
									
									
									
									
										vendored
									
									
								
							| @@ -41,11 +41,18 @@ jobs: | |||||||
|     - name: Install Jython |     - name: Install Jython | ||||||
|       if: ${{ matrix.python-impl == 'jython' }} |       if: ${{ matrix.python-impl == 'jython' }} | ||||||
|       run: | |       run: | | ||||||
|         wget http://search.maven.org/remotecontent?filepath=org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar -O jython-installer.jar |         wget https://repo1.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar -O jython-installer.jar | ||||||
|         java -jar jython-installer.jar -s -d "$HOME/jython" |         java -jar jython-installer.jar -s -d "$HOME/jython" | ||||||
|         echo "$HOME/jython/bin" >> $GITHUB_PATH |         echo "$HOME/jython/bin" >> $GITHUB_PATH | ||||||
|     - name: Install nose |     - name: Install nose | ||||||
|  |       if: ${{ matrix.python-impl != 'jython' }} | ||||||
|       run: pip install nose |       run: pip install nose | ||||||
|  |     - name: Install nose (Jython) | ||||||
|  |       if: ${{ matrix.python-impl == 'jython' }} | ||||||
|  |       # Working around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb) | ||||||
|  |       run: | | ||||||
|  |         wget https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl | ||||||
|  |         pip install nose-1.3.7-py2-none-any.whl | ||||||
|     - name: Run tests |     - name: Run tests | ||||||
|       continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }} |       continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }} | ||||||
|       env: |       env: | ||||||
|   | |||||||
| @@ -1056,11 +1056,20 @@ class YoutubeDL(object): | |||||||
|  |  | ||||||
|     def extract_info(self, url, download=True, ie_key=None, extra_info={}, |     def extract_info(self, url, download=True, ie_key=None, extra_info={}, | ||||||
|                      process=True, force_generic_extractor=False): |                      process=True, force_generic_extractor=False): | ||||||
|         ''' |         """ | ||||||
|         Returns a list with a dictionary for each video we find. |         Return a list with a dictionary for each video extracted. | ||||||
|         If 'download', also downloads the videos. |  | ||||||
|         extra_info is a dict containing the extra values to add to each result |         Arguments: | ||||||
|         ''' |         url -- URL to extract | ||||||
|  |  | ||||||
|  |         Keyword arguments: | ||||||
|  |         download -- whether to download videos during extraction | ||||||
|  |         ie_key -- extractor key hint | ||||||
|  |         extra_info -- dictionary containing the extra values to add to each result | ||||||
|  |         process -- whether to resolve all unresolved references (URLs, playlist items), | ||||||
|  |             must be True for download to work. | ||||||
|  |         force_generic_extractor -- force using the generic extractor | ||||||
|  |         """ | ||||||
|  |  | ||||||
|         if not ie_key and force_generic_extractor: |         if not ie_key and force_generic_extractor: | ||||||
|             ie_key = 'Generic' |             ie_key = 'Generic' | ||||||
|   | |||||||
| @@ -133,6 +133,8 @@ class CDAIE(InfoExtractor): | |||||||
|             'age_limit': 18 if need_confirm_age else 0, |             'age_limit': 18 if need_confirm_age else 0, | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |         info = self._search_json_ld(webpage, video_id, default={}) | ||||||
|  |  | ||||||
|         # Source: https://www.cda.pl/js/player.js?t=1606154898 |         # Source: https://www.cda.pl/js/player.js?t=1606154898 | ||||||
|         def decrypt_file(a): |         def decrypt_file(a): | ||||||
|             for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'): |             for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'): | ||||||
| @@ -197,7 +199,7 @@ class CDAIE(InfoExtractor): | |||||||
|                 handler = self._download_webpage |                 handler = self._download_webpage | ||||||
|  |  | ||||||
|             webpage = handler( |             webpage = handler( | ||||||
|                 self._BASE_URL + href, video_id, |                 urljoin(self._BASE_URL, href), video_id, | ||||||
|                 'Downloading %s version information' % resolution, fatal=False) |                 'Downloading %s version information' % resolution, fatal=False) | ||||||
|             if not webpage: |             if not webpage: | ||||||
|                 # Manually report warning because empty page is returned when |                 # Manually report warning because empty page is returned when | ||||||
| @@ -209,6 +211,4 @@ class CDAIE(InfoExtractor): | |||||||
|  |  | ||||||
|         self._sort_formats(formats) |         self._sort_formats(formats) | ||||||
|  |  | ||||||
|         info = self._search_json_ld(webpage, video_id, default={}) |  | ||||||
|  |  | ||||||
|         return merge_dicts(info_dict, info) |         return merge_dicts(info_dict, info) | ||||||
|   | |||||||
| @@ -32,6 +32,18 @@ class DigitallySpeakingIE(InfoExtractor): | |||||||
|         # From http://www.gdcvault.com/play/1013700/Advanced-Material |         # From http://www.gdcvault.com/play/1013700/Advanced-Material | ||||||
|         'url': 'http://sevt.dispeak.com/ubm/gdc/eur10/xml/11256_1282118587281VNIT.xml', |         'url': 'http://sevt.dispeak.com/ubm/gdc/eur10/xml/11256_1282118587281VNIT.xml', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|  |     }, { | ||||||
|  |         # From https://gdcvault.com/play/1016624, empty speakerVideo | ||||||
|  |         'url': 'https://sevt.dispeak.com/ubm/gdc/online12/xml/201210-822101_1349794556671DDDD.xml', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '201210-822101_1349794556671DDDD', | ||||||
|  |             'ext': 'flv', | ||||||
|  |             'title': 'Pre-launch - Preparing to Take the Plunge', | ||||||
|  |         }, | ||||||
|  |     }, { | ||||||
|  |         # From http://www.gdcvault.com/play/1014846/Conference-Keynote-Shigeru, empty slideVideo | ||||||
|  |         'url': 'http://events.digitallyspeaking.com/gdc/project25/xml/p25-miyamoto1999_1282467389849HSVB.xml', | ||||||
|  |         'only_matching': True, | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     def _parse_mp4(self, metadata): |     def _parse_mp4(self, metadata): | ||||||
| @@ -85,24 +97,18 @@ class DigitallySpeakingIE(InfoExtractor): | |||||||
|                 'quality': 1, |                 'quality': 1, | ||||||
|                 'format_id': audio.get('code'), |                 'format_id': audio.get('code'), | ||||||
|             }) |             }) | ||||||
|         slide_video_path = xpath_text(metadata, './slideVideo', fatal=True) |         for video_key, format_id, preference in ( | ||||||
|  |                 ('slide', 'slides', -2), ('speaker', 'speaker', -1)): | ||||||
|  |             video_path = xpath_text(metadata, './%sVideo' % video_key) | ||||||
|  |             if not video_path: | ||||||
|  |                 continue | ||||||
|             formats.append({ |             formats.append({ | ||||||
|                 'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, |                 'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, | ||||||
|             'play_path': remove_end(slide_video_path, '.flv'), |                 'play_path': remove_end(video_path, '.flv'), | ||||||
|                 'ext': 'flv', |                 'ext': 'flv', | ||||||
|             'format_note': 'slide deck video', |                 'format_note': '%s video' % video_key, | ||||||
|             'quality': -2, |                 'quality': preference, | ||||||
|             'format_id': 'slides', |                 'format_id': format_id, | ||||||
|             'acodec': 'none', |  | ||||||
|         }) |  | ||||||
|         speaker_video_path = xpath_text(metadata, './speakerVideo', fatal=True) |  | ||||||
|         formats.append({ |  | ||||||
|             'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, |  | ||||||
|             'play_path': remove_end(speaker_video_path, '.flv'), |  | ||||||
|             'ext': 'flv', |  | ||||||
|             'format_note': 'speaker video', |  | ||||||
|             'quality': -1, |  | ||||||
|             'format_id': 'speaker', |  | ||||||
|             }) |             }) | ||||||
|         return formats |         return formats | ||||||
|  |  | ||||||
|   | |||||||
| @@ -151,7 +151,6 @@ from .bleacherreport import ( | |||||||
|     BleacherReportIE, |     BleacherReportIE, | ||||||
|     BleacherReportCMSIE, |     BleacherReportCMSIE, | ||||||
| ) | ) | ||||||
| from .blinkx import BlinkxIE |  | ||||||
| from .bloomberg import BloombergIE | from .bloomberg import BloombergIE | ||||||
| from .bokecc import BokeCCIE | from .bokecc import BokeCCIE | ||||||
| from .bongacams import BongaCamsIE | from .bongacams import BongaCamsIE | ||||||
|   | |||||||
| @@ -402,6 +402,10 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor): | |||||||
|     }, { |     }, { | ||||||
|         'url': 'http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin', |         'url': 'http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|  |     }, { | ||||||
|  |         # "<figure id=" pattern (#28792) | ||||||
|  |         'url': 'https://www.francetvinfo.fr/culture/patrimoine/incendie-de-notre-dame-de-paris/notre-dame-de-paris-de-l-incendie-de-la-cathedrale-a-sa-reconstruction_4372291.html', | ||||||
|  |         'only_matching': True, | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
| @@ -419,8 +423,7 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor): | |||||||
|             (r'player\.load[^;]+src:\s*["\']([^"\']+)', |             (r'player\.load[^;]+src:\s*["\']([^"\']+)', | ||||||
|              r'id-video=([^@]+@[^"]+)', |              r'id-video=([^@]+@[^"]+)', | ||||||
|              r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"', |              r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"', | ||||||
|              r'data-id=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})', |              r'(?:data-id|<figure[^<]+\bid)=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'), | ||||||
|              r'<figure[^>]+id=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'), |  | ||||||
|             webpage, 'video id') |             webpage, 'video id') | ||||||
|  |  | ||||||
|         return self._make_url_result(video_id) |         return self._make_url_result(video_id) | ||||||
|   | |||||||
| @@ -16,7 +16,7 @@ from ..utils import ( | |||||||
|  |  | ||||||
|  |  | ||||||
| class FunimationIE(InfoExtractor): | class FunimationIE(InfoExtractor): | ||||||
|     _VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/shows/[^/]+/(?P<id>[^/?#&]+)' |     _VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/(?:[^/]+/)?shows/[^/]+/(?P<id>[^/?#&]+)' | ||||||
|  |  | ||||||
|     _NETRC_MACHINE = 'funimation' |     _NETRC_MACHINE = 'funimation' | ||||||
|     _TOKEN = None |     _TOKEN = None | ||||||
| @@ -51,6 +51,10 @@ class FunimationIE(InfoExtractor): | |||||||
|     }, { |     }, { | ||||||
|         'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/', |         'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|  |     }, { | ||||||
|  |         # with lang code | ||||||
|  |         'url': 'https://www.funimation.com/en/shows/hacksign/role-play/', | ||||||
|  |         'only_matching': True, | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     def _login(self): |     def _login(self): | ||||||
|   | |||||||
| @@ -5,7 +5,10 @@ import re | |||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from .kaltura import KalturaIE | from .kaltura import KalturaIE | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|  |     HEADRequest, | ||||||
|  |     remove_start, | ||||||
|     sanitized_Request, |     sanitized_Request, | ||||||
|  |     smuggle_url, | ||||||
|     urlencode_postdata, |     urlencode_postdata, | ||||||
| ) | ) | ||||||
|  |  | ||||||
| @@ -100,6 +103,26 @@ class GDCVaultIE(InfoExtractor): | |||||||
|                 'format': 'mp4-408', |                 'format': 'mp4-408', | ||||||
|             }, |             }, | ||||||
|         }, |         }, | ||||||
|  |         { | ||||||
|  |             # Kaltura embed, whitespace between quote and embedded URL in iframe's src | ||||||
|  |             'url': 'https://www.gdcvault.com/play/1025699', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '0_zagynv0a', | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'title': 'Tech Toolbox', | ||||||
|  |                 'upload_date': '20190408', | ||||||
|  |                 'uploader_id': 'joe@blazestreaming.com', | ||||||
|  |                 'timestamp': 1554764629, | ||||||
|  |             }, | ||||||
|  |             'params': { | ||||||
|  |                 'skip_download': True, | ||||||
|  |             }, | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             # HTML5 video | ||||||
|  |             'url': 'http://www.gdcvault.com/play/1014846/Conference-Keynote-Shigeru', | ||||||
|  |             'only_matching': True, | ||||||
|  |         }, | ||||||
|     ] |     ] | ||||||
|  |  | ||||||
|     def _login(self, webpage_url, display_id): |     def _login(self, webpage_url, display_id): | ||||||
| @@ -120,38 +143,78 @@ class GDCVaultIE(InfoExtractor): | |||||||
|         request = sanitized_Request(login_url, urlencode_postdata(login_form)) |         request = sanitized_Request(login_url, urlencode_postdata(login_form)) | ||||||
|         request.add_header('Content-Type', 'application/x-www-form-urlencoded') |         request.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||||
|         self._download_webpage(request, display_id, 'Logging in') |         self._download_webpage(request, display_id, 'Logging in') | ||||||
|         webpage = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page') |         start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page') | ||||||
|         self._download_webpage(logout_url, display_id, 'Logging out') |         self._download_webpage(logout_url, display_id, 'Logging out') | ||||||
|  |  | ||||||
|         return webpage |         return start_page | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         video_id, name = re.match(self._VALID_URL, url).groups() |         video_id, name = re.match(self._VALID_URL, url).groups() | ||||||
|         display_id = name or video_id |         display_id = name or video_id | ||||||
|  |  | ||||||
|         webpage = self._download_webpage(url, display_id) |         webpage_url = 'http://www.gdcvault.com/play/' + video_id | ||||||
|  |         start_page = self._download_webpage(webpage_url, display_id) | ||||||
|  |  | ||||||
|  |         direct_url = self._search_regex( | ||||||
|  |             r's1\.addVariable\("file",\s*encodeURIComponent\("(/[^"]+)"\)\);', | ||||||
|  |             start_page, 'url', default=None) | ||||||
|  |         if direct_url: | ||||||
|             title = self._html_search_regex( |             title = self._html_search_regex( | ||||||
|                 r'<td><strong>Session Name:?</strong></td>\s*<td>(.*?)</td>', |                 r'<td><strong>Session Name:?</strong></td>\s*<td>(.*?)</td>', | ||||||
|             webpage, 'title') |                 start_page, 'title') | ||||||
|  |             video_url = 'http://www.gdcvault.com' + direct_url | ||||||
|  |             # resolve the url so that we can detect the correct extension | ||||||
|  |             video_url = self._request_webpage( | ||||||
|  |                 HEADRequest(video_url), video_id).geturl() | ||||||
|  |  | ||||||
|         PLAYER_REGEX = r'<iframe src=\"(?P<manifest_url>.*?)\".*?</iframe>' |             return { | ||||||
|         manifest_url = self._html_search_regex( |                 'id': video_id, | ||||||
|             PLAYER_REGEX, webpage, 'manifest_url') |                 'display_id': display_id, | ||||||
|  |                 'url': video_url, | ||||||
|  |                 'title': title, | ||||||
|  |             } | ||||||
|  |  | ||||||
|         partner_id = self._search_regex( |         embed_url = KalturaIE._extract_url(start_page) | ||||||
|             r'/p(?:artner_id)?/(\d+)', manifest_url, 'partner id', |         if embed_url: | ||||||
|             default='1670711') |             embed_url = smuggle_url(embed_url, {'source_url': url}) | ||||||
|  |             ie_key = 'Kaltura' | ||||||
|  |         else: | ||||||
|  |             PLAYER_REGEX = r'<iframe src="(?P<xml_root>.+?)/(?:gdc-)?player.*?\.html.*?".*?</iframe>' | ||||||
|  |  | ||||||
|         kaltura_id = self._search_regex( |             xml_root = self._html_search_regex( | ||||||
|             r'entry_id=(?P<id>(?:[^&])+)', manifest_url, |                 PLAYER_REGEX, start_page, 'xml root', default=None) | ||||||
|             'kaltura id', group='id') |             if xml_root is None: | ||||||
|  |                 # Probably need to authenticate | ||||||
|  |                 login_res = self._login(webpage_url, display_id) | ||||||
|  |                 if login_res is None: | ||||||
|  |                     self.report_warning('Could not login.') | ||||||
|  |                 else: | ||||||
|  |                     start_page = login_res | ||||||
|  |                     # Grab the url from the authenticated page | ||||||
|  |                     xml_root = self._html_search_regex( | ||||||
|  |                         PLAYER_REGEX, start_page, 'xml root') | ||||||
|  |  | ||||||
|  |             xml_name = self._html_search_regex( | ||||||
|  |                 r'<iframe src=".*?\?xml(?:=|URL=xml/)(.+?\.xml).*?".*?</iframe>', | ||||||
|  |                 start_page, 'xml filename', default=None) | ||||||
|  |             if not xml_name: | ||||||
|  |                 info = self._parse_html5_media_entries(url, start_page, video_id)[0] | ||||||
|  |                 info.update({ | ||||||
|  |                     'title': remove_start(self._search_regex( | ||||||
|  |                         r'>Session Name:\s*<.*?>\s*<td>(.+?)</td>', start_page, | ||||||
|  |                         'title', default=None) or self._og_search_title( | ||||||
|  |                         start_page, default=None), 'GDC Vault - '), | ||||||
|  |                     'id': video_id, | ||||||
|  |                     'display_id': display_id, | ||||||
|  |                 }) | ||||||
|  |                 return info | ||||||
|  |             embed_url = '%s/xml/%s' % (xml_root, xml_name) | ||||||
|  |             ie_key = 'DigitallySpeaking' | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             '_type': 'url_transparent', |             '_type': 'url_transparent', | ||||||
|             'url': 'kaltura:%s:%s' % (partner_id, kaltura_id), |  | ||||||
|             'ie_key': KalturaIE.ie_key(), |  | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'display_id': display_id, |             'display_id': display_id, | ||||||
|             'title': title, |             'url': embed_url, | ||||||
|  |             'ie_key': ie_key, | ||||||
|         } |         } | ||||||
|   | |||||||
| @@ -120,7 +120,7 @@ class KalturaIE(InfoExtractor): | |||||||
|     def _extract_urls(webpage): |     def _extract_urls(webpage): | ||||||
|         # Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site |         # Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site | ||||||
|         finditer = ( |         finditer = ( | ||||||
|             re.finditer( |             list(re.finditer( | ||||||
|                 r"""(?xs) |                 r"""(?xs) | ||||||
|                     kWidget\.(?:thumb)?[Ee]mbed\( |                     kWidget\.(?:thumb)?[Ee]mbed\( | ||||||
|                     \{.*? |                     \{.*? | ||||||
| @@ -128,8 +128,8 @@ class KalturaIE(InfoExtractor): | |||||||
|                         (?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*? |                         (?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*? | ||||||
|                         (?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s* |                         (?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s* | ||||||
|                         (?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\}) |                         (?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\}) | ||||||
|                 """, webpage) |                 """, webpage)) | ||||||
|             or re.finditer( |             or list(re.finditer( | ||||||
|                 r'''(?xs) |                 r'''(?xs) | ||||||
|                     (?P<q1>["']) |                     (?P<q1>["']) | ||||||
|                         (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)* |                         (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)* | ||||||
| @@ -142,16 +142,16 @@ class KalturaIE(InfoExtractor): | |||||||
|                         \[\s*(?P<q2_1>["'])entry_?[Ii]d(?P=q2_1)\s*\]\s*=\s* |                         \[\s*(?P<q2_1>["'])entry_?[Ii]d(?P=q2_1)\s*\]\s*=\s* | ||||||
|                     ) |                     ) | ||||||
|                     (?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3) |                     (?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3) | ||||||
|                 ''', webpage) |                 ''', webpage)) | ||||||
|             or re.finditer( |             or list(re.finditer( | ||||||
|                 r'''(?xs) |                 r'''(?xs) | ||||||
|                     <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["']) |                     <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])\s* | ||||||
|                       (?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+) |                       (?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+) | ||||||
|                       (?:(?!(?P=q1)).)* |                       (?:(?!(?P=q1)).)* | ||||||
|                       [?&;]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+) |                       [?&;]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+) | ||||||
|                       (?:(?!(?P=q1)).)* |                       (?:(?!(?P=q1)).)* | ||||||
|                     (?P=q1) |                     (?P=q1) | ||||||
|                 ''', webpage) |                 ''', webpage)) | ||||||
|         ) |         ) | ||||||
|         urls = [] |         urls = [] | ||||||
|         for mobj in finditer: |         for mobj in finditer: | ||||||
|   | |||||||
| @@ -15,33 +15,39 @@ from ..utils import ( | |||||||
|  |  | ||||||
|  |  | ||||||
| class MedalTVIE(InfoExtractor): | class MedalTVIE(InfoExtractor): | ||||||
|     _VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[0-9]+)' |     _VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[^/?#&]+)' | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'https://medal.tv/clips/34934644/3Is9zyGMoBMr', |         'url': 'https://medal.tv/clips/2mA60jWAGQCBH', | ||||||
|         'md5': '7b07b064331b1cf9e8e5c52a06ae68fa', |         'md5': '7b07b064331b1cf9e8e5c52a06ae68fa', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '34934644', |             'id': '2mA60jWAGQCBH', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': 'Quad Cold', |             'title': 'Quad Cold', | ||||||
|             'description': 'Medal,https://medal.tv/desktop/', |             'description': 'Medal,https://medal.tv/desktop/', | ||||||
|             'uploader': 'MowgliSB', |             'uploader': 'MowgliSB', | ||||||
|             'timestamp': 1603165266, |             'timestamp': 1603165266, | ||||||
|             'upload_date': '20201020', |             'upload_date': '20201020', | ||||||
|             'uploader_id': 10619174, |             'uploader_id': '10619174', | ||||||
|         } |         } | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://medal.tv/clips/36787208', |         'url': 'https://medal.tv/clips/2um24TWdty0NA', | ||||||
|         'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148', |         'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '36787208', |             'id': '2um24TWdty0NA', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': 'u tk me i tk u bigger', |             'title': 'u tk me i tk u bigger', | ||||||
|             'description': 'Medal,https://medal.tv/desktop/', |             'description': 'Medal,https://medal.tv/desktop/', | ||||||
|             'uploader': 'Mimicc', |             'uploader': 'Mimicc', | ||||||
|             'timestamp': 1605580939, |             'timestamp': 1605580939, | ||||||
|             'upload_date': '20201117', |             'upload_date': '20201117', | ||||||
|             'uploader_id': 5156321, |             'uploader_id': '5156321', | ||||||
|         } |         } | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://medal.tv/clips/37rMeFpryCC-9', | ||||||
|  |         'only_matching': True, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://medal.tv/clips/2WRj40tpY_EU9', | ||||||
|  |         'only_matching': True, | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|   | |||||||
| @@ -146,7 +146,7 @@ class SVTPlayIE(SVTPlayBaseIE): | |||||||
|                         ) |                         ) | ||||||
|                         (?P<svt_id>[^/?#&]+)| |                         (?P<svt_id>[^/?#&]+)| | ||||||
|                         https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+) |                         https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+) | ||||||
|                         (?:.*?modalId=(?P<modal_id>[\da-zA-Z-]+))? |                         (?:.*?(?:modalId|id)=(?P<modal_id>[\da-zA-Z-]+))? | ||||||
|                     ) |                     ) | ||||||
|                     ''' |                     ''' | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
| @@ -177,6 +177,9 @@ class SVTPlayIE(SVTPlayBaseIE): | |||||||
|     }, { |     }, { | ||||||
|         'url': 'https://www.svtplay.se/video/30479064/husdrommar/husdrommar-sasong-8-designdrommar-i-stenungsund?modalId=8zVbDPA', |         'url': 'https://www.svtplay.se/video/30479064/husdrommar/husdrommar-sasong-8-designdrommar-i-stenungsund?modalId=8zVbDPA', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://www.svtplay.se/video/30684086/rapport/rapport-24-apr-18-00-7?id=e72gVpa', | ||||||
|  |         'only_matching': True, | ||||||
|     }, { |     }, { | ||||||
|         # geo restricted to Sweden |         # geo restricted to Sweden | ||||||
|         'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten', |         'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten', | ||||||
| @@ -259,7 +262,7 @@ class SVTPlayIE(SVTPlayBaseIE): | |||||||
|         if not svt_id: |         if not svt_id: | ||||||
|             svt_id = self._search_regex( |             svt_id = self._search_regex( | ||||||
|                 (r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)', |                 (r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)', | ||||||
|                  r'<[^>]+\bdata-rt=["\']top-area-play-button["\'][^>]+\bhref=["\'][^"\']*video/%s/[^"\']*\bmodalId=([\da-zA-Z-]+)' % re.escape(video_id), |                  r'<[^>]+\bdata-rt=["\']top-area-play-button["\'][^>]+\bhref=["\'][^"\']*video/%s/[^"\']*\b(?:modalId|id)=([\da-zA-Z-]+)' % re.escape(video_id), | ||||||
|                  r'["\']videoSvtId["\']\s*:\s*["\']([\da-zA-Z-]+)', |                  r'["\']videoSvtId["\']\s*:\s*["\']([\da-zA-Z-]+)', | ||||||
|                  r'["\']videoSvtId\\?["\']\s*:\s*\\?["\']([\da-zA-Z-]+)', |                  r'["\']videoSvtId\\?["\']\s*:\s*\\?["\']([\da-zA-Z-]+)', | ||||||
|                  r'"content"\s*:\s*{.*?"id"\s*:\s*"([\da-zA-Z-]+)"', |                  r'"content"\s*:\s*{.*?"id"\s*:\s*"([\da-zA-Z-]+)"', | ||||||
|   | |||||||
| @@ -74,6 +74,12 @@ class TV2DKIE(InfoExtractor): | |||||||
|         webpage = self._download_webpage(url, video_id) |         webpage = self._download_webpage(url, video_id) | ||||||
|  |  | ||||||
|         entries = [] |         entries = [] | ||||||
|  |  | ||||||
|  |         def add_entry(partner_id, kaltura_id): | ||||||
|  |             entries.append(self.url_result( | ||||||
|  |                 'kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura', | ||||||
|  |                 video_id=kaltura_id)) | ||||||
|  |  | ||||||
|         for video_el in re.findall(r'(?s)<[^>]+\bdata-entryid\s*=[^>]*>', webpage): |         for video_el in re.findall(r'(?s)<[^>]+\bdata-entryid\s*=[^>]*>', webpage): | ||||||
|             video = extract_attributes(video_el) |             video = extract_attributes(video_el) | ||||||
|             kaltura_id = video.get('data-entryid') |             kaltura_id = video.get('data-entryid') | ||||||
| @@ -82,9 +88,14 @@ class TV2DKIE(InfoExtractor): | |||||||
|             partner_id = video.get('data-partnerid') |             partner_id = video.get('data-partnerid') | ||||||
|             if not partner_id: |             if not partner_id: | ||||||
|                 continue |                 continue | ||||||
|             entries.append(self.url_result( |             add_entry(partner_id, kaltura_id) | ||||||
|                 'kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura', |         if not entries: | ||||||
|                 video_id=kaltura_id)) |             kaltura_id = self._search_regex( | ||||||
|  |                 r'entry_id\s*:\s*["\']([0-9a-z_]+)', webpage, 'kaltura id') | ||||||
|  |             partner_id = self._search_regex( | ||||||
|  |                 (r'\\u002Fp\\u002F(\d+)\\u002F', r'/p/(\d+)/'), webpage, | ||||||
|  |                 'partner id') | ||||||
|  |             add_entry(partner_id, kaltura_id) | ||||||
|         return self.playlist_result(entries) |         return self.playlist_result(entries) | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -9,7 +9,6 @@ from ..utils import ( | |||||||
|     int_or_none, |     int_or_none, | ||||||
|     remove_start, |     remove_start, | ||||||
|     smuggle_url, |     smuggle_url, | ||||||
|     strip_or_none, |  | ||||||
|     try_get, |     try_get, | ||||||
| ) | ) | ||||||
|  |  | ||||||
| @@ -45,32 +44,18 @@ class TVerIE(InfoExtractor): | |||||||
|             query={'token': self._TOKEN})['main'] |             query={'token': self._TOKEN})['main'] | ||||||
|         p_id = main['publisher_id'] |         p_id = main['publisher_id'] | ||||||
|         service = remove_start(main['service'], 'ts_') |         service = remove_start(main['service'], 'ts_') | ||||||
|         info = { |  | ||||||
|             '_type': 'url_transparent', |  | ||||||
|             'description': try_get(main, lambda x: x['note'][0]['text'], compat_str), |  | ||||||
|             'episode_number': int_or_none(try_get(main, lambda x: x['ext']['episode_number'])), |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if service == 'cx': |  | ||||||
|             title = main['title'] |  | ||||||
|             subtitle = strip_or_none(main.get('subtitle')) |  | ||||||
|             if subtitle: |  | ||||||
|                 title += ' - ' + subtitle |  | ||||||
|             info.update({ |  | ||||||
|                 'title': title, |  | ||||||
|                 'url': 'https://i.fod.fujitv.co.jp/plus7/web/%s/%s.html' % (p_id[:4], p_id), |  | ||||||
|                 'ie_key': 'FujiTVFODPlus7', |  | ||||||
|             }) |  | ||||||
|         else: |  | ||||||
|         r_id = main['reference_id'] |         r_id = main['reference_id'] | ||||||
|         if service not in ('tx', 'russia2018', 'sebare2018live', 'gorin'): |         if service not in ('tx', 'russia2018', 'sebare2018live', 'gorin'): | ||||||
|             r_id = 'ref:' + r_id |             r_id = 'ref:' + r_id | ||||||
|         bc_url = smuggle_url( |         bc_url = smuggle_url( | ||||||
|             self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id), |             self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id), | ||||||
|             {'geo_countries': ['JP']}) |             {'geo_countries': ['JP']}) | ||||||
|             info.update({ |  | ||||||
|  |         return { | ||||||
|  |             '_type': 'url_transparent', | ||||||
|  |             'description': try_get(main, lambda x: x['note'][0]['text'], compat_str), | ||||||
|  |             'episode_number': int_or_none(try_get(main, lambda x: x['ext']['episode_number'])), | ||||||
|             'url': bc_url, |             'url': bc_url, | ||||||
|             'ie_key': 'BrightcoveNew', |             'ie_key': 'BrightcoveNew', | ||||||
|             }) |         } | ||||||
|  |  | ||||||
|         return info |  | ||||||
|   | |||||||
| @@ -19,6 +19,7 @@ from ..utils import ( | |||||||
|     strip_or_none, |     strip_or_none, | ||||||
|     unified_timestamp, |     unified_timestamp, | ||||||
|     update_url_query, |     update_url_query, | ||||||
|  |     url_or_none, | ||||||
|     xpath_text, |     xpath_text, | ||||||
| ) | ) | ||||||
|  |  | ||||||
| @@ -52,6 +53,9 @@ class TwitterBaseIE(InfoExtractor): | |||||||
|             return [f], {} |             return [f], {} | ||||||
|  |  | ||||||
|     def _extract_formats_from_vmap_url(self, vmap_url, video_id): |     def _extract_formats_from_vmap_url(self, vmap_url, video_id): | ||||||
|  |         vmap_url = url_or_none(vmap_url) | ||||||
|  |         if not vmap_url: | ||||||
|  |             return [] | ||||||
|         vmap_data = self._download_xml(vmap_url, video_id) |         vmap_data = self._download_xml(vmap_url, video_id) | ||||||
|         formats = [] |         formats = [] | ||||||
|         subtitles = {} |         subtitles = {} | ||||||
|   | |||||||
| @@ -58,6 +58,7 @@ class XFileShareIE(InfoExtractor): | |||||||
|         (r'vidlocker\.xyz', 'VidLocker'), |         (r'vidlocker\.xyz', 'VidLocker'), | ||||||
|         (r'vidshare\.tv', 'VidShare'), |         (r'vidshare\.tv', 'VidShare'), | ||||||
|         (r'vup\.to', 'VUp'), |         (r'vup\.to', 'VUp'), | ||||||
|  |         (r'wolfstream\.tv', 'WolfStream'), | ||||||
|         (r'xvideosharing\.com', 'XVideoSharing'), |         (r'xvideosharing\.com', 'XVideoSharing'), | ||||||
|     ) |     ) | ||||||
|  |  | ||||||
| @@ -82,6 +83,9 @@ class XFileShareIE(InfoExtractor): | |||||||
|     }, { |     }, { | ||||||
|         'url': 'https://aparat.cam/n4d6dh0wvlpr', |         'url': 'https://aparat.cam/n4d6dh0wvlpr', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://wolfstream.tv/nthme29v9u2x', | ||||||
|  |         'only_matching': True, | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     @staticmethod |     @staticmethod | ||||||
|   | |||||||
| @@ -11,6 +11,7 @@ from ..utils import ( | |||||||
|     parse_duration, |     parse_duration, | ||||||
|     sanitized_Request, |     sanitized_Request, | ||||||
|     str_to_int, |     str_to_int, | ||||||
|  |     url_or_none, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -71,10 +72,10 @@ class XTubeIE(InfoExtractor): | |||||||
|                 'Cookie': 'age_verified=1; cookiesAccepted=1', |                 'Cookie': 'age_verified=1; cookiesAccepted=1', | ||||||
|             }) |             }) | ||||||
|  |  | ||||||
|         title, thumbnail, duration = [None] * 3 |         title, thumbnail, duration, sources, media_definition = [None] * 5 | ||||||
|  |  | ||||||
|         config = self._parse_json(self._search_regex( |         config = self._parse_json(self._search_regex( | ||||||
|             r'playerConf\s*=\s*({.+?})\s*,\s*(?:\n|loaderConf)', webpage, 'config', |             r'playerConf\s*=\s*({.+?})\s*,\s*(?:\n|loaderConf|playerWrapper)', webpage, 'config', | ||||||
|             default='{}'), video_id, transform_source=js_to_json, fatal=False) |             default='{}'), video_id, transform_source=js_to_json, fatal=False) | ||||||
|         if config: |         if config: | ||||||
|             config = config.get('mainRoll') |             config = config.get('mainRoll') | ||||||
| @@ -83,20 +84,52 @@ class XTubeIE(InfoExtractor): | |||||||
|                 thumbnail = config.get('poster') |                 thumbnail = config.get('poster') | ||||||
|                 duration = int_or_none(config.get('duration')) |                 duration = int_or_none(config.get('duration')) | ||||||
|                 sources = config.get('sources') or config.get('format') |                 sources = config.get('sources') or config.get('format') | ||||||
|  |                 media_definition = config.get('mediaDefinition') | ||||||
|  |  | ||||||
|         if not isinstance(sources, dict): |         if not isinstance(sources, dict) and not media_definition: | ||||||
|             sources = self._parse_json(self._search_regex( |             sources = self._parse_json(self._search_regex( | ||||||
|                 r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),', |                 r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),', | ||||||
|                 webpage, 'sources', group='sources'), video_id, |                 webpage, 'sources', group='sources'), video_id, | ||||||
|                 transform_source=js_to_json) |                 transform_source=js_to_json) | ||||||
|  |  | ||||||
|         formats = [] |         formats = [] | ||||||
|  |         format_urls = set() | ||||||
|  |  | ||||||
|  |         if isinstance(sources, dict): | ||||||
|             for format_id, format_url in sources.items(): |             for format_id, format_url in sources.items(): | ||||||
|  |                 format_url = url_or_none(format_url) | ||||||
|  |                 if not format_url: | ||||||
|  |                     continue | ||||||
|  |                 if format_url in format_urls: | ||||||
|  |                     continue | ||||||
|  |                 format_urls.add(format_url) | ||||||
|                 formats.append({ |                 formats.append({ | ||||||
|                     'url': format_url, |                     'url': format_url, | ||||||
|                     'format_id': format_id, |                     'format_id': format_id, | ||||||
|                     'height': int_or_none(format_id), |                     'height': int_or_none(format_id), | ||||||
|                 }) |                 }) | ||||||
|  |  | ||||||
|  |         if isinstance(media_definition, list): | ||||||
|  |             for media in media_definition: | ||||||
|  |                 video_url = url_or_none(media.get('videoUrl')) | ||||||
|  |                 if not video_url: | ||||||
|  |                     continue | ||||||
|  |                 if video_url in format_urls: | ||||||
|  |                     continue | ||||||
|  |                 format_urls.add(video_url) | ||||||
|  |                 format_id = media.get('format') | ||||||
|  |                 if format_id == 'hls': | ||||||
|  |                     formats.extend(self._extract_m3u8_formats( | ||||||
|  |                         video_url, video_id, 'mp4', entry_protocol='m3u8_native', | ||||||
|  |                         m3u8_id='hls', fatal=False)) | ||||||
|  |                 elif format_id == 'mp4': | ||||||
|  |                     height = int_or_none(media.get('quality')) | ||||||
|  |                     formats.append({ | ||||||
|  |                         'url': video_url, | ||||||
|  |                         'format_id': '%s-%d' % (format_id, height) if height else format_id, | ||||||
|  |                         'height': height, | ||||||
|  |                     }) | ||||||
|  |  | ||||||
|         self._remove_duplicate_formats(formats) |         self._remove_duplicate_formats(formats) | ||||||
|         self._sort_formats(formats) |         self._sort_formats(formats) | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 pukkandan
					pukkandan