mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[ie/ARDBetaMediathek] Fix series extraction (#8687)
Closes #7666 Authored by: lstrojny
This commit is contained in:
		| @@ -292,7 +292,7 @@ class ARDIE(InfoExtractor): | |||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         # available till 7.12.2023 |         # available till 7.12.2023 | ||||||
|         'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-video-424.html', |         'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-video-424.html', | ||||||
|         'md5': 'a438f671e87a7eba04000336a119ccc4', |         'md5': '94812e6438488fb923c361a44469614b', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': 'maischberger-video-424', |             'id': 'maischberger-video-424', | ||||||
|             'display_id': 'maischberger-video-424', |             'display_id': 'maischberger-video-424', | ||||||
| @@ -403,26 +403,25 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): | |||||||
|     _VALID_URL = r'''(?x)https:// |     _VALID_URL = r'''(?x)https:// | ||||||
|         (?:(?:beta|www)\.)?ardmediathek\.de/ |         (?:(?:beta|www)\.)?ardmediathek\.de/ | ||||||
|         (?:(?P<client>[^/]+)/)? |         (?:(?P<client>[^/]+)/)? | ||||||
|         (?:player|live|video|(?P<playlist>sendung|sammlung))/ |         (?:player|live|video|(?P<playlist>sendung|serie|sammlung))/ | ||||||
|         (?:(?P<display_id>(?(playlist)[^?#]+?|[^?#]+))/)? |         (?:(?P<display_id>(?(playlist)[^?#]+?|[^?#]+))/)? | ||||||
|         (?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+) |         (?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+) | ||||||
|         (?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))''' |         (?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))''' | ||||||
| 
 | 
 | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'https://www.ardmediathek.de/video/filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy8xZGY0ZGJmZS00ZWQwLTRmMGItYjhhYy0wOGQ4ZmYxNjVhZDI', |         'url': 'https://www.ardmediathek.de/video/filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen/Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0', | ||||||
|         'md5': '3fd5fead7a370a819341129c8d713136', |         'md5': 'b6e8ab03f2bcc6e1f9e6cef25fcc03c4', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'display_id': 'filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen', |             'display_id': 'filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen', | ||||||
|             'id': '12172961', |             'id': '12939099', | ||||||
|             'title': 'Wolfsland - Die traurigen Schwestern', |             'title': 'Liebe auf vier Pfoten', | ||||||
|             'description': r're:^Als der Polizeiobermeister Raaben', |             'description': r're:^Claudia Schmitt, Anwältin in Salzburg', | ||||||
|             'duration': 5241, |             'duration': 5222, | ||||||
|             'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:efa186f7b0054957', |             'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:aee7cbf8f06de976?w=960&ch=ae4d0f2ee47d8b9b', | ||||||
|             'timestamp': 1670710500, |             'timestamp': 1701343800, | ||||||
|             'upload_date': '20221210', |             'upload_date': '20231130', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'age_limit': 12, |             'episode': 'Liebe auf vier Pfoten', | ||||||
|             'episode': 'Wolfsland - Die traurigen Schwestern', |  | ||||||
|             'series': 'Filme im MDR' |             'series': 'Filme im MDR' | ||||||
|         }, |         }, | ||||||
|     }, { |     }, { | ||||||
| @@ -454,7 +453,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): | |||||||
|             'duration': 915, |             'duration': 915, | ||||||
|             'episode': 'tagesschau, 20:00 Uhr', |             'episode': 'tagesschau, 20:00 Uhr', | ||||||
|             'series': 'tagesschau', |             'series': 'tagesschau', | ||||||
|             'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49', |             'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49?w=960&ch=ee69108ae344f678', | ||||||
|         }, |         }, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE', |         'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE', | ||||||
| @@ -475,6 +474,10 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): | |||||||
|         # playlist of type 'sendung' |         # playlist of type 'sendung' | ||||||
|         'url': 'https://www.ardmediathek.de/ard/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw/', |         'url': 'https://www.ardmediathek.de/ard/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw/', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|  |     }, { | ||||||
|  |         # playlist of type 'serie' | ||||||
|  |         'url': 'https://www.ardmediathek.de/serie/nachtstreife/staffel-1/Y3JpZDovL3N3ci5kZS9zZGIvc3RJZC8xMjQy/1', | ||||||
|  |         'only_matching': True, | ||||||
|     }, { |     }, { | ||||||
|         # playlist of type 'sammlung' |         # playlist of type 'sammlung' | ||||||
|         'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/', |         'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/', | ||||||
| @@ -487,10 +490,11 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): | |||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|     }] |     }] | ||||||
| 
 | 
 | ||||||
|     def _ARD_load_playlist_snipped(self, playlist_id, display_id, client, mode, pageNumber): |     def _ARD_load_playlist_snippet(self, playlist_id, display_id, client, mode, page_number): | ||||||
|         """ Query the ARD server for playlist information |         """ Query the ARD server for playlist information | ||||||
|         and returns the data in "raw" format """ |         and returns the data in "raw" format """ | ||||||
|         if mode == 'sendung': |         assert mode in ('sendung', 'serie', 'sammlung') | ||||||
|  |         if mode in ('sendung', 'serie'): | ||||||
|             graphQL = json.dumps({ |             graphQL = json.dumps({ | ||||||
|                 'query': '''{ |                 'query': '''{ | ||||||
|                     showPage( |                     showPage( | ||||||
| @@ -507,7 +511,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): | |||||||
|                             links { target { id href title } } |                             links { target { id href title } } | ||||||
|                             type |                             type | ||||||
|                         } |                         } | ||||||
|                     }}''' % (client, playlist_id, pageNumber), |                     }}''' % (client, playlist_id, page_number), | ||||||
|             }).encode() |             }).encode() | ||||||
|         else:  # mode == 'sammlung' |         else:  # mode == 'sammlung' | ||||||
|             graphQL = json.dumps({ |             graphQL = json.dumps({ | ||||||
| @@ -528,7 +532,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): | |||||||
|                                 type |                                 type | ||||||
|                             } |                             } | ||||||
|                         } |                         } | ||||||
|                     }}''' % (client, playlist_id, pageNumber), |                     }}''' % (client, playlist_id, page_number), | ||||||
|             }).encode() |             }).encode() | ||||||
|         # Ressources for ARD graphQL debugging: |         # Ressources for ARD graphQL debugging: | ||||||
|         # https://api-test.ardmediathek.de/public-gateway |         # https://api-test.ardmediathek.de/public-gateway | ||||||
| @@ -538,7 +542,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): | |||||||
|             data=graphQL, |             data=graphQL, | ||||||
|             headers={'Content-Type': 'application/json'})['data'] |             headers={'Content-Type': 'application/json'})['data'] | ||||||
|         # align the structure of the returned data: |         # align the structure of the returned data: | ||||||
|         if mode == 'sendung': |         if mode in ('sendung', 'serie'): | ||||||
|             show_page = show_page['showPage'] |             show_page = show_page['showPage'] | ||||||
|         else:  # mode == 'sammlung' |         else:  # mode == 'sammlung' | ||||||
|             show_page = show_page['morePage']['widget'] |             show_page = show_page['morePage']['widget'] | ||||||
| @@ -546,12 +550,12 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): | |||||||
| 
 | 
 | ||||||
|     def _ARD_extract_playlist(self, url, playlist_id, display_id, client, mode): |     def _ARD_extract_playlist(self, url, playlist_id, display_id, client, mode): | ||||||
|         """ Collects all playlist entries and returns them as info dict. |         """ Collects all playlist entries and returns them as info dict. | ||||||
|         Supports playlists of mode 'sendung' and 'sammlung', and also nested |         Supports playlists of mode 'sendung', 'serie', and 'sammlung', | ||||||
|         playlists. """ |         as well as nested playlists. """ | ||||||
|         entries = [] |         entries = [] | ||||||
|         pageNumber = 0 |         pageNumber = 0 | ||||||
|         while True:  # iterate by pageNumber |         while True:  # iterate by pageNumber | ||||||
|             show_page = self._ARD_load_playlist_snipped( |             show_page = self._ARD_load_playlist_snippet( | ||||||
|                 playlist_id, display_id, client, mode, pageNumber) |                 playlist_id, display_id, client, mode, pageNumber) | ||||||
|             for teaser in show_page['teasers']:  # process playlist items |             for teaser in show_page['teasers']:  # process playlist items | ||||||
|                 if '/compilation/' in teaser['links']['target']['href']: |                 if '/compilation/' in teaser['links']['target']['href']: | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Lars Strojny
					Lars Strojny