mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 22:55:18 +00:00 
			
		
		
		
	[sverigesradio] Add extractor
This commit is contained in:
		 Mattias Wadman
					Mattias Wadman
				
			
				
					committed by
					
						 Remita Amine
						Remita Amine
					
				
			
			
				
	
			
			
			 Remita Amine
						Remita Amine
					
				
			
						parent
						
							4e4db743e7
						
					
				
				
					commit
					7ff8ad80f1
				
			| @@ -1098,6 +1098,10 @@ from .streetvoice import StreetVoiceIE | |||||||
| from .stretchinternet import StretchInternetIE | from .stretchinternet import StretchInternetIE | ||||||
| from .stv import STVPlayerIE | from .stv import STVPlayerIE | ||||||
| from .sunporno import SunPornoIE | from .sunporno import SunPornoIE | ||||||
|  | from .sverigesradio import ( | ||||||
|  |     SverigesRadioEpisodeIE, | ||||||
|  |     SverigesRadioPublicationIE, | ||||||
|  | ) | ||||||
| from .svt import ( | from .svt import ( | ||||||
|     SVTIE, |     SVTIE, | ||||||
|     SVTPageIE, |     SVTPageIE, | ||||||
|   | |||||||
							
								
								
									
										105
									
								
								youtube_dl/extractor/sverigesradio.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										105
									
								
								youtube_dl/extractor/sverigesradio.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,105 @@ | |||||||
|  | # coding: utf-8 | ||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | from .common import InfoExtractor | ||||||
|  | from ..utils import int_or_none | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class SverigesRadioBaseIE(InfoExtractor): | ||||||
|  |     _BASE_URL = 'https://sverigesradio.se/sida/playerajax' | ||||||
|  |     _QUALITIES = ['high', 'medium', 'low'] | ||||||
|  |     _CODING_FORMATS = { | ||||||
|  |         5: {'acodec': 'mp3', 'abr': 128}, | ||||||
|  |         11: {'acodec': 'aac', 'abr': 192}, | ||||||
|  |         12: {'acodec': 'aac', 'abr': 32}, | ||||||
|  |         13: {'acodec': 'aac', 'abr': 96}, | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     def _extract_formats(self, query, audio_id, audio_type): | ||||||
|  |         audiourls = {} | ||||||
|  |         for quality in self._QUALITIES: | ||||||
|  |             audiourl = self._download_json( | ||||||
|  |                 self._BASE_URL + '/getaudiourl', audio_id, | ||||||
|  |                 fatal=True, | ||||||
|  |                 query=dict(query, type=audio_type, quality=quality, format='iis')) | ||||||
|  |             if audiourl is None: | ||||||
|  |                 continue | ||||||
|  |  | ||||||
|  |             # for some reason url can be empty, skip if so | ||||||
|  |             # also skip if url has already been seen (quality parameter is ignored?) | ||||||
|  |             url = audiourl.get('audioUrl') | ||||||
|  |             if url is None or url == "" or url in audiourls: | ||||||
|  |                 continue | ||||||
|  |  | ||||||
|  |             audioformat = {'vcodec': 'none', 'url': url} | ||||||
|  |             # add codec and bitrate if known coding format | ||||||
|  |             codingformat = audiourl.get('codingFormat') | ||||||
|  |             if codingformat: | ||||||
|  |                 audioformat.update(self._CODING_FORMATS.get(codingformat, {})) | ||||||
|  |  | ||||||
|  |             audiourls[url] = audioformat | ||||||
|  |  | ||||||
|  |         return audiourls.values() | ||||||
|  |  | ||||||
|  |     def _extract_audio(self, audio_type, url): | ||||||
|  |         audio_id = self._match_id(url) | ||||||
|  |         query = {'id': audio_id, 'type': audio_type} | ||||||
|  |  | ||||||
|  |         metadata = self._download_json(self._BASE_URL + '/audiometadata', audio_id, query=query) | ||||||
|  |         item = metadata['items'][0] | ||||||
|  |  | ||||||
|  |         formats = self._extract_formats(query, audio_id, audio_type) | ||||||
|  |         self._sort_formats(formats) | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             'id': audio_id, | ||||||
|  |             'title': item['subtitle'], | ||||||
|  |             'formats': formats, | ||||||
|  |             'series': item.get('title'), | ||||||
|  |             'duration': int_or_none(item.get('duration')), | ||||||
|  |             'thumbnail': item.get('displayimageurl'), | ||||||
|  |             'description': item.get('description'), | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class SverigesRadioPublicationIE(SverigesRadioBaseIE): | ||||||
|  |     _VALID_URL = r'https?://(?:www\.)?sverigesradio\.se/sida/(?:artikel|gruppsida)\.aspx\?.*artikel=(?P<id>[0-9]+)' | ||||||
|  |     _TESTS = [{ | ||||||
|  |         'url': 'https://sverigesradio.se/sida/artikel.aspx?programid=83&artikel=7038546', | ||||||
|  |         'md5': '6a4917e1923fccb080e5a206a5afa542', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '7038546', | ||||||
|  |             'ext': 'm4a', | ||||||
|  |             'duration': 132, | ||||||
|  |             'series': 'Nyheter (Ekot)', | ||||||
|  |             'title': 'Esa Teittinen: Sanningen har inte kommit fram', | ||||||
|  |             'description': 'md5:daf7ce66a8f0a53d5465a5984d3839df', | ||||||
|  |             'thumbnail': 're:^https://static-cdn.sr.se/sida/images/', | ||||||
|  |         }, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://sverigesradio.se/sida/gruppsida.aspx?programid=3304&grupp=6247&artikel=7146887', | ||||||
|  |         'only_matching': True, | ||||||
|  |     }] | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         return self._extract_audio('publication', url) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class SverigesRadioEpisodeIE(SverigesRadioBaseIE): | ||||||
|  |     _VALID_URL = r'https?://(?:www\.)?sverigesradio\.se/(?:sida/)?avsnitt/(?P<id>[0-9]+)' | ||||||
|  |     _TEST = { | ||||||
|  |         'url': 'https://sverigesradio.se/avsnitt/1140922?programid=1300', | ||||||
|  |         'md5': '20dc4d8db24228f846be390b0c59a07c', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '1140922', | ||||||
|  |             'ext': 'mp3', | ||||||
|  |             'duration': 3307, | ||||||
|  |             'series': 'Konflikt', | ||||||
|  |             'title': 'Metoo och valen', | ||||||
|  |             'description': 'md5:fcb5c1f667f00badcc702b196f10a27e', | ||||||
|  |             'thumbnail': 're:^https://static-cdn.sr.se/sida/images/' | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         return self._extract_audio('episode', url) | ||||||
		Reference in New Issue
	
	Block a user