mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[StoryFire] Add new extractor
This commit is contained in:
		| @@ -1057,6 +1057,11 @@ from .spike import ( | ||||
|     BellatorIE, | ||||
|     ParamountNetworkIE, | ||||
| ) | ||||
| from .storyfire import ( | ||||
|     StoryFireIE, | ||||
|     StoryFireUserIE, | ||||
|     StoryFireSeriesIE, | ||||
| ) | ||||
| from .stitcher import StitcherIE | ||||
| from .sport5 import Sport5IE | ||||
| from .sportbox import SportBoxIE | ||||
|   | ||||
							
								
								
									
										255
									
								
								youtube_dl/extractor/storyfire.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										255
									
								
								youtube_dl/extractor/storyfire.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,255 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import itertools | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class StoryFireIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:(?:https?://(?:www\.)?storyfire\.com/video-details)|(?:https://storyfire.app.link))/(?P<id>[^/\s]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://storyfire.com/video-details/5df1d132b6378700117f9181', | ||||
|         'md5': '560953bfca81a69003cfa5e53ac8a920', | ||||
|         'info_dict': { | ||||
|             'id': '5df1d132b6378700117f9181', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Buzzfeed Teaches You About Memes', | ||||
|             'uploader_id': 'ntZAJFECERSgqHSxzonV5K2E89s1', | ||||
|             'timestamp': 1576129028, | ||||
|             'description': 'Mocking Buzzfeed\'s meme lesson. Reuploaded from YouTube because of their new policies', | ||||
|             'uploader': 'whang!', | ||||
|             'upload_date': '20191212', | ||||
|         }, | ||||
|         'params': {'format': 'bestvideo'}  # There are no merged formats in the playlist. | ||||
|     }, { | ||||
|         'url': 'https://storyfire.app.link/5GxAvWOQr8',  # Alternate URL format, with unrelated short ID | ||||
|         'md5': '7a2dc6d60c4889edfed459c620fe690d', | ||||
|         'info_dict': { | ||||
|             'id': '5f1e11ecd78a57b6c702001d', | ||||
|             'ext': 'm4a', | ||||
|             'title': 'Weird Nintendo Prototype Leaks', | ||||
|             'description': 'A stream taking a look at some weird Nintendo Prototypes with Luigi in Mario 64 and weird Yoshis', | ||||
|             'timestamp': 1595808576, | ||||
|             'upload_date': '20200727', | ||||
|             'uploader': 'whang!', | ||||
|             'uploader_id': 'ntZAJFECERSgqHSxzonV5K2E89s1', | ||||
|         }, | ||||
|         'params': {'format': 'bestaudio'}  # Verifying audio extraction | ||||
|  | ||||
|     }] | ||||
|  | ||||
|     _aformats = { | ||||
|         'audio-medium-audio': {'acodec': 'aac', 'abr': 125, 'preference': -10}, | ||||
|         'audio-high-audio': {'acodec': 'aac', 'abr': 254, 'preference': -1}, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         # Extracting the json blob is mandatory to proceed with extraction. | ||||
|         jsontext = self._html_search_regex( | ||||
|             r'<script id="__NEXT_DATA__" type="application/json">(.+?)</script>', | ||||
|             webpage, 'json_data') | ||||
|  | ||||
|         json = self._parse_json(jsontext, video_id) | ||||
|  | ||||
|         # The currentVideo field in the json is mandatory | ||||
|         # because it contains the only link to the m3u playlist | ||||
|         video = json['props']['initialState']['video']['currentVideo'] | ||||
|         videourl = video['vimeoVideoURL']  # Video URL is mandatory | ||||
|  | ||||
|         # Extract other fields from the json in an error tolerant fashion | ||||
|         # ID may be incorrect (on short URL format), correct it. | ||||
|         parsed_id = video.get('_id') | ||||
|         if parsed_id: | ||||
|             video_id = parsed_id | ||||
|  | ||||
|         title = video.get('title') | ||||
|         description = video.get('description') | ||||
|  | ||||
|         thumbnail = video.get('storyImage') | ||||
|         views = video.get('views') | ||||
|         likes = video.get('likesCount') | ||||
|         comments = video.get('commentsCount') | ||||
|         duration = video.get('videoDuration') | ||||
|         publishdate = video.get('publishDate')  # Apparently epoch time, day only | ||||
|  | ||||
|         uploader = video.get('username') | ||||
|         uploader_id = video.get('hostID') | ||||
|         # Construct an uploader URL | ||||
|         uploader_url = None | ||||
|         if uploader_id: | ||||
|             uploader_url = "https://storyfire.com/user/%s/video" % uploader_id | ||||
|  | ||||
|         # Collect root playlist to determine formats | ||||
|         formats = self._extract_m3u8_formats( | ||||
|             videourl, video_id, 'mp4', 'm3u8_native') | ||||
|  | ||||
|         # Modify formats to fill in missing information about audio codecs | ||||
|         for format in formats: | ||||
|             aformat = self._aformats.get(format['format_id']) | ||||
|             if aformat: | ||||
|                 format['acodec'] = aformat['acodec'] | ||||
|                 format['abr'] = aformat['abr'] | ||||
|                 format['preference'] = aformat['preference'] | ||||
|                 format['ext'] = 'm4a' | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'ext': "mp4", | ||||
|             'url': videourl, | ||||
|             'formats': formats, | ||||
|  | ||||
|             'thumbnail': thumbnail, | ||||
|             'view_count': views, | ||||
|             'like_count': likes, | ||||
|             'comment_count': comments, | ||||
|             'duration': duration, | ||||
|             'timestamp': publishdate, | ||||
|  | ||||
|             'uploader': uploader, | ||||
|             'uploader_id': uploader_id, | ||||
|             'uploader_url': uploader_url, | ||||
|  | ||||
|         } | ||||
|  | ||||
|  | ||||
| class StoryFireUserIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?storyfire\.com/user/(?P<id>[^/\s]+)/video' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://storyfire.com/user/ntZAJFECERSgqHSxzonV5K2E89s1/video', | ||||
|         'info_dict': { | ||||
|             'id': 'ntZAJFECERSgqHSxzonV5K2E89s1', | ||||
|             'title': 'whang!', | ||||
|         }, | ||||
|         'playlist_mincount': 18 | ||||
|     }, { | ||||
|         'url': 'https://storyfire.com/user/UQ986nFxmAWIgnkZQ0ftVhq4nOk2/video', | ||||
|         'info_dict': { | ||||
|             'id': 'UQ986nFxmAWIgnkZQ0ftVhq4nOk2', | ||||
|             'title': 'McJuggerNuggets', | ||||
|         }, | ||||
|         'playlist_mincount': 143 | ||||
|  | ||||
|     }] | ||||
|  | ||||
|     # Generator for fetching playlist items | ||||
|     def _enum_videos(self, baseurl, user_id, firstjson): | ||||
|         totalVideos = int(firstjson['videosCount']) | ||||
|         haveVideos = 0 | ||||
|         json = firstjson | ||||
|  | ||||
|         for page in itertools.count(1): | ||||
|             for video in json['videos']: | ||||
|                 id = video['_id'] | ||||
|                 url = "https://storyfire.com/video-details/%s" % id | ||||
|                 haveVideos += 1 | ||||
|                 yield { | ||||
|                     '_type': 'url', | ||||
|                     'id': id, | ||||
|                     'url': url, | ||||
|                     'ie_key': 'StoryFire', | ||||
|  | ||||
|                     'title': video.get('title'), | ||||
|                     'description': video.get('description'), | ||||
|                     'view_count': video.get('views'), | ||||
|                     'comment_count': video.get('commentsCount'), | ||||
|                     'duration': video.get('videoDuration'), | ||||
|                     'timestamp': video.get('publishDate'), | ||||
|                 } | ||||
|             # Are there more pages we could fetch? | ||||
|             if haveVideos < totalVideos: | ||||
|                 pageurl = baseurl + ("%i" % haveVideos) | ||||
|                 json = self._download_json(pageurl, user_id, | ||||
|                                            note='Downloading page %s' % page) | ||||
|  | ||||
|                 # Are there any videos in the new json? | ||||
|                 videos = json.get('videos') | ||||
|                 if not videos or len(videos) == 0: | ||||
|                     break  # no videos | ||||
|  | ||||
|             else: | ||||
|                 break  # We have fetched all the videos, stop | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         user_id = self._match_id(url) | ||||
|  | ||||
|         baseurl = "https://storyfire.com/app/publicVideos/%s?skip=" % user_id | ||||
|  | ||||
|         # Download first page to ensure it can be downloaded, and get user information if available. | ||||
|         firstpage = baseurl + "0" | ||||
|         firstjson = self._download_json(firstpage, user_id) | ||||
|  | ||||
|         title = None | ||||
|         videos = firstjson.get('videos') | ||||
|         if videos and len(videos): | ||||
|             title = videos[1].get('username') | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'entries': self._enum_videos(baseurl, user_id, firstjson), | ||||
|             'id': user_id, | ||||
|             'title': title, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class StoryFireSeriesIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?storyfire\.com/write/series/stories/(?P<id>[^/\s]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://storyfire.com/write/series/stories/-Lq6MsuIHLODO6d2dDkr/', | ||||
|         'info_dict': { | ||||
|             'id': '-Lq6MsuIHLODO6d2dDkr', | ||||
|         }, | ||||
|         'playlist_mincount': 13 | ||||
|     }, { | ||||
|         'url': 'https://storyfire.com/write/series/stories/the_mortal_one/', | ||||
|         'info_dict': { | ||||
|             'id': 'the_mortal_one', | ||||
|         }, | ||||
|         'playlist_count': 0  # This playlist has entries, but no videos. | ||||
|     }, { | ||||
|         'url': 'https://storyfire.com/write/series/stories/story_time', | ||||
|         'info_dict': { | ||||
|             'id': 'story_time', | ||||
|         }, | ||||
|         'playlist_mincount': 10 | ||||
|     }] | ||||
|  | ||||
|     # Generator for returning playlist items | ||||
|     # This object is substantially different than the one in the user videos page above | ||||
|     def _enum_videos(self, jsonlist): | ||||
|         for video in jsonlist: | ||||
|             id = video['_id'] | ||||
|             if video.get('hasVideo'):  # Boolean element | ||||
|                 url = "https://storyfire.com/video-details/%s" % id | ||||
|                 yield { | ||||
|                     '_type': 'url', | ||||
|                     'id': id, | ||||
|                     'url': url, | ||||
|                     'ie_key': 'StoryFire', | ||||
|  | ||||
|                     'title': video.get('title'), | ||||
|                     'description': video.get('description'), | ||||
|                     'view_count': video.get('views'), | ||||
|                     'likes_count': video.get('likesCount'), | ||||
|                     'comment_count': video.get('commentsCount'), | ||||
|                     'duration': video.get('videoDuration'), | ||||
|                     'timestamp': video.get('publishDate'), | ||||
|                 } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         list_id = self._match_id(url) | ||||
|  | ||||
|         listurl = "https://storyfire.com/app/seriesStories/%s/list" % list_id | ||||
|         json = self._download_json(listurl, list_id) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'entries': self._enum_videos(json), | ||||
|             'id': list_id | ||||
|         } | ||||
		Reference in New Issue
	
	Block a user
	 Stephen Stair
					Stephen Stair