mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	Merge pull request #6 from sgstair/storyfire
[StoryFire] Add new extractor
This commit is contained in:
		| @@ -1058,6 +1058,11 @@ from .spike import ( | |||||||
|     BellatorIE, |     BellatorIE, | ||||||
|     ParamountNetworkIE, |     ParamountNetworkIE, | ||||||
| ) | ) | ||||||
|  | from .storyfire import ( | ||||||
|  |     StoryFireIE, | ||||||
|  |     StoryFireUserIE, | ||||||
|  |     StoryFireSeriesIE, | ||||||
|  | ) | ||||||
| from .stitcher import StitcherIE | from .stitcher import StitcherIE | ||||||
| from .sport5 import Sport5IE | from .sport5 import Sport5IE | ||||||
| from .sportbox import SportBoxIE | from .sportbox import SportBoxIE | ||||||
|   | |||||||
							
								
								
									
										255
									
								
								youtube_dl/extractor/storyfire.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										255
									
								
								youtube_dl/extractor/storyfire.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,255 @@ | |||||||
|  | # coding: utf-8 | ||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | import itertools | ||||||
|  | from .common import InfoExtractor | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class StoryFireIE(InfoExtractor): | ||||||
|  |     _VALID_URL = r'(?:(?:https?://(?:www\.)?storyfire\.com/video-details)|(?:https://storyfire.app.link))/(?P<id>[^/\s]+)' | ||||||
|  |     _TESTS = [{ | ||||||
|  |         'url': 'https://storyfire.com/video-details/5df1d132b6378700117f9181', | ||||||
|  |         'md5': '560953bfca81a69003cfa5e53ac8a920', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '5df1d132b6378700117f9181', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': 'Buzzfeed Teaches You About Memes', | ||||||
|  |             'uploader_id': 'ntZAJFECERSgqHSxzonV5K2E89s1', | ||||||
|  |             'timestamp': 1576129028, | ||||||
|  |             'description': 'Mocking Buzzfeed\'s meme lesson. Reuploaded from YouTube because of their new policies', | ||||||
|  |             'uploader': 'whang!', | ||||||
|  |             'upload_date': '20191212', | ||||||
|  |         }, | ||||||
|  |         'params': {'format': 'bestvideo'}  # There are no merged formats in the playlist. | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://storyfire.app.link/5GxAvWOQr8',  # Alternate URL format, with unrelated short ID | ||||||
|  |         'md5': '7a2dc6d60c4889edfed459c620fe690d', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '5f1e11ecd78a57b6c702001d', | ||||||
|  |             'ext': 'm4a', | ||||||
|  |             'title': 'Weird Nintendo Prototype Leaks', | ||||||
|  |             'description': 'A stream taking a look at some weird Nintendo Prototypes with Luigi in Mario 64 and weird Yoshis', | ||||||
|  |             'timestamp': 1595808576, | ||||||
|  |             'upload_date': '20200727', | ||||||
|  |             'uploader': 'whang!', | ||||||
|  |             'uploader_id': 'ntZAJFECERSgqHSxzonV5K2E89s1', | ||||||
|  |         }, | ||||||
|  |         'params': {'format': 'bestaudio'}  # Verifying audio extraction | ||||||
|  |  | ||||||
|  |     }] | ||||||
|  |  | ||||||
|  |     _aformats = { | ||||||
|  |         'audio-medium-audio': {'acodec': 'aac', 'abr': 125, 'preference': -10}, | ||||||
|  |         'audio-high-audio': {'acodec': 'aac', 'abr': 254, 'preference': -1}, | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         video_id = self._match_id(url) | ||||||
|  |         webpage = self._download_webpage(url, video_id) | ||||||
|  |  | ||||||
|  |         # Extracting the json blob is mandatory to proceed with extraction. | ||||||
|  |         jsontext = self._html_search_regex( | ||||||
|  |             r'<script id="__NEXT_DATA__" type="application/json">(.+?)</script>', | ||||||
|  |             webpage, 'json_data') | ||||||
|  |  | ||||||
|  |         json = self._parse_json(jsontext, video_id) | ||||||
|  |  | ||||||
|  |         # The currentVideo field in the json is mandatory | ||||||
|  |         # because it contains the only link to the m3u playlist | ||||||
|  |         video = json['props']['initialState']['video']['currentVideo'] | ||||||
|  |         videourl = video['vimeoVideoURL']  # Video URL is mandatory | ||||||
|  |  | ||||||
|  |         # Extract other fields from the json in an error tolerant fashion | ||||||
|  |         # ID may be incorrect (on short URL format), correct it. | ||||||
|  |         parsed_id = video.get('_id') | ||||||
|  |         if parsed_id: | ||||||
|  |             video_id = parsed_id | ||||||
|  |  | ||||||
|  |         title = video.get('title') | ||||||
|  |         description = video.get('description') | ||||||
|  |  | ||||||
|  |         thumbnail = video.get('storyImage') | ||||||
|  |         views = video.get('views') | ||||||
|  |         likes = video.get('likesCount') | ||||||
|  |         comments = video.get('commentsCount') | ||||||
|  |         duration = video.get('videoDuration') | ||||||
|  |         publishdate = video.get('publishDate')  # Apparently epoch time, day only | ||||||
|  |  | ||||||
|  |         uploader = video.get('username') | ||||||
|  |         uploader_id = video.get('hostID') | ||||||
|  |         # Construct an uploader URL | ||||||
|  |         uploader_url = None | ||||||
|  |         if uploader_id: | ||||||
|  |             uploader_url = "https://storyfire.com/user/%s/video" % uploader_id | ||||||
|  |  | ||||||
|  |         # Collect root playlist to determine formats | ||||||
|  |         formats = self._extract_m3u8_formats( | ||||||
|  |             videourl, video_id, 'mp4', 'm3u8_native') | ||||||
|  |  | ||||||
|  |         # Modify formats to fill in missing information about audio codecs | ||||||
|  |         for format in formats: | ||||||
|  |             aformat = self._aformats.get(format['format_id']) | ||||||
|  |             if aformat: | ||||||
|  |                 format['acodec'] = aformat['acodec'] | ||||||
|  |                 format['abr'] = aformat['abr'] | ||||||
|  |                 format['preference'] = aformat['preference'] | ||||||
|  |                 format['ext'] = 'm4a' | ||||||
|  |  | ||||||
|  |         self._sort_formats(formats) | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             'id': video_id, | ||||||
|  |             'title': title, | ||||||
|  |             'description': description, | ||||||
|  |             'ext': "mp4", | ||||||
|  |             'url': videourl, | ||||||
|  |             'formats': formats, | ||||||
|  |  | ||||||
|  |             'thumbnail': thumbnail, | ||||||
|  |             'view_count': views, | ||||||
|  |             'like_count': likes, | ||||||
|  |             'comment_count': comments, | ||||||
|  |             'duration': duration, | ||||||
|  |             'timestamp': publishdate, | ||||||
|  |  | ||||||
|  |             'uploader': uploader, | ||||||
|  |             'uploader_id': uploader_id, | ||||||
|  |             'uploader_url': uploader_url, | ||||||
|  |  | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class StoryFireUserIE(InfoExtractor): | ||||||
|  |     _VALID_URL = r'https?://(?:www\.)?storyfire\.com/user/(?P<id>[^/\s]+)/video' | ||||||
|  |     _TESTS = [{ | ||||||
|  |         'url': 'https://storyfire.com/user/ntZAJFECERSgqHSxzonV5K2E89s1/video', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'ntZAJFECERSgqHSxzonV5K2E89s1', | ||||||
|  |             'title': 'whang!', | ||||||
|  |         }, | ||||||
|  |         'playlist_mincount': 18 | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://storyfire.com/user/UQ986nFxmAWIgnkZQ0ftVhq4nOk2/video', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'UQ986nFxmAWIgnkZQ0ftVhq4nOk2', | ||||||
|  |             'title': 'McJuggerNuggets', | ||||||
|  |         }, | ||||||
|  |         'playlist_mincount': 143 | ||||||
|  |  | ||||||
|  |     }] | ||||||
|  |  | ||||||
|  |     # Generator for fetching playlist items | ||||||
|  |     def _enum_videos(self, baseurl, user_id, firstjson): | ||||||
|  |         totalVideos = int(firstjson['videosCount']) | ||||||
|  |         haveVideos = 0 | ||||||
|  |         json = firstjson | ||||||
|  |  | ||||||
|  |         for page in itertools.count(1): | ||||||
|  |             for video in json['videos']: | ||||||
|  |                 id = video['_id'] | ||||||
|  |                 url = "https://storyfire.com/video-details/%s" % id | ||||||
|  |                 haveVideos += 1 | ||||||
|  |                 yield { | ||||||
|  |                     '_type': 'url', | ||||||
|  |                     'id': id, | ||||||
|  |                     'url': url, | ||||||
|  |                     'ie_key': 'StoryFire', | ||||||
|  |  | ||||||
|  |                     'title': video.get('title'), | ||||||
|  |                     'description': video.get('description'), | ||||||
|  |                     'view_count': video.get('views'), | ||||||
|  |                     'comment_count': video.get('commentsCount'), | ||||||
|  |                     'duration': video.get('videoDuration'), | ||||||
|  |                     'timestamp': video.get('publishDate'), | ||||||
|  |                 } | ||||||
|  |             # Are there more pages we could fetch? | ||||||
|  |             if haveVideos < totalVideos: | ||||||
|  |                 pageurl = baseurl + ("%i" % haveVideos) | ||||||
|  |                 json = self._download_json(pageurl, user_id, | ||||||
|  |                                            note='Downloading page %s' % page) | ||||||
|  |  | ||||||
|  |                 # Are there any videos in the new json? | ||||||
|  |                 videos = json.get('videos') | ||||||
|  |                 if not videos or len(videos) == 0: | ||||||
|  |                     break  # no videos | ||||||
|  |  | ||||||
|  |             else: | ||||||
|  |                 break  # We have fetched all the videos, stop | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         user_id = self._match_id(url) | ||||||
|  |  | ||||||
|  |         baseurl = "https://storyfire.com/app/publicVideos/%s?skip=" % user_id | ||||||
|  |  | ||||||
|  |         # Download first page to ensure it can be downloaded, and get user information if available. | ||||||
|  |         firstpage = baseurl + "0" | ||||||
|  |         firstjson = self._download_json(firstpage, user_id) | ||||||
|  |  | ||||||
|  |         title = None | ||||||
|  |         videos = firstjson.get('videos') | ||||||
|  |         if videos and len(videos): | ||||||
|  |             title = videos[1].get('username') | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             '_type': 'playlist', | ||||||
|  |             'entries': self._enum_videos(baseurl, user_id, firstjson), | ||||||
|  |             'id': user_id, | ||||||
|  |             'title': title, | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class StoryFireSeriesIE(InfoExtractor): | ||||||
|  |     _VALID_URL = r'https?://(?:www\.)?storyfire\.com/write/series/stories/(?P<id>[^/\s]+)' | ||||||
|  |     _TESTS = [{ | ||||||
|  |         'url': 'https://storyfire.com/write/series/stories/-Lq6MsuIHLODO6d2dDkr/', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '-Lq6MsuIHLODO6d2dDkr', | ||||||
|  |         }, | ||||||
|  |         'playlist_mincount': 13 | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://storyfire.com/write/series/stories/the_mortal_one/', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'the_mortal_one', | ||||||
|  |         }, | ||||||
|  |         'playlist_count': 0  # This playlist has entries, but no videos. | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://storyfire.com/write/series/stories/story_time', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'story_time', | ||||||
|  |         }, | ||||||
|  |         'playlist_mincount': 10 | ||||||
|  |     }] | ||||||
|  |  | ||||||
|  |     # Generator for returning playlist items | ||||||
|  |     # This object is substantially different than the one in the user videos page above | ||||||
|  |     def _enum_videos(self, jsonlist): | ||||||
|  |         for video in jsonlist: | ||||||
|  |             id = video['_id'] | ||||||
|  |             if video.get('hasVideo'):  # Boolean element | ||||||
|  |                 url = "https://storyfire.com/video-details/%s" % id | ||||||
|  |                 yield { | ||||||
|  |                     '_type': 'url', | ||||||
|  |                     'id': id, | ||||||
|  |                     'url': url, | ||||||
|  |                     'ie_key': 'StoryFire', | ||||||
|  |  | ||||||
|  |                     'title': video.get('title'), | ||||||
|  |                     'description': video.get('description'), | ||||||
|  |                     'view_count': video.get('views'), | ||||||
|  |                     'likes_count': video.get('likesCount'), | ||||||
|  |                     'comment_count': video.get('commentsCount'), | ||||||
|  |                     'duration': video.get('videoDuration'), | ||||||
|  |                     'timestamp': video.get('publishDate'), | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         list_id = self._match_id(url) | ||||||
|  |  | ||||||
|  |         listurl = "https://storyfire.com/app/seriesStories/%s/list" % list_id | ||||||
|  |         json = self._download_json(listurl, list_id) | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             '_type': 'playlist', | ||||||
|  |             'entries': self._enum_videos(json), | ||||||
|  |             'id': list_id | ||||||
|  |         } | ||||||
		Reference in New Issue
	
	Block a user
	 Tom-Oliver Heidel
					Tom-Oliver Heidel