mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	 e4e50f60b1
			
		
	
	e4e50f60b1
	
	
	
		
			
			Since Python 3.6, invalid escape sequences are deprecated. It's likely that there are invalid escape sequences somewhere on the webpage, so instead of unescaping the whole webpage, just unescape the URL. See https://bugs.python.org/issue27364. That change was designed for string literals, while it affects the 'unicode_escape' encoding as well. The code path is: str.decode('unicode_escape') codecs.unicode_escape_decode() PyUnicode_DecodeUnicodeEscape()
		
			
				
	
	
		
			94 lines
		
	
	
		
			3.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			94 lines
		
	
	
		
			3.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from __future__ import unicode_literals
 | |
| 
 | |
| import re
 | |
| 
 | |
| from .common import InfoExtractor
 | |
| from ..utils import (
 | |
|     ExtractorError,
 | |
|     int_or_none,
 | |
|     lowercase_escape,
 | |
| )
 | |
| 
 | |
| 
 | |
| class GoogleDriveIE(InfoExtractor):
 | |
|     _VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})'
 | |
|     _TESTS = [{
 | |
|         'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
 | |
|         'md5': 'd109872761f7e7ecf353fa108c0dbe1e',
 | |
|         'info_dict': {
 | |
|             'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
 | |
|             'ext': 'mp4',
 | |
|             'title': 'Big Buck Bunny.mp4',
 | |
|             'duration': 45,
 | |
|         }
 | |
|     }, {
 | |
|         # video id is longer than 28 characters
 | |
|         'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
 | |
|         'only_matching': True,
 | |
|     }]
 | |
|     _FORMATS_EXT = {
 | |
|         '5': 'flv',
 | |
|         '6': 'flv',
 | |
|         '13': '3gp',
 | |
|         '17': '3gp',
 | |
|         '18': 'mp4',
 | |
|         '22': 'mp4',
 | |
|         '34': 'flv',
 | |
|         '35': 'flv',
 | |
|         '36': '3gp',
 | |
|         '37': 'mp4',
 | |
|         '38': 'mp4',
 | |
|         '43': 'webm',
 | |
|         '44': 'webm',
 | |
|         '45': 'webm',
 | |
|         '46': 'webm',
 | |
|         '59': 'mp4',
 | |
|     }
 | |
| 
 | |
|     @staticmethod
 | |
|     def _extract_url(webpage):
 | |
|         mobj = re.search(
 | |
|             r'<iframe[^>]+src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28,})',
 | |
|             webpage)
 | |
|         if mobj:
 | |
|             return 'https://drive.google.com/file/d/%s' % mobj.group('id')
 | |
| 
 | |
|     def _real_extract(self, url):
 | |
|         video_id = self._match_id(url)
 | |
|         webpage = self._download_webpage(
 | |
|             'http://docs.google.com/file/d/%s' % video_id, video_id)
 | |
| 
 | |
|         reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
 | |
|         if reason:
 | |
|             raise ExtractorError(reason)
 | |
| 
 | |
|         title = self._search_regex(r'"title"\s*,\s*"([^"]+)', webpage, 'title')
 | |
|         duration = int_or_none(self._search_regex(
 | |
|             r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', default=None))
 | |
|         fmt_stream_map = self._search_regex(
 | |
|             r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',')
 | |
|         fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',')
 | |
| 
 | |
|         formats = []
 | |
|         for fmt, fmt_stream in zip(fmt_list, fmt_stream_map):
 | |
|             fmt_id, fmt_url = fmt_stream.split('|')
 | |
|             resolution = fmt.split('/')[1]
 | |
|             width, height = resolution.split('x')
 | |
|             formats.append({
 | |
|                 'url': lowercase_escape(fmt_url),
 | |
|                 'format_id': fmt_id,
 | |
|                 'resolution': resolution,
 | |
|                 'width': int_or_none(width),
 | |
|                 'height': int_or_none(height),
 | |
|                 'ext': self._FORMATS_EXT[fmt_id],
 | |
|             })
 | |
|         self._sort_formats(formats)
 | |
| 
 | |
|         return {
 | |
|             'id': video_id,
 | |
|             'title': title,
 | |
|             'thumbnail': self._og_search_thumbnail(webpage, default=None),
 | |
|             'duration': duration,
 | |
|             'formats': formats,
 | |
|         }
 |