mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[cnn] Add multiple formats, duration, and upload_date
This commit is contained in:
		| @@ -18,6 +18,7 @@ from youtube_dl.utils import ( | |||||||
|     find_xpath_attr, |     find_xpath_attr, | ||||||
|     get_meta_content, |     get_meta_content, | ||||||
|     orderedSet, |     orderedSet, | ||||||
|  |     parse_duration, | ||||||
|     sanitize_filename, |     sanitize_filename, | ||||||
|     shell_quote, |     shell_quote, | ||||||
|     smuggle_url, |     smuggle_url, | ||||||
| @@ -192,5 +193,12 @@ class TestUtil(unittest.TestCase): | |||||||
|             url_basename(u'http://media.w3.org/2010/05/sintel/trailer.mp4'), |             url_basename(u'http://media.w3.org/2010/05/sintel/trailer.mp4'), | ||||||
|             u'trailer.mp4') |             u'trailer.mp4') | ||||||
|  |  | ||||||
|  |     def test_parse_duration(self): | ||||||
|  |         self.assertEqual(parse_duration(None), None) | ||||||
|  |         self.assertEqual(parse_duration('1'), 1) | ||||||
|  |         self.assertEqual(parse_duration('1337:12'), 80232) | ||||||
|  |         self.assertEqual(parse_duration('9:12:43'), 33163) | ||||||
|  |         self.assertEqual(parse_duration('x:y'), None) | ||||||
|  |  | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|     unittest.main() |     unittest.main() | ||||||
|   | |||||||
| @@ -1,7 +1,10 @@ | |||||||
| import re | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import determine_ext | from ..utils import ( | ||||||
|  |     int_or_none, | ||||||
|  |     parse_duration, | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class CNNIE(InfoExtractor): | class CNNIE(InfoExtractor): | ||||||
| @@ -15,6 +18,8 @@ class CNNIE(InfoExtractor): | |||||||
|         u'info_dict': { |         u'info_dict': { | ||||||
|             u'title': u'Nadal wins 8th French Open title', |             u'title': u'Nadal wins 8th French Open title', | ||||||
|             u'description': u'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.', |             u'description': u'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.', | ||||||
|  |             u'duration': 135, | ||||||
|  |             u'upload_date': u'20130609', | ||||||
|         }, |         }, | ||||||
|     }, |     }, | ||||||
|     { |     { | ||||||
| @@ -35,22 +40,58 @@ class CNNIE(InfoExtractor): | |||||||
|         info = self._download_xml(info_url, page_title) |         info = self._download_xml(info_url, page_title) | ||||||
|  |  | ||||||
|         formats = [] |         formats = [] | ||||||
|  |         rex = re.compile(r'''(?x) | ||||||
|  |             (?P<width>[0-9]+)x(?P<height>[0-9]+) | ||||||
|  |             (?:_(?P<bitrate>[0-9]+)k)? | ||||||
|  |         ''') | ||||||
|         for f in info.findall('files/file'): |         for f in info.findall('files/file'): | ||||||
|             mf = re.match(r'(\d+)x(\d+)(?:_(.*)k)?',f.attrib['bitrate']) |             video_url = 'http://ht.cdn.turner.com/cnn/big%s' % (f.text.strip()) | ||||||
|             if mf is not None: |             fdct = { | ||||||
|                 formats.append((int(mf.group(1)), int(mf.group(2)), int(mf.group(3) or 0), f.text)) |                 'format_id': f.attrib['bitrate'], | ||||||
|         formats = sorted(formats) |                 'url': video_url, | ||||||
|         (_,_,_, video_path) = formats[-1] |             } | ||||||
|         video_url = 'http://ht.cdn.turner.com/cnn/big%s' % video_path |  | ||||||
|  |             mf = rex.match(f.attrib['bitrate']) | ||||||
|  |             if mf: | ||||||
|  |                 fdct['width'] = int(mf.group('width')) | ||||||
|  |                 fdct['height'] = int(mf.group('height')) | ||||||
|  |                 fdct['tbr'] = int_or_none(mf.group('bitrate')) | ||||||
|  |             else: | ||||||
|  |                 mf = rex.search(f.text) | ||||||
|  |                 if mf: | ||||||
|  |                     fdct['width'] = int(mf.group('width')) | ||||||
|  |                     fdct['height'] = int(mf.group('height')) | ||||||
|  |                     fdct['tbr'] = int_or_none(mf.group('bitrate')) | ||||||
|  |                 else: | ||||||
|  |                     mi = re.match(r'ios_(audio|[0-9]+)$', f.attrib['bitrate']) | ||||||
|  |                     if mi: | ||||||
|  |                         if mi.group(1) == 'audio': | ||||||
|  |                             fdct['vcodec'] = 'none' | ||||||
|  |                             fdct['ext'] = 'm4a' | ||||||
|  |                         else: | ||||||
|  |                             fdct['tbr'] = int(mi.group(1)) | ||||||
|  |  | ||||||
|  |             formats.append(fdct) | ||||||
|  |  | ||||||
|  |         self._sort_formats(formats) | ||||||
|  |  | ||||||
|         thumbnails = sorted([((int(t.attrib['height']),int(t.attrib['width'])), t.text) for t in info.findall('images/image')]) |         thumbnails = sorted([((int(t.attrib['height']),int(t.attrib['width'])), t.text) for t in info.findall('images/image')]) | ||||||
|         thumbs_dict = [{'resolution': res, 'url': t_url} for (res, t_url) in thumbnails] |         thumbs_dict = [{'resolution': res, 'url': t_url} for (res, t_url) in thumbnails] | ||||||
|  |  | ||||||
|         return {'id': info.attrib['id'], |         metas_el = info.find('metas') | ||||||
|  |         upload_date = ( | ||||||
|  |             metas_el.attrib.get('version') if metas_el is not None else None) | ||||||
|  |  | ||||||
|  |         duration_el = info.find('length') | ||||||
|  |         duration = parse_duration(duration_el.text) | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             'id': info.attrib['id'], | ||||||
|             'title': info.find('headline').text, |             'title': info.find('headline').text, | ||||||
|                 'url': video_url, |             'formats': formats, | ||||||
|                 'ext': determine_ext(video_url), |  | ||||||
|             'thumbnail': thumbnails[-1][1], |             'thumbnail': thumbnails[-1][1], | ||||||
|             'thumbnails': thumbs_dict, |             'thumbnails': thumbs_dict, | ||||||
|             'description': info.find('description').text, |             'description': info.find('description').text, | ||||||
|  |             'duration': duration, | ||||||
|  |             'upload_date': upload_date, | ||||||
|         } |         } | ||||||
|   | |||||||
| @@ -1102,3 +1102,19 @@ class HEADRequest(compat_urllib_request.Request): | |||||||
|  |  | ||||||
| def int_or_none(v): | def int_or_none(v): | ||||||
|     return v if v is None else int(v) |     return v if v is None else int(v) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def parse_duration(s): | ||||||
|  |     if s is None: | ||||||
|  |         return None | ||||||
|  |  | ||||||
|  |     m = re.match( | ||||||
|  |         r'(?:(?:(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)$', s) | ||||||
|  |     if not m: | ||||||
|  |         return None | ||||||
|  |     res = int(m.group('secs')) | ||||||
|  |     if m.group('mins'): | ||||||
|  |         res += int(m.group('mins')) * 60 | ||||||
|  |         if m.group('hours'): | ||||||
|  |             res += int(m.group('hours')) * 60 * 60 | ||||||
|  |     return res | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister