mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[camdemy] Simplify and make more robust (#4938)
Do not throw errors if view count or upload date extraction fails. Dispose of re.MULTILINE, which had absolutely no effect without any ^ or $ in sight. Follow PEP8 naming conventions.
This commit is contained in:
		| @@ -1,11 +1,18 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import datetime | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import (compat_urllib_parse, compat_urlparse) | ||||
| from ..utils import parse_iso8601 | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     parse_iso8601, | ||||
|     str_to_int, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CamdemyIE(InfoExtractor): | ||||
| @@ -23,6 +30,7 @@ class CamdemyIE(InfoExtractor): | ||||
|             'creator': 'ss11spring', | ||||
|             'upload_date': '20130114', | ||||
|             'timestamp': 1358154556, | ||||
|             'view_count': int, | ||||
|         } | ||||
|     }, { | ||||
|         # With non-empty description | ||||
| @@ -55,46 +63,43 @@ class CamdemyIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         page = self._download_webpage(url, video_id) | ||||
|  | ||||
|         srcFrom = self._html_search_regex( | ||||
|         src_from = self._html_search_regex( | ||||
|             r"<div class='srcFrom'>Source: <a title='([^']+)'", page, | ||||
|             'external source', default=None) | ||||
|  | ||||
|         if srcFrom: | ||||
|             return self.url_result(srcFrom) | ||||
|         if src_from: | ||||
|             return self.url_result(src_from) | ||||
|  | ||||
|         oembed_obj = self._download_json( | ||||
|             'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id) | ||||
|  | ||||
|         thumb_url = oembed_obj['thumbnail_url'] | ||||
|         video_folder = compat_urlparse.urljoin(thumb_url, 'video/') | ||||
|         fileListXML = self._download_xml( | ||||
|         file_list_doc = self._download_xml( | ||||
|             compat_urlparse.urljoin(video_folder, 'fileList.xml'), | ||||
|             video_id, 'Filelist XML') | ||||
|         fileName = fileListXML.find('./video/item/fileName').text | ||||
|         file_name = file_list_doc.find('./video/item/fileName').text | ||||
|         video_url = compat_urlparse.urljoin(video_folder, file_name) | ||||
|  | ||||
|         creation_time = self._html_search_regex( | ||||
|             r"<div class='title'>Posted :</div>[\r\n ]*<div class='value'>([^<>]+)<", | ||||
|             page, 'creation time', flags=re.MULTILINE) + '+08:00' | ||||
|         creation_timestamp = parse_iso8601(creation_time, delimiter=' ') | ||||
|  | ||||
|         view_count_str = self._html_search_regex( | ||||
|             r"<div class='title'>Views :</div>[\r\n ]*<div class='value'>([^<>]+)<", | ||||
|             page, 'view count', flags=re.MULTILINE) | ||||
|         views = int(view_count_str.replace(',', '')) | ||||
|         timestamp = parse_iso8601(self._html_search_regex( | ||||
|             r"<div class='title'>Posted\s*:</div>\s*<div class='value'>([^<>]+)<", | ||||
|             page, 'creation time', fatal=False), | ||||
|             delimiter=' ', timezone=datetime.timedelta(hours=8)) | ||||
|         view_count = str_to_int(self._html_search_regex( | ||||
|             r"<div class='title'>Views\s*:</div>\s*<div class='value'>([^<>]+)<", | ||||
|             page, 'view count', fatal=False)) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': compat_urlparse.urljoin(video_folder, fileName), | ||||
|             'url': video_url, | ||||
|             'title': oembed_obj['title'], | ||||
|             'thumbnail': thumb_url, | ||||
|             'description': self._html_search_meta('description', page), | ||||
|             'creator': oembed_obj['author_name'], | ||||
|             'duration': oembed_obj['duration'], | ||||
|             'timestamp': creation_timestamp, | ||||
|             'view_count': views, | ||||
|             'timestamp': timestamp, | ||||
|             'view_count': view_count, | ||||
|         } | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -666,26 +666,27 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler): | ||||
|             req, **kwargs) | ||||
|  | ||||
|  | ||||
| def parse_iso8601(date_str, delimiter='T'): | ||||
| def parse_iso8601(date_str, delimiter='T', timezone=None): | ||||
|     """ Return a UNIX timestamp from the given date """ | ||||
|  | ||||
|     if date_str is None: | ||||
|         return None | ||||
|  | ||||
|     m = re.search( | ||||
|         r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)', | ||||
|         date_str) | ||||
|     if not m: | ||||
|         timezone = datetime.timedelta() | ||||
|     else: | ||||
|         date_str = date_str[:-len(m.group(0))] | ||||
|         if not m.group('sign'): | ||||
|     if timezone is None: | ||||
|         m = re.search( | ||||
|             r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)', | ||||
|             date_str) | ||||
|         if not m: | ||||
|             timezone = datetime.timedelta() | ||||
|         else: | ||||
|             sign = 1 if m.group('sign') == '+' else -1 | ||||
|             timezone = datetime.timedelta( | ||||
|                 hours=sign * int(m.group('hours')), | ||||
|                 minutes=sign * int(m.group('minutes'))) | ||||
|             date_str = date_str[:-len(m.group(0))] | ||||
|             if not m.group('sign'): | ||||
|                 timezone = datetime.timedelta() | ||||
|             else: | ||||
|                 sign = 1 if m.group('sign') == '+' else -1 | ||||
|                 timezone = datetime.timedelta( | ||||
|                     hours=sign * int(m.group('hours')), | ||||
|                     minutes=sign * int(m.group('minutes'))) | ||||
|     date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter) | ||||
|     dt = datetime.datetime.strptime(date_str, date_format) - timezone | ||||
|     return calendar.timegm(dt.timetuple()) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister