mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-11-04 00:25:15 +00:00 
			
		
		
		
	[canal13cl] Add test and improve extraction (#2498)
This commit is contained in:
		@@ -1,32 +1,48 @@
 | 
				
			|||||||
 | 
					# coding: utf-8
 | 
				
			||||||
from __future__ import unicode_literals
 | 
					from __future__ import unicode_literals
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .common import InfoExtractor
 | 
					from .common import InfoExtractor
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Canal13clIE(InfoExtractor):
 | 
					class Canal13clIE(InfoExtractor):
 | 
				
			||||||
    _VALID_URL = r'^http://(?:www\.)?13\.cl/'
 | 
					    _VALID_URL = r'^http://(?:www\.)?13\.cl/(?:[^/?#]+/)*(?P<id>[^/?#]+)'
 | 
				
			||||||
    IE_NAME = 'Canal13cl'
 | 
					    _TEST = {
 | 
				
			||||||
 | 
					        'url': 'http://www.13.cl/t13/nacional/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
 | 
				
			||||||
 | 
					        'md5': '4cb1fa38adcad8fea88487a078831755',
 | 
				
			||||||
 | 
					        'info_dict': {
 | 
				
			||||||
 | 
					            'id': '1403022125',
 | 
				
			||||||
 | 
					            'display_id': 'el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
 | 
				
			||||||
 | 
					            'ext': 'mp4',
 | 
				
			||||||
 | 
					            'title': 'El "círculo de hierro" de Michelle Bachelet en su regreso a La Moneda',
 | 
				
			||||||
 | 
					            'description': '(Foto: Agencia Uno) En nueve días más, Michelle Bachelet va a asumir por segunda vez como presidenta de la República. Entre aquellos que la acompañarán hay caras que se repiten y otras que se consolidan en su entorno de colaboradores más cercanos.',
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _real_extract(self, url):
 | 
					    def _real_extract(self, url):
 | 
				
			||||||
        webpage = self._download_webpage(url, url)
 | 
					        mobj = re.match(self._VALID_URL, url)
 | 
				
			||||||
        video_id = self._html_search_regex(
 | 
					        display_id = mobj.group('id')
 | 
				
			||||||
            r'http://streaming.13.cl/(.*)\.mp4',
 | 
					
 | 
				
			||||||
            webpage, u'video_id')
 | 
					        webpage = self._download_webpage(url, display_id)
 | 
				
			||||||
        title = self._html_search_regex(
 | 
					
 | 
				
			||||||
            r'(articuloTitulo = \"(.*?)\"|(.*?)\|)',
 | 
					        title = self._html_search_meta(
 | 
				
			||||||
            webpage, u'title')
 | 
					            'twitter:title', webpage, 'title', fatal=True)
 | 
				
			||||||
 | 
					        description = self._html_search_meta(
 | 
				
			||||||
 | 
					            'twitter:description', webpage, 'description')
 | 
				
			||||||
        url = self._html_search_regex(
 | 
					        url = self._html_search_regex(
 | 
				
			||||||
            r'articuloVideo = \"(.*?)\"',
 | 
					            r'articuloVideo = \"(.*?)\"', webpage, 'url')
 | 
				
			||||||
            webpage, u'url')
 | 
					        real_id = self._search_regex(
 | 
				
			||||||
        thumbnail = self._html_search_regex (
 | 
					            r'[^0-9]([0-9]{7,})[^0-9]', url, 'id', default=display_id)
 | 
				
			||||||
            r'articuloImagen = \"(.*?)\"',
 | 
					        thumbnail = self._html_search_regex(
 | 
				
			||||||
            webpage, u'thumbnail')
 | 
					            r'articuloImagen = \"(.*?)\"', webpage, 'thumbnail')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return {
 | 
					        return {
 | 
				
			||||||
            'video_id': video_id,
 | 
					            'id': real_id,
 | 
				
			||||||
 | 
					            'display_id': display_id,
 | 
				
			||||||
            'url': url,
 | 
					            'url': url,
 | 
				
			||||||
            'title': title,
 | 
					            'title': title,
 | 
				
			||||||
 | 
					            'description': description,
 | 
				
			||||||
            'ext': 'mp4',
 | 
					            'ext': 'mp4',
 | 
				
			||||||
            'thumbnail': thumbnail
 | 
					            'thumbnail': thumbnail,
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -436,14 +436,14 @@ class InfoExtractor(object):
 | 
				
			|||||||
        if secure: regexes = self._og_regexes('video:secure_url') + regexes
 | 
					        if secure: regexes = self._og_regexes('video:secure_url') + regexes
 | 
				
			||||||
        return self._html_search_regex(regexes, html, name, **kargs)
 | 
					        return self._html_search_regex(regexes, html, name, **kargs)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _html_search_meta(self, name, html, display_name=None):
 | 
					    def _html_search_meta(self, name, html, display_name=None, fatal=False):
 | 
				
			||||||
        if display_name is None:
 | 
					        if display_name is None:
 | 
				
			||||||
            display_name = name
 | 
					            display_name = name
 | 
				
			||||||
        return self._html_search_regex(
 | 
					        return self._html_search_regex(
 | 
				
			||||||
            r'''(?ix)<meta
 | 
					            r'''(?ix)<meta
 | 
				
			||||||
                    (?=[^>]+(?:itemprop|name|property)=["\']%s["\'])
 | 
					                    (?=[^>]+(?:itemprop|name|property)=["\']%s["\'])
 | 
				
			||||||
                    [^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
 | 
					                    [^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
 | 
				
			||||||
            html, display_name, fatal=False)
 | 
					            html, display_name, fatal=fatal)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _dc_search_uploader(self, html):
 | 
					    def _dc_search_uploader(self, html):
 | 
				
			||||||
        return self._html_search_meta('dc.creator', html, 'uploader')
 | 
					        return self._html_search_meta('dc.creator', html, 'uploader')
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user