Merge branch 'master' into youtube-mix-fix

2026-03-09 07:40:09 +00:00 · 2020-10-31 02:40:11 -07:00
parent 5b0a6a8010 7fb5f2f29d
commit 712799bd30
28 changed files with 579 additions and 154 deletions
--- a/youtube_dlc/extractor/adobepass.py
+++ b/youtube_dlc/extractor/adobepass.py
@@ -1438,6 +1438,13 @@ class AdobePassIE(InfoExtractor):
                            provider_redirect_page, 'oauth redirect')
                        self._download_webpage(
                            oauth_redirect_url, video_id, 'Confirming auto login')
+                    elif 'automatically signed in with' in provider_redirect_page:
+                        # Seems like comcast is rolling up new way of automatically signing customers
+                        oauth_redirect_url = self._html_search_regex(
+                            r'continue:\s*"(https://oauth.xfinity.com/oauth/authorize\?.+)"', provider_redirect_page,
+                            'oauth redirect (signed)')
+                        # Just need to process the request. No useful data comes back
+                        self._download_webpage(oauth_redirect_url, video_id, 'Confirming auto login')
                    else:
                        if '<form name="signin"' in provider_redirect_page:
                            provider_login_page_res = provider_redirect_page_res
--- a/youtube_dlc/extractor/brightcove.py
+++ b/youtube_dlc/extractor/brightcove.py
@@ -471,12 +471,17 @@ class BrightcoveNewIE(AdobePassIE):
        title = json_data['name'].strip()

        formats = []
+        sources_num = len(json_data.get('sources'))
+        key_systems_present = 0
        for source in json_data.get('sources', []):
            container = source.get('container')
            ext = mimetype2ext(source.get('type'))
            src = source.get('src')
-            # https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object
-            if ext == 'ism' or container == 'WVM' or source.get('key_systems'):
+            # https://apis.support.brightcove.com/playback/references/playback-api-video-fields-reference.html
+            if source.get('key_systems'):
+                key_systems_present += 1
+                continue
+            elif ext == 'ism' or container == 'WVM':
                continue
            elif ext == 'm3u8' or container == 'M2TS':
                if not src:
@@ -533,6 +538,10 @@ class BrightcoveNewIE(AdobePassIE):
                        'format_id': build_format_id('rtmp'),
                    })
                formats.append(f)
+
+        if sources_num == key_systems_present:
+            raise ExtractorError('This video is DRM protected', expected=True)
+
        if not formats:
            # for sonyliv.com DRM protected videos
            s3_source_url = json_data.get('custom_fields', {}).get('s3sourceurl')
--- a/youtube_dlc/extractor/extractors.py
+++ b/youtube_dlc/extractor/extractors.py
@@ -751,6 +751,7 @@ from .ninecninemedia import NineCNineMediaIE
 from .ninegag import NineGagIE
 from .ninenow import NineNowIE
 from .nintendo import NintendoIE
+from .nitter import NitterIE
 from .njpwworld import NJPWWorldIE
 from .nobelprize import NobelPrizeIE
 from .noco import NocoIE
@@ -1037,6 +1038,10 @@ from .sky import (
    SkyNewsIE,
    SkySportsIE,
 )
+from .skyitalia import (
+    SkyArteItaliaIE,
+    SkyItaliaIE,
+)
 from .slideshare import SlideshareIE
 from .slideslive import SlidesLiveIE
 from .slutload import SlutloadIE
--- a/youtube_dlc/extractor/mtv.py
+++ b/youtube_dlc/extractor/mtv.py
@@ -289,7 +289,7 @@ class MTVServicesInfoExtractor(InfoExtractor):

        return mgid

-    def _extract_mgid(self, webpage, url, data_zone=None):
+    def _extract_mgid(self, webpage, url, title=None, data_zone=None):
        try:
            # the url can be http://media.mtvnservices.com/fb/{mgid}.swf
            # or http://media.mtvnservices.com/{mgid}
@@ -300,7 +300,8 @@ class MTVServicesInfoExtractor(InfoExtractor):
        except RegexNotFoundError:
            mgid = None

-        title = self._match_id(url)
+        if not title:
+            title = url_basename(url)

        try:
            window_data = self._parse_json(self._search_regex(
@@ -336,7 +337,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
    def _real_extract(self, url):
        title = url_basename(url)
        webpage = self._download_webpage(url, title)
-        mgid = self._extract_mgid(webpage, url)
+        mgid = self._extract_mgid(webpage, url, title=title)
        videos_info = self._get_videos_info(mgid, url=url)
        return videos_info

--- a/youtube_dlc/extractor/netzkino.py
+++ b/youtube_dlc/extractor/netzkino.py
@@ -13,17 +13,16 @@ from ..utils import (


 class NetzkinoIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?netzkino\.de/\#!/(?P<category>[^/]+)/(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?netzkino\.de/\#!/[^/]+/(?P<id>[^/]+)'

-    _TEST = {
-        'url': 'http://www.netzkino.de/#!/scifikino/rakete-zum-mond',
+    _TESTS = [{
+        'url': 'https://www.netzkino.de/#!/scifikino/rakete-zum-mond',
        'md5': '92a3f8b76f8d7220acce5377ea5d4873',
        'info_dict': {
            'id': 'rakete-zum-mond',
            'ext': 'mp4',
-            'title': 'Rakete zum Mond (Endstation Mond, Destination Moon)',
-            'comments': 'mincount:3',
-            'description': 'md5:1eddeacc7e62d5a25a2d1a7290c64a28',
+            'title': 'Rakete zum Mond \u2013 Jules Verne',
+            'description': 'md5:f0a8024479618ddbfa450ff48ffa6c60',
            'upload_date': '20120813',
            'thumbnail': r're:https?://.*\.jpg$',
            'timestamp': 1344858571,
@@ -32,17 +31,30 @@ class NetzkinoIE(InfoExtractor):
        'params': {
            'skip_download': 'Download only works from Germany',
        }
-    }
+    }, {
+        'url': 'https://www.netzkino.de/#!/filme/dr-jekyll-mrs-hyde-2',
+        'md5': 'c7728b2dadd04ff6727814847a51ef03',
+        'info_dict': {
+            'id': 'dr-jekyll-mrs-hyde-2',
+            'ext': 'mp4',
+            'title': 'Dr. Jekyll & Mrs. Hyde 2',
+            'description': 'md5:c2e9626ebd02de0a794b95407045d186',
+            'upload_date': '20190130',
+            'thumbnail': r're:https?://.*\.jpg$',
+            'timestamp': 1548849437,
+            'age_limit': 18,
+        },
+        'params': {
+            'skip_download': 'Download only works from Germany',
+        }
+    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        category_id = mobj.group('category')
        video_id = mobj.group('id')

-        api_url = 'http://api.netzkino.de.simplecache.net/capi-2.0a/categories/%s.json?d=www' % category_id
-        api_info = self._download_json(api_url, video_id)
-        info = next(
-            p for p in api_info['posts'] if p['slug'] == video_id)
+        api_url = 'https://api.netzkino.de.simplecache.net/capi-2.0a/movies/%s.json?d=www' % video_id
+        info = self._download_json(api_url, video_id)
        custom_fields = info['custom_fields']

        production_js = self._download_webpage(
@@ -67,23 +79,12 @@ class NetzkinoIE(InfoExtractor):
        } for key, tpl in templates.items()]
        self._sort_formats(formats)

-        comments = [{
-            'timestamp': parse_iso8601(c.get('date'), delimiter=' '),
-            'id': c['id'],
-            'author': c['name'],
-            'html': c['content'],
-            'parent': 'root' if c.get('parent', 0) == 0 else c['parent'],
-        } for c in info.get('comments', [])]
-
        return {
            'id': video_id,
            'formats': formats,
-            'comments': comments,
            'title': info['title'],
            'age_limit': int_or_none(custom_fields.get('FSK')[0]),
            'timestamp': parse_iso8601(info.get('date'), delimiter=' '),
            'description': clean_html(info.get('content')),
            'thumbnail': info.get('thumbnail'),
-            'playlist_title': api_info.get('title'),
-            'playlist_id': category_id,
        }
--- a/youtube_dlc/extractor/newgrounds.py
+++ b/youtube_dlc/extractor/newgrounds.py
@@ -4,6 +4,7 @@ import re

 from .common import InfoExtractor
 from ..utils import (
+    ExtractorError,
    extract_attributes,
    int_or_none,
    parse_duration,
@@ -20,22 +21,22 @@ class NewgroundsIE(InfoExtractor):
        'info_dict': {
            'id': '549479',
            'ext': 'mp3',
-            'title': 'B7 - BusMode',
+            'title': 'Burn7 - B7 - BusMode',
            'uploader': 'Burn7',
            'timestamp': 1378878540,
            'upload_date': '20130911',
            'duration': 143,
        },
    }, {
-        'url': 'https://www.newgrounds.com/portal/view/673111',
-        'md5': '3394735822aab2478c31b1004fe5e5bc',
+        'url': 'https://www.newgrounds.com/portal/view/1',
+        'md5': 'fbfb40e2dc765a7e830cb251d370d981',
        'info_dict': {
-            'id': '673111',
+            'id': '1',
            'ext': 'mp4',
-            'title': 'Dancin',
-            'uploader': 'Squirrelman82',
-            'timestamp': 1460256780,
-            'upload_date': '20160410',
+            'title': 'Brian-Beaton - Scrotum 1',
+            'uploader': 'Brian-Beaton',
+            'timestamp': 955064100,
+            'upload_date': '20000406',
        },
    }, {
        # source format unavailable, additional mp4 formats
@@ -43,7 +44,7 @@ class NewgroundsIE(InfoExtractor):
        'info_dict': {
            'id': '689400',
            'ext': 'mp4',
-            'title': 'ZTV News Episode 8',
+            'title': 'Bennettthesage - ZTV News Episode 8',
            'uploader': 'BennettTheSage',
            'timestamp': 1487965140,
            'upload_date': '20170224',
@@ -55,42 +56,73 @@ class NewgroundsIE(InfoExtractor):

    def _real_extract(self, url):
        media_id = self._match_id(url)
-
+        formats = []
+        uploader = None
        webpage = self._download_webpage(url, media_id)

        title = self._html_search_regex(
            r'<title>([^>]+)</title>', webpage, 'title')

-        media_url = self._parse_json(self._search_regex(
-            r'"url"\s*:\s*("[^"]+"),', webpage, ''), media_id)
+        media_url_string = self._search_regex(
+            r'"url"\s*:\s*("[^"]+"),', webpage, 'media url', default=None, fatal=False)

-        formats = [{
-            'url': media_url,
-            'format_id': 'source',
-            'quality': 1,
-        }]
+        if media_url_string:
+            media_url = self._parse_json(media_url_string, media_id)
+            formats = [{
+                'url': media_url,
+                'format_id': 'source',
+                'quality': 1,
+            }]

-        max_resolution = int_or_none(self._search_regex(
-            r'max_resolution["\']\s*:\s*(\d+)', webpage, 'max resolution',
-            default=None))
-        if max_resolution:
-            url_base = media_url.rpartition('.')[0]
-            for resolution in (360, 720, 1080):
-                if resolution > max_resolution:
-                    break
-                formats.append({
-                    'url': '%s.%dp.mp4' % (url_base, resolution),
-                    'format_id': '%dp' % resolution,
-                    'height': resolution,
-                })
+            max_resolution = int_or_none(self._search_regex(
+                r'max_resolution["\']\s*:\s*(\d+)', webpage, 'max resolution',
+                default=None))
+            if max_resolution:
+                url_base = media_url.rpartition('.')[0]
+                for resolution in (360, 720, 1080):
+                    if resolution > max_resolution:
+                        break
+                    formats.append({
+                        'url': '%s.%dp.mp4' % (url_base, resolution),
+                        'format_id': '%dp' % resolution,
+                        'height': resolution,
+                    })
+        else:
+            video_id = int_or_none(self._search_regex(
+                r'data-movie-id=\\"([0-9]+)\\"', webpage, ''))
+            if not video_id:
+                raise ExtractorError('Could not extract media data')
+
+            url_video_data = 'https://www.newgrounds.com/portal/video/%s' % video_id
+            headers = {
+                'Accept': 'application/json',
+                'Referer': url,
+                'X-Requested-With': 'XMLHttpRequest'
+            }
+            json_video = self._download_json(url_video_data, video_id, headers=headers, fatal=False)
+            if not json_video:
+                raise ExtractorError('Could not fetch media data')
+
+            uploader = json_video.get('author')
+            title = json_video.get('title')
+            media_formats = json_video.get('sources', [])
+            for media_format in media_formats:
+                media_sources = media_formats[media_format]
+                for source in media_sources:
+                    formats.append({
+                        'format_id': media_format,
+                        'quality': int_or_none(media_format[:-1]),
+                        'url': source.get('src')
+                    })

        self._check_formats(formats, media_id)
        self._sort_formats(formats)

-        uploader = self._html_search_regex(
-            (r'(?s)<h4[^>]*>(.+?)</h4>.*?<em>\s*Author\s*</em>',
-             r'(?:Author|Writer)\s*<a[^>]+>([^<]+)'), webpage, 'uploader',
-            fatal=False)
+        if not uploader:
+            uploader = self._html_search_regex(
+                (r'(?s)<h4[^>]*>(.+?)</h4>.*?<em>\s*(?:Author|Artist)\s*</em>',
+                 r'(?:Author|Writer)\s*<a[^>]+>([^<]+)'), webpage, 'uploader',
+                fatal=False)

        timestamp = unified_timestamp(self._html_search_regex(
            (r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+</dd>\s*<dd>[^<]+)',
@@ -109,6 +141,9 @@ class NewgroundsIE(InfoExtractor):
        if '<dd>Song' in webpage:
            formats[0]['vcodec'] = 'none'

+        if uploader:
+            title = "%s - %s" % (uploader, title)
+
        return {
            'id': media_id,
            'title': title,
--- a/youtube_dlc/extractor/nitter.py
+++ b/youtube_dlc/extractor/nitter.py
@@ -0,0 +1,167 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+    parse_count,
+    unified_strdate,
+    unified_timestamp,
+    remove_end,
+    determine_ext,
+)
+import re
+
+
+class NitterIE(InfoExtractor):
+    # Taken from https://github.com/zedeus/nitter/wiki/Instances
+    INSTANCES = ('nitter.net',
+                 'nitter.snopyta.org',
+                 'nitter.42l.fr',
+                 'nitter.nixnet.services',
+                 'nitter.13ad.de',
+                 'nitter.pussthecat.org',
+                 'nitter.mastodont.cat',
+                 'nitter.dark.fail',
+                 'nitter.tedomum.net',
+                 'nitter.cattube.org',
+                 'nitter.fdn.fr',
+                 'nitter.1d4.us',
+                 'nitter.kavin.rocks',
+                 'tweet.lambda.dance',
+                 'nitter.cc',
+                 'nitter.weaponizedhumiliation.com',
+                 '3nzoldnxplag42gqjs23xvghtzf6t6yzssrtytnntc6ppc7xxuoneoad.onion',
+                 'nitter.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd.onion',
+                 'nitterlgj3n5fgwesu3vxc5h67ruku33nqaoeoocae2mvlzhsu6k7fqd.onion')
+
+    _INSTANCES_RE = '(?:' + '|'.join([re.escape(instance) for instance in INSTANCES]) + ')'
+    _VALID_URL = r'https?://%(instance)s/(?P<uploader_id>.+)/status/(?P<id>[0-9]+)(#.)?' % {'instance': _INSTANCES_RE}
+    current_instance = INSTANCES[0]  # the test and official instance
+    _TESTS = [
+        {
+            # GIF (wrapped in mp4)
+            'url': 'https://' + current_instance + '/firefox/status/1314279897502629888#m',
+            'info_dict': {
+                'id': '1314279897502629888',
+                'ext': 'mp4',
+                'title': 'Firefox 🔥 - You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension.   Report harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg  #UnfckTheInternet',
+                'description': 'You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension.   Report harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg  #UnfckTheInternet',
+                'thumbnail': r're:^https?://.*\.jpg$',
+                'uploader': 'Firefox 🔥',
+                'uploader_id': 'firefox',
+                'uploader_url': 'https://' + current_instance + '/firefox',
+                'upload_date': '20201008',
+                'timestamp': 1602183720,
+            },
+        }, {  # normal video
+            'url': 'https://' + current_instance + '/Le___Doc/status/1299715685392756737#m',
+            'info_dict': {
+                'id': '1299715685392756737',
+                'ext': 'mp4',
+                'title': 'Le Doc - "Je ne prédis jamais rien" D Raoult, Août 2020...',
+                'description': '"Je ne prédis jamais rien" D Raoult, Août 2020...',
+                'thumbnail': r're:^https?://.*\.jpg$',
+                'uploader': 'Le Doc',
+                'uploader_id': 'Le___Doc',
+                'uploader_url': 'https://' + current_instance + '/Le___Doc',
+                'upload_date': '20200829',
+                'timestamp': 1598711341,
+                'view_count': int,
+                'like_count': int,
+                'repost_count': int,
+                'comment_count': int,
+            },
+        }, {  # video embed in a "Streaming Political Ads" box
+            'url': 'https://' + current_instance + '/mozilla/status/1321147074491092994#m',
+            'info_dict': {
+                'id': '1321147074491092994',
+                'ext': 'mp4',
+                'title': "Mozilla - Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows?  This isn't a real political ad, but if you're watching streaming TV in the U.S., chances are you've seen quite a few.   Learn more ➡️ https://mzl.la/StreamingAds",
+                'description': "Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows?  This isn't a real political ad, but if you're watching streaming TV in the U.S., chances are you've seen quite a few.   Learn more ➡️ https://mzl.la/StreamingAds",
+                'thumbnail': r're:^https?://.*\.jpg$',
+                'uploader': 'Mozilla',
+                'uploader_id': 'mozilla',
+                'uploader_url': 'https://' + current_instance + '/mozilla',
+                'upload_date': '20201027',
+                'timestamp': 1603820982
+            },
+        },
+    ]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        parsed_url = compat_urlparse.urlparse(url)
+        base_url = parsed_url.scheme + '://' + parsed_url.netloc
+
+        self._set_cookie(parsed_url.netloc, 'hlsPlayback', 'on')
+        webpage = self._download_webpage(url, video_id)
+
+        video_url = base_url + self._html_search_regex(r'(?:<video[^>]+data-url|<source[^>]+src)="([^"]+)"', webpage, 'video url')
+        ext = determine_ext(video_url)
+
+        if ext == 'unknown_video':
+            formats = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
+        else:
+            formats = [{
+                'url': video_url,
+                'ext': ext
+            }]
+
+        title = (
+            self._og_search_description(webpage).replace('\n', ' ')
+            or self._html_search_regex(r'<div class="tweet-content[^>]+>([^<]+)</div>', webpage, 'title'))
+        description = title
+
+        mobj = re.match(self._VALID_URL, url)
+        uploader_id = (
+            mobj.group('uploader_id')
+            or self._html_search_regex(r'<a class="fullname"[^>]+title="([^"]+)"', webpage, 'uploader name', fatal=False))
+
+        if uploader_id:
+            uploader_url = base_url + '/' + uploader_id
+
+        uploader = self._html_search_regex(r'<a class="fullname"[^>]+title="([^"]+)"', webpage, 'uploader name', fatal=False)
+
+        if uploader:
+            title = uploader + ' - ' + title
+
+        view_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-play[^>]*></span>\s([^<]+)</div>', webpage, 'view count', fatal=False))
+        like_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-heart[^>]*></span>\s([^<]+)</div>', webpage, 'like count', fatal=False))
+        repost_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-retweet[^>]*></span>\s([^<]+)</div>', webpage, 'repost count', fatal=False))
+        comment_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-comment[^>]*></span>\s([^<]+)</div>', webpage, 'repost count', fatal=False))
+
+        thumbnail = base_url + (self._html_search_meta('og:image', webpage, 'thumbnail url')
+                                or self._html_search_regex(r'<video[^>]+poster="([^"]+)"', webpage, 'thumbnail url', fatal=False))
+
+        thumbnail = remove_end(thumbnail, '%3Asmall')  # if parsed with regex, it should contain this
+
+        thumbnails = []
+        thumbnail_ids = ('thumb', 'small', 'large', 'medium', 'orig')
+        for id in thumbnail_ids:
+            thumbnails.append({
+                'id': id,
+                'url': thumbnail + '%3A' + id,
+            })
+
+        date = self._html_search_regex(r'<span[^>]+class="tweet-date"[^>]*><a[^>]+title="([^"]+)"', webpage, 'upload date', fatal=False)
+        upload_date = unified_strdate(date)
+        timestamp = unified_timestamp(date)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'uploader': uploader,
+            'timestamp': timestamp,
+            'uploader_id': uploader_id,
+            'uploader_url': uploader_url,
+            'view_count': view_count,
+            'like_count': like_count,
+            'repost_count': repost_count,
+            'comment_count': comment_count,
+            'formats': formats,
+            'thumbnails': thumbnails,
+            'thumbnail': thumbnail,
+            'upload_date': upload_date,
+        }
--- a/youtube_dlc/extractor/skyitalia.py
+++ b/youtube_dlc/extractor/skyitalia.py
@@ -0,0 +1,119 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class SkyItaliaBaseIE(InfoExtractor):
+    _GET_VIDEO_DATA = 'https://apid.sky.it/vdp/v1/getVideoData?token={token}&caller=sky&rendition=web&id={id}'
+    _RES = {
+        'low': [426, 240],
+        'med': [640, 360],
+        'high': [854, 480],
+        'hd': [1280, 720]
+    }
+
+    def _extract_video_id(self, url):
+        webpage = self._download_webpage(url, 'skyitalia')
+        video_id = self._html_search_regex(
+            [r'data-videoid=\"(\d+)\"',
+             r'http://player\.sky\.it/social\?id=(\d+)\&'],
+            webpage, 'video_id')
+        if video_id:
+            return video_id
+        raise ExtractorError('Video ID not found.')
+
+    def _get_formats(self, video_id, token):
+        data_url = self._GET_VIDEO_DATA.replace('{id}', video_id)
+        data_url = data_url.replace('{token}', token)
+        video_data = self._parse_json(
+            self._download_webpage(data_url, video_id),
+            video_id)
+
+        formats = []
+        for q, r in self._RES.items():
+            key = 'web_%s_url' % q
+            if key not in video_data:
+                continue
+            formats.append({
+                'url': video_data.get(key),
+                'format_id': q,
+                'width': r[0],
+                'height': r[1]
+            })
+
+        self._sort_formats(formats)
+        title = video_data.get('title')
+        thumb = video_data.get('thumb')
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumb,
+            'formats': formats
+        }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        if video_id == 'None':
+            video_id = self._extract_video_id(url)
+        return self._get_formats(video_id, self._TOKEN)
+
+
+class SkyItaliaIE(SkyItaliaBaseIE):
+    IE_NAME = 'sky.it'
+    _VALID_URL = r'''(?x)https?://
+                    (?P<ie>sport|tg24|video)
+                    \.sky\.it/(?:.+?)
+                    (?P<id>[0-9]{6})?
+                    (?:$|\?)'''
+
+    _TESTS = [{
+        'url': 'https://video.sky.it/sport/motogp/video/motogp-gp-emilia-romagna-highlights-prove-libere-616162',
+        'md5': '9c03b590b06e5952d8051f0e02b0feca',
+        'info_dict': {
+            'id': '616162',
+            'ext': 'mp4',
+            'title': 'MotoGP, GP Emilia Romagna: gli highlights delle prove libere',
+            'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/18/1600441214452_hl-libere-motogp-misano2_5602634_thumbnail_1.jpg',
+        }
+    }, {
+        'url': 'https://sport.sky.it/motogp/2020/09/18/motogp-gp-emilia-romagna-misano-2020-prove-libere-diretta',
+        'md5': '9c03b590b06e5952d8051f0e02b0feca',
+        'info_dict': {
+            'id': '616162',
+            'ext': 'mp4',
+            'title': 'MotoGP, GP Emilia Romagna: gli highlights delle prove libere',
+            'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/18/1600441214452_hl-libere-motogp-misano2_5602634_thumbnail_1.jpg',
+        }
+    }, {
+        'url': 'https://tg24.sky.it/salute-e-benessere/2020/09/18/coronavirus-vaccino-ue-sanofi',
+        'md5': 'caa25e62dadb529bc5e0b078da99f854',
+        'info_dict': {
+            'id': '615904',
+            'ext': 'mp4',
+            'title': 'Covid-19, al Buzzi di Milano tamponi drive-in per studenti',
+            'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/17/1600351405841_error-coronavirus-al-buzzi-di-milano-tamponi_thumbnail_1.jpg',
+        }
+    }, {
+        'url': 'https://video.sky.it/sport/motogp/video/motogp-gp-emilia-romagna-highlights-prove-libere-616162?itm_source=parsely-api',
+        'only_matching': True,
+    }]
+    _TOKEN = 'F96WlOd8yoFmLQgiqv6fNQRvHZcsWk5jDaYnDvhbiJk'
+
+
+class SkyArteItaliaIE(SkyItaliaBaseIE):
+    IE_NAME = 'arte.sky.it'
+    _VALID_URL = r'https?://arte\.sky\.it/video/.+?(?P<id>[0-9]{6})?$'
+    _TEST = {
+        'url': 'https://arte.sky.it/video/federico-fellini-maestri-cinema/',
+        'md5': '2f22513a89f45142f2746f878d690647',
+        'info_dict': {
+            'id': '612888',
+            'ext': 'mp4',
+            'title': 'I maestri del cinema Federico Felini',
+            'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/03/1599146747305_i-maestri-del-cinema-federico-felini_thumbnail_1.jpg',
+        }
+    }
+    _TOKEN = 'LWk29hfiU39NNdq87ePeRach3nzTSV20o0lTv2001Cd'
--- a/youtube_dlc/extractor/xtube.py
+++ b/youtube_dlc/extractor/xtube.py
@@ -5,6 +5,7 @@ import re

 from .common import InfoExtractor
 from ..utils import (
+    ExtractorError,
    int_or_none,
    js_to_json,
    orderedSet,
@@ -33,27 +34,11 @@ class XTubeIE(InfoExtractor):
            'title': 'strange erotica',
            'description': 'contains:an ET kind of thing',
            'uploader': 'greenshowers',
-            'duration': 450,
+            'duration': 449,
            'view_count': int,
            'comment_count': int,
            'age_limit': 18,
        }
-    }, {
-        # FLV videos with duplicated formats
-        'url': 'http://www.xtube.com/video-watch/A-Super-Run-Part-1-YT-9299752',
-        'md5': 'a406963eb349dd43692ec54631efd88b',
-        'info_dict': {
-            'id': '9299752',
-            'display_id': 'A-Super-Run-Part-1-YT',
-            'ext': 'flv',
-            'title': 'A Super Run - Part 1 (YT)',
-            'description': 'md5:4cc3af1aa1b0413289babc88f0d4f616',
-            'uploader': 'tshirtguy59',
-            'duration': 579,
-            'view_count': int,
-            'comment_count': int,
-            'age_limit': 18,
-        },
    }, {
        # new URL schema
        'url': 'http://www.xtube.com/video-watch/strange-erotica-625837',
@@ -89,16 +74,24 @@ class XTubeIE(InfoExtractor):

        title, thumbnail, duration = [None] * 3

-        config = self._parse_json(self._search_regex(
-            r'playerConf\s*=\s*({.+?})\s*,\s*\n', webpage, 'config',
-            default='{}'), video_id, transform_source=js_to_json, fatal=False)
-        if config:
-            config = config.get('mainRoll')
-            if isinstance(config, dict):
-                title = config.get('title')
-                thumbnail = config.get('poster')
-                duration = int_or_none(config.get('duration'))
-                sources = config.get('sources') or config.get('format')
+        json_config_string = self._search_regex(
+            r'playerConf=({.+?}),loaderConf',
+            webpage, 'config', default=None)
+        if not json_config_string:
+            raise ExtractorError("Could not extract video player data")
+
+        json_config_string = json_config_string.replace("!0", "true").replace("!1", "false")
+
+        config = self._parse_json(json_config_string, video_id, transform_source=js_to_json, fatal=False)
+        if not config:
+            raise ExtractorError("Could not extract video player data")
+
+        config = config.get('mainRoll')
+        if isinstance(config, dict):
+            title = config.get('title')
+            thumbnail = config.get('poster')
+            duration = int_or_none(config.get('duration'))
+            sources = config.get('sources') or config.get('format')

        if not isinstance(sources, dict):
            sources = self._parse_json(self._search_regex(
--- a/youtube_dlc/extractor/youtube.py
+++ b/youtube_dlc/extractor/youtube.py
@@ -1375,14 +1375,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                    'ext': ext,
                })
            sub_lang_list[lang] = sub_formats
-        """ if has_live_chat_replay:
+        if has_live_chat_replay:
            sub_lang_list['live_chat'] = [
                {
                    'video_id': video_id,
                    'ext': 'json',
                    'protocol': 'youtube_live_chat_replay',
                },
-            ] """
+            ]
        if not sub_lang_list:
            self._downloader.report_warning('video doesn\'t have subtitles')
            return {}
@@ -1406,6 +1406,44 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            return self._parse_json(
                uppercase_escape(config), video_id, fatal=False)

+    def _get_music_metadata_from_yt_initial(self, yt_initial):
+        music_metadata = []
+        key_map = {
+            'Album': 'album',
+            'Artist': 'artist',
+            'Song': 'track'
+        }
+        contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'])
+        if type(contents) is list:
+            for content in contents:
+                music_track = {}
+                if type(content) is not dict:
+                    continue
+                videoSecondaryInfoRenderer = try_get(content, lambda x: x['videoSecondaryInfoRenderer'])
+                if type(videoSecondaryInfoRenderer) is not dict:
+                    continue
+                rows = try_get(videoSecondaryInfoRenderer, lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'])
+                if type(rows) is not list:
+                    continue
+                for row in rows:
+                    metadataRowRenderer = try_get(row, lambda x: x['metadataRowRenderer'])
+                    if type(metadataRowRenderer) is not dict:
+                        continue
+                    key = try_get(metadataRowRenderer, lambda x: x['title']['simpleText'])
+                    value = try_get(metadataRowRenderer, lambda x: x['contents'][0]['simpleText']) or \
+                        try_get(metadataRowRenderer, lambda x: x['contents'][0]['runs'][0]['text'])
+                    if type(key) is not str or type(value) is not str:
+                        continue
+                    if key in key_map:
+                        if key_map[key] in music_track:
+                            # we've started on a new track
+                            music_metadata.append(music_track)
+                            music_track = {}
+                        music_track[key_map[key]] = value
+                if len(music_track.keys()):
+                    music_metadata.append(music_track)
+        return music_metadata
+
    def _get_automatic_captions(self, video_id, webpage):
        """We need the webpage for getting the captions url, pass it as an
           argument to speed up the process."""
@@ -2328,6 +2366,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                if release_year:
                    release_year = int(release_year)

+        yt_initial = self._get_yt_initial_data(video_id, video_webpage)
+        if yt_initial:
+            music_metadata = self._get_music_metadata_from_yt_initial(yt_initial)
+            if len(music_metadata):
+                album = music_metadata[0].get('album')
+                artist = music_metadata[0].get('artist')
+                track = music_metadata[0].get('track')
+
        m_episode = re.search(
            r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
            video_webpage)