[ie/1tv] Extract chapters (#15848 )

Authored by: hunter-gatherer8
[ie/pornhub] Fix extractor (#15858 )
2026-02-07 22:47:24 +00:00 · 2026-02-06 20:45:47 +00:00 · 2026-02-06 20:41:56 +00:00 · 2026-02-04 21:06:39 +00:00 · 2026-02-04 20:49:07 +00:00 · 2026-02-04 15:33:00 +00:00
8 changed files with 371 additions and 10 deletions
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -924,6 +924,7 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(month_by_name(None), None)
        self.assertEqual(month_by_name('December', 'en'), 12)
        self.assertEqual(month_by_name('décembre', 'fr'), 12)
+        self.assertEqual(month_by_name('desember', 'is'), 12)
        self.assertEqual(month_by_name('December'), 12)
        self.assertEqual(month_by_name('décembre'), None)
        self.assertEqual(month_by_name('Unknown', 'unknown'), None)
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1029,6 +1029,10 @@ from .livestream import (
 )
 from .livestreamfails import LivestreamfailsIE
 from .lnk import LnkIE
+from .locipo import (
+    LocipoIE,
+    LocipoPlaylistIE,
+)
 from .loco import LocoIE
 from .loom import (
    LoomFolderIE,
@@ -2343,6 +2347,7 @@ from .vimm import (
 )
 from .viously import ViouslyIE
 from .viqeo import ViqeoIE
+from .visir import VisirIE
 from .viu import (
    ViuIE,
    ViuOTTIE,
--- a/yt_dlp/extractor/firsttv.py
+++ b/yt_dlp/extractor/firsttv.py
@@ -3,10 +3,12 @@ import urllib.parse
 from .common import InfoExtractor
 from ..utils import (
    determine_ext,
+    float_or_none,
    int_or_none,
    join_nonempty,
    mimetype2ext,
    parse_qs,
+    unescapeHTML,
    unified_strdate,
    url_or_none,
 )
@@ -107,6 +109,11 @@ class FirstTVIE(InfoExtractor):
                    'timestamp': ('dvr_begin_at', {int_or_none}),
                    'upload_date': ('date_air', {unified_strdate}),
                    'duration': ('duration', {int_or_none}),
+                    'chapters': ('episodes', lambda _, v: float_or_none(v['from']) is not None, {
+                        'start_time': ('from', {float_or_none}),
+                        'title': ('name', {str}, {unescapeHTML}),
+                        'end_time': ('to', {float_or_none}),
+                    }),
                }),
                'id': video_id,
                'formats': formats,
--- a/yt_dlp/extractor/locipo.py
+++ b/yt_dlp/extractor/locipo.py
@@ -0,0 +1,209 @@
+import functools
+import math
+
+from .streaks import StreaksBaseIE
+from ..networking import HEADRequest
+from ..utils import (
+    InAdvancePagedList,
+    clean_html,
+    js_to_json,
+    parse_iso8601,
+    parse_qs,
+    str_or_none,
+)
+from ..utils.traversal import require, traverse_obj
+
+
+class LocipoBaseIE(StreaksBaseIE):
+    _API_BASE = 'https://web-api.locipo.jp'
+    _BASE_URL = 'https://locipo.jp'
+    _UUID_RE = r'[\da-f]{8}(?:-[\da-f]{4}){3}-[\da-f]{12}'
+
+    def _call_api(self, path, item_id, note, fatal=True):
+        return self._download_json(
+            f'{self._API_BASE}/{path}', item_id,
+            f'Downloading {note} API JSON',
+            f'Unable to download {note} API JSON',
+            fatal=fatal)
+
+
+class LocipoIE(LocipoBaseIE):
+    _VALID_URL = [
+        fr'https?://locipo\.jp/creative/(?P<id>{LocipoBaseIE._UUID_RE})',
+        fr'https?://locipo\.jp/embed/?\?(?:[^#]+&)?id=(?P<id>{LocipoBaseIE._UUID_RE})',
+    ]
+    _TESTS = [{
+        'url': 'https://locipo.jp/creative/fb5ffeaa-398d-45ce-bb49-0e221b5f94f1',
+        'info_dict': {
+            'id': 'fb5ffeaa-398d-45ce-bb49-0e221b5f94f1',
+            'ext': 'mp4',
+            'title': 'リアルカレカノ#4 ～伊達さゆりと勉強しよっ？～',
+            'description': 'md5:70a40c202f3fb7946b61e55fa015094c',
+            'display_id': '5a2947fe596441f5bab88a61b0432d0d',
+            'live_status': 'not_live',
+            'modified_date': r're:\d{8}',
+            'modified_timestamp': int,
+            'release_timestamp': 1711789200,
+            'release_date': '20240330',
+            'series': 'リアルカレカノ',
+            'series_id': '1142',
+            'tags': 'count:4',
+            'thumbnail': r're:https?://.+\.(?:jpg|png)',
+            'timestamp': 1756984919,
+            'upload_date': '20250904',
+            'uploader': '東海テレビ',
+            'uploader_id': 'locipo-prod',
+        },
+    }, {
+        'url': 'https://locipo.jp/embed/?id=71a334a0-2b25-406f-9d96-88f341f571c2',
+        'info_dict': {
+            'id': '71a334a0-2b25-406f-9d96-88f341f571c2',
+            'ext': 'mp4',
+            'title': '#1 オーディション／ゲスト伊藤美来、豊田萌絵',
+            'description': 'md5:5bbcf532474700439cf56ceb6a15630e',
+            'display_id': '0ab32634b884499a84adb25de844c551',
+            'live_status': 'not_live',
+            'modified_date': r're:\d{8}',
+            'modified_timestamp': int,
+            'release_timestamp': 1751623200,
+            'release_date': '20250704',
+            'series': '声優ラジオのウラカブリ～Locipo出張所～',
+            'series_id': '1454',
+            'tags': 'count:6',
+            'thumbnail': r're:https?://.+\.(?:jpg|png)',
+            'timestamp': 1757002966,
+            'upload_date': '20250904',
+            'uploader': 'テレビ愛知',
+            'uploader_id': 'locipo-prod',
+        },
+    }, {
+        'url': 'https://locipo.jp/creative/bff9950d-229b-4fe9-911a-7fa71a232f35?list=69a5b15c-901f-4828-a336-30c0de7612d3',
+        'info_dict': {
+            'id': '69a5b15c-901f-4828-a336-30c0de7612d3',
+            'title': '見て・乗って・語りたい。 東海の鉄道沼',
+        },
+        'playlist_mincount': 3,
+    }, {
+        'url': 'https://locipo.jp/creative/a0751a7f-c7dd-4a10-a7f1-e12720bdf16c?list=006cff3f-ba74-42f0-b4fd-241486ebda2b',
+        'info_dict': {
+            'id': 'a0751a7f-c7dd-4a10-a7f1-e12720bdf16c',
+            'ext': 'mp4',
+            'title': '#839 人間真空パック',
+            'description': 'md5:9fe190333b6975c5001c8c9cbe20d276',
+            'display_id': 'c2b4c9f4a6d648bd8e3c320e384b9d56',
+            'live_status': 'not_live',
+            'modified_date': r're:\d{8}',
+            'modified_timestamp': int,
+            'release_timestamp': 1746239400,
+            'release_date': '20250503',
+            'series': 'でんじろう先生のはぴエネ！',
+            'series_id': '202',
+            'tags': 'count:3',
+            'thumbnail': r're:https?://.+\.(?:jpg|png)',
+            'timestamp': 1756975909,
+            'upload_date': '20250904',
+            'uploader': '中京テレビ',
+            'uploader_id': 'locipo-prod',
+        },
+        'params': {'noplaylist': True},
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        playlist_id = traverse_obj(parse_qs(url), ('list', -1, {str}))
+        if self._yes_playlist(playlist_id, video_id):
+            return self.url_result(
+                f'{self._BASE_URL}/playlist/{playlist_id}', LocipoPlaylistIE)
+
+        creatives = self._call_api(f'creatives/{video_id}', video_id, 'Creatives')
+        media_id = traverse_obj(creatives, ('media_id', {str}, {require('Streaks media ID')}))
+
+        webpage = self._download_webpage(url, video_id)
+        config = self._search_json(
+            r'window\.__NUXT__\.config\s*=', webpage, 'config', video_id, transform_source=js_to_json)
+        api_key = traverse_obj(config, ('public', 'streaksVodPlaybackApiKey', {str}, {require('api key')}))
+
+        return {
+            **self._extract_from_streaks_api('locipo-prod', media_id, headers={
+                'Origin': 'https://locipo.jp',
+                'X-Streaks-Api-Key': api_key,
+            }),
+            **traverse_obj(creatives, {
+                'title': ('name', {clean_html}),
+                'description': ('description', {clean_html}, filter),
+                'release_timestamp': ('publication_started_at', {parse_iso8601}),
+                'tags': ('keyword', {clean_html}, {lambda x: x.split(',')}, ..., {str.strip}, filter),
+                'uploader': ('company', 'name', {clean_html}, filter),
+            }),
+            **traverse_obj(creatives, ('series', {
+                'series': ('name', {clean_html}, filter),
+                'series_id': ('id', {str_or_none}),
+            })),
+            'id': video_id,
+        }
+
+
+class LocipoPlaylistIE(LocipoBaseIE):
+    _VALID_URL = [
+        fr'https?://locipo\.jp/(?P<type>playlist)/(?P<id>{LocipoBaseIE._UUID_RE})',
+        r'https?://locipo\.jp/(?P<type>series)/(?P<id>\d+)',
+    ]
+    _TESTS = [{
+        'url': 'https://locipo.jp/playlist/35d3dd2b-531d-4824-8575-b1c527d29538',
+        'info_dict': {
+            'id': '35d3dd2b-531d-4824-8575-b1c527d29538',
+            'title': 'レシピ集',
+        },
+        'playlist_mincount': 135,
+    }, {
+        # Redirects to https://locipo.jp/series/1363
+        'url': 'https://locipo.jp/playlist/fef7c4fb-741f-4d6a-a3a6-754f354302a2',
+        'info_dict': {
+            'id': '1363',
+            'title': 'CBCアナウンサー公式【みてちょてれび】',
+            'description': 'md5:50a1b23e63112d5c06c882835c8c1fb1',
+        },
+        'playlist_mincount': 38,
+    }, {
+        'url': 'https://locipo.jp/series/503',
+        'info_dict': {
+            'id': '503',
+            'title': 'FishingLover東海',
+            'description': '東海地区の釣り場でフィッシングの魅力を余すところなくご紹介！！',
+        },
+        'playlist_mincount': 223,
+    }]
+    _PAGE_SIZE = 100
+
+    def _fetch_page(self, path, playlist_id, page):
+        creatives = self._download_json(
+            f'{self._API_BASE}/{path}/{playlist_id}/creatives',
+            playlist_id, f'Downloading page {page + 1}', query={
+                'premium': False,
+                'live': False,
+                'limit': self._PAGE_SIZE,
+                'offset': page * self._PAGE_SIZE,
+            })
+
+        for video_id in traverse_obj(creatives, ('items', ..., 'id', {str})):
+            yield self.url_result(f'{self._BASE_URL}/creative/{video_id}', LocipoIE)
+
+    def _real_extract(self, url):
+        playlist_type, playlist_id = self._match_valid_url(url).group('type', 'id')
+        if urlh := self._request_webpage(HEADRequest(url), playlist_id, fatal=False):
+            playlist_type, playlist_id = self._match_valid_url(urlh.url).group('type', 'id')
+
+        path = 'playlists' if playlist_type == 'playlist' else 'series'
+        creatives = self._call_api(
+            f'{path}/{playlist_id}/creatives', playlist_id, path.capitalize())
+
+        entries = InAdvancePagedList(
+            functools.partial(self._fetch_page, path, playlist_id),
+            math.ceil(int(creatives['total']) / self._PAGE_SIZE), self._PAGE_SIZE)
+
+        return self.playlist_result(
+            entries, playlist_id,
+            **traverse_obj(creatives, ('items', ..., playlist_type, {
+                'title': ('name', {clean_html}, filter),
+                'description': ('description', {clean_html}, filter),
+            }, any)))
--- a/yt_dlp/extractor/pornhub.py
+++ b/yt_dlp/extractor/pornhub.py
@@ -506,6 +506,7 @@ class PornHubIE(PornHubBaseIE):
                'cast': ({find_elements(attr='data-label', value='pornstar')}, ..., {clean_html}),
            }),
            'subtitles': subtitles,
+            'http_headers': {'Referer': f'https://www.{host}/'},
        }, info)


--- a/yt_dlp/extractor/visir.py
+++ b/yt_dlp/extractor/visir.py
@@ -0,0 +1,116 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    UnsupportedError,
+    clean_html,
+    int_or_none,
+    js_to_json,
+    month_by_name,
+    url_or_none,
+    urljoin,
+)
+from ..utils.traversal import find_element, traverse_obj
+
+
+class VisirIE(InfoExtractor):
+    IE_DESC = 'Vísir'
+
+    _VALID_URL = r'https?://(?:www\.)?visir\.is/(?P<type>k|player)/(?P<id>[\da-f-]+)(?:/(?P<slug>[\w.-]+))?'
+    _EMBED_REGEX = [rf'<iframe[^>]+src=["\'](?P<url>{_VALID_URL})']
+    _TESTS = [{
+        'url': 'https://www.visir.is/k/eabb8f7f-ad87-46fb-9469-a0f1dc0fc4bc-1769022963988',
+        'info_dict': {
+            'id': 'eabb8f7f-ad87-46fb-9469-a0f1dc0fc4bc-1769022963988',
+            'ext': 'mp4',
+            'title': 'Sveppi og Siggi Þór mestu skaphundarnir',
+            'categories': ['island-i-dag'],
+            'description': 'md5:e06bd6a0cd8bdde328ad8cf00d3d4df6',
+            'duration': 792,
+            'thumbnail': r're:https?://www\.visir\.is/.+',
+            'upload_date': '20260121',
+            'view_count': int,
+        },
+    }, {
+        'url': 'https://www.visir.is/k/b0a88e02-eceb-4270-855c-8328b76b9d81-1763979306704/tonlistarborgin-reykjavik',
+        'info_dict': {
+            'id': 'b0a88e02-eceb-4270-855c-8328b76b9d81-1763979306704',
+            'ext': 'mp4',
+            'title': 'Tónlistarborgin Reykjavík',
+            'categories': ['tonlist'],
+            'description': 'md5:47237589dc95dbde55dfbb163396f88a',
+            'display_id': 'tonlistarborgin-reykjavik',
+            'duration': 81,
+            'thumbnail': r're:https?://www\.visir\.is/.+',
+            'upload_date': '20251124',
+            'view_count': int,
+        },
+    }, {
+        'url': 'https://www.visir.is/player/0cd5709e-6870-46d0-aaaf-0ae637de94f1-1770060083580',
+        'info_dict': {
+            'id': '0cd5709e-6870-46d0-aaaf-0ae637de94f1-1770060083580',
+            'ext': 'mp4',
+            'title': 'Sportpakkinn 2. febrúar 2026',
+            'categories': ['sportpakkinn'],
+            'display_id': 'sportpakkinn-2.-februar-2026',
+            'duration': 293,
+            'thumbnail': r're:https?://www\.visir\.is/.+',
+            'upload_date': '20260202',
+            'view_count': int,
+        },
+    }]
+    _WEBPAGE_TESTS = [{
+        'url': 'https://www.visir.is/g/20262837896d/segir-von-brigdin-med-prinsessuna-rista-djupt',
+        'info_dict': {
+            'id': '9ad5e58a-f26f-49f7-8b1d-68f0629485b7-1770059257365',
+            'ext': 'mp4',
+            'title': 'Norðmenn tala ekki um annað en prinsessuna',
+            'categories': ['frettir'],
+            'description': 'md5:53e2623ae79e1355778c14f5b557a0cd',
+            'display_id': 'nordmenn-tala-ekki-um-annad-en-prinsessuna',
+            'duration': 138,
+            'thumbnail': r're:https?://www\.visir\.is/.+',
+            'upload_date': '20260202',
+            'view_count': int,
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_type, video_id, display_id = self._match_valid_url(url).group('type', 'id', 'slug')
+        webpage = self._download_webpage(url, video_id)
+        if video_type == 'player':
+            real_url = self._og_search_url(webpage)
+            if not self.suitable(real_url) or self._match_valid_url(real_url).group('type') == 'player':
+                raise UnsupportedError(real_url)
+            return self.url_result(real_url, self.ie_key())
+
+        upload_date = None
+        date_elements = traverse_obj(webpage, (
+            {find_element(cls='article-item__date')}, {clean_html}, filter, {str.split}))
+        if date_elements and len(date_elements) == 3:
+            day, month, year = date_elements
+            day = int_or_none(day.rstrip('.'))
+            month = month_by_name(month, 'is')
+            if day and month and re.fullmatch(r'[0-9]{4}', year):
+                upload_date = f'{year}{month:02d}{day:02d}'
+
+        player = self._search_json(
+            r'App\.Player\.Init\(', webpage, video_id, 'player', transform_source=js_to_json)
+        m3u8_url = traverse_obj(player, ('File', {urljoin('https://vod.visir.is/')}))
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4'),
+            'upload_date': upload_date,
+            **traverse_obj(webpage, ({find_element(cls='article-item press-ads')}, {
+                'description': ({find_element(cls='-large')}, {clean_html}, filter),
+                'view_count': ({find_element(cls='article-item__viewcount')}, {clean_html}, {int_or_none}),
+            })),
+            **traverse_obj(player, {
+                'title': ('Title', {clean_html}),
+                'categories': ('Categoryname', {clean_html}, filter, all, filter),
+                'duration': ('MediaDuration', {int_or_none}),
+                'thumbnail': ('Image', {url_or_none}),
+            }),
+        }
--- a/yt_dlp/extractor/xhamster.py
+++ b/yt_dlp/extractor/xhamster.py
@@ -213,16 +213,9 @@ class XHamsterIE(InfoExtractor):
        'only_matching': True,
    }]

-    def _decipher_format_url(self, format_url, format_id):
-        parsed_url = urllib.parse.urlparse(format_url)
-
-        hex_string, path_remainder = self._search_regex(
-            r'^/(?P<hex>[0-9a-fA-F]{12,})(?P<rem>[/,].+)$', parsed_url.path, 'url components',
-            default=(None, None), group=('hex', 'rem'))
-        if not hex_string:
-            self.report_warning(f'Skipping format "{format_id}": unsupported URL format')
-            return None
+    _VALID_HEX_RE = r'[0-9a-fA-F]{12,}'

+    def _decipher_hex_string(self, hex_string, format_id):
        byte_data = bytes.fromhex(hex_string)
        seed = int.from_bytes(byte_data[1:5], byteorder='little', signed=True)

@@ -232,7 +225,33 @@ class XHamsterIE(InfoExtractor):
            self.report_warning(f'Skipping format "{format_id}": {e.msg}')
            return None

-        deciphered = bytearray(byte ^ next(byte_gen) for byte in byte_data[5:]).decode('latin-1')
+        return bytearray(byte ^ next(byte_gen) for byte in byte_data[5:]).decode('latin-1')
+
+    def _decipher_format_url(self, format_url, format_id):
+        # format_url can be hex ciphertext or a URL with a hex ciphertext segment
+        if re.fullmatch(self._VALID_HEX_RE, format_url):
+            return self._decipher_hex_string(format_url, format_id)
+        elif not url_or_none(format_url):
+            if re.fullmatch(r'[0-9a-fA-F]+', format_url):
+                # Hex strings that are too short are expected, so we don't want to warn
+                self.write_debug(f'Skipping dummy ciphertext for "{format_id}": {format_url}')
+            else:
+                # Something has likely changed on the site's end, so we need to warn
+                self.report_warning(f'Skipping format "{format_id}": invalid ciphertext')
+            return None
+
+        parsed_url = urllib.parse.urlparse(format_url)
+
+        hex_string, path_remainder = self._search_regex(
+            rf'^/(?P<hex>{self._VALID_HEX_RE})(?P<rem>[/,].+)$', parsed_url.path, 'url components',
+            default=(None, None), group=('hex', 'rem'))
+        if not hex_string:
+            self.report_warning(f'Skipping format "{format_id}": unsupported URL format')
+            return None
+
+        deciphered = self._decipher_hex_string(hex_string, format_id)
+        if not deciphered:
+            return None

        return parsed_url._replace(path=f'/{deciphered}{path_remainder}').geturl()

--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@@ -75,6 +75,9 @@ MONTH_NAMES = {
    'fr': [
        'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
        'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
+    'is': [
+        'janúar', 'febrúar', 'mars', 'apríl', 'maí', 'júní',
+        'júlí', 'ágúst', 'september', 'október', 'nóvember', 'desember'],
    # these follow the genitive grammatical case (dopełniacz)
    # some websites might be using nominative, which will require another month list
    # https://en.wikibooks.org/wiki/Polish/Noun_cases
Author	SHA1	Message	Date
hunter-gatherer8	23c059a455	[ie/1tv] Extract chapters (#15848 ) Authored by: hunter-gatherer8	2026-02-06 20:45:47 +00:00
beacdeac	6f38df31b4	[ie/pornhub] Fix extractor (#15858 ) Closes #15827 Authored by: beacdeac	2026-02-06 20:41:56 +00:00
doe1080	442c90da3e	[ie/locipo] Add extractors (#15486 ) Closes #13656 Authored by: doe1080, gravesducking Co-authored-by: gravesducking <219445875+gravesducking@users.noreply.github.com>	2026-02-04 21:06:39 +00:00
0x∅	133cb959be	[ie/xhamster] Fix extractor (#15831 ) Closes #15802 Authored by: 0xvd	2026-02-04 20:49:07 +00:00
doe1080	c7c45f5289	[ie/visir] Add extractor (#15811 ) Closes #11901 Authored by: doe1080	2026-02-04 15:33:00 +00:00