1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-08-13 16:08:29 +00:00

Add UMPEncoder

This commit is contained in:
coletdjnz 2025-08-10 16:43:09 +12:00
parent c74cf4dd51
commit acb19e95ee
No known key found for this signature in database
GPG Key ID: 91984263BB39894A
2 changed files with 91 additions and 16 deletions

View File

@ -1,7 +1,15 @@
import io
import pytest
from yt_dlp.extractor.youtube._streaming.ump import varint_size, read_varint, UMPDecoder, UMPPartId
from yt_dlp.extractor.youtube._streaming.ump import (
varint_size,
read_varint,
UMPDecoder,
UMPPartId,
write_varint,
UMPEncoder,
UMPPart,
)
@pytest.mark.parametrize('data, expected', [
@ -21,28 +29,32 @@ def test_varint_size(data, expected):
@pytest.mark.parametrize('data, expected', [
# 1 byte long varint
(b'\x01', 1),
(b'\x4F', 79),
# 2 byte long varint
(b'\x80\x01', 64),
(b'\x8A\x7F', 8138),
(b'\xBF\x7F', 8191),
# 3 byte long varint
(b'\xC0\x80\x01', 12288),
(b'\xDF\x7F\xFF', 2093055),
# 4 byte long varint
(b'\xE0\x80\x80\x01', 1574912),
(b'\xEF\x7F\xFF\xFF', 268433407),
# 5 byte long varint
(b'\xF0\x80\x80\x80\x01', 25198720),
(b'\xFF\x7F\xFF\xFF\xFF', 4294967167),
(b'\xad\x05', 365),
(b'\xd5\x22\x05', 42069),
(b'\xe0\x68\x89\x09', 10000000),
(b'\xf0\xff\xc9\x9a\x3b', 999999999),
(b'\xf0\xff\xff\xff\xff', 4294967295),
],
)
def test_readvarint(data, expected):
assert read_varint(io.BytesIO(data)) == expected
@pytest.mark.parametrize('value, expected_bytes', [
(1, b'\x01'),
(365, b'\xad\x05'),
(42069, b'\xd5\x22\x05'),
(10000000, b'\xe0\x68\x89\x09'),
(999999999, b'\xf0\xff\xc9\x9a\x3b'),
(4294967295, b'\xf0\xff\xff\xff\xff'),
])
def test_writevarint(value, expected_bytes):
fp = io.BytesIO()
write_varint(fp, value)
assert fp.getvalue() == expected_bytes
class TestUMPDecoder:
EXAMPLE_PART_DATA = [
{
@ -100,3 +112,21 @@ def test_unexpected_eof(self):
part.data.read()
assert mock_file.closed
class TestUMPEncoder:
def test_write_part(self):
fp = io.BytesIO()
encoder = UMPEncoder(fp)
part = UMPPart(
part_id=UMPPartId.MEDIA_HEADER,
size=127,
data=io.BytesIO(b'\x01' * 127),
)
encoder.write_part(part)
part_type = b'\x14' # MEDIA_HEADER part type
part_size = b'\x7F' # Part size of 127
expected_data = part_type + part_size + b'\x01' * 127
assert fp.getvalue() == expected_data

View File

@ -83,6 +83,19 @@ def iter_parts(self):
yield UMPPart(UMPPartId(part_type), part_size, io.BytesIO(part_data))
class UMPEncoder:
def __init__(self, fp: io.BufferedIOBase):
self.fp = fp
def write_part(self, part: UMPPart) -> None:
if not isinstance(part.part_id, UMPPartId):
raise ValueError('part_id must be an instance of UMPPartId')
write_varint(self.fp, part.part_id.value)
write_varint(self.fp, part.size)
self.fp.write(part.data.read())
def read_varint(fp: io.BufferedIOBase) -> int:
# https://web.archive.org/web/20250430054327/https://github.com/gsuberland/UMP_Format/blob/main/UMP_Format.md
# https://web.archive.org/web/20250429151021/https://github.com/davidzeng0/innertube/blob/main/googlevideo/ump.md
@ -114,3 +127,35 @@ def read_varint(fp: io.BufferedIOBase) -> int:
def varint_size(byte: int) -> int:
return 1 if byte < 128 else 2 if byte < 192 else 3 if byte < 224 else 4 if byte < 240 else 5
def write_varint(fp: io.BufferedIOBase, value: int) -> None:
# ref: https://github.com/LuanRT/googlevideo/blob/main/src/core/UmpWriter.ts
if value < 0:
raise ValueError('Value must be a non-negative integer')
if value < 128:
fp.write(bytes([value]))
elif value < 16384:
fp.write(bytes([
(value & 0x3F) | 0x80,
value >> 6,
]))
elif value < 2097152:
fp.write(bytes([
(value & 0x1F) | 0xC0,
(value >> 5) & 0xFF,
value >> 13,
]))
elif value < 268435456:
fp.write(bytes([
(value & 0x0F) | 0xE0,
(value >> 4) & 0xFF,
(value >> 12) & 0xFF,
value >> 20,
]))
else:
data = bytearray(5)
data[0] = 0xF0
data[1:5] = value.to_bytes(4, 'little')
fp.write(data)