mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	 51887484e4
			
		
	
	51887484e4
	
	
	
		
			
			* Adds InfoExtractor._search_nuxt_json for webpage extraction * Adds InfoExtractor._resolve_nuxt_array for direct use with payload JSON * Adds yt_dlp.utils.jslib module for Python solutions to common JavaScript libraries * Adds devalue.parse and devalue.parse_iter to jslib utils Ref: *9e503be0f2*f3fd2aa93d/src/parse.jsAuthored by: bashonly, Grub4K Co-authored-by: Simon Sawicki <contact@grub4k.dev>
		
			
				
	
	
		
			236 lines
		
	
	
		
			5.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			236 lines
		
	
	
		
			5.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #!/usr/bin/env python3
 | |
| 
 | |
| # Allow direct execution
 | |
| import os
 | |
| import sys
 | |
| 
 | |
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 | |
| 
 | |
| 
 | |
| import datetime as dt
 | |
| import json
 | |
| import math
 | |
| import re
 | |
| import unittest
 | |
| 
 | |
| from yt_dlp.utils.jslib import devalue
 | |
| 
 | |
| 
 | |
| TEST_CASES_EQUALS = [{
 | |
|     'name': 'int',
 | |
|     'unparsed': [-42],
 | |
|     'parsed': -42,
 | |
| }, {
 | |
|     'name': 'str',
 | |
|     'unparsed': ['woo!!!'],
 | |
|     'parsed': 'woo!!!',
 | |
| }, {
 | |
|     'name': 'Number',
 | |
|     'unparsed': [['Object', 42]],
 | |
|     'parsed': 42,
 | |
| }, {
 | |
|     'name': 'String',
 | |
|     'unparsed': [['Object', 'yar']],
 | |
|     'parsed': 'yar',
 | |
| }, {
 | |
|     'name': 'Infinity',
 | |
|     'unparsed': -4,
 | |
|     'parsed': math.inf,
 | |
| }, {
 | |
|     'name': 'negative Infinity',
 | |
|     'unparsed': -5,
 | |
|     'parsed': -math.inf,
 | |
| }, {
 | |
|     'name': 'negative zero',
 | |
|     'unparsed': -6,
 | |
|     'parsed': -0.0,
 | |
| }, {
 | |
|     'name': 'RegExp',
 | |
|     'unparsed': [['RegExp', 'regexp', 'gim']],  # XXX: flags are ignored
 | |
|     'parsed': re.compile('regexp'),
 | |
| }, {
 | |
|     'name': 'Date',
 | |
|     'unparsed': [['Date', '2001-09-09T01:46:40.000Z']],
 | |
|     'parsed': dt.datetime.fromtimestamp(1e9, tz=dt.timezone.utc),
 | |
| }, {
 | |
|     'name': 'Array',
 | |
|     'unparsed': [[1, 2, 3], 'a', 'b', 'c'],
 | |
|     'parsed': ['a', 'b', 'c'],
 | |
| }, {
 | |
|     'name': 'Array (empty)',
 | |
|     'unparsed': [[]],
 | |
|     'parsed': [],
 | |
| }, {
 | |
|     'name': 'Array (sparse)',
 | |
|     'unparsed': [[-2, 1, -2], 'b'],
 | |
|     'parsed': [None, 'b', None],
 | |
| }, {
 | |
|     'name': 'Object',
 | |
|     'unparsed': [{'foo': 1, 'x-y': 2}, 'bar', 'z'],
 | |
|     'parsed': {'foo': 'bar', 'x-y': 'z'},
 | |
| }, {
 | |
|     'name': 'Set',
 | |
|     'unparsed': [['Set', 1, 2, 3], 1, 2, 3],
 | |
|     'parsed': [1, 2, 3],
 | |
| }, {
 | |
|     'name': 'Map',
 | |
|     'unparsed': [['Map', 1, 2], 'a', 'b'],
 | |
|     'parsed': [['a', 'b']],
 | |
| }, {
 | |
|     'name': 'BigInt',
 | |
|     'unparsed': [['BigInt', '1']],
 | |
|     'parsed': 1,
 | |
| }, {
 | |
|     'name': 'Uint8Array',
 | |
|     'unparsed': [['Uint8Array', 'AQID']],
 | |
|     'parsed': [1, 2, 3],
 | |
| }, {
 | |
|     'name': 'ArrayBuffer',
 | |
|     'unparsed': [['ArrayBuffer', 'AQID']],
 | |
|     'parsed': [1, 2, 3],
 | |
| }, {
 | |
|     'name': 'str (repetition)',
 | |
|     'unparsed': [[1, 1], 'a string'],
 | |
|     'parsed': ['a string', 'a string'],
 | |
| }, {
 | |
|     'name': 'None (repetition)',
 | |
|     'unparsed': [[1, 1], None],
 | |
|     'parsed': [None, None],
 | |
| }, {
 | |
|     'name': 'dict (repetition)',
 | |
|     'unparsed': [[1, 1], {}],
 | |
|     'parsed': [{}, {}],
 | |
| }, {
 | |
|     'name': 'Object without prototype',
 | |
|     'unparsed': [['null']],
 | |
|     'parsed': {},
 | |
| }, {
 | |
|     'name': 'cross-realm POJO',
 | |
|     'unparsed': [{}],
 | |
|     'parsed': {},
 | |
| }]
 | |
| 
 | |
| TEST_CASES_IS = [{
 | |
|     'name': 'bool',
 | |
|     'unparsed': [True],
 | |
|     'parsed': True,
 | |
| }, {
 | |
|     'name': 'Boolean',
 | |
|     'unparsed': [['Object', False]],
 | |
|     'parsed': False,
 | |
| }, {
 | |
|     'name': 'undefined',
 | |
|     'unparsed': -1,
 | |
|     'parsed': None,
 | |
| }, {
 | |
|     'name': 'null',
 | |
|     'unparsed': [None],
 | |
|     'parsed': None,
 | |
| }, {
 | |
|     'name': 'NaN',
 | |
|     'unparsed': -3,
 | |
|     'parsed': math.nan,
 | |
| }]
 | |
| 
 | |
| TEST_CASES_INVALID = [{
 | |
|     'name': 'empty string',
 | |
|     'unparsed': '',
 | |
|     'error': ValueError,
 | |
|     'pattern': r'expected int or list as input',
 | |
| }, {
 | |
|     'name': 'hole',
 | |
|     'unparsed': -2,
 | |
|     'error': ValueError,
 | |
|     'pattern': r'invalid integer input',
 | |
| }, {
 | |
|     'name': 'string',
 | |
|     'unparsed': 'hello',
 | |
|     'error': ValueError,
 | |
|     'pattern': r'expected int or list as input',
 | |
| }, {
 | |
|     'name': 'number',
 | |
|     'unparsed': 42,
 | |
|     'error': ValueError,
 | |
|     'pattern': r'invalid integer input',
 | |
| }, {
 | |
|     'name': 'boolean',
 | |
|     'unparsed': True,
 | |
|     'error': ValueError,
 | |
|     'pattern': r'expected int or list as input',
 | |
| }, {
 | |
|     'name': 'null',
 | |
|     'unparsed': None,
 | |
|     'error': ValueError,
 | |
|     'pattern': r'expected int or list as input',
 | |
| }, {
 | |
|     'name': 'object',
 | |
|     'unparsed': {},
 | |
|     'error': ValueError,
 | |
|     'pattern': r'expected int or list as input',
 | |
| }, {
 | |
|     'name': 'empty array',
 | |
|     'unparsed': [],
 | |
|     'error': ValueError,
 | |
|     'pattern': r'expected a non-empty list as input',
 | |
| }, {
 | |
|     'name': 'Python negative indexing',
 | |
|     'unparsed': [[1, 2, 3, 4, 5, 6, 7, -7], 1, 2, 3, 4, 5, 6, 7],
 | |
|     'error': IndexError,
 | |
|     'pattern': r'invalid index: -7',
 | |
| }]
 | |
| 
 | |
| 
 | |
| class TestDevalue(unittest.TestCase):
 | |
|     def test_devalue_parse_equals(self):
 | |
|         for tc in TEST_CASES_EQUALS:
 | |
|             self.assertEqual(devalue.parse(tc['unparsed']), tc['parsed'], tc['name'])
 | |
| 
 | |
|     def test_devalue_parse_is(self):
 | |
|         for tc in TEST_CASES_IS:
 | |
|             self.assertIs(devalue.parse(tc['unparsed']), tc['parsed'], tc['name'])
 | |
| 
 | |
|     def test_devalue_parse_invalid(self):
 | |
|         for tc in TEST_CASES_INVALID:
 | |
|             with self.assertRaisesRegex(tc['error'], tc['pattern'], msg=tc['name']):
 | |
|                 devalue.parse(tc['unparsed'])
 | |
| 
 | |
|     def test_devalue_parse_cyclical(self):
 | |
|         name = 'Map (cyclical)'
 | |
|         result = devalue.parse([['Map', 1, 0], 'self'])
 | |
|         self.assertEqual(result[0][0], 'self', name)
 | |
|         self.assertIs(result, result[0][1], name)
 | |
| 
 | |
|         name = 'Set (cyclical)'
 | |
|         result = devalue.parse([['Set', 0, 1], 42])
 | |
|         self.assertEqual(result[1], 42, name)
 | |
|         self.assertIs(result, result[0], name)
 | |
| 
 | |
|         result = devalue.parse([[0]])
 | |
|         self.assertIs(result, result[0], 'Array (cyclical)')
 | |
| 
 | |
|         name = 'Object (cyclical)'
 | |
|         result = devalue.parse([{'self': 0}])
 | |
|         self.assertIs(result, result['self'], name)
 | |
| 
 | |
|         name = 'Object with null prototype (cyclical)'
 | |
|         result = devalue.parse([['null', 'self', 0]])
 | |
|         self.assertIs(result, result['self'], name)
 | |
| 
 | |
|         name = 'Objects (cyclical)'
 | |
|         result = devalue.parse([[1, 2], {'second': 2}, {'first': 1}])
 | |
|         self.assertIs(result[0], result[1]['first'], name)
 | |
|         self.assertIs(result[1], result[0]['second'], name)
 | |
| 
 | |
|     def test_devalue_parse_revivers(self):
 | |
|         self.assertEqual(
 | |
|             devalue.parse([['indirect', 1], {'a': 2}, 'b'], revivers={'indirect': lambda x: x}),
 | |
|             {'a': 'b'}, 'revivers (indirect)')
 | |
| 
 | |
|         self.assertEqual(
 | |
|             devalue.parse([['parse', 1], '{"a":0}'], revivers={'parse': lambda x: json.loads(x)}),
 | |
|             {'a': 0}, 'revivers (parse)')
 | |
| 
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|     unittest.main()
 |