mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-26 12:10:59 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			190 lines
		
	
	
		
			4.7 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			190 lines
		
	
	
		
			4.7 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/bin/env python3
 | |
| 
 | |
| """
 | |
| Simple parser for spec compliant toml files
 | |
| 
 | |
| A simple toml parser for files that comply with the spec.
 | |
| Should only be used to parse `pyproject.toml` for `install_deps.py`.
 | |
| 
 | |
| IMPORTANT: INVALID FILES OR MULTILINE STRINGS ARE NOT SUPPORTED!
 | |
| """
 | |
| 
 | |
| from __future__ import annotations
 | |
| 
 | |
| import datetime as dt
 | |
| import json
 | |
| import re
 | |
| 
 | |
| WS = r'(?:[\ \t]*)'
 | |
| STRING_RE = re.compile(r'"(?:\\.|[^\\"\n])*"|\'[^\'\n]*\'')
 | |
| SINGLE_KEY_RE = re.compile(rf'{STRING_RE.pattern}|[A-Za-z0-9_-]+')
 | |
| KEY_RE = re.compile(rf'{WS}(?:{SINGLE_KEY_RE.pattern}){WS}(?:\.{WS}(?:{SINGLE_KEY_RE.pattern}){WS})*')
 | |
| EQUALS_RE = re.compile(rf'={WS}')
 | |
| WS_RE = re.compile(WS)
 | |
| 
 | |
| _SUBTABLE = rf'(?P<subtable>^\[(?P<is_list>\[)?(?P<path>{KEY_RE.pattern})\]\]?)'
 | |
| EXPRESSION_RE = re.compile(rf'^(?:{_SUBTABLE}|{KEY_RE.pattern}=)', re.MULTILINE)
 | |
| 
 | |
| LIST_WS_RE = re.compile(rf'{WS}((#[^\n]*)?\n{WS})*')
 | |
| LEFTOVER_VALUE_RE = re.compile(r'[^,}\]\t\n#]+')
 | |
| 
 | |
| 
 | |
| def parse_key(value: str):
 | |
|     for match in SINGLE_KEY_RE.finditer(value):
 | |
|         if match[0][0] == '"':
 | |
|             yield json.loads(match[0])
 | |
|         elif match[0][0] == '\'':
 | |
|             yield match[0][1:-1]
 | |
|         else:
 | |
|             yield match[0]
 | |
| 
 | |
| 
 | |
| def get_target(root: dict, paths: list[str], is_list=False):
 | |
|     target = root
 | |
| 
 | |
|     for index, key in enumerate(paths, 1):
 | |
|         use_list = is_list and index == len(paths)
 | |
|         result = target.get(key)
 | |
|         if result is None:
 | |
|             result = [] if use_list else {}
 | |
|             target[key] = result
 | |
| 
 | |
|         if isinstance(result, dict):
 | |
|             target = result
 | |
|         elif use_list:
 | |
|             target = {}
 | |
|             result.append(target)
 | |
|         else:
 | |
|             target = result[-1]
 | |
| 
 | |
|     assert isinstance(target, dict)
 | |
|     return target
 | |
| 
 | |
| 
 | |
| def parse_enclosed(data: str, index: int, end: str, ws_re: re.Pattern):
 | |
|     index += 1
 | |
| 
 | |
|     if match := ws_re.match(data, index):
 | |
|         index = match.end()
 | |
| 
 | |
|     while data[index] != end:
 | |
|         index = yield True, index
 | |
| 
 | |
|         if match := ws_re.match(data, index):
 | |
|             index = match.end()
 | |
| 
 | |
|         if data[index] == ',':
 | |
|             index += 1
 | |
| 
 | |
|         if match := ws_re.match(data, index):
 | |
|             index = match.end()
 | |
| 
 | |
|     assert data[index] == end
 | |
|     yield False, index + 1
 | |
| 
 | |
| 
 | |
| def parse_value(data: str, index: int):
 | |
|     if data[index] == '[':
 | |
|         result = []
 | |
| 
 | |
|         indices = parse_enclosed(data, index, ']', LIST_WS_RE)
 | |
|         valid, index = next(indices)
 | |
|         while valid:
 | |
|             index, value = parse_value(data, index)
 | |
|             result.append(value)
 | |
|             valid, index = indices.send(index)
 | |
| 
 | |
|         return index, result
 | |
| 
 | |
|     if data[index] == '{':
 | |
|         result = {}
 | |
| 
 | |
|         indices = parse_enclosed(data, index, '}', WS_RE)
 | |
|         valid, index = next(indices)
 | |
|         while valid:
 | |
|             valid, index = indices.send(parse_kv_pair(data, index, result))
 | |
| 
 | |
|         return index, result
 | |
| 
 | |
|     if match := STRING_RE.match(data, index):
 | |
|         return match.end(), json.loads(match[0]) if match[0][0] == '"' else match[0][1:-1]
 | |
| 
 | |
|     match = LEFTOVER_VALUE_RE.match(data, index)
 | |
|     assert match
 | |
|     value = match[0].strip()
 | |
|     for func in [
 | |
|         int,
 | |
|         float,
 | |
|         dt.time.fromisoformat,
 | |
|         dt.date.fromisoformat,
 | |
|         dt.datetime.fromisoformat,
 | |
|         {'true': True, 'false': False}.get,
 | |
|     ]:
 | |
|         try:
 | |
|             value = func(value)
 | |
|             break
 | |
|         except Exception:
 | |
|             pass
 | |
| 
 | |
|     return match.end(), value
 | |
| 
 | |
| 
 | |
| def parse_kv_pair(data: str, index: int, target: dict):
 | |
|     match = KEY_RE.match(data, index)
 | |
|     if not match:
 | |
|         return None
 | |
| 
 | |
|     *keys, key = parse_key(match[0])
 | |
| 
 | |
|     match = EQUALS_RE.match(data, match.end())
 | |
|     assert match
 | |
|     index = match.end()
 | |
| 
 | |
|     index, value = parse_value(data, index)
 | |
|     get_target(target, keys)[key] = value
 | |
|     return index
 | |
| 
 | |
| 
 | |
| def parse_toml(data: str):
 | |
|     root = {}
 | |
|     target = root
 | |
| 
 | |
|     index = 0
 | |
|     while True:
 | |
|         match = EXPRESSION_RE.search(data, index)
 | |
|         if not match:
 | |
|             break
 | |
| 
 | |
|         if match.group('subtable'):
 | |
|             index = match.end()
 | |
|             path, is_list = match.group('path', 'is_list')
 | |
|             target = get_target(root, list(parse_key(path)), bool(is_list))
 | |
|             continue
 | |
| 
 | |
|         index = parse_kv_pair(data, match.start(), target)
 | |
|         assert index is not None
 | |
| 
 | |
|     return root
 | |
| 
 | |
| 
 | |
| def main():
 | |
|     import argparse
 | |
|     from pathlib import Path
 | |
| 
 | |
|     parser = argparse.ArgumentParser()
 | |
|     parser.add_argument('infile', type=Path, help='The TOML file to read as input')
 | |
|     args = parser.parse_args()
 | |
| 
 | |
|     with args.infile.open('r', encoding='utf-8') as file:
 | |
|         data = file.read()
 | |
| 
 | |
|     def default(obj):
 | |
|         if isinstance(obj, (dt.date, dt.time, dt.datetime)):
 | |
|             return obj.isoformat()
 | |
| 
 | |
|     print(json.dumps(parse_toml(data), default=default))
 | |
| 
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|     main()
 | 
