mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[devscripts] tomlparse: Add makeshift toml parser
				
					
				
			Authored by: Grub4K
This commit is contained in:
		
							
								
								
									
										189
									
								
								devscripts/tomlparse.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										189
									
								
								devscripts/tomlparse.py
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,189 @@ | ||||
| #!/usr/bin/env python3 | ||||
| 
 | ||||
| """ | ||||
| Simple parser for spec compliant toml files | ||||
| 
 | ||||
| A simple toml parser for files that comply with the spec. | ||||
| Should only be used to parse `pyproject.toml` for `install_deps.py`. | ||||
| 
 | ||||
| IMPORTANT: INVALID FILES OR MULTILINE STRINGS ARE NOT SUPPORTED! | ||||
| """ | ||||
| 
 | ||||
| from __future__ import annotations | ||||
| 
 | ||||
| import datetime | ||||
| import json | ||||
| import re | ||||
| 
 | ||||
| WS = r'(?:[\ \t]*)' | ||||
| STRING_RE = re.compile(r'"(?:\\.|[^\\"\n])*"|\'[^\'\n]*\'') | ||||
| SINGLE_KEY_RE = re.compile(rf'{STRING_RE.pattern}|[A-Za-z0-9_-]+') | ||||
| KEY_RE = re.compile(rf'{WS}(?:{SINGLE_KEY_RE.pattern}){WS}(?:\.{WS}(?:{SINGLE_KEY_RE.pattern}){WS})*') | ||||
| EQUALS_RE = re.compile(rf'={WS}') | ||||
| WS_RE = re.compile(WS) | ||||
| 
 | ||||
| _SUBTABLE = rf'(?P<subtable>^\[(?P<is_list>\[)?(?P<path>{KEY_RE.pattern})\]\]?)' | ||||
| EXPRESSION_RE = re.compile(rf'^(?:{_SUBTABLE}|{KEY_RE.pattern}=)', re.MULTILINE) | ||||
| 
 | ||||
| LIST_WS_RE = re.compile(rf'{WS}((#[^\n]*)?\n{WS})*') | ||||
| LEFTOVER_VALUE_RE = re.compile(r'[^,}\]\t\n#]+') | ||||
| 
 | ||||
| 
 | ||||
| def parse_key(value: str): | ||||
|     for match in SINGLE_KEY_RE.finditer(value): | ||||
|         if match[0][0] == '"': | ||||
|             yield json.loads(match[0]) | ||||
|         elif match[0][0] == '\'': | ||||
|             yield match[0][1:-1] | ||||
|         else: | ||||
|             yield match[0] | ||||
| 
 | ||||
| 
 | ||||
| def get_target(root: dict, paths: list[str], is_list=False): | ||||
|     target = root | ||||
| 
 | ||||
|     for index, key in enumerate(paths, 1): | ||||
|         use_list = is_list and index == len(paths) | ||||
|         result = target.get(key) | ||||
|         if result is None: | ||||
|             result = [] if use_list else {} | ||||
|             target[key] = result | ||||
| 
 | ||||
|         if isinstance(result, dict): | ||||
|             target = result | ||||
|         elif use_list: | ||||
|             target = {} | ||||
|             result.append(target) | ||||
|         else: | ||||
|             target = result[-1] | ||||
| 
 | ||||
|     assert isinstance(target, dict) | ||||
|     return target | ||||
| 
 | ||||
| 
 | ||||
| def parse_enclosed(data: str, index: int, end: str, ws_re: re.Pattern): | ||||
|     index += 1 | ||||
| 
 | ||||
|     if match := ws_re.match(data, index): | ||||
|         index = match.end() | ||||
| 
 | ||||
|     while data[index] != end: | ||||
|         index = yield True, index | ||||
| 
 | ||||
|         if match := ws_re.match(data, index): | ||||
|             index = match.end() | ||||
| 
 | ||||
|         if data[index] == ',': | ||||
|             index += 1 | ||||
| 
 | ||||
|         if match := ws_re.match(data, index): | ||||
|             index = match.end() | ||||
| 
 | ||||
|     assert data[index] == end | ||||
|     yield False, index + 1 | ||||
| 
 | ||||
| 
 | ||||
| def parse_value(data: str, index: int): | ||||
|     if data[index] == '[': | ||||
|         result = [] | ||||
| 
 | ||||
|         indices = parse_enclosed(data, index, ']', LIST_WS_RE) | ||||
|         valid, index = next(indices) | ||||
|         while valid: | ||||
|             index, value = parse_value(data, index) | ||||
|             result.append(value) | ||||
|             valid, index = indices.send(index) | ||||
| 
 | ||||
|         return index, result | ||||
| 
 | ||||
|     if data[index] == '{': | ||||
|         result = {} | ||||
| 
 | ||||
|         indices = parse_enclosed(data, index, '}', WS_RE) | ||||
|         valid, index = next(indices) | ||||
|         while valid: | ||||
|             valid, index = indices.send(parse_kv_pair(data, index, result)) | ||||
| 
 | ||||
|         return index, result | ||||
| 
 | ||||
|     if match := STRING_RE.match(data, index): | ||||
|         return match.end(), json.loads(match[0]) if match[0][0] == '"' else match[0][1:-1] | ||||
| 
 | ||||
|     match = LEFTOVER_VALUE_RE.match(data, index) | ||||
|     assert match | ||||
|     value = match[0].strip() | ||||
|     for func in [ | ||||
|         int, | ||||
|         float, | ||||
|         datetime.time.fromisoformat, | ||||
|         datetime.date.fromisoformat, | ||||
|         datetime.datetime.fromisoformat, | ||||
|         {'true': True, 'false': False}.get, | ||||
|     ]: | ||||
|         try: | ||||
|             value = func(value) | ||||
|             break | ||||
|         except Exception: | ||||
|             pass | ||||
| 
 | ||||
|     return match.end(), value | ||||
| 
 | ||||
| 
 | ||||
| def parse_kv_pair(data: str, index: int, target: dict): | ||||
|     match = KEY_RE.match(data, index) | ||||
|     if not match: | ||||
|         return None | ||||
| 
 | ||||
|     *keys, key = parse_key(match[0]) | ||||
| 
 | ||||
|     match = EQUALS_RE.match(data, match.end()) | ||||
|     assert match | ||||
|     index = match.end() | ||||
| 
 | ||||
|     index, value = parse_value(data, index) | ||||
|     get_target(target, keys)[key] = value | ||||
|     return index | ||||
| 
 | ||||
| 
 | ||||
| def parse_toml(data: str): | ||||
|     root = {} | ||||
|     target = root | ||||
| 
 | ||||
|     index = 0 | ||||
|     while True: | ||||
|         match = EXPRESSION_RE.search(data, index) | ||||
|         if not match: | ||||
|             break | ||||
| 
 | ||||
|         if match.group('subtable'): | ||||
|             index = match.end() | ||||
|             path, is_list = match.group('path', 'is_list') | ||||
|             target = get_target(root, list(parse_key(path)), bool(is_list)) | ||||
|             continue | ||||
| 
 | ||||
|         index = parse_kv_pair(data, match.start(), target) | ||||
|         assert index is not None | ||||
| 
 | ||||
|     return root | ||||
| 
 | ||||
| 
 | ||||
| def main(): | ||||
|     import argparse | ||||
|     from pathlib import Path | ||||
| 
 | ||||
|     parser = argparse.ArgumentParser() | ||||
|     parser.add_argument('infile', type=Path, help='The TOML file to read as input') | ||||
|     args = parser.parse_args() | ||||
| 
 | ||||
|     with args.infile.open('r', encoding='utf-8') as file: | ||||
|         data = file.read() | ||||
| 
 | ||||
|     def default(obj): | ||||
|         if isinstance(obj, (datetime.date, datetime.time, datetime.datetime)): | ||||
|             return obj.isoformat() | ||||
| 
 | ||||
|     print(json.dumps(parse_toml(data), default=default)) | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     main() | ||||
		Reference in New Issue
	
	Block a user
	 Simon Sawicki
					Simon Sawicki