mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			509 lines
		
	
	
		
			18 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			509 lines
		
	
	
		
			18 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from __future__ import annotations
 | |
| 
 | |
| # Allow direct execution
 | |
| import os
 | |
| import sys
 | |
| 
 | |
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 | |
| 
 | |
| import enum
 | |
| import itertools
 | |
| import json
 | |
| import logging
 | |
| import re
 | |
| from collections import defaultdict
 | |
| from dataclasses import dataclass
 | |
| from functools import lru_cache
 | |
| from pathlib import Path
 | |
| 
 | |
| from devscripts.utils import read_file, run_process, write_file
 | |
| 
 | |
| BASE_URL = 'https://github.com'
 | |
| LOCATION_PATH = Path(__file__).parent
 | |
| HASH_LENGTH = 7
 | |
| 
 | |
| logger = logging.getLogger(__name__)
 | |
| 
 | |
| 
 | |
| class CommitGroup(enum.Enum):
 | |
|     PRIORITY = 'Important'
 | |
|     CORE = 'Core'
 | |
|     EXTRACTOR = 'Extractor'
 | |
|     DOWNLOADER = 'Downloader'
 | |
|     POSTPROCESSOR = 'Postprocessor'
 | |
|     NETWORKING = 'Networking'
 | |
|     MISC = 'Misc.'
 | |
| 
 | |
|     @classmethod
 | |
|     @lru_cache
 | |
|     def subgroup_lookup(cls):
 | |
|         return {
 | |
|             name: group
 | |
|             for group, names in {
 | |
|                 cls.MISC: {
 | |
|                     'build',
 | |
|                     'ci',
 | |
|                     'cleanup',
 | |
|                     'devscripts',
 | |
|                     'docs',
 | |
|                     'test',
 | |
|                 },
 | |
|                 cls.NETWORKING: {
 | |
|                     'rh',
 | |
|                 },
 | |
|             }.items()
 | |
|             for name in names
 | |
|         }
 | |
| 
 | |
|     @classmethod
 | |
|     @lru_cache
 | |
|     def group_lookup(cls):
 | |
|         result = {
 | |
|             'fd': cls.DOWNLOADER,
 | |
|             'ie': cls.EXTRACTOR,
 | |
|             'pp': cls.POSTPROCESSOR,
 | |
|             'upstream': cls.CORE,
 | |
|         }
 | |
|         result.update({item.name.lower(): item for item in iter(cls)})
 | |
|         return result
 | |
| 
 | |
|     @classmethod
 | |
|     def get(cls, value: str) -> tuple[CommitGroup | None, str | None]:
 | |
|         group, _, subgroup = (group.strip().lower() for group in value.partition('/'))
 | |
| 
 | |
|         if result := cls.group_lookup().get(group):
 | |
|             return result, subgroup or None
 | |
| 
 | |
|         if subgroup:
 | |
|             return None, value
 | |
| 
 | |
|         return cls.subgroup_lookup().get(group), group or None
 | |
| 
 | |
| 
 | |
| @dataclass
 | |
| class Commit:
 | |
|     hash: str | None
 | |
|     short: str
 | |
|     authors: list[str]
 | |
| 
 | |
|     def __str__(self):
 | |
|         result = f'{self.short!r}'
 | |
| 
 | |
|         if self.hash:
 | |
|             result += f' ({self.hash[:HASH_LENGTH]})'
 | |
| 
 | |
|         if self.authors:
 | |
|             authors = ', '.join(self.authors)
 | |
|             result += f' by {authors}'
 | |
| 
 | |
|         return result
 | |
| 
 | |
| 
 | |
| @dataclass
 | |
| class CommitInfo:
 | |
|     details: str | None
 | |
|     sub_details: tuple[str, ...]
 | |
|     message: str
 | |
|     issues: list[str]
 | |
|     commit: Commit
 | |
|     fixes: list[Commit]
 | |
| 
 | |
|     def key(self):
 | |
|         return ((self.details or '').lower(), self.sub_details, self.message)
 | |
| 
 | |
| 
 | |
| def unique(items):
 | |
|     return sorted({item.strip().lower(): item for item in items if item}.values())
 | |
| 
 | |
| 
 | |
| class Changelog:
 | |
|     MISC_RE = re.compile(r'(?:^|\b)(?:lint(?:ing)?|misc|format(?:ting)?|fixes)(?:\b|$)', re.IGNORECASE)
 | |
|     ALWAYS_SHOWN = (CommitGroup.PRIORITY,)
 | |
| 
 | |
|     def __init__(self, groups, repo, collapsible=False):
 | |
|         self._groups = groups
 | |
|         self._repo = repo
 | |
|         self._collapsible = collapsible
 | |
| 
 | |
|     def __str__(self):
 | |
|         return '\n'.join(self._format_groups(self._groups)).replace('\t', '    ')
 | |
| 
 | |
|     def _format_groups(self, groups):
 | |
|         first = True
 | |
|         for item in CommitGroup:
 | |
|             if self._collapsible and item not in self.ALWAYS_SHOWN and first:
 | |
|                 first = False
 | |
|                 yield '\n<details><summary><h3>Changelog</h3></summary>\n'
 | |
| 
 | |
|             if group := groups[item]:
 | |
|                 yield self.format_module(item.value, group)
 | |
| 
 | |
|         if self._collapsible:
 | |
|             yield '\n</details>'
 | |
| 
 | |
|     def format_module(self, name, group):
 | |
|         result = f'\n#### {name} changes\n' if name else '\n'
 | |
|         return result + '\n'.join(self._format_group(group))
 | |
| 
 | |
|     def _format_group(self, group):
 | |
|         sorted_group = sorted(group, key=CommitInfo.key)
 | |
|         detail_groups = itertools.groupby(sorted_group, lambda item: (item.details or '').lower())
 | |
|         for _, items in detail_groups:
 | |
|             items = list(items)
 | |
|             details = items[0].details
 | |
| 
 | |
|             if details == 'cleanup':
 | |
|                 items = self._prepare_cleanup_misc_items(items)
 | |
| 
 | |
|             prefix = '-'
 | |
|             if details:
 | |
|                 if len(items) == 1:
 | |
|                     prefix = f'- **{details}**:'
 | |
|                 else:
 | |
|                     yield f'- **{details}**'
 | |
|                     prefix = '\t-'
 | |
| 
 | |
|             sub_detail_groups = itertools.groupby(items, lambda item: tuple(map(str.lower, item.sub_details)))
 | |
|             for sub_details, entries in sub_detail_groups:
 | |
|                 if not sub_details:
 | |
|                     for entry in entries:
 | |
|                         yield f'{prefix} {self.format_single_change(entry)}'
 | |
|                     continue
 | |
| 
 | |
|                 entries = list(entries)
 | |
|                 sub_prefix = f'{prefix} {", ".join(entries[0].sub_details)}'
 | |
|                 if len(entries) == 1:
 | |
|                     yield f'{sub_prefix}: {self.format_single_change(entries[0])}'
 | |
|                     continue
 | |
| 
 | |
|                 yield sub_prefix
 | |
|                 for entry in entries:
 | |
|                     yield f'\t{prefix} {self.format_single_change(entry)}'
 | |
| 
 | |
|     def _prepare_cleanup_misc_items(self, items):
 | |
|         cleanup_misc_items = defaultdict(list)
 | |
|         sorted_items = []
 | |
|         for item in items:
 | |
|             if self.MISC_RE.search(item.message):
 | |
|                 cleanup_misc_items[tuple(item.commit.authors)].append(item)
 | |
|             else:
 | |
|                 sorted_items.append(item)
 | |
| 
 | |
|         for commit_infos in cleanup_misc_items.values():
 | |
|             sorted_items.append(CommitInfo(
 | |
|                 'cleanup', ('Miscellaneous',), ', '.join(
 | |
|                     self._format_message_link(None, info.commit.hash)
 | |
|                     for info in sorted(commit_infos, key=lambda item: item.commit.hash or '')),
 | |
|                 [], Commit(None, '', commit_infos[0].commit.authors), []))
 | |
| 
 | |
|         return sorted_items
 | |
| 
 | |
|     def format_single_change(self, info: CommitInfo):
 | |
|         message, sep, rest = info.message.partition('\n')
 | |
|         if '[' not in message:
 | |
|             # If the message doesn't already contain markdown links, try to add a link to the commit
 | |
|             message = self._format_message_link(message, info.commit.hash)
 | |
| 
 | |
|         if info.issues:
 | |
|             message = f'{message} ({self._format_issues(info.issues)})'
 | |
| 
 | |
|         if info.commit.authors:
 | |
|             message = f'{message} by {self._format_authors(info.commit.authors)}'
 | |
| 
 | |
|         if info.fixes:
 | |
|             fix_message = ', '.join(f'{self._format_message_link(None, fix.hash)}' for fix in info.fixes)
 | |
| 
 | |
|             authors = sorted({author for fix in info.fixes for author in fix.authors}, key=str.casefold)
 | |
|             if authors != info.commit.authors:
 | |
|                 fix_message = f'{fix_message} by {self._format_authors(authors)}'
 | |
| 
 | |
|             message = f'{message} (With fixes in {fix_message})'
 | |
| 
 | |
|         return message if not sep else f'{message}{sep}{rest}'
 | |
| 
 | |
|     def _format_message_link(self, message, commit_hash):
 | |
|         assert message or commit_hash, 'Improperly defined commit message or override'
 | |
|         message = message if message else commit_hash[:HASH_LENGTH]
 | |
|         return f'[{message}]({self.repo_url}/commit/{commit_hash})' if commit_hash else message
 | |
| 
 | |
|     def _format_issues(self, issues):
 | |
|         return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues)
 | |
| 
 | |
|     @staticmethod
 | |
|     def _format_authors(authors):
 | |
|         return ', '.join(f'[{author}]({BASE_URL}/{author})' for author in authors)
 | |
| 
 | |
|     @property
 | |
|     def repo_url(self):
 | |
|         return f'{BASE_URL}/{self._repo}'
 | |
| 
 | |
| 
 | |
| class CommitRange:
 | |
|     COMMAND = 'git'
 | |
|     COMMIT_SEPARATOR = '-----'
 | |
| 
 | |
|     AUTHOR_INDICATOR_RE = re.compile(r'Authored by:? ', re.IGNORECASE)
 | |
|     MESSAGE_RE = re.compile(r'''
 | |
|         (?:\[(?P<prefix>[^\]]+)\]\ )?
 | |
|         (?:(?P<sub_details>`?[\w.-]+`?): )?
 | |
|         (?P<message>.+?)
 | |
|         (?:\ \((?P<issues>\#\d+(?:,\ \#\d+)*)\))?
 | |
|         ''', re.VERBOSE | re.DOTALL)
 | |
|     EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE)
 | |
|     REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})')
 | |
|     FIXES_RE = re.compile(r'(?i:(?:bug\s*)?fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Improve)\s+([\da-f]{40})')
 | |
|     UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)')
 | |
| 
 | |
|     def __init__(self, start, end, default_author=None):
 | |
|         self._start, self._end = start, end
 | |
|         self._commits, self._fixes = self._get_commits_and_fixes(default_author)
 | |
|         self._commits_added = []
 | |
| 
 | |
|     def __iter__(self):
 | |
|         return iter(itertools.chain(self._commits.values(), self._commits_added))
 | |
| 
 | |
|     def __len__(self):
 | |
|         return len(self._commits) + len(self._commits_added)
 | |
| 
 | |
|     def __contains__(self, commit):
 | |
|         if isinstance(commit, Commit):
 | |
|             if not commit.hash:
 | |
|                 return False
 | |
|             commit = commit.hash
 | |
| 
 | |
|         return commit in self._commits
 | |
| 
 | |
|     def _get_commits_and_fixes(self, default_author):
 | |
|         result = run_process(
 | |
|             self.COMMAND, 'log', f'--format=%H%n%s%n%b%n{self.COMMIT_SEPARATOR}',
 | |
|             f'{self._start}..{self._end}' if self._start else self._end).stdout
 | |
| 
 | |
|         commits, reverts = {}, {}
 | |
|         fixes = defaultdict(list)
 | |
|         lines = iter(result.splitlines(False))
 | |
|         for i, commit_hash in enumerate(lines):
 | |
|             short = next(lines)
 | |
|             skip = short.startswith('Release ') or short == '[version] update'
 | |
| 
 | |
|             fix_commitish = None
 | |
|             if match := self.FIXES_RE.search(short):
 | |
|                 fix_commitish = match.group(1)
 | |
| 
 | |
|             authors = [default_author] if default_author else []
 | |
|             for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR):
 | |
|                 if match := self.AUTHOR_INDICATOR_RE.match(line):
 | |
|                     authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold)
 | |
|                 if not fix_commitish and (match := self.FIXES_RE.fullmatch(line)):
 | |
|                     fix_commitish = match.group(1)
 | |
| 
 | |
|             commit = Commit(commit_hash, short, authors)
 | |
|             if skip and (self._start or not i):
 | |
|                 logger.debug(f'Skipped commit: {commit}')
 | |
|                 continue
 | |
|             elif skip:
 | |
|                 logger.debug(f'Reached Release commit, breaking: {commit}')
 | |
|                 break
 | |
| 
 | |
|             if match := self.REVERT_RE.fullmatch(commit.short):
 | |
|                 reverts[match.group(1)] = commit
 | |
|                 continue
 | |
| 
 | |
|             if fix_commitish:
 | |
|                 fixes[fix_commitish].append(commit)
 | |
| 
 | |
|             commits[commit.hash] = commit
 | |
| 
 | |
|         for commitish, revert_commit in reverts.items():
 | |
|             if reverted := commits.pop(commitish, None):
 | |
|                 logger.debug(f'{commitish} fully reverted {reverted}')
 | |
|             else:
 | |
|                 commits[revert_commit.hash] = revert_commit
 | |
| 
 | |
|         for commitish, fix_commits in fixes.items():
 | |
|             if commitish in commits:
 | |
|                 hashes = ', '.join(commit.hash[:HASH_LENGTH] for commit in fix_commits)
 | |
|                 logger.info(f'Found fix(es) for {commitish[:HASH_LENGTH]}: {hashes}')
 | |
|                 for fix_commit in fix_commits:
 | |
|                     del commits[fix_commit.hash]
 | |
|             else:
 | |
|                 logger.debug(f'Commit with fixes not in changes: {commitish[:HASH_LENGTH]}')
 | |
| 
 | |
|         return commits, fixes
 | |
| 
 | |
|     def apply_overrides(self, overrides):
 | |
|         for override in overrides:
 | |
|             when = override.get('when')
 | |
|             if when and when not in self and when != self._start:
 | |
|                 logger.debug(f'Ignored {when!r} override')
 | |
|                 continue
 | |
| 
 | |
|             override_hash = override.get('hash') or when
 | |
|             if override['action'] == 'add':
 | |
|                 commit = Commit(override.get('hash'), override['short'], override.get('authors') or [])
 | |
|                 logger.info(f'ADD    {commit}')
 | |
|                 self._commits_added.append(commit)
 | |
| 
 | |
|             elif override['action'] == 'remove':
 | |
|                 if override_hash in self._commits:
 | |
|                     logger.info(f'REMOVE {self._commits[override_hash]}')
 | |
|                     del self._commits[override_hash]
 | |
| 
 | |
|             elif override['action'] == 'change':
 | |
|                 if override_hash not in self._commits:
 | |
|                     continue
 | |
|                 commit = Commit(override_hash, override['short'], override.get('authors') or [])
 | |
|                 logger.info(f'CHANGE {self._commits[commit.hash]} -> {commit}')
 | |
|                 self._commits[commit.hash] = commit
 | |
| 
 | |
|         self._commits = dict(reversed(self._commits.items()))
 | |
| 
 | |
|     def groups(self):
 | |
|         group_dict = defaultdict(list)
 | |
|         for commit in self:
 | |
|             upstream_re = self.UPSTREAM_MERGE_RE.search(commit.short)
 | |
|             if upstream_re:
 | |
|                 commit.short = f'[upstream] Merged with youtube-dl {upstream_re.group(1)}'
 | |
| 
 | |
|             match = self.MESSAGE_RE.fullmatch(commit.short)
 | |
|             if not match:
 | |
|                 logger.error(f'Error parsing short commit message: {commit.short!r}')
 | |
|                 continue
 | |
| 
 | |
|             prefix, sub_details_alt, message, issues = match.groups()
 | |
|             issues = [issue.strip()[1:] for issue in issues.split(',')] if issues else []
 | |
| 
 | |
|             if prefix:
 | |
|                 groups, details, sub_details = zip(*map(self.details_from_prefix, prefix.split(',')))
 | |
|                 group = next(iter(filter(None, groups)), None)
 | |
|                 details = ', '.join(unique(details))
 | |
|                 sub_details = list(itertools.chain.from_iterable(sub_details))
 | |
|             else:
 | |
|                 group = CommitGroup.CORE
 | |
|                 details = None
 | |
|                 sub_details = []
 | |
| 
 | |
|             if sub_details_alt:
 | |
|                 sub_details.append(sub_details_alt)
 | |
|             sub_details = tuple(unique(sub_details))
 | |
| 
 | |
|             if not group:
 | |
|                 if self.EXTRACTOR_INDICATOR_RE.search(commit.short):
 | |
|                     group = CommitGroup.EXTRACTOR
 | |
|                     logger.error(f'Assuming [ie] group for {commit.short!r}')
 | |
|                 else:
 | |
|                     group = CommitGroup.CORE
 | |
| 
 | |
|             commit_info = CommitInfo(
 | |
|                 details, sub_details, message.strip(),
 | |
|                 issues, commit, self._fixes[commit.hash])
 | |
| 
 | |
|             logger.debug(f'Resolved {commit.short!r} to {commit_info!r}')
 | |
|             group_dict[group].append(commit_info)
 | |
| 
 | |
|         return group_dict
 | |
| 
 | |
|     @staticmethod
 | |
|     def details_from_prefix(prefix):
 | |
|         if not prefix:
 | |
|             return CommitGroup.CORE, None, ()
 | |
| 
 | |
|         prefix, *sub_details = prefix.split(':')
 | |
| 
 | |
|         group, details = CommitGroup.get(prefix)
 | |
|         if group is CommitGroup.PRIORITY and details:
 | |
|             details = details.partition('/')[2].strip()
 | |
| 
 | |
|         if details and '/' in details:
 | |
|             logger.error(f'Prefix is overnested, using first part: {prefix}')
 | |
|             details = details.partition('/')[0].strip()
 | |
| 
 | |
|         if details == 'common':
 | |
|             details = None
 | |
|         elif group is CommitGroup.NETWORKING and details == 'rh':
 | |
|             details = 'Request Handler'
 | |
| 
 | |
|         return group, details, sub_details
 | |
| 
 | |
| 
 | |
| def get_new_contributors(contributors_path, commits):
 | |
|     contributors = set()
 | |
|     if contributors_path.exists():
 | |
|         for line in read_file(contributors_path).splitlines():
 | |
|             author, _, _ = line.strip().partition(' (')
 | |
|             authors = author.split('/')
 | |
|             contributors.update(map(str.casefold, authors))
 | |
| 
 | |
|     new_contributors = set()
 | |
|     for commit in commits:
 | |
|         for author in commit.authors:
 | |
|             author_folded = author.casefold()
 | |
|             if author_folded not in contributors:
 | |
|                 contributors.add(author_folded)
 | |
|                 new_contributors.add(author)
 | |
| 
 | |
|     return sorted(new_contributors, key=str.casefold)
 | |
| 
 | |
| 
 | |
| def create_changelog(args):
 | |
|     logging.basicConfig(
 | |
|         datefmt='%Y-%m-%d %H-%M-%S', format='{asctime} | {levelname:<8} | {message}',
 | |
|         level=logging.WARNING - 10 * args.verbosity, style='{', stream=sys.stderr)
 | |
| 
 | |
|     commits = CommitRange(None, args.commitish, args.default_author)
 | |
| 
 | |
|     if not args.no_override:
 | |
|         if args.override_path.exists():
 | |
|             overrides = json.loads(read_file(args.override_path))
 | |
|             commits.apply_overrides(overrides)
 | |
|         else:
 | |
|             logger.warning(f'File {args.override_path.as_posix()} does not exist')
 | |
| 
 | |
|     logger.info(f'Loaded {len(commits)} commits')
 | |
| 
 | |
|     if new_contributors := get_new_contributors(args.contributors_path, commits):
 | |
|         if args.contributors:
 | |
|             write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a')
 | |
|         logger.info(f'New contributors: {", ".join(new_contributors)}')
 | |
| 
 | |
|     return Changelog(commits.groups(), args.repo, args.collapsible)
 | |
| 
 | |
| 
 | |
| def create_parser():
 | |
|     import argparse
 | |
| 
 | |
|     parser = argparse.ArgumentParser(
 | |
|         description='Create a changelog markdown from a git commit range')
 | |
|     parser.add_argument(
 | |
|         'commitish', default='HEAD', nargs='?',
 | |
|         help='The commitish to create the range from (default: %(default)s)')
 | |
|     parser.add_argument(
 | |
|         '-v', '--verbosity', action='count', default=0,
 | |
|         help='increase verbosity (can be used twice)')
 | |
|     parser.add_argument(
 | |
|         '-c', '--contributors', action='store_true',
 | |
|         help='update CONTRIBUTORS file (default: %(default)s)')
 | |
|     parser.add_argument(
 | |
|         '--contributors-path', type=Path, default=LOCATION_PATH.parent / 'CONTRIBUTORS',
 | |
|         help='path to the CONTRIBUTORS file')
 | |
|     parser.add_argument(
 | |
|         '--no-override', action='store_true',
 | |
|         help='skip override json in commit generation (default: %(default)s)')
 | |
|     parser.add_argument(
 | |
|         '--override-path', type=Path, default=LOCATION_PATH / 'changelog_override.json',
 | |
|         help='path to the changelog_override.json file')
 | |
|     parser.add_argument(
 | |
|         '--default-author', default='pukkandan',
 | |
|         help='the author to use without a author indicator (default: %(default)s)')
 | |
|     parser.add_argument(
 | |
|         '--repo', default='yt-dlp/yt-dlp',
 | |
|         help='the github repository to use for the operations (default: %(default)s)')
 | |
|     parser.add_argument(
 | |
|         '--collapsible', action='store_true',
 | |
|         help='make changelog collapsible (default: %(default)s)')
 | |
| 
 | |
|     return parser
 | |
| 
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|     print(create_changelog(create_parser().parse_args()))
 | 
