From d274b10e79cac6d464cd05824fdcd0066cb9dd7f Mon Sep 17 00:00:00 2001 From: Jose Pablo Domingo Aramburo Sanchez Date: Wed, 27 Aug 2025 16:08:39 -0700 Subject: [PATCH] [add] Init --- README.md | 125 +++++++++++++++++ github_issues_md/__init__.py | 3 + github_issues_md/github_client.py | 174 ++++++++++++++++++++++++ github_issues_md/main.py | 219 ++++++++++++++++++++++++++++++ pyproject.toml | 25 ++++ 5 files changed, 546 insertions(+) create mode 100644 README.md create mode 100644 github_issues_md/__init__.py create mode 100644 github_issues_md/github_client.py create mode 100644 github_issues_md/main.py create mode 100644 pyproject.toml diff --git a/README.md b/README.md new file mode 100644 index 0000000..5274829 --- /dev/null +++ b/README.md @@ -0,0 +1,125 @@ +# GitHub Issues to Markdown + +A command-line tool that downloads all issues from a GitHub repository and compiles them into a single markdown file with token counting. + +## Prerequisites + +- Python 3.8 or higher +- GitHub CLI (`gh`) installed and authenticated +- Poetry (recommended) or pip + +## Installation + +### Using Poetry (Recommended) +```bash +# Clone or download the project +cd github-issues-md + +# Install dependencies +poetry install + +# The script will be available as 'github-issues-md' +``` + +### Using Pip + +```bash +# From the repo +pip3 install git+https://git.laziness.rocks/PootisPenserHere/github-issues-md.git +``` + +```bash +# Clone or download the project +cd github-issues-md + +# Install the package +pip3 install . + +# Or install in development mode +pip3 install -e . +``` + +## Setup GitHub CLI + +Make sure GitHub CLI is installed and authenticated: + +```bash +# Install GitHub CLI (if not already installed) +# On macOS: brew install gh +# On Ubuntu: sudo apt install gh +# On Windows: winget install GitHub.cli + +# Authenticate with GitHub +gh auth login +``` + +## Usage + +```bash +# Basic usage with repository format +github-issues-md owner/repo + +# Using full GitHub URL +github-issues-md https://github.com/owner/repo + +# Filter by date range +github-issues-md owner/repo --after 2023-01-01 --before 2023-12-31 + +# Limit number of issues +github-issues-md owner/repo --limit 10 + +# Filter by state (open, closed, all) +github-issues-md owner/repo --state open + +# Combine filters +github-issues-md owner/repo --after 2023-06-01 --limit 5 --state closed + +# Save to file +github-issues-md owner/repo > issues.md +``` + +## Options + +- `repo`: GitHub repository (required) - can be `owner/repo` format or full GitHub URL +- `--before DATE`: Only issues created before this date (YYYY-MM-DD format) +- `--after DATE`: Only issues created after this date (YYYY-MM-DD format) +- `--limit N`: Maximum number of issues to fetch +- `--state STATE`: Issue state to filter by (open, closed, all) - default: all + +## Output + +The tool outputs a markdown file to stdout containing: + +- Repository information and generation metadata +- Each issue with its title, author, state, dates, and description +- All comments for each issue with author and timestamp +- Token count at the end using tiktoken + +Each issue and its discussions are clearly separated with markdown headers and horizontal rules. + +## Examples + +```bash +# Get all issues from a popular repository +github-issues-md microsoft/vscode > vscode-issues.md + +# Get only open issues from the last 30 days +github-issues-md facebook/react --after 2023-11-01 --state open + +# Get the latest 20 issues +github-issues-md owner/repo --limit 20 +``` + +## Development + +```bash +# Install in development mode +poetry install + +# Run tests +poetry run pytest + +# Format code +poetry run black . +poetry run isort . +``` diff --git a/github_issues_md/__init__.py b/github_issues_md/__init__.py new file mode 100644 index 0000000..6ab1c13 --- /dev/null +++ b/github_issues_md/__init__.py @@ -0,0 +1,3 @@ +"""GitHub Issues to Markdown converter.""" + +__version__ = "0.1.0" diff --git a/github_issues_md/github_client.py b/github_issues_md/github_client.py new file mode 100644 index 0000000..cd1f318 --- /dev/null +++ b/github_issues_md/github_client.py @@ -0,0 +1,174 @@ +"""GitHub API client for fetching issues and comments.""" + +import json +import subprocess +from datetime import datetime +from typing import Dict, List, Optional, Any +from dataclasses import dataclass + + +@dataclass +class Comment: + """Represents a GitHub issue comment.""" + + author: str + body: str + created_at: datetime + url: str + + +@dataclass +class Issue: + """Represents a GitHub issue with its comments.""" + + number: int + title: str + body: str + author: str + state: str + created_at: datetime + updated_at: datetime + url: str + comments: List[Comment] + + +class GitHubClient: + """Client for interacting with GitHub API using GitHub CLI.""" + + def __init__(self): + """Initialize the GitHub client.""" + self._verify_gh_cli() + + def _verify_gh_cli(self) -> None: + """Verify that GitHub CLI is installed and authenticated.""" + try: + result = subprocess.run( + ["gh", "auth", "status"], + capture_output=True, + text=True, + check=True + ) + except subprocess.CalledProcessError: + raise RuntimeError( + "GitHub CLI is not authenticated. Please run 'gh auth login' first." + ) + except FileNotFoundError: + raise RuntimeError( + "GitHub CLI is not installed. Please install it first." + ) + + def _run_gh_command(self, command: List[str]) -> str: + """Run a GitHub CLI command and return the output.""" + try: + result = subprocess.run( + command, + capture_output=True, + text=True, + check=True + ) + return result.stdout + except subprocess.CalledProcessError as e: + raise RuntimeError(f"GitHub CLI command failed: {e.stderr}") + + def get_issues( + self, + repo: str, + before: Optional[datetime] = None, + after: Optional[datetime] = None, + limit: Optional[int] = None, + state: str = "all" + ) -> List[Issue]: + """ + Fetch issues from a GitHub repository. + + Args: + repo: Repository in format "owner/repo" + before: Only issues created before this date + after: Only issues created after this date + limit: Maximum number of issues to fetch + state: Issue state (open, closed, all) + + Returns: + List of Issue objects + """ + command = [ + "gh", "issue", "list", + "--repo", repo, + "--state", state, + "--json", "number,title,body,author,state,createdAt,updatedAt,url" + ] + + if limit: + command.extend(["--limit", str(limit)]) + + output = self._run_gh_command(command) + issues_data = json.loads(output) + + issues = [] + for issue_data in issues_data: + created_at = datetime.fromisoformat( + issue_data["createdAt"].replace("Z", "+00:00") + ) + updated_at = datetime.fromisoformat( + issue_data["updatedAt"].replace("Z", "+00:00") + ) + + # Apply date filters + if before and created_at >= before: + continue + if after and created_at <= after: + continue + + # Fetch comments for this issue + comments = self._get_issue_comments(repo, issue_data["number"]) + + issue = Issue( + number=issue_data["number"], + title=issue_data["title"], + body=issue_data["body"] or "", + author=issue_data["author"]["login"], + state=issue_data["state"], + created_at=created_at, + updated_at=updated_at, + url=issue_data["url"], + comments=comments + ) + issues.append(issue) + + return issues + + def _get_issue_comments(self, repo: str, issue_number: int) -> List[Comment]: + """ + Fetch comments for a specific issue. + + Args: + repo: Repository in format "owner/repo" + issue_number: Issue number + + Returns: + List of Comment objects + """ + command = [ + "gh", "issue", "view", str(issue_number), + "--repo", repo, + "--json", "comments" + ] + + output = self._run_gh_command(command) + issue_data = json.loads(output) + + comments = [] + for comment_data in issue_data.get("comments", []): + created_at = datetime.fromisoformat( + comment_data["createdAt"].replace("Z", "+00:00") + ) + + comment = Comment( + author=comment_data["author"]["login"], + body=comment_data["body"], + created_at=created_at, + url=comment_data["url"] + ) + comments.append(comment) + + return comments diff --git a/github_issues_md/main.py b/github_issues_md/main.py new file mode 100644 index 0000000..2af7bd1 --- /dev/null +++ b/github_issues_md/main.py @@ -0,0 +1,219 @@ +"""Main CLI application for GitHub Issues to Markdown converter.""" + +import re +import sys +from datetime import datetime +from typing import List, Optional +from urllib.parse import urlparse + +import tiktoken +import typer +from typing_extensions import Annotated + +from .github_client import GitHubClient, Issue + + +app = typer.Typer( + name="github-issues-md", + help="Download GitHub issues and compile them into a markdown file" +) + + +def parse_github_url(url: str) -> str: + """ + Parse GitHub URL and extract repository name. + + Args: + url: GitHub URL or repository name + + Returns: + Repository name in format "owner/repo" + """ + # If it's already in owner/repo format + if re.match(r"^[^/]+/[^/]+$", url): + return url + + # Parse full GitHub URL + parsed = urlparse(url) + if parsed.netloc == "github.com": + path_parts = parsed.path.strip("/").split("/") + if len(path_parts) >= 2: + return f"{path_parts[0]}/{path_parts[1]}" + + raise typer.BadParameter( + "Invalid GitHub URL. Use format 'owner/repo' or 'https://github.com/owner/repo'" + ) + + +def format_issue_to_markdown(issue: Issue) -> str: + """ + Format a single issue and its comments to markdown. + + Args: + issue: Issue object to format + + Returns: + Markdown formatted string + """ + # Format dates + created_date = issue.created_at.strftime("%Y-%m-%d %H:%M:%S UTC") + updated_date = issue.updated_at.strftime("%Y-%m-%d %H:%M:%S UTC") + + # Issue header + markdown = f"## Issue #{issue.number}: {issue.title}\n\n" + markdown += f"**Author:** {issue.author} \n" + markdown += f"**State:** {issue.state} \n" + markdown += f"**Created:** {created_date} \n" + markdown += f"**Updated:** {updated_date} \n" + markdown += f"**URL:** {issue.url}\n\n" + + # Issue body + if issue.body.strip(): + markdown += "### Description\n\n" + markdown += f"{issue.body}\n\n" + + # Comments + if issue.comments: + markdown += "### Comments\n\n" + for i, comment in enumerate(issue.comments, 1): + comment_date = comment.created_at.strftime("%Y-%m-%d %H:%M:%S UTC") + markdown += f"#### Comment {i} by {comment.author}\n\n" + markdown += f"**Posted:** {comment_date} \n" + markdown += f"**URL:** {comment.url}\n\n" + markdown += f"{comment.body}\n\n" + + markdown += "---\n\n" + return markdown + + +def count_tokens(text: str) -> int: + """ + Count tokens in the text using tiktoken. + + Args: + text: Text to count tokens for + + Returns: + Number of tokens + """ + try: + encoding = tiktoken.get_encoding("cl100k_base") # GPT-4 encoding + return len(encoding.encode(text)) + except Exception: + # Fallback to approximate count + return len(text.split()) + + +@app.command() +def main( + repo: Annotated[str, typer.Argument(help="GitHub repository (owner/repo or full URL)")], + before: Annotated[ + Optional[str], + typer.Option( + "--before", + help="Only issues created before this date (YYYY-MM-DD format)" + ) + ] = None, + after: Annotated[ + Optional[str], + typer.Option( + "--after", + help="Only issues created after this date (YYYY-MM-DD format)" + ) + ] = None, + limit: Annotated[ + Optional[int], + typer.Option( + "--limit", + help="Maximum number of issues to fetch" + ) + ] = None, + state: Annotated[ + str, + typer.Option( + "--state", + help="Issue state to filter by" + ) + ] = "all" +) -> None: + """ + Download GitHub issues and compile them into a markdown file. + + The output is written to stdout and can be redirected to a file. + """ + try: + # Parse repository URL + repo_name = parse_github_url(repo) + + # Parse dates + before_date = None + after_date = None + + if before: + try: + before_date = datetime.strptime(before, "%Y-%m-%d") + except ValueError: + raise typer.BadParameter("Invalid before date format. Use YYYY-MM-DD") + + if after: + try: + after_date = datetime.strptime(after, "%Y-%m-%d") + except ValueError: + raise typer.BadParameter("Invalid after date format. Use YYYY-MM-DD") + + # Initialize GitHub client + client = GitHubClient() + + # Fetch issues + typer.echo(f"Fetching issues from {repo_name}...", err=True) + issues = client.get_issues( + repo=repo_name, + before=before_date, + after=after_date, + limit=limit, + state=state + ) + + if not issues: + typer.echo("No issues found matching the criteria.", err=True) + return + + # Generate markdown + typer.echo(f"Found {len(issues)} issues. Generating markdown...", err=True) + + # Header + markdown_content = f"# Issues from {repo_name}\n\n" + markdown_content += f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')} \n" + markdown_content += f"**Total Issues:** {len(issues)}\n\n" + + if before_date: + markdown_content += f"**Before:** {before_date.strftime('%Y-%m-%d')} \n" + if after_date: + markdown_content += f"**After:** {after_date.strftime('%Y-%m-%d')} \n" + if limit: + markdown_content += f"**Limit:** {limit} \n" + + markdown_content += f"**State:** {state}\n\n" + markdown_content += "---\n\n" + + # Add each issue + for issue in issues: + markdown_content += format_issue_to_markdown(issue) + + # Count tokens + token_count = count_tokens(markdown_content) + markdown_content += f"\n---\n\n**Token Count:** {token_count:,} tokens\n" + + # Output to stdout + print(markdown_content) + + # Log completion to stderr + typer.echo(f"✅ Generated markdown with {token_count:,} tokens", err=True) + + except Exception as e: + typer.echo(f"❌ Error: {e}", err=True) + sys.exit(1) + + +if __name__ == "__main__": + app() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..7b0c066 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,25 @@ +[tool.poetry] +name = "github-issues-md" +version = "0.1.0" +description = "Download GitHub issues and compile them into a markdown file" +authors = ["Your Name "] +readme = "README.md" +packages = [{include = "github_issues_md"}] + +[tool.poetry.dependencies] +python = "^3.8" +typer = {extras = ["all"], version = "^0.12.3"} +tiktoken = "^0.7.0" +requests = "^2.31.0" + +[tool.poetry.scripts] +github-issues-md = "github_issues_md.main:app" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" + +[tool.poetry.group.dev.dependencies] +pytest = "^7.0.0" +black = "^23.0.0" +isort = "^5.12.0"