[add] Init

2025-08-27 16:08:39 -07:00 · 2025-08-27 16:08:39 -07:00 · d274b10e79
commit d274b10e79
5 changed files with 546 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,125 @@
 # GitHub Issues to Markdown
 A command-line tool that downloads all issues from a GitHub repository and compiles them into a single markdown file with token counting.
 ## Prerequisites
 - Python 3.8 or higher
 - GitHub CLI (`gh`) installed and authenticated
 - Poetry (recommended) or pip
 ## Installation
 ### Using Poetry (Recommended)
 ```bash
 # Clone or download the project
 cd github-issues-md
 # Install dependencies
 poetry install
 # The script will be available as 'github-issues-md'
 ```
 ### Using Pip
 ```bash
 # From the repo
 pip3 install git+https://git.laziness.rocks/PootisPenserHere/github-issues-md.git
 ```
 ```bash
 # Clone or download the project
 cd github-issues-md
 # Install the package
 pip3 install .
 # Or install in development mode
 pip3 install -e .
 ```
 ## Setup GitHub CLI
 Make sure GitHub CLI is installed and authenticated:
 ```bash
 # Install GitHub CLI (if not already installed)
 # On macOS: brew install gh
 # On Ubuntu: sudo apt install gh
 # On Windows: winget install GitHub.cli
 # Authenticate with GitHub
 gh auth login
 ```
 ## Usage
 ```bash
 # Basic usage with repository format
 github-issues-md owner/repo
 # Using full GitHub URL
 github-issues-md https://github.com/owner/repo
 # Filter by date range
 github-issues-md owner/repo --after 2023-01-01 --before 2023-12-31
 # Limit number of issues
 github-issues-md owner/repo --limit 10
 # Filter by state (open, closed, all)
 github-issues-md owner/repo --state open
 # Combine filters
 github-issues-md owner/repo --after 2023-06-01 --limit 5 --state closed
 # Save to file
 github-issues-md owner/repo > issues.md
 ```
 ## Options
 - `repo`: GitHub repository (required) - can be `owner/repo` format or full GitHub URL
 - `--before DATE`: Only issues created before this date (YYYY-MM-DD format)
 - `--after DATE`: Only issues created after this date (YYYY-MM-DD format)  
 - `--limit N`: Maximum number of issues to fetch
 - `--state STATE`: Issue state to filter by (open, closed, all) - default: all
 ## Output
 The tool outputs a markdown file to stdout containing:
 - Repository information and generation metadata
 - Each issue with its title, author, state, dates, and description
 - All comments for each issue with author and timestamp
 - Token count at the end using tiktoken
 Each issue and its discussions are clearly separated with markdown headers and horizontal rules.
 ## Examples
 ```bash
 # Get all issues from a popular repository
 github-issues-md microsoft/vscode > vscode-issues.md
 # Get only open issues from the last 30 days
 github-issues-md facebook/react --after 2023-11-01 --state open
 # Get the latest 20 issues
 github-issues-md owner/repo --limit 20
 ```
 ## Development
 ```bash
 # Install in development mode
 poetry install
 # Run tests
 poetry run pytest
 # Format code
 poetry run black .
 poetry run isort .
 ```
--- a/github_issues_md/init.py
+++ b/github_issues_md/init.py
@ -0,0 +1,3 @@
 """GitHub Issues to Markdown converter."""
 __version__ = "0.1.0"
--- a/github_issues_md/github_client.py
+++ b/github_issues_md/github_client.py
@ -0,0 +1,174 @@
 """GitHub API client for fetching issues and comments."""
 import json
 import subprocess
 from datetime import datetime
 from typing import Dict, List, Optional, Any
 from dataclasses import dataclass
@dataclass
 class Comment:
    """Represents a GitHub issue comment."""
    author: str
    body: str
    created_at: datetime
    url: str
@dataclass
 class Issue:
    """Represents a GitHub issue with its comments."""
    number: int
    title: str
    body: str
    author: str
    state: str
    created_at: datetime
    updated_at: datetime
    url: str
    comments: List[Comment]
 class GitHubClient:
    """Client for interacting with GitHub API using GitHub CLI."""
    def __init__(self):
        """Initialize the GitHub client."""
        self._verify_gh_cli()
    def _verify_gh_cli(self) -> None:
        """Verify that GitHub CLI is installed and authenticated."""
        try:
            result = subprocess.run(
                ["gh", "auth", "status"],
                capture_output=True,
                text=True,
                check=True
            )
        except subprocess.CalledProcessError:
            raise RuntimeError(
                "GitHub CLI is not authenticated. Please run 'gh auth login' first."
            )
        except FileNotFoundError:
            raise RuntimeError(
                "GitHub CLI is not installed. Please install it first."
            )
    def _run_gh_command(self, command: List[str]) -> str:
        """Run a GitHub CLI command and return the output."""
        try:
            result = subprocess.run(
                command,
                capture_output=True,
                text=True,
                check=True
            )
            return result.stdout
        except subprocess.CalledProcessError as e:
            raise RuntimeError(f"GitHub CLI command failed: {e.stderr}")
    def get_issues(
        self,
        repo: str,
        before: Optional[datetime] = None,
        after: Optional[datetime] = None,
        limit: Optional[int] = None,
        state: str = "all"
    ) -> List[Issue]:
        """
        Fetch issues from a GitHub repository.
        Args:
            repo: Repository in format "owner/repo"
            before: Only issues created before this date
            after: Only issues created after this date
            limit: Maximum number of issues to fetch
            state: Issue state (open, closed, all)
        Returns:
            List of Issue objects
        """
        command = [
            "gh", "issue", "list",
            "--repo", repo,
            "--state", state,
            "--json", "number,title,body,author,state,createdAt,updatedAt,url"
        ]
        if limit:
            command.extend(["--limit", str(limit)])
        output = self._run_gh_command(command)
        issues_data = json.loads(output)
        issues = []
        for issue_data in issues_data:
            created_at = datetime.fromisoformat(
                issue_data["createdAt"].replace("Z", "+00:00")
            )
            updated_at = datetime.fromisoformat(
                issue_data["updatedAt"].replace("Z", "+00:00")
            )
            # Apply date filters
            if before and created_at >= before:
                continue
            if after and created_at <= after:
                continue
            # Fetch comments for this issue
            comments = self._get_issue_comments(repo, issue_data["number"])
            issue = Issue(
                number=issue_data["number"],
                title=issue_data["title"],
                body=issue_data["body"] or "",
                author=issue_data["author"]["login"],
                state=issue_data["state"],
                created_at=created_at,
                updated_at=updated_at,
                url=issue_data["url"],
                comments=comments
            )
            issues.append(issue)
        return issues
    def _get_issue_comments(self, repo: str, issue_number: int) -> List[Comment]:
        """
        Fetch comments for a specific issue.
        Args:
            repo: Repository in format "owner/repo"
            issue_number: Issue number
        Returns:
            List of Comment objects
        """
        command = [
            "gh", "issue", "view", str(issue_number),
            "--repo", repo,
            "--json", "comments"
        ]
        output = self._run_gh_command(command)
        issue_data = json.loads(output)
        comments = []
        for comment_data in issue_data.get("comments", []):
            created_at = datetime.fromisoformat(
                comment_data["createdAt"].replace("Z", "+00:00")
            )
            comment = Comment(
                author=comment_data["author"]["login"],
                body=comment_data["body"],
                created_at=created_at,
                url=comment_data["url"]
            )
            comments.append(comment)
        return comments
--- a/github_issues_md/main.py
+++ b/github_issues_md/main.py
@ -0,0 +1,219 @@
 """Main CLI application for GitHub Issues to Markdown converter."""
 import re
 import sys
 from datetime import datetime
 from typing import List, Optional
 from urllib.parse import urlparse
 import tiktoken
 import typer
 from typing_extensions import Annotated
 from .github_client import GitHubClient, Issue
 app = typer.Typer(
    name="github-issues-md",
    help="Download GitHub issues and compile them into a markdown file"
 )
 def parse_github_url(url: str) -> str:
    """
    Parse GitHub URL and extract repository name.
    Args:
        url: GitHub URL or repository name
    Returns:
        Repository name in format "owner/repo"
    """
    # If it's already in owner/repo format
    if re.match(r"^[^/]+/[^/]+$", url):
        return url
    # Parse full GitHub URL
    parsed = urlparse(url)
    if parsed.netloc == "github.com":
        path_parts = parsed.path.strip("/").split("/")
        if len(path_parts) >= 2:
            return f"{path_parts[0]}/{path_parts[1]}"
    raise typer.BadParameter(
        "Invalid GitHub URL. Use format 'owner/repo' or 'https://github.com/owner/repo'"
    )
 def format_issue_to_markdown(issue: Issue) -> str:
    """
    Format a single issue and its comments to markdown.
    Args:
        issue: Issue object to format
    Returns:
        Markdown formatted string
    """
    # Format dates
    created_date = issue.created_at.strftime("%Y-%m-%d %H:%M:%S UTC")
    updated_date = issue.updated_at.strftime("%Y-%m-%d %H:%M:%S UTC")
    # Issue header
    markdown = f"## Issue #{issue.number}: {issue.title}\n\n"
    markdown += f"**Author:** {issue.author}  \n"
    markdown += f"**State:** {issue.state}  \n"
    markdown += f"**Created:** {created_date}  \n"
    markdown += f"**Updated:** {updated_date}  \n"
    markdown += f"**URL:** {issue.url}\n\n"
    # Issue body
    if issue.body.strip():
        markdown += "### Description\n\n"
        markdown += f"{issue.body}\n\n"
    # Comments
    if issue.comments:
        markdown += "### Comments\n\n"
        for i, comment in enumerate(issue.comments, 1):
            comment_date = comment.created_at.strftime("%Y-%m-%d %H:%M:%S UTC")
            markdown += f"#### Comment {i} by {comment.author}\n\n"
            markdown += f"**Posted:** {comment_date}  \n"
            markdown += f"**URL:** {comment.url}\n\n"
            markdown += f"{comment.body}\n\n"
    markdown += "---\n\n"
    return markdown
 def count_tokens(text: str) -> int:
    """
    Count tokens in the text using tiktoken.
    Args:
        text: Text to count tokens for
    Returns:
        Number of tokens
    """
    try:
        encoding = tiktoken.get_encoding("cl100k_base")  # GPT-4 encoding
        return len(encoding.encode(text))
    except Exception:
        # Fallback to approximate count
        return len(text.split())
@app.command()
 def main(
    repo: Annotated[str, typer.Argument(help="GitHub repository (owner/repo or full URL)")],
    before: Annotated[
        Optional[str],
        typer.Option(
            "--before",
            help="Only issues created before this date (YYYY-MM-DD format)"
        )
    ] = None,
    after: Annotated[
        Optional[str],
        typer.Option(
            "--after",
            help="Only issues created after this date (YYYY-MM-DD format)"
        )
    ] = None,
    limit: Annotated[
        Optional[int],
        typer.Option(
            "--limit",
            help="Maximum number of issues to fetch"
        )
    ] = None,
    state: Annotated[
        str,
        typer.Option(
            "--state",
            help="Issue state to filter by"
        )
    ] = "all"
 ) -> None:
    """
    Download GitHub issues and compile them into a markdown file.
    The output is written to stdout and can be redirected to a file.
    """
    try:
        # Parse repository URL
        repo_name = parse_github_url(repo)
        # Parse dates
        before_date = None
        after_date = None
        if before:
            try:
                before_date = datetime.strptime(before, "%Y-%m-%d")
            except ValueError:
                raise typer.BadParameter("Invalid before date format. Use YYYY-MM-DD")
        if after:
            try:
                after_date = datetime.strptime(after, "%Y-%m-%d")
            except ValueError:
                raise typer.BadParameter("Invalid after date format. Use YYYY-MM-DD")
        # Initialize GitHub client
        client = GitHubClient()
        # Fetch issues
        typer.echo(f"Fetching issues from {repo_name}...", err=True)
        issues = client.get_issues(
            repo=repo_name,
            before=before_date,
            after=after_date,
            limit=limit,
            state=state
        )
        if not issues:
            typer.echo("No issues found matching the criteria.", err=True)
            return
        # Generate markdown
        typer.echo(f"Found {len(issues)} issues. Generating markdown...", err=True)
        # Header
        markdown_content = f"# Issues from {repo_name}\n\n"
        markdown_content += f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')}  \n"
        markdown_content += f"**Total Issues:** {len(issues)}\n\n"
        if before_date:
            markdown_content += f"**Before:** {before_date.strftime('%Y-%m-%d')}  \n"
        if after_date:
            markdown_content += f"**After:** {after_date.strftime('%Y-%m-%d')}  \n"
        if limit:
            markdown_content += f"**Limit:** {limit}  \n"
        markdown_content += f"**State:** {state}\n\n"
        markdown_content += "---\n\n"
        # Add each issue
        for issue in issues:
            markdown_content += format_issue_to_markdown(issue)
        # Count tokens
        token_count = count_tokens(markdown_content)
        markdown_content += f"\n---\n\n**Token Count:** {token_count:,} tokens\n"
        # Output to stdout
        print(markdown_content)
        # Log completion to stderr
        typer.echo(f"✅ Generated markdown with {token_count:,} tokens", err=True)
    except Exception as e:
        typer.echo(f"❌ Error: {e}", err=True)
        sys.exit(1)
 if __name__ == "__main__":
    app()
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,25 @@
 [tool.poetry]
 name = "github-issues-md"
 version = "0.1.0"
 description = "Download GitHub issues and compile them into a markdown file"
 authors = ["Your Name <your.email@example.com>"]
 readme = "README.md"
 packages = [{include = "github_issues_md"}]
 [tool.poetry.dependencies]
 python = "^3.8"
 typer = {extras = ["all"], version = "^0.12.3"}
 tiktoken = "^0.7.0"
 requests = "^2.31.0"
 [tool.poetry.scripts]
 github-issues-md = "github_issues_md.main:app"
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
 [tool.poetry.group.dev.dependencies]
 pytest = "^7.0.0"
 black = "^23.0.0"
 isort = "^5.12.0"
		`@ -0,0 +1,3 @@`
							`"""GitHub Issues to Markdown converter."""`

							`__version__ = "0.1.0"`