github-issues-md/github_issues_md/main.py

"""Main CLI application for GitHub Issues to Markdown converter."""

import re
import sys
from datetime import datetime
from typing import List, Optional
from urllib.parse import urlparse

import tiktoken
import typer
from typing_extensions import Annotated

from .github_client import GitHubClient, Issue


app = typer.Typer(
    name="github-issues-md",
    help="Download GitHub issues and compile them into a markdown file"
)


def parse_github_url(url: str) -> str:
    """
    Parse GitHub URL and extract repository name.

    Args:
        url: GitHub URL or repository name

    Returns:
        Repository name in format "owner/repo"
    """
    # If it's already in owner/repo format
    if re.match(r"^[^/]+/[^/]+$", url):
        return url

    # Parse full GitHub URL
    parsed = urlparse(url)
    if parsed.netloc == "github.com":
        path_parts = parsed.path.strip("/").split("/")
        if len(path_parts) >= 2:
            return f"{path_parts[0]}/{path_parts[1]}"

    raise typer.BadParameter(
        "Invalid GitHub URL. Use format 'owner/repo' or 'https://github.com/owner/repo'"
    )


def format_issue_to_markdown(issue: Issue) -> str:
    """
    Format a single issue and its comments to markdown.

    Args:
        issue: Issue object to format

    Returns:
        Markdown formatted string
    """
    # Format dates
    created_date = issue.created_at.strftime("%Y-%m-%d %H:%M:%S UTC")
    updated_date = issue.updated_at.strftime("%Y-%m-%d %H:%M:%S UTC")

    # Issue header
    markdown = f"## Issue #{issue.number}: {issue.title}\n\n"
    markdown += f"**Author:** {issue.author}  \n"
    markdown += f"**State:** {issue.state}  \n"
    markdown += f"**Created:** {created_date}  \n"
    markdown += f"**Updated:** {updated_date}  \n"
    markdown += f"**URL:** {issue.url}\n\n"

    # Issue body
    if issue.body.strip():
        markdown += "### Description\n\n"
        markdown += f"{issue.body}\n\n"

    # Comments
    if issue.comments:
        markdown += "### Comments\n\n"
        for i, comment in enumerate(issue.comments, 1):
            comment_date = comment.created_at.strftime("%Y-%m-%d %H:%M:%S UTC")
            markdown += f"#### Comment {i} by {comment.author}\n\n"
            markdown += f"**Posted:** {comment_date}  \n"
            markdown += f"**URL:** {comment.url}\n\n"
            markdown += f"{comment.body}\n\n"

    markdown += "---\n\n"
    return markdown


def count_tokens(text: str) -> int:
    """
    Count tokens in the text using tiktoken.

    Args:
        text: Text to count tokens for

    Returns:
        Number of tokens
    """
    try:
        encoding = tiktoken.get_encoding("cl100k_base")  # GPT-4 encoding
        return len(encoding.encode(text))
    except Exception:
        # Fallback to approximate count
        return len(text.split())


@app.command()
def main(
    repo: Annotated[str, typer.Argument(help="GitHub repository (owner/repo or full URL)")],
    before: Annotated[
        Optional[str],
        typer.Option(
            "--before",
            help="Only issues created before this date (YYYY-MM-DD format)"
        )
    ] = None,
    after: Annotated[
        Optional[str],
        typer.Option(
            "--after",
            help="Only issues created after this date (YYYY-MM-DD format)"
        )
    ] = None,
    limit: Annotated[
        Optional[int],
        typer.Option(
            "--limit",
            help="Maximum number of issues to fetch"
        )
    ] = None,
    state: Annotated[
        str,
        typer.Option(
            "--state",
            help="Issue state to filter by"
        )
    ] = "all"
) -> None:
    """
    Download GitHub issues and compile them into a markdown file.

    The output is written to stdout and can be redirected to a file.
    """
    try:
        # Parse repository URL
        repo_name = parse_github_url(repo)

        # Parse dates
        before_date = None
        after_date = None

        if before:
            try:
                before_date = datetime.strptime(before, "%Y-%m-%d")
            except ValueError:
                raise typer.BadParameter("Invalid before date format. Use YYYY-MM-DD")

        if after:
            try:
                after_date = datetime.strptime(after, "%Y-%m-%d")
            except ValueError:
                raise typer.BadParameter("Invalid after date format. Use YYYY-MM-DD")

        # Initialize GitHub client
        client = GitHubClient()

        # Fetch issues
        typer.echo(f"Fetching issues from {repo_name}...", err=True)
        issues = client.get_issues(
            repo=repo_name,
            before=before_date,
            after=after_date,
            limit=limit,
            state=state
        )

        if not issues:
            typer.echo("No issues found matching the criteria.", err=True)
            return

        # Generate markdown
        typer.echo(f"Found {len(issues)} issues. Generating markdown...", err=True)

        # Header
        markdown_content = f"# Issues from {repo_name}\n\n"
        markdown_content += f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')}  \n"
        markdown_content += f"**Total Issues:** {len(issues)}\n\n"

        if before_date:
            markdown_content += f"**Before:** {before_date.strftime('%Y-%m-%d')}  \n"
        if after_date:
            markdown_content += f"**After:** {after_date.strftime('%Y-%m-%d')}  \n"
        if limit:
            markdown_content += f"**Limit:** {limit}  \n"

        markdown_content += f"**State:** {state}\n\n"
        markdown_content += "---\n\n"

        # Add each issue
        for issue in issues:
            markdown_content += format_issue_to_markdown(issue)

        # Count tokens
        token_count = count_tokens(markdown_content)
        markdown_content += f"\n---\n\n**Token Count:** {token_count:,} tokens\n"

        # Output to stdout
        print(markdown_content)

        # Log completion to stderr
        typer.echo(f"✅ Generated markdown with {token_count:,} tokens", err=True)

    except Exception as e:
        typer.echo(f"❌ Error: {e}", err=True)
        sys.exit(1)


if __name__ == "__main__":
    app()