[add] Init

2025-08-27 16:08:39 -07:00
commit d274b10e79
5 changed files with 546 additions and 0 deletions
--- a/github_issues_md/init.py
+++ b/github_issues_md/init.py
@@ -0,0 +1,3 @@
+"""GitHub Issues to Markdown converter."""
+
+__version__ = "0.1.0"
--- a/github_issues_md/github_client.py
+++ b/github_issues_md/github_client.py
@@ -0,0 +1,174 @@
+"""GitHub API client for fetching issues and comments."""
+
+import json
+import subprocess
+from datetime import datetime
+from typing import Dict, List, Optional, Any
+from dataclasses import dataclass
+
+
+@dataclass
+class Comment:
+    """Represents a GitHub issue comment."""
+    
+    author: str
+    body: str
+    created_at: datetime
+    url: str
+
+
+@dataclass
+class Issue:
+    """Represents a GitHub issue with its comments."""
+    
+    number: int
+    title: str
+    body: str
+    author: str
+    state: str
+    created_at: datetime
+    updated_at: datetime
+    url: str
+    comments: List[Comment]
+
+
+class GitHubClient:
+    """Client for interacting with GitHub API using GitHub CLI."""
+
+    def __init__(self):
+        """Initialize the GitHub client."""
+        self._verify_gh_cli()
+
+    def _verify_gh_cli(self) -> None:
+        """Verify that GitHub CLI is installed and authenticated."""
+        try:
+            result = subprocess.run(
+                ["gh", "auth", "status"],
+                capture_output=True,
+                text=True,
+                check=True
+            )
+        except subprocess.CalledProcessError:
+            raise RuntimeError(
+                "GitHub CLI is not authenticated. Please run 'gh auth login' first."
+            )
+        except FileNotFoundError:
+            raise RuntimeError(
+                "GitHub CLI is not installed. Please install it first."
+            )
+
+    def _run_gh_command(self, command: List[str]) -> str:
+        """Run a GitHub CLI command and return the output."""
+        try:
+            result = subprocess.run(
+                command,
+                capture_output=True,
+                text=True,
+                check=True
+            )
+            return result.stdout
+        except subprocess.CalledProcessError as e:
+            raise RuntimeError(f"GitHub CLI command failed: {e.stderr}")
+
+    def get_issues(
+        self,
+        repo: str,
+        before: Optional[datetime] = None,
+        after: Optional[datetime] = None,
+        limit: Optional[int] = None,
+        state: str = "all"
+    ) -> List[Issue]:
+        """
+        Fetch issues from a GitHub repository.
+        
+        Args:
+            repo: Repository in format "owner/repo"
+            before: Only issues created before this date
+            after: Only issues created after this date
+            limit: Maximum number of issues to fetch
+            state: Issue state (open, closed, all)
+            
+        Returns:
+            List of Issue objects
+        """
+        command = [
+            "gh", "issue", "list",
+            "--repo", repo,
+            "--state", state,
+            "--json", "number,title,body,author,state,createdAt,updatedAt,url"
+        ]
+        
+        if limit:
+            command.extend(["--limit", str(limit)])
+
+        output = self._run_gh_command(command)
+        issues_data = json.loads(output)
+        
+        issues = []
+        for issue_data in issues_data:
+            created_at = datetime.fromisoformat(
+                issue_data["createdAt"].replace("Z", "+00:00")
+            )
+            updated_at = datetime.fromisoformat(
+                issue_data["updatedAt"].replace("Z", "+00:00")
+            )
+            
+            # Apply date filters
+            if before and created_at >= before:
+                continue
+            if after and created_at <= after:
+                continue
+            
+            # Fetch comments for this issue
+            comments = self._get_issue_comments(repo, issue_data["number"])
+            
+            issue = Issue(
+                number=issue_data["number"],
+                title=issue_data["title"],
+                body=issue_data["body"] or "",
+                author=issue_data["author"]["login"],
+                state=issue_data["state"],
+                created_at=created_at,
+                updated_at=updated_at,
+                url=issue_data["url"],
+                comments=comments
+            )
+            issues.append(issue)
+        
+        return issues
+
+    def _get_issue_comments(self, repo: str, issue_number: int) -> List[Comment]:
+        """
+        Fetch comments for a specific issue.
+        
+        Args:
+            repo: Repository in format "owner/repo"
+            issue_number: Issue number
+            
+        Returns:
+            List of Comment objects
+        """
+        command = [
+            "gh", "issue", "view", str(issue_number),
+            "--repo", repo,
+            "--json", "comments"
+        ]
+        
+        output = self._run_gh_command(command)
+        issue_data = json.loads(output)
+        
+        comments = []
+        for comment_data in issue_data.get("comments", []):
+            created_at = datetime.fromisoformat(
+                comment_data["createdAt"].replace("Z", "+00:00")
+            )
+            
+            comment = Comment(
+                author=comment_data["author"]["login"],
+                body=comment_data["body"],
+                created_at=created_at,
+                url=comment_data["url"]
+            )
+            comments.append(comment)
+        
+        return comments
--- a/github_issues_md/main.py
+++ b/github_issues_md/main.py
@@ -0,0 +1,219 @@
+"""Main CLI application for GitHub Issues to Markdown converter."""
+
+import re
+import sys
+from datetime import datetime
+from typing import List, Optional
+from urllib.parse import urlparse
+
+import tiktoken
+import typer
+from typing_extensions import Annotated
+
+from .github_client import GitHubClient, Issue
+
+
+app = typer.Typer(
+    name="github-issues-md",
+    help="Download GitHub issues and compile them into a markdown file"
+)
+
+
+def parse_github_url(url: str) -> str:
+    """
+    Parse GitHub URL and extract repository name.
+    
+    Args:
+        url: GitHub URL or repository name
+        
+    Returns:
+        Repository name in format "owner/repo"
+    """
+    # If it's already in owner/repo format
+    if re.match(r"^[^/]+/[^/]+$", url):
+        return url
+    
+    # Parse full GitHub URL
+    parsed = urlparse(url)
+    if parsed.netloc == "github.com":
+        path_parts = parsed.path.strip("/").split("/")
+        if len(path_parts) >= 2:
+            return f"{path_parts[0]}/{path_parts[1]}"
+    
+    raise typer.BadParameter(
+        "Invalid GitHub URL. Use format 'owner/repo' or 'https://github.com/owner/repo'"
+    )
+
+
+def format_issue_to_markdown(issue: Issue) -> str:
+    """
+    Format a single issue and its comments to markdown.
+    
+    Args:
+        issue: Issue object to format
+        
+    Returns:
+        Markdown formatted string
+    """
+    # Format dates
+    created_date = issue.created_at.strftime("%Y-%m-%d %H:%M:%S UTC")
+    updated_date = issue.updated_at.strftime("%Y-%m-%d %H:%M:%S UTC")
+    
+    # Issue header
+    markdown = f"## Issue #{issue.number}: {issue.title}\n\n"
+    markdown += f"**Author:** {issue.author}  \n"
+    markdown += f"**State:** {issue.state}  \n"
+    markdown += f"**Created:** {created_date}  \n"
+    markdown += f"**Updated:** {updated_date}  \n"
+    markdown += f"**URL:** {issue.url}\n\n"
+    
+    # Issue body
+    if issue.body.strip():
+        markdown += "### Description\n\n"
+        markdown += f"{issue.body}\n\n"
+    
+    # Comments
+    if issue.comments:
+        markdown += "### Comments\n\n"
+        for i, comment in enumerate(issue.comments, 1):
+            comment_date = comment.created_at.strftime("%Y-%m-%d %H:%M:%S UTC")
+            markdown += f"#### Comment {i} by {comment.author}\n\n"
+            markdown += f"**Posted:** {comment_date}  \n"
+            markdown += f"**URL:** {comment.url}\n\n"
+            markdown += f"{comment.body}\n\n"
+    
+    markdown += "---\n\n"
+    return markdown
+
+
+def count_tokens(text: str) -> int:
+    """
+    Count tokens in the text using tiktoken.
+    
+    Args:
+        text: Text to count tokens for
+        
+    Returns:
+        Number of tokens
+    """
+    try:
+        encoding = tiktoken.get_encoding("cl100k_base")  # GPT-4 encoding
+        return len(encoding.encode(text))
+    except Exception:
+        # Fallback to approximate count
+        return len(text.split())
+
+
+@app.command()
+def main(
+    repo: Annotated[str, typer.Argument(help="GitHub repository (owner/repo or full URL)")],
+    before: Annotated[
+        Optional[str],
+        typer.Option(
+            "--before",
+            help="Only issues created before this date (YYYY-MM-DD format)"
+        )
+    ] = None,
+    after: Annotated[
+        Optional[str],
+        typer.Option(
+            "--after",
+            help="Only issues created after this date (YYYY-MM-DD format)"
+        )
+    ] = None,
+    limit: Annotated[
+        Optional[int],
+        typer.Option(
+            "--limit",
+            help="Maximum number of issues to fetch"
+        )
+    ] = None,
+    state: Annotated[
+        str,
+        typer.Option(
+            "--state",
+            help="Issue state to filter by"
+        )
+    ] = "all"
+) -> None:
+    """
+    Download GitHub issues and compile them into a markdown file.
+    
+    The output is written to stdout and can be redirected to a file.
+    """
+    try:
+        # Parse repository URL
+        repo_name = parse_github_url(repo)
+        
+        # Parse dates
+        before_date = None
+        after_date = None
+        
+        if before:
+            try:
+                before_date = datetime.strptime(before, "%Y-%m-%d")
+            except ValueError:
+                raise typer.BadParameter("Invalid before date format. Use YYYY-MM-DD")
+        
+        if after:
+            try:
+                after_date = datetime.strptime(after, "%Y-%m-%d")
+            except ValueError:
+                raise typer.BadParameter("Invalid after date format. Use YYYY-MM-DD")
+        
+        # Initialize GitHub client
+        client = GitHubClient()
+        
+        # Fetch issues
+        typer.echo(f"Fetching issues from {repo_name}...", err=True)
+        issues = client.get_issues(
+            repo=repo_name,
+            before=before_date,
+            after=after_date,
+            limit=limit,
+            state=state
+        )
+        
+        if not issues:
+            typer.echo("No issues found matching the criteria.", err=True)
+            return
+        
+        # Generate markdown
+        typer.echo(f"Found {len(issues)} issues. Generating markdown...", err=True)
+        
+        # Header
+        markdown_content = f"# Issues from {repo_name}\n\n"
+        markdown_content += f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')}  \n"
+        markdown_content += f"**Total Issues:** {len(issues)}\n\n"
+        
+        if before_date:
+            markdown_content += f"**Before:** {before_date.strftime('%Y-%m-%d')}  \n"
+        if after_date:
+            markdown_content += f"**After:** {after_date.strftime('%Y-%m-%d')}  \n"
+        if limit:
+            markdown_content += f"**Limit:** {limit}  \n"
+        
+        markdown_content += f"**State:** {state}\n\n"
+        markdown_content += "---\n\n"
+        
+        # Add each issue
+        for issue in issues:
+            markdown_content += format_issue_to_markdown(issue)
+        
+        # Count tokens
+        token_count = count_tokens(markdown_content)
+        markdown_content += f"\n---\n\n**Token Count:** {token_count:,} tokens\n"
+        
+        # Output to stdout
+        print(markdown_content)
+        
+        # Log completion to stderr
+        typer.echo(f"✅ Generated markdown with {token_count:,} tokens", err=True)
+        
+    except Exception as e:
+        typer.echo(f"❌ Error: {e}", err=True)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    app()