github-issues-md/github_issues_md/main.py

220 lines
6.3 KiB
Python

"""Main CLI application for GitHub Issues to Markdown converter."""
import re
import sys
from datetime import datetime
from typing import List, Optional
from urllib.parse import urlparse
import tiktoken
import typer
from typing_extensions import Annotated
from .github_client import GitHubClient, Issue
app = typer.Typer(
name="github-issues-md",
help="Download GitHub issues and compile them into a markdown file"
)
def parse_github_url(url: str) -> str:
"""
Parse GitHub URL and extract repository name.
Args:
url: GitHub URL or repository name
Returns:
Repository name in format "owner/repo"
"""
# If it's already in owner/repo format
if re.match(r"^[^/]+/[^/]+$", url):
return url
# Parse full GitHub URL
parsed = urlparse(url)
if parsed.netloc == "github.com":
path_parts = parsed.path.strip("/").split("/")
if len(path_parts) >= 2:
return f"{path_parts[0]}/{path_parts[1]}"
raise typer.BadParameter(
"Invalid GitHub URL. Use format 'owner/repo' or 'https://github.com/owner/repo'"
)
def format_issue_to_markdown(issue: Issue) -> str:
"""
Format a single issue and its comments to markdown.
Args:
issue: Issue object to format
Returns:
Markdown formatted string
"""
# Format dates
created_date = issue.created_at.strftime("%Y-%m-%d %H:%M:%S UTC")
updated_date = issue.updated_at.strftime("%Y-%m-%d %H:%M:%S UTC")
# Issue header
markdown = f"## Issue #{issue.number}: {issue.title}\n\n"
markdown += f"**Author:** {issue.author} \n"
markdown += f"**State:** {issue.state} \n"
markdown += f"**Created:** {created_date} \n"
markdown += f"**Updated:** {updated_date} \n"
markdown += f"**URL:** {issue.url}\n\n"
# Issue body
if issue.body.strip():
markdown += "### Description\n\n"
markdown += f"{issue.body}\n\n"
# Comments
if issue.comments:
markdown += "### Comments\n\n"
for i, comment in enumerate(issue.comments, 1):
comment_date = comment.created_at.strftime("%Y-%m-%d %H:%M:%S UTC")
markdown += f"#### Comment {i} by {comment.author}\n\n"
markdown += f"**Posted:** {comment_date} \n"
markdown += f"**URL:** {comment.url}\n\n"
markdown += f"{comment.body}\n\n"
markdown += "---\n\n"
return markdown
def count_tokens(text: str) -> int:
"""
Count tokens in the text using tiktoken.
Args:
text: Text to count tokens for
Returns:
Number of tokens
"""
try:
encoding = tiktoken.get_encoding("cl100k_base") # GPT-4 encoding
return len(encoding.encode(text))
except Exception:
# Fallback to approximate count
return len(text.split())
@app.command()
def main(
repo: Annotated[str, typer.Argument(help="GitHub repository (owner/repo or full URL)")],
before: Annotated[
Optional[str],
typer.Option(
"--before",
help="Only issues created before this date (YYYY-MM-DD format)"
)
] = None,
after: Annotated[
Optional[str],
typer.Option(
"--after",
help="Only issues created after this date (YYYY-MM-DD format)"
)
] = None,
limit: Annotated[
Optional[int],
typer.Option(
"--limit",
help="Maximum number of issues to fetch"
)
] = None,
state: Annotated[
str,
typer.Option(
"--state",
help="Issue state to filter by"
)
] = "all"
) -> None:
"""
Download GitHub issues and compile them into a markdown file.
The output is written to stdout and can be redirected to a file.
"""
try:
# Parse repository URL
repo_name = parse_github_url(repo)
# Parse dates
before_date = None
after_date = None
if before:
try:
before_date = datetime.strptime(before, "%Y-%m-%d")
except ValueError:
raise typer.BadParameter("Invalid before date format. Use YYYY-MM-DD")
if after:
try:
after_date = datetime.strptime(after, "%Y-%m-%d")
except ValueError:
raise typer.BadParameter("Invalid after date format. Use YYYY-MM-DD")
# Initialize GitHub client
client = GitHubClient()
# Fetch issues
typer.echo(f"Fetching issues from {repo_name}...", err=True)
issues = client.get_issues(
repo=repo_name,
before=before_date,
after=after_date,
limit=limit,
state=state
)
if not issues:
typer.echo("No issues found matching the criteria.", err=True)
return
# Generate markdown
typer.echo(f"Found {len(issues)} issues. Generating markdown...", err=True)
# Header
markdown_content = f"# Issues from {repo_name}\n\n"
markdown_content += f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')} \n"
markdown_content += f"**Total Issues:** {len(issues)}\n\n"
if before_date:
markdown_content += f"**Before:** {before_date.strftime('%Y-%m-%d')} \n"
if after_date:
markdown_content += f"**After:** {after_date.strftime('%Y-%m-%d')} \n"
if limit:
markdown_content += f"**Limit:** {limit} \n"
markdown_content += f"**State:** {state}\n\n"
markdown_content += "---\n\n"
# Add each issue
for issue in issues:
markdown_content += format_issue_to_markdown(issue)
# Count tokens
token_count = count_tokens(markdown_content)
markdown_content += f"\n---\n\n**Token Count:** {token_count:,} tokens\n"
# Output to stdout
print(markdown_content)
# Log completion to stderr
typer.echo(f"✅ Generated markdown with {token_count:,} tokens", err=True)
except Exception as e:
typer.echo(f"❌ Error: {e}", err=True)
sys.exit(1)
if __name__ == "__main__":
app()