[add] Init

This commit is contained in:
Jose Pablo Domingo Aramburo Sanchez 2025-08-27 16:08:39 -07:00
commit d274b10e79
5 changed files with 546 additions and 0 deletions

125
README.md Normal file
View File

@ -0,0 +1,125 @@
# GitHub Issues to Markdown
A command-line tool that downloads all issues from a GitHub repository and compiles them into a single markdown file with token counting.
## Prerequisites
- Python 3.8 or higher
- GitHub CLI (`gh`) installed and authenticated
- Poetry (recommended) or pip
## Installation
### Using Poetry (Recommended)
```bash
# Clone or download the project
cd github-issues-md
# Install dependencies
poetry install
# The script will be available as 'github-issues-md'
```
### Using Pip
```bash
# From the repo
pip3 install git+https://git.laziness.rocks/PootisPenserHere/github-issues-md.git
```
```bash
# Clone or download the project
cd github-issues-md
# Install the package
pip3 install .
# Or install in development mode
pip3 install -e .
```
## Setup GitHub CLI
Make sure GitHub CLI is installed and authenticated:
```bash
# Install GitHub CLI (if not already installed)
# On macOS: brew install gh
# On Ubuntu: sudo apt install gh
# On Windows: winget install GitHub.cli
# Authenticate with GitHub
gh auth login
```
## Usage
```bash
# Basic usage with repository format
github-issues-md owner/repo
# Using full GitHub URL
github-issues-md https://github.com/owner/repo
# Filter by date range
github-issues-md owner/repo --after 2023-01-01 --before 2023-12-31
# Limit number of issues
github-issues-md owner/repo --limit 10
# Filter by state (open, closed, all)
github-issues-md owner/repo --state open
# Combine filters
github-issues-md owner/repo --after 2023-06-01 --limit 5 --state closed
# Save to file
github-issues-md owner/repo > issues.md
```
## Options
- `repo`: GitHub repository (required) - can be `owner/repo` format or full GitHub URL
- `--before DATE`: Only issues created before this date (YYYY-MM-DD format)
- `--after DATE`: Only issues created after this date (YYYY-MM-DD format)
- `--limit N`: Maximum number of issues to fetch
- `--state STATE`: Issue state to filter by (open, closed, all) - default: all
## Output
The tool outputs a markdown file to stdout containing:
- Repository information and generation metadata
- Each issue with its title, author, state, dates, and description
- All comments for each issue with author and timestamp
- Token count at the end using tiktoken
Each issue and its discussions are clearly separated with markdown headers and horizontal rules.
## Examples
```bash
# Get all issues from a popular repository
github-issues-md microsoft/vscode > vscode-issues.md
# Get only open issues from the last 30 days
github-issues-md facebook/react --after 2023-11-01 --state open
# Get the latest 20 issues
github-issues-md owner/repo --limit 20
```
## Development
```bash
# Install in development mode
poetry install
# Run tests
poetry run pytest
# Format code
poetry run black .
poetry run isort .
```

View File

@ -0,0 +1,3 @@
"""GitHub Issues to Markdown converter."""
__version__ = "0.1.0"

View File

@ -0,0 +1,174 @@
"""GitHub API client for fetching issues and comments."""
import json
import subprocess
from datetime import datetime
from typing import Dict, List, Optional, Any
from dataclasses import dataclass
@dataclass
class Comment:
"""Represents a GitHub issue comment."""
author: str
body: str
created_at: datetime
url: str
@dataclass
class Issue:
"""Represents a GitHub issue with its comments."""
number: int
title: str
body: str
author: str
state: str
created_at: datetime
updated_at: datetime
url: str
comments: List[Comment]
class GitHubClient:
"""Client for interacting with GitHub API using GitHub CLI."""
def __init__(self):
"""Initialize the GitHub client."""
self._verify_gh_cli()
def _verify_gh_cli(self) -> None:
"""Verify that GitHub CLI is installed and authenticated."""
try:
result = subprocess.run(
["gh", "auth", "status"],
capture_output=True,
text=True,
check=True
)
except subprocess.CalledProcessError:
raise RuntimeError(
"GitHub CLI is not authenticated. Please run 'gh auth login' first."
)
except FileNotFoundError:
raise RuntimeError(
"GitHub CLI is not installed. Please install it first."
)
def _run_gh_command(self, command: List[str]) -> str:
"""Run a GitHub CLI command and return the output."""
try:
result = subprocess.run(
command,
capture_output=True,
text=True,
check=True
)
return result.stdout
except subprocess.CalledProcessError as e:
raise RuntimeError(f"GitHub CLI command failed: {e.stderr}")
def get_issues(
self,
repo: str,
before: Optional[datetime] = None,
after: Optional[datetime] = None,
limit: Optional[int] = None,
state: str = "all"
) -> List[Issue]:
"""
Fetch issues from a GitHub repository.
Args:
repo: Repository in format "owner/repo"
before: Only issues created before this date
after: Only issues created after this date
limit: Maximum number of issues to fetch
state: Issue state (open, closed, all)
Returns:
List of Issue objects
"""
command = [
"gh", "issue", "list",
"--repo", repo,
"--state", state,
"--json", "number,title,body,author,state,createdAt,updatedAt,url"
]
if limit:
command.extend(["--limit", str(limit)])
output = self._run_gh_command(command)
issues_data = json.loads(output)
issues = []
for issue_data in issues_data:
created_at = datetime.fromisoformat(
issue_data["createdAt"].replace("Z", "+00:00")
)
updated_at = datetime.fromisoformat(
issue_data["updatedAt"].replace("Z", "+00:00")
)
# Apply date filters
if before and created_at >= before:
continue
if after and created_at <= after:
continue
# Fetch comments for this issue
comments = self._get_issue_comments(repo, issue_data["number"])
issue = Issue(
number=issue_data["number"],
title=issue_data["title"],
body=issue_data["body"] or "",
author=issue_data["author"]["login"],
state=issue_data["state"],
created_at=created_at,
updated_at=updated_at,
url=issue_data["url"],
comments=comments
)
issues.append(issue)
return issues
def _get_issue_comments(self, repo: str, issue_number: int) -> List[Comment]:
"""
Fetch comments for a specific issue.
Args:
repo: Repository in format "owner/repo"
issue_number: Issue number
Returns:
List of Comment objects
"""
command = [
"gh", "issue", "view", str(issue_number),
"--repo", repo,
"--json", "comments"
]
output = self._run_gh_command(command)
issue_data = json.loads(output)
comments = []
for comment_data in issue_data.get("comments", []):
created_at = datetime.fromisoformat(
comment_data["createdAt"].replace("Z", "+00:00")
)
comment = Comment(
author=comment_data["author"]["login"],
body=comment_data["body"],
created_at=created_at,
url=comment_data["url"]
)
comments.append(comment)
return comments

219
github_issues_md/main.py Normal file
View File

@ -0,0 +1,219 @@
"""Main CLI application for GitHub Issues to Markdown converter."""
import re
import sys
from datetime import datetime
from typing import List, Optional
from urllib.parse import urlparse
import tiktoken
import typer
from typing_extensions import Annotated
from .github_client import GitHubClient, Issue
app = typer.Typer(
name="github-issues-md",
help="Download GitHub issues and compile them into a markdown file"
)
def parse_github_url(url: str) -> str:
"""
Parse GitHub URL and extract repository name.
Args:
url: GitHub URL or repository name
Returns:
Repository name in format "owner/repo"
"""
# If it's already in owner/repo format
if re.match(r"^[^/]+/[^/]+$", url):
return url
# Parse full GitHub URL
parsed = urlparse(url)
if parsed.netloc == "github.com":
path_parts = parsed.path.strip("/").split("/")
if len(path_parts) >= 2:
return f"{path_parts[0]}/{path_parts[1]}"
raise typer.BadParameter(
"Invalid GitHub URL. Use format 'owner/repo' or 'https://github.com/owner/repo'"
)
def format_issue_to_markdown(issue: Issue) -> str:
"""
Format a single issue and its comments to markdown.
Args:
issue: Issue object to format
Returns:
Markdown formatted string
"""
# Format dates
created_date = issue.created_at.strftime("%Y-%m-%d %H:%M:%S UTC")
updated_date = issue.updated_at.strftime("%Y-%m-%d %H:%M:%S UTC")
# Issue header
markdown = f"## Issue #{issue.number}: {issue.title}\n\n"
markdown += f"**Author:** {issue.author} \n"
markdown += f"**State:** {issue.state} \n"
markdown += f"**Created:** {created_date} \n"
markdown += f"**Updated:** {updated_date} \n"
markdown += f"**URL:** {issue.url}\n\n"
# Issue body
if issue.body.strip():
markdown += "### Description\n\n"
markdown += f"{issue.body}\n\n"
# Comments
if issue.comments:
markdown += "### Comments\n\n"
for i, comment in enumerate(issue.comments, 1):
comment_date = comment.created_at.strftime("%Y-%m-%d %H:%M:%S UTC")
markdown += f"#### Comment {i} by {comment.author}\n\n"
markdown += f"**Posted:** {comment_date} \n"
markdown += f"**URL:** {comment.url}\n\n"
markdown += f"{comment.body}\n\n"
markdown += "---\n\n"
return markdown
def count_tokens(text: str) -> int:
"""
Count tokens in the text using tiktoken.
Args:
text: Text to count tokens for
Returns:
Number of tokens
"""
try:
encoding = tiktoken.get_encoding("cl100k_base") # GPT-4 encoding
return len(encoding.encode(text))
except Exception:
# Fallback to approximate count
return len(text.split())
@app.command()
def main(
repo: Annotated[str, typer.Argument(help="GitHub repository (owner/repo or full URL)")],
before: Annotated[
Optional[str],
typer.Option(
"--before",
help="Only issues created before this date (YYYY-MM-DD format)"
)
] = None,
after: Annotated[
Optional[str],
typer.Option(
"--after",
help="Only issues created after this date (YYYY-MM-DD format)"
)
] = None,
limit: Annotated[
Optional[int],
typer.Option(
"--limit",
help="Maximum number of issues to fetch"
)
] = None,
state: Annotated[
str,
typer.Option(
"--state",
help="Issue state to filter by"
)
] = "all"
) -> None:
"""
Download GitHub issues and compile them into a markdown file.
The output is written to stdout and can be redirected to a file.
"""
try:
# Parse repository URL
repo_name = parse_github_url(repo)
# Parse dates
before_date = None
after_date = None
if before:
try:
before_date = datetime.strptime(before, "%Y-%m-%d")
except ValueError:
raise typer.BadParameter("Invalid before date format. Use YYYY-MM-DD")
if after:
try:
after_date = datetime.strptime(after, "%Y-%m-%d")
except ValueError:
raise typer.BadParameter("Invalid after date format. Use YYYY-MM-DD")
# Initialize GitHub client
client = GitHubClient()
# Fetch issues
typer.echo(f"Fetching issues from {repo_name}...", err=True)
issues = client.get_issues(
repo=repo_name,
before=before_date,
after=after_date,
limit=limit,
state=state
)
if not issues:
typer.echo("No issues found matching the criteria.", err=True)
return
# Generate markdown
typer.echo(f"Found {len(issues)} issues. Generating markdown...", err=True)
# Header
markdown_content = f"# Issues from {repo_name}\n\n"
markdown_content += f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')} \n"
markdown_content += f"**Total Issues:** {len(issues)}\n\n"
if before_date:
markdown_content += f"**Before:** {before_date.strftime('%Y-%m-%d')} \n"
if after_date:
markdown_content += f"**After:** {after_date.strftime('%Y-%m-%d')} \n"
if limit:
markdown_content += f"**Limit:** {limit} \n"
markdown_content += f"**State:** {state}\n\n"
markdown_content += "---\n\n"
# Add each issue
for issue in issues:
markdown_content += format_issue_to_markdown(issue)
# Count tokens
token_count = count_tokens(markdown_content)
markdown_content += f"\n---\n\n**Token Count:** {token_count:,} tokens\n"
# Output to stdout
print(markdown_content)
# Log completion to stderr
typer.echo(f"✅ Generated markdown with {token_count:,} tokens", err=True)
except Exception as e:
typer.echo(f"❌ Error: {e}", err=True)
sys.exit(1)
if __name__ == "__main__":
app()

25
pyproject.toml Normal file
View File

@ -0,0 +1,25 @@
[tool.poetry]
name = "github-issues-md"
version = "0.1.0"
description = "Download GitHub issues and compile them into a markdown file"
authors = ["Your Name <your.email@example.com>"]
readme = "README.md"
packages = [{include = "github_issues_md"}]
[tool.poetry.dependencies]
python = "^3.8"
typer = {extras = ["all"], version = "^0.12.3"}
tiktoken = "^0.7.0"
requests = "^2.31.0"
[tool.poetry.scripts]
github-issues-md = "github_issues_md.main:app"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
[tool.poetry.group.dev.dependencies]
pytest = "^7.0.0"
black = "^23.0.0"
isort = "^5.12.0"