[add] Init
This commit is contained in:
commit
d274b10e79
125
README.md
Normal file
125
README.md
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
# GitHub Issues to Markdown
|
||||||
|
|
||||||
|
A command-line tool that downloads all issues from a GitHub repository and compiles them into a single markdown file with token counting.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
- Python 3.8 or higher
|
||||||
|
- GitHub CLI (`gh`) installed and authenticated
|
||||||
|
- Poetry (recommended) or pip
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
### Using Poetry (Recommended)
|
||||||
|
```bash
|
||||||
|
# Clone or download the project
|
||||||
|
cd github-issues-md
|
||||||
|
|
||||||
|
# Install dependencies
|
||||||
|
poetry install
|
||||||
|
|
||||||
|
# The script will be available as 'github-issues-md'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Using Pip
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# From the repo
|
||||||
|
pip3 install git+https://git.laziness.rocks/PootisPenserHere/github-issues-md.git
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Clone or download the project
|
||||||
|
cd github-issues-md
|
||||||
|
|
||||||
|
# Install the package
|
||||||
|
pip3 install .
|
||||||
|
|
||||||
|
# Or install in development mode
|
||||||
|
pip3 install -e .
|
||||||
|
```
|
||||||
|
|
||||||
|
## Setup GitHub CLI
|
||||||
|
|
||||||
|
Make sure GitHub CLI is installed and authenticated:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Install GitHub CLI (if not already installed)
|
||||||
|
# On macOS: brew install gh
|
||||||
|
# On Ubuntu: sudo apt install gh
|
||||||
|
# On Windows: winget install GitHub.cli
|
||||||
|
|
||||||
|
# Authenticate with GitHub
|
||||||
|
gh auth login
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Basic usage with repository format
|
||||||
|
github-issues-md owner/repo
|
||||||
|
|
||||||
|
# Using full GitHub URL
|
||||||
|
github-issues-md https://github.com/owner/repo
|
||||||
|
|
||||||
|
# Filter by date range
|
||||||
|
github-issues-md owner/repo --after 2023-01-01 --before 2023-12-31
|
||||||
|
|
||||||
|
# Limit number of issues
|
||||||
|
github-issues-md owner/repo --limit 10
|
||||||
|
|
||||||
|
# Filter by state (open, closed, all)
|
||||||
|
github-issues-md owner/repo --state open
|
||||||
|
|
||||||
|
# Combine filters
|
||||||
|
github-issues-md owner/repo --after 2023-06-01 --limit 5 --state closed
|
||||||
|
|
||||||
|
# Save to file
|
||||||
|
github-issues-md owner/repo > issues.md
|
||||||
|
```
|
||||||
|
|
||||||
|
## Options
|
||||||
|
|
||||||
|
- `repo`: GitHub repository (required) - can be `owner/repo` format or full GitHub URL
|
||||||
|
- `--before DATE`: Only issues created before this date (YYYY-MM-DD format)
|
||||||
|
- `--after DATE`: Only issues created after this date (YYYY-MM-DD format)
|
||||||
|
- `--limit N`: Maximum number of issues to fetch
|
||||||
|
- `--state STATE`: Issue state to filter by (open, closed, all) - default: all
|
||||||
|
|
||||||
|
## Output
|
||||||
|
|
||||||
|
The tool outputs a markdown file to stdout containing:
|
||||||
|
|
||||||
|
- Repository information and generation metadata
|
||||||
|
- Each issue with its title, author, state, dates, and description
|
||||||
|
- All comments for each issue with author and timestamp
|
||||||
|
- Token count at the end using tiktoken
|
||||||
|
|
||||||
|
Each issue and its discussions are clearly separated with markdown headers and horizontal rules.
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Get all issues from a popular repository
|
||||||
|
github-issues-md microsoft/vscode > vscode-issues.md
|
||||||
|
|
||||||
|
# Get only open issues from the last 30 days
|
||||||
|
github-issues-md facebook/react --after 2023-11-01 --state open
|
||||||
|
|
||||||
|
# Get the latest 20 issues
|
||||||
|
github-issues-md owner/repo --limit 20
|
||||||
|
```
|
||||||
|
|
||||||
|
## Development
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Install in development mode
|
||||||
|
poetry install
|
||||||
|
|
||||||
|
# Run tests
|
||||||
|
poetry run pytest
|
||||||
|
|
||||||
|
# Format code
|
||||||
|
poetry run black .
|
||||||
|
poetry run isort .
|
||||||
|
```
|
3
github_issues_md/__init__.py
Normal file
3
github_issues_md/__init__.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
"""GitHub Issues to Markdown converter."""
|
||||||
|
|
||||||
|
__version__ = "0.1.0"
|
174
github_issues_md/github_client.py
Normal file
174
github_issues_md/github_client.py
Normal file
@ -0,0 +1,174 @@
|
|||||||
|
"""GitHub API client for fetching issues and comments."""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import subprocess
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Dict, List, Optional, Any
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Comment:
|
||||||
|
"""Represents a GitHub issue comment."""
|
||||||
|
|
||||||
|
author: str
|
||||||
|
body: str
|
||||||
|
created_at: datetime
|
||||||
|
url: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Issue:
|
||||||
|
"""Represents a GitHub issue with its comments."""
|
||||||
|
|
||||||
|
number: int
|
||||||
|
title: str
|
||||||
|
body: str
|
||||||
|
author: str
|
||||||
|
state: str
|
||||||
|
created_at: datetime
|
||||||
|
updated_at: datetime
|
||||||
|
url: str
|
||||||
|
comments: List[Comment]
|
||||||
|
|
||||||
|
|
||||||
|
class GitHubClient:
|
||||||
|
"""Client for interacting with GitHub API using GitHub CLI."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
"""Initialize the GitHub client."""
|
||||||
|
self._verify_gh_cli()
|
||||||
|
|
||||||
|
def _verify_gh_cli(self) -> None:
|
||||||
|
"""Verify that GitHub CLI is installed and authenticated."""
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
["gh", "auth", "status"],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
check=True
|
||||||
|
)
|
||||||
|
except subprocess.CalledProcessError:
|
||||||
|
raise RuntimeError(
|
||||||
|
"GitHub CLI is not authenticated. Please run 'gh auth login' first."
|
||||||
|
)
|
||||||
|
except FileNotFoundError:
|
||||||
|
raise RuntimeError(
|
||||||
|
"GitHub CLI is not installed. Please install it first."
|
||||||
|
)
|
||||||
|
|
||||||
|
def _run_gh_command(self, command: List[str]) -> str:
|
||||||
|
"""Run a GitHub CLI command and return the output."""
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
command,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
check=True
|
||||||
|
)
|
||||||
|
return result.stdout
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
raise RuntimeError(f"GitHub CLI command failed: {e.stderr}")
|
||||||
|
|
||||||
|
def get_issues(
|
||||||
|
self,
|
||||||
|
repo: str,
|
||||||
|
before: Optional[datetime] = None,
|
||||||
|
after: Optional[datetime] = None,
|
||||||
|
limit: Optional[int] = None,
|
||||||
|
state: str = "all"
|
||||||
|
) -> List[Issue]:
|
||||||
|
"""
|
||||||
|
Fetch issues from a GitHub repository.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
repo: Repository in format "owner/repo"
|
||||||
|
before: Only issues created before this date
|
||||||
|
after: Only issues created after this date
|
||||||
|
limit: Maximum number of issues to fetch
|
||||||
|
state: Issue state (open, closed, all)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of Issue objects
|
||||||
|
"""
|
||||||
|
command = [
|
||||||
|
"gh", "issue", "list",
|
||||||
|
"--repo", repo,
|
||||||
|
"--state", state,
|
||||||
|
"--json", "number,title,body,author,state,createdAt,updatedAt,url"
|
||||||
|
]
|
||||||
|
|
||||||
|
if limit:
|
||||||
|
command.extend(["--limit", str(limit)])
|
||||||
|
|
||||||
|
output = self._run_gh_command(command)
|
||||||
|
issues_data = json.loads(output)
|
||||||
|
|
||||||
|
issues = []
|
||||||
|
for issue_data in issues_data:
|
||||||
|
created_at = datetime.fromisoformat(
|
||||||
|
issue_data["createdAt"].replace("Z", "+00:00")
|
||||||
|
)
|
||||||
|
updated_at = datetime.fromisoformat(
|
||||||
|
issue_data["updatedAt"].replace("Z", "+00:00")
|
||||||
|
)
|
||||||
|
|
||||||
|
# Apply date filters
|
||||||
|
if before and created_at >= before:
|
||||||
|
continue
|
||||||
|
if after and created_at <= after:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Fetch comments for this issue
|
||||||
|
comments = self._get_issue_comments(repo, issue_data["number"])
|
||||||
|
|
||||||
|
issue = Issue(
|
||||||
|
number=issue_data["number"],
|
||||||
|
title=issue_data["title"],
|
||||||
|
body=issue_data["body"] or "",
|
||||||
|
author=issue_data["author"]["login"],
|
||||||
|
state=issue_data["state"],
|
||||||
|
created_at=created_at,
|
||||||
|
updated_at=updated_at,
|
||||||
|
url=issue_data["url"],
|
||||||
|
comments=comments
|
||||||
|
)
|
||||||
|
issues.append(issue)
|
||||||
|
|
||||||
|
return issues
|
||||||
|
|
||||||
|
def _get_issue_comments(self, repo: str, issue_number: int) -> List[Comment]:
|
||||||
|
"""
|
||||||
|
Fetch comments for a specific issue.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
repo: Repository in format "owner/repo"
|
||||||
|
issue_number: Issue number
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of Comment objects
|
||||||
|
"""
|
||||||
|
command = [
|
||||||
|
"gh", "issue", "view", str(issue_number),
|
||||||
|
"--repo", repo,
|
||||||
|
"--json", "comments"
|
||||||
|
]
|
||||||
|
|
||||||
|
output = self._run_gh_command(command)
|
||||||
|
issue_data = json.loads(output)
|
||||||
|
|
||||||
|
comments = []
|
||||||
|
for comment_data in issue_data.get("comments", []):
|
||||||
|
created_at = datetime.fromisoformat(
|
||||||
|
comment_data["createdAt"].replace("Z", "+00:00")
|
||||||
|
)
|
||||||
|
|
||||||
|
comment = Comment(
|
||||||
|
author=comment_data["author"]["login"],
|
||||||
|
body=comment_data["body"],
|
||||||
|
created_at=created_at,
|
||||||
|
url=comment_data["url"]
|
||||||
|
)
|
||||||
|
comments.append(comment)
|
||||||
|
|
||||||
|
return comments
|
219
github_issues_md/main.py
Normal file
219
github_issues_md/main.py
Normal file
@ -0,0 +1,219 @@
|
|||||||
|
"""Main CLI application for GitHub Issues to Markdown converter."""
|
||||||
|
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import List, Optional
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
import tiktoken
|
||||||
|
import typer
|
||||||
|
from typing_extensions import Annotated
|
||||||
|
|
||||||
|
from .github_client import GitHubClient, Issue
|
||||||
|
|
||||||
|
|
||||||
|
app = typer.Typer(
|
||||||
|
name="github-issues-md",
|
||||||
|
help="Download GitHub issues and compile them into a markdown file"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_github_url(url: str) -> str:
|
||||||
|
"""
|
||||||
|
Parse GitHub URL and extract repository name.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: GitHub URL or repository name
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Repository name in format "owner/repo"
|
||||||
|
"""
|
||||||
|
# If it's already in owner/repo format
|
||||||
|
if re.match(r"^[^/]+/[^/]+$", url):
|
||||||
|
return url
|
||||||
|
|
||||||
|
# Parse full GitHub URL
|
||||||
|
parsed = urlparse(url)
|
||||||
|
if parsed.netloc == "github.com":
|
||||||
|
path_parts = parsed.path.strip("/").split("/")
|
||||||
|
if len(path_parts) >= 2:
|
||||||
|
return f"{path_parts[0]}/{path_parts[1]}"
|
||||||
|
|
||||||
|
raise typer.BadParameter(
|
||||||
|
"Invalid GitHub URL. Use format 'owner/repo' or 'https://github.com/owner/repo'"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def format_issue_to_markdown(issue: Issue) -> str:
|
||||||
|
"""
|
||||||
|
Format a single issue and its comments to markdown.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
issue: Issue object to format
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Markdown formatted string
|
||||||
|
"""
|
||||||
|
# Format dates
|
||||||
|
created_date = issue.created_at.strftime("%Y-%m-%d %H:%M:%S UTC")
|
||||||
|
updated_date = issue.updated_at.strftime("%Y-%m-%d %H:%M:%S UTC")
|
||||||
|
|
||||||
|
# Issue header
|
||||||
|
markdown = f"## Issue #{issue.number}: {issue.title}\n\n"
|
||||||
|
markdown += f"**Author:** {issue.author} \n"
|
||||||
|
markdown += f"**State:** {issue.state} \n"
|
||||||
|
markdown += f"**Created:** {created_date} \n"
|
||||||
|
markdown += f"**Updated:** {updated_date} \n"
|
||||||
|
markdown += f"**URL:** {issue.url}\n\n"
|
||||||
|
|
||||||
|
# Issue body
|
||||||
|
if issue.body.strip():
|
||||||
|
markdown += "### Description\n\n"
|
||||||
|
markdown += f"{issue.body}\n\n"
|
||||||
|
|
||||||
|
# Comments
|
||||||
|
if issue.comments:
|
||||||
|
markdown += "### Comments\n\n"
|
||||||
|
for i, comment in enumerate(issue.comments, 1):
|
||||||
|
comment_date = comment.created_at.strftime("%Y-%m-%d %H:%M:%S UTC")
|
||||||
|
markdown += f"#### Comment {i} by {comment.author}\n\n"
|
||||||
|
markdown += f"**Posted:** {comment_date} \n"
|
||||||
|
markdown += f"**URL:** {comment.url}\n\n"
|
||||||
|
markdown += f"{comment.body}\n\n"
|
||||||
|
|
||||||
|
markdown += "---\n\n"
|
||||||
|
return markdown
|
||||||
|
|
||||||
|
|
||||||
|
def count_tokens(text: str) -> int:
|
||||||
|
"""
|
||||||
|
Count tokens in the text using tiktoken.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: Text to count tokens for
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Number of tokens
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
encoding = tiktoken.get_encoding("cl100k_base") # GPT-4 encoding
|
||||||
|
return len(encoding.encode(text))
|
||||||
|
except Exception:
|
||||||
|
# Fallback to approximate count
|
||||||
|
return len(text.split())
|
||||||
|
|
||||||
|
|
||||||
|
@app.command()
|
||||||
|
def main(
|
||||||
|
repo: Annotated[str, typer.Argument(help="GitHub repository (owner/repo or full URL)")],
|
||||||
|
before: Annotated[
|
||||||
|
Optional[str],
|
||||||
|
typer.Option(
|
||||||
|
"--before",
|
||||||
|
help="Only issues created before this date (YYYY-MM-DD format)"
|
||||||
|
)
|
||||||
|
] = None,
|
||||||
|
after: Annotated[
|
||||||
|
Optional[str],
|
||||||
|
typer.Option(
|
||||||
|
"--after",
|
||||||
|
help="Only issues created after this date (YYYY-MM-DD format)"
|
||||||
|
)
|
||||||
|
] = None,
|
||||||
|
limit: Annotated[
|
||||||
|
Optional[int],
|
||||||
|
typer.Option(
|
||||||
|
"--limit",
|
||||||
|
help="Maximum number of issues to fetch"
|
||||||
|
)
|
||||||
|
] = None,
|
||||||
|
state: Annotated[
|
||||||
|
str,
|
||||||
|
typer.Option(
|
||||||
|
"--state",
|
||||||
|
help="Issue state to filter by"
|
||||||
|
)
|
||||||
|
] = "all"
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Download GitHub issues and compile them into a markdown file.
|
||||||
|
|
||||||
|
The output is written to stdout and can be redirected to a file.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Parse repository URL
|
||||||
|
repo_name = parse_github_url(repo)
|
||||||
|
|
||||||
|
# Parse dates
|
||||||
|
before_date = None
|
||||||
|
after_date = None
|
||||||
|
|
||||||
|
if before:
|
||||||
|
try:
|
||||||
|
before_date = datetime.strptime(before, "%Y-%m-%d")
|
||||||
|
except ValueError:
|
||||||
|
raise typer.BadParameter("Invalid before date format. Use YYYY-MM-DD")
|
||||||
|
|
||||||
|
if after:
|
||||||
|
try:
|
||||||
|
after_date = datetime.strptime(after, "%Y-%m-%d")
|
||||||
|
except ValueError:
|
||||||
|
raise typer.BadParameter("Invalid after date format. Use YYYY-MM-DD")
|
||||||
|
|
||||||
|
# Initialize GitHub client
|
||||||
|
client = GitHubClient()
|
||||||
|
|
||||||
|
# Fetch issues
|
||||||
|
typer.echo(f"Fetching issues from {repo_name}...", err=True)
|
||||||
|
issues = client.get_issues(
|
||||||
|
repo=repo_name,
|
||||||
|
before=before_date,
|
||||||
|
after=after_date,
|
||||||
|
limit=limit,
|
||||||
|
state=state
|
||||||
|
)
|
||||||
|
|
||||||
|
if not issues:
|
||||||
|
typer.echo("No issues found matching the criteria.", err=True)
|
||||||
|
return
|
||||||
|
|
||||||
|
# Generate markdown
|
||||||
|
typer.echo(f"Found {len(issues)} issues. Generating markdown...", err=True)
|
||||||
|
|
||||||
|
# Header
|
||||||
|
markdown_content = f"# Issues from {repo_name}\n\n"
|
||||||
|
markdown_content += f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')} \n"
|
||||||
|
markdown_content += f"**Total Issues:** {len(issues)}\n\n"
|
||||||
|
|
||||||
|
if before_date:
|
||||||
|
markdown_content += f"**Before:** {before_date.strftime('%Y-%m-%d')} \n"
|
||||||
|
if after_date:
|
||||||
|
markdown_content += f"**After:** {after_date.strftime('%Y-%m-%d')} \n"
|
||||||
|
if limit:
|
||||||
|
markdown_content += f"**Limit:** {limit} \n"
|
||||||
|
|
||||||
|
markdown_content += f"**State:** {state}\n\n"
|
||||||
|
markdown_content += "---\n\n"
|
||||||
|
|
||||||
|
# Add each issue
|
||||||
|
for issue in issues:
|
||||||
|
markdown_content += format_issue_to_markdown(issue)
|
||||||
|
|
||||||
|
# Count tokens
|
||||||
|
token_count = count_tokens(markdown_content)
|
||||||
|
markdown_content += f"\n---\n\n**Token Count:** {token_count:,} tokens\n"
|
||||||
|
|
||||||
|
# Output to stdout
|
||||||
|
print(markdown_content)
|
||||||
|
|
||||||
|
# Log completion to stderr
|
||||||
|
typer.echo(f"✅ Generated markdown with {token_count:,} tokens", err=True)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
typer.echo(f"❌ Error: {e}", err=True)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
app()
|
25
pyproject.toml
Normal file
25
pyproject.toml
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
[tool.poetry]
|
||||||
|
name = "github-issues-md"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "Download GitHub issues and compile them into a markdown file"
|
||||||
|
authors = ["Your Name <your.email@example.com>"]
|
||||||
|
readme = "README.md"
|
||||||
|
packages = [{include = "github_issues_md"}]
|
||||||
|
|
||||||
|
[tool.poetry.dependencies]
|
||||||
|
python = "^3.8"
|
||||||
|
typer = {extras = ["all"], version = "^0.12.3"}
|
||||||
|
tiktoken = "^0.7.0"
|
||||||
|
requests = "^2.31.0"
|
||||||
|
|
||||||
|
[tool.poetry.scripts]
|
||||||
|
github-issues-md = "github_issues_md.main:app"
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["poetry-core"]
|
||||||
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
|
[tool.poetry.group.dev.dependencies]
|
||||||
|
pytest = "^7.0.0"
|
||||||
|
black = "^23.0.0"
|
||||||
|
isort = "^5.12.0"
|
Loading…
Reference in New Issue
Block a user