(no commit message)
This commit is contained in:
18
LICENSE
18
LICENSE
@@ -1,18 +0,0 @@
|
|||||||
MIT License
|
|
||||||
|
|
||||||
Copyright (c) 2025 johwang
|
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
|
|
||||||
associated documentation files (the "Software"), to deal in the Software without restriction, including
|
|
||||||
without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the
|
|
||||||
following conditions:
|
|
||||||
|
|
||||||
The above copyright notice and this permission notice shall be included in all copies or substantial
|
|
||||||
portions of the Software.
|
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
|
||||||
LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
|
|
||||||
EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
|
||||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
|
||||||
USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
193
agent.json
Normal file
193
agent.json
Normal file
@@ -0,0 +1,193 @@
|
|||||||
|
{
|
||||||
|
"analyze_repo.predict": {
|
||||||
|
"traces": [],
|
||||||
|
"train": [],
|
||||||
|
"demos": [],
|
||||||
|
"signature": {
|
||||||
|
"instructions": "Analyze a repository structure and identify key components.",
|
||||||
|
"fields": [
|
||||||
|
{
|
||||||
|
"prefix": "Repo Url:",
|
||||||
|
"description": "GitHub repository URL"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"prefix": "File Tree:",
|
||||||
|
"description": "Repository file structure"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"prefix": "Readme Content:",
|
||||||
|
"description": "README.md content"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"prefix": "Reasoning: Let's think step by step in order to",
|
||||||
|
"description": "${reasoning}"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"prefix": "Project Purpose:",
|
||||||
|
"description": "Main purpose and goals of the project"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"prefix": "Key Concepts:",
|
||||||
|
"description": "List of important concepts and terminology"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"prefix": "Architecture Overview:",
|
||||||
|
"description": "High-level architecture description"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"lm": {
|
||||||
|
"model": "gpt-4o",
|
||||||
|
"model_type": "chat",
|
||||||
|
"cache": true,
|
||||||
|
"num_retries": 3,
|
||||||
|
"finetuning_model": null,
|
||||||
|
"launch_kwargs": {},
|
||||||
|
"train_kwargs": {},
|
||||||
|
"temperature": 0.7,
|
||||||
|
"max_tokens": 8192
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"analyze_structure.predict": {
|
||||||
|
"traces": [],
|
||||||
|
"train": [],
|
||||||
|
"demos": [],
|
||||||
|
"signature": {
|
||||||
|
"instructions": "Analyze code structure to identify important directories and files.",
|
||||||
|
"fields": [
|
||||||
|
{
|
||||||
|
"prefix": "File Tree:",
|
||||||
|
"description": "Repository file structure"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"prefix": "Package Files:",
|
||||||
|
"description": "Key package and configuration files"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"prefix": "Reasoning: Let's think step by step in order to",
|
||||||
|
"description": "${reasoning}"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"prefix": "Important Directories:",
|
||||||
|
"description": "Key directories and their purposes"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"prefix": "Entry Points:",
|
||||||
|
"description": "Main entry points and important files"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"prefix": "Development Info:",
|
||||||
|
"description": "Development setup and workflow information"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"lm": {
|
||||||
|
"model": "gpt-4o",
|
||||||
|
"model_type": "chat",
|
||||||
|
"cache": true,
|
||||||
|
"num_retries": 3,
|
||||||
|
"finetuning_model": null,
|
||||||
|
"launch_kwargs": {},
|
||||||
|
"train_kwargs": {},
|
||||||
|
"temperature": 0.7,
|
||||||
|
"max_tokens": 8192
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"generate_examples.predict": {
|
||||||
|
"traces": [],
|
||||||
|
"train": [],
|
||||||
|
"demos": [],
|
||||||
|
"signature": {
|
||||||
|
"instructions": "Given the fields `repo_info`, produce the fields `usage_examples`.",
|
||||||
|
"fields": [
|
||||||
|
{
|
||||||
|
"prefix": "Repo Info:",
|
||||||
|
"description": "${repo_info}"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"prefix": "Reasoning: Let's think step by step in order to",
|
||||||
|
"description": "${reasoning}"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"prefix": "Usage Examples:",
|
||||||
|
"description": "${usage_examples}"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"lm": {
|
||||||
|
"model": "gpt-4o",
|
||||||
|
"model_type": "chat",
|
||||||
|
"cache": true,
|
||||||
|
"num_retries": 3,
|
||||||
|
"finetuning_model": null,
|
||||||
|
"launch_kwargs": {},
|
||||||
|
"train_kwargs": {},
|
||||||
|
"temperature": 0.7,
|
||||||
|
"max_tokens": 8192
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"generate_llms_txt.predict": {
|
||||||
|
"traces": [],
|
||||||
|
"train": [],
|
||||||
|
"demos": [],
|
||||||
|
"signature": {
|
||||||
|
"instructions": "Generate a comprehensive llms.txt file from analyzed repository information.",
|
||||||
|
"fields": [
|
||||||
|
{
|
||||||
|
"prefix": "Project Purpose:",
|
||||||
|
"description": "${project_purpose}"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"prefix": "Key Concepts:",
|
||||||
|
"description": "${key_concepts}"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"prefix": "Architecture Overview:",
|
||||||
|
"description": "${architecture_overview}"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"prefix": "Important Directories:",
|
||||||
|
"description": "${important_directories}"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"prefix": "Entry Points:",
|
||||||
|
"description": "${entry_points}"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"prefix": "Development Info:",
|
||||||
|
"description": "${development_info}"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"prefix": "Usage Examples:",
|
||||||
|
"description": "Common usage patterns and examples"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"prefix": "Reasoning: Let's think step by step in order to",
|
||||||
|
"description": "${reasoning}"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"prefix": "Llms Txt Content:",
|
||||||
|
"description": "Complete llms.txt file content following the standard format"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"lm": {
|
||||||
|
"model": "gpt-4o",
|
||||||
|
"model_type": "chat",
|
||||||
|
"cache": true,
|
||||||
|
"num_retries": 3,
|
||||||
|
"finetuning_model": null,
|
||||||
|
"launch_kwargs": {},
|
||||||
|
"train_kwargs": {},
|
||||||
|
"temperature": 0.7,
|
||||||
|
"max_tokens": 8192
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"dependency_versions": {
|
||||||
|
"python": "3.13",
|
||||||
|
"dspy": "3.0.4",
|
||||||
|
"cloudpickle": "3.1"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
4
auto_classes.json
Normal file
4
auto_classes.json
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
{
|
||||||
|
"AutoConfig": "src.analyzer.RepositoryAnalyzerConfig",
|
||||||
|
"AutoAgent": "src.analyzer.RepositoryAnalyzer"
|
||||||
|
}
|
||||||
15
compile.py
Normal file
15
compile.py
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
import sys
|
||||||
|
from src.analyzer import RepositoryAnalyzer, RepositoryAnalyzerConfig
|
||||||
|
|
||||||
|
llms_txt_generator = RepositoryAnalyzer(
|
||||||
|
config=RepositoryAnalyzerConfig()
|
||||||
|
)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
try:
|
||||||
|
llms_txt_generator.push_to_hub("johwang/llms-txt-generator", with_code=True)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error: {e}", file=sys.stderr)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
7
config.json
Normal file
7
config.json
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"repo_analyzer_model": "gpt-4o",
|
||||||
|
"code_analyzer_model": "gpt-4o",
|
||||||
|
"llms_txt_generator_model": "gpt-4o",
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"temperature": 0.7
|
||||||
|
}
|
||||||
17
pyproject.toml
Normal file
17
pyproject.toml
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
[project]
|
||||||
|
name = "llms-txt-generator"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "Generate llms.txt documentation for GitHub repositories using DSPy"
|
||||||
|
readme = "README.md"
|
||||||
|
requires-python = ">=3.10"
|
||||||
|
dependencies = ["dspy>=3.0.4", "requests>=2.31.0", "python-dotenv>=1.0.0"]
|
||||||
|
|
||||||
|
[project.scripts]
|
||||||
|
llmstxt-gen = "main:main"
|
||||||
|
|
||||||
|
[project.optional-dependencies]
|
||||||
|
dev = [
|
||||||
|
"pytest>=7.4.0",
|
||||||
|
"black>=23.0.0",
|
||||||
|
"ruff>=0.1.0",
|
||||||
|
]
|
||||||
14
src/__init__.py
Normal file
14
src/__init__.py
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
from .signatures import AnalyzeRepository, AnalyzeCodeStructure, GenerateLLMsTxt
|
||||||
|
from .analyzer import RepositoryAnalyzer, RepositoryAnalyzerConfig
|
||||||
|
from .github_utils import gather_repository_info
|
||||||
|
from .generator import generate_llms_txt
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"AnalyzeRepository",
|
||||||
|
"AnalyzeCodeStructure",
|
||||||
|
"GenerateLLMsTxt",
|
||||||
|
"RepositoryAnalyzer",
|
||||||
|
"RepositoryAnalyzerConfig",
|
||||||
|
"gather_repository_info",
|
||||||
|
"generate_llms_txt",
|
||||||
|
]
|
||||||
117
src/analyzer.py
Normal file
117
src/analyzer.py
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
import os
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import dspy
|
||||||
|
from modaic import PrecompiledAgent, PrecompiledConfig
|
||||||
|
|
||||||
|
from .github_utils import gather_repository_info
|
||||||
|
from .signatures import AnalyzeRepository, AnalyzeCodeStructure, GenerateLLMsTxt
|
||||||
|
|
||||||
|
|
||||||
|
class RepositoryAnalyzerConfig(PrecompiledConfig):
|
||||||
|
"""Configuration for RepositoryAnalyzer."""
|
||||||
|
|
||||||
|
repo_analyzer_model: str = "gpt-4o"
|
||||||
|
code_analyzer_model: str = "gpt-4o"
|
||||||
|
llms_txt_generator_model: str = "gpt-4o"
|
||||||
|
max_tokens: int = 8192
|
||||||
|
temperature: float = 0.7
|
||||||
|
|
||||||
|
|
||||||
|
class RepositoryAnalyzer(PrecompiledAgent):
|
||||||
|
"""Analyzes repository and generates llms.txt documentation."""
|
||||||
|
|
||||||
|
config: RepositoryAnalyzerConfig
|
||||||
|
|
||||||
|
def __init__(self, config: RepositoryAnalyzerConfig, **kwargs):
|
||||||
|
super().__init__(config, **kwargs)
|
||||||
|
self.repo_analyzer_lm = dspy.LM(
|
||||||
|
config.repo_analyzer_model,
|
||||||
|
max_tokens=config.max_tokens,
|
||||||
|
temperature=config.temperature,
|
||||||
|
)
|
||||||
|
self.code_analyzer_lm = dspy.LM(
|
||||||
|
config.code_analyzer_model,
|
||||||
|
max_tokens=config.max_tokens,
|
||||||
|
temperature=config.temperature,
|
||||||
|
)
|
||||||
|
self.llms_txt_generator_lm = dspy.LM(
|
||||||
|
config.llms_txt_generator_model,
|
||||||
|
max_tokens=config.max_tokens,
|
||||||
|
temperature=config.temperature,
|
||||||
|
)
|
||||||
|
self.analyze_repo = dspy.ChainOfThought(AnalyzeRepository)
|
||||||
|
self.analyze_repo.set_lm(self.repo_analyzer_lm)
|
||||||
|
self.analyze_structure = dspy.ChainOfThought(AnalyzeCodeStructure)
|
||||||
|
self.analyze_structure.set_lm(self.code_analyzer_lm)
|
||||||
|
self.generate_examples = dspy.ChainOfThought("repo_info -> usage_examples")
|
||||||
|
self.generate_examples.set_lm(self.code_analyzer_lm)
|
||||||
|
self.generate_llms_txt = dspy.ChainOfThought(GenerateLLMsTxt)
|
||||||
|
self.generate_llms_txt.set_lm(self.llms_txt_generator_lm)
|
||||||
|
|
||||||
|
def forward(
|
||||||
|
self,
|
||||||
|
repo_url: str,
|
||||||
|
output_file: str = "llms.txt",
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Analyze repository and generate llms.txt content.
|
||||||
|
|
||||||
|
This method handles the complete pipeline:
|
||||||
|
1. Fetches repository information from GitHub
|
||||||
|
2. Analyzes repository structure and purpose
|
||||||
|
3. Generates llms.txt documentation
|
||||||
|
4. Writes the result to a file
|
||||||
|
|
||||||
|
Args:
|
||||||
|
repo_url: GitHub repository URL (e.g., https://github.com/stanfordnlp/dspy)
|
||||||
|
output_file: Path to output file (default: llms.txt)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dspy.Prediction with llms_txt_content, analysis, and structure
|
||||||
|
"""
|
||||||
|
print(f"Fetching repository information from {repo_url}...")
|
||||||
|
file_tree, readme_content, package_files = gather_repository_info(
|
||||||
|
repo_url, token=os.environ.get("GITHUB_ACCESS_TOKEN")
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Analyzing repository structure and purpose...")
|
||||||
|
repo_analysis = self.analyze_repo(
|
||||||
|
repo_url=repo_url, file_tree=file_tree, readme_content=readme_content
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Analyzing code structure...")
|
||||||
|
structure_analysis = self.analyze_structure(
|
||||||
|
file_tree=file_tree, package_files=package_files
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Generating usage examples...")
|
||||||
|
usage_examples = self.generate_examples(
|
||||||
|
repo_info=f"Purpose: {repo_analysis.project_purpose}\nConcepts: {repo_analysis.key_concepts}"
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Generating llms.txt content...")
|
||||||
|
llms_txt = self.generate_llms_txt(
|
||||||
|
project_purpose=repo_analysis.project_purpose,
|
||||||
|
key_concepts=repo_analysis.key_concepts,
|
||||||
|
architecture_overview=repo_analysis.architecture_overview,
|
||||||
|
important_directories=structure_analysis.important_directories,
|
||||||
|
entry_points=structure_analysis.entry_points,
|
||||||
|
development_info=structure_analysis.development_info,
|
||||||
|
usage_examples=usage_examples.usage_examples,
|
||||||
|
)
|
||||||
|
|
||||||
|
with open(output_file, "w") as f:
|
||||||
|
f.write(llms_txt.llms_txt_content)
|
||||||
|
|
||||||
|
print(f"\nGenerated llms.txt saved to: {output_file}")
|
||||||
|
print(f"\nPreview (first 500 characters):")
|
||||||
|
print(llms_txt.llms_txt_content[:500] + "...")
|
||||||
|
|
||||||
|
return dspy.Prediction(
|
||||||
|
llms_txt_content=llms_txt.llms_txt_content,
|
||||||
|
analysis=repo_analysis,
|
||||||
|
structure=structure_analysis,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
56
src/generator.py
Normal file
56
src/generator.py
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
from .analyzer import RepositoryAnalyzer, RepositoryAnalyzerConfig
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
|
||||||
|
def generate_llms_txt(
|
||||||
|
repo_url: str,
|
||||||
|
output_file: str = "llms.txt",
|
||||||
|
model: str = "gpt-4o",
|
||||||
|
github_token: Optional[str] = None,
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Generate llms.txt file for a given repository.
|
||||||
|
|
||||||
|
This is a convenience wrapper around RepositoryAnalyzer that handles
|
||||||
|
configuration and invokes the analyzer's forward method.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
repo_url: GitHub repository URL (e.g., https://github.com/stanfordnlp/dspy)
|
||||||
|
output_file: Path to output file (default: llms.txt)
|
||||||
|
model: Language model to use for analysis (default: gpt-4o)
|
||||||
|
github_token: GitHub access token (optional, can also use GITHUB_ACCESS_TOKEN env var)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The generated llms.txt content
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> result = generate_llms_txt("https://github.com/stanfordnlp/dspy")
|
||||||
|
>>> print(result[:100])
|
||||||
|
"""
|
||||||
|
# Create analyzer configuration
|
||||||
|
config = RepositoryAnalyzerConfig(
|
||||||
|
repo_analyzer_model=model,
|
||||||
|
code_analyzer_model=model,
|
||||||
|
llms_txt_generator_model=model,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create analyzer instance
|
||||||
|
analyzer = RepositoryAnalyzer(config=config)
|
||||||
|
|
||||||
|
# Run the complete pipeline
|
||||||
|
result = analyzer(
|
||||||
|
repo_url=repo_url,
|
||||||
|
output_file=output_file,
|
||||||
|
github_token=github_token,
|
||||||
|
)
|
||||||
|
|
||||||
|
return result.llms_txt_content
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
generate_llms_txt("https://github.com/stanfordnlp/dspy")
|
||||||
126
src/github_utils.py
Normal file
126
src/github_utils.py
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
import base64
|
||||||
|
import os
|
||||||
|
from typing import Optional
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
|
||||||
|
def get_github_file_tree(repo_url: str, token: Optional[str] = None) -> str:
|
||||||
|
"""
|
||||||
|
Get repository file structure from GitHub API.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
repo_url: GitHub repository URL (e.g., https://github.com/owner/repo)
|
||||||
|
token: Optional GitHub access token for authentication
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Newline-separated list of file paths in the repository
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
Exception: If the API request fails
|
||||||
|
"""
|
||||||
|
# extract owner/repo from URL
|
||||||
|
parts = repo_url.rstrip("/").split("/")
|
||||||
|
owner, repo = parts[-2], parts[-1]
|
||||||
|
|
||||||
|
# try both main and master branches
|
||||||
|
for branch in ["main", "master"]:
|
||||||
|
api_url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/{branch}?recursive=1"
|
||||||
|
headers = {}
|
||||||
|
|
||||||
|
if token:
|
||||||
|
headers["Authorization"] = f"Bearer {token}"
|
||||||
|
elif os.environ.get("GITHUB_ACCESS_TOKEN"):
|
||||||
|
headers["Authorization"] = f"Bearer {os.environ.get('GITHUB_ACCESS_TOKEN')}"
|
||||||
|
|
||||||
|
response = requests.get(api_url, headers=headers)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
tree_data = response.json()
|
||||||
|
file_paths = [
|
||||||
|
item["path"] for item in tree_data["tree"] if item["type"] == "blob"
|
||||||
|
]
|
||||||
|
return "\n".join(sorted(file_paths))
|
||||||
|
elif response.status_code != 404:
|
||||||
|
raise Exception(
|
||||||
|
f"Failed to fetch repository tree: {response.status_code} - {response.text}"
|
||||||
|
)
|
||||||
|
|
||||||
|
raise Exception(f"Could not fetch repository tree. Tried branches: main, master")
|
||||||
|
|
||||||
|
|
||||||
|
def get_github_file_content(
|
||||||
|
repo_url: str, file_path: str, token: Optional[str] = None
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Get specific file content from GitHub.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
repo_url: GitHub repository URL
|
||||||
|
file_path: Path to the file within the repository
|
||||||
|
token: Optional GitHub access token for authentication
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Content of the file as a string
|
||||||
|
"""
|
||||||
|
parts = repo_url.rstrip("/").split("/")
|
||||||
|
owner, repo = parts[-2], parts[-1]
|
||||||
|
|
||||||
|
api_url = f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}"
|
||||||
|
headers = {}
|
||||||
|
|
||||||
|
if token:
|
||||||
|
headers["Authorization"] = f"Bearer {token}"
|
||||||
|
elif os.environ.get("GITHUB_ACCESS_TOKEN"):
|
||||||
|
headers["Authorization"] = f"Bearer {os.environ.get('GITHUB_ACCESS_TOKEN')}"
|
||||||
|
|
||||||
|
response = requests.get(api_url, headers=headers)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
content = base64.b64decode(response.json()["content"]).decode("utf-8")
|
||||||
|
return content
|
||||||
|
else:
|
||||||
|
return f"Could not fetch {file_path}"
|
||||||
|
|
||||||
|
|
||||||
|
def gather_repository_info(
|
||||||
|
repo_url: str, token: Optional[str] = None
|
||||||
|
) -> tuple[str, str, str]:
|
||||||
|
"""
|
||||||
|
Gather all necessary repository information.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
repo_url: GitHub repository URL
|
||||||
|
token: Optional GitHub access token for authentication
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (file_tree, readme_content, package_files_content)
|
||||||
|
"""
|
||||||
|
file_tree = get_github_file_tree(repo_url, token)
|
||||||
|
readme_content = get_github_file_content(repo_url, "README.md", token)
|
||||||
|
|
||||||
|
# get key package files
|
||||||
|
package_files = []
|
||||||
|
for file_path in [
|
||||||
|
"pyproject.toml",
|
||||||
|
"setup.py",
|
||||||
|
"requirements.txt",
|
||||||
|
"package.json",
|
||||||
|
"Cargo.toml",
|
||||||
|
"go.mod",
|
||||||
|
]:
|
||||||
|
try:
|
||||||
|
content = get_github_file_content(repo_url, file_path, token)
|
||||||
|
if "Could not fetch" not in content:
|
||||||
|
package_files.append(f"=== {file_path} ===\n{content}")
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
package_files_content = (
|
||||||
|
"\n\n".join(package_files) if package_files else "No package files found"
|
||||||
|
)
|
||||||
|
|
||||||
|
return file_tree, readme_content, package_files_content
|
||||||
52
src/signatures.py
Normal file
52
src/signatures.py
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
import dspy
|
||||||
|
|
||||||
|
|
||||||
|
class AnalyzeRepository(dspy.Signature):
|
||||||
|
"""Analyze a repository structure and identify key components."""
|
||||||
|
|
||||||
|
repo_url: str = dspy.InputField(desc="GitHub repository URL")
|
||||||
|
file_tree: str = dspy.InputField(desc="Repository file structure")
|
||||||
|
readme_content: str = dspy.InputField(desc="README.md content")
|
||||||
|
|
||||||
|
project_purpose: str = dspy.OutputField(
|
||||||
|
desc="Main purpose and goals of the project"
|
||||||
|
)
|
||||||
|
key_concepts: list[str] = dspy.OutputField(
|
||||||
|
desc="List of important concepts and terminology"
|
||||||
|
)
|
||||||
|
architecture_overview: str = dspy.OutputField(
|
||||||
|
desc="High-level architecture description"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AnalyzeCodeStructure(dspy.Signature):
|
||||||
|
"""Analyze code structure to identify important directories and files."""
|
||||||
|
|
||||||
|
file_tree: str = dspy.InputField(desc="Repository file structure")
|
||||||
|
package_files: str = dspy.InputField(desc="Key package and configuration files")
|
||||||
|
|
||||||
|
important_directories: list[str] = dspy.OutputField(
|
||||||
|
desc="Key directories and their purposes"
|
||||||
|
)
|
||||||
|
entry_points: list[str] = dspy.OutputField(
|
||||||
|
desc="Main entry points and important files"
|
||||||
|
)
|
||||||
|
development_info: str = dspy.OutputField(
|
||||||
|
desc="Development setup and workflow information"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class GenerateLLMsTxt(dspy.Signature):
|
||||||
|
"""Generate a comprehensive llms.txt file from analyzed repository information."""
|
||||||
|
|
||||||
|
project_purpose: str = dspy.InputField()
|
||||||
|
key_concepts: list[str] = dspy.InputField()
|
||||||
|
architecture_overview: str = dspy.InputField()
|
||||||
|
important_directories: list[str] = dspy.InputField()
|
||||||
|
entry_points: list[str] = dspy.InputField()
|
||||||
|
development_info: str = dspy.InputField()
|
||||||
|
usage_examples: str = dspy.InputField(desc="Common usage patterns and examples")
|
||||||
|
|
||||||
|
llms_txt_content: str = dspy.OutputField(
|
||||||
|
desc="Complete llms.txt file content following the standard format"
|
||||||
|
)
|
||||||
Reference in New Issue
Block a user