118 lines
4.3 KiB
Python
118 lines
4.3 KiB
Python
import os
|
|
from typing import Optional
|
|
|
|
import dspy
|
|
from modaic import PrecompiledAgent, PrecompiledConfig
|
|
|
|
from .github_utils import gather_repository_info
|
|
from .signatures import AnalyzeRepository, AnalyzeCodeStructure, GenerateLLMsTxt
|
|
|
|
|
|
class RepositoryAnalyzerConfig(PrecompiledConfig):
|
|
"""Configuration for RepositoryAnalyzer."""
|
|
|
|
repo_analyzer_model: str = "gpt-5.1-2025-11-13"
|
|
code_analyzer_model: str = "gpt-5.1-2025-11-13"
|
|
llms_txt_generator_model: str = "gpt-4o"
|
|
max_tokens: int = 16000
|
|
temperature: float = 1.0
|
|
|
|
|
|
class RepositoryAnalyzer(PrecompiledAgent):
|
|
"""Analyzes repository and generates llms.txt documentation."""
|
|
|
|
config: RepositoryAnalyzerConfig
|
|
|
|
def __init__(self, config: RepositoryAnalyzerConfig, **kwargs):
|
|
super().__init__(config, **kwargs)
|
|
self.repo_analyzer_lm = dspy.LM(
|
|
config.repo_analyzer_model,
|
|
max_tokens=config.max_tokens,
|
|
temperature=config.temperature,
|
|
)
|
|
self.code_analyzer_lm = dspy.LM(
|
|
config.code_analyzer_model,
|
|
max_tokens=config.max_tokens,
|
|
temperature=config.temperature,
|
|
)
|
|
self.llms_txt_generator_lm = dspy.LM(
|
|
config.llms_txt_generator_model,
|
|
max_tokens=config.max_tokens,
|
|
temperature=config.temperature,
|
|
)
|
|
self.analyze_repo = dspy.ChainOfThought(AnalyzeRepository)
|
|
self.analyze_repo.set_lm(self.repo_analyzer_lm)
|
|
self.analyze_structure = dspy.ChainOfThought(AnalyzeCodeStructure)
|
|
self.analyze_structure.set_lm(self.code_analyzer_lm)
|
|
self.generate_examples = dspy.ChainOfThought("repo_info -> usage_examples")
|
|
self.generate_examples.set_lm(self.code_analyzer_lm)
|
|
self.generate_llms_txt = dspy.ChainOfThought(GenerateLLMsTxt)
|
|
self.generate_llms_txt.set_lm(self.llms_txt_generator_lm)
|
|
|
|
def forward(
|
|
self,
|
|
repo_url: str,
|
|
output_file: str = "llms.txt",
|
|
):
|
|
"""
|
|
Analyze repository and generate llms.txt content.
|
|
|
|
This method handles the complete pipeline:
|
|
1. Fetches repository information from GitHub
|
|
2. Analyzes repository structure and purpose
|
|
3. Generates llms.txt documentation
|
|
4. Writes the result to a file
|
|
|
|
Args:
|
|
repo_url: GitHub repository URL (e.g., https://github.com/stanfordnlp/dspy)
|
|
output_file: Path to output file (default: llms.txt)
|
|
|
|
Returns:
|
|
dspy.Prediction with llms_txt_content, analysis, and structure
|
|
"""
|
|
print(f"Fetching repository information from {repo_url}...")
|
|
file_tree, readme_content, package_files = gather_repository_info(
|
|
repo_url, token=os.environ.get("GITHUB_ACCESS_TOKEN")
|
|
)
|
|
|
|
print("Analyzing repository structure and purpose...")
|
|
repo_analysis = self.analyze_repo(
|
|
repo_url=repo_url, file_tree=file_tree, readme_content=readme_content
|
|
)
|
|
|
|
print("Analyzing code structure...")
|
|
structure_analysis = self.analyze_structure(
|
|
file_tree=file_tree, package_files=package_files
|
|
)
|
|
|
|
print("Generating usage examples...")
|
|
usage_examples = self.generate_examples(
|
|
repo_info=f"Purpose: {repo_analysis.project_purpose}\nConcepts: {repo_analysis.key_concepts}"
|
|
)
|
|
|
|
print("Generating llms.txt content...")
|
|
llms_txt = self.generate_llms_txt(
|
|
project_purpose=repo_analysis.project_purpose,
|
|
key_concepts=repo_analysis.key_concepts,
|
|
architecture_overview=repo_analysis.architecture_overview,
|
|
important_directories=structure_analysis.important_directories,
|
|
entry_points=structure_analysis.entry_points,
|
|
development_info=structure_analysis.development_info,
|
|
usage_examples=usage_examples.usage_examples,
|
|
)
|
|
|
|
with open(output_file, "w") as f:
|
|
f.write(llms_txt.llms_txt_content)
|
|
|
|
print(f"\nGenerated llms.txt saved to: {output_file}")
|
|
print(f"\nPreview (first 500 characters):")
|
|
print(llms_txt.llms_txt_content[:500] + "...")
|
|
|
|
return dspy.Prediction(
|
|
llms_txt_content=llms_txt.llms_txt_content,
|
|
analysis=repo_analysis,
|
|
structure=structure_analysis,
|
|
)
|
|
|
|
|