import os from typing import Optional import dspy from modaic import PrecompiledAgent, PrecompiledConfig from .github_utils import gather_repository_info from .signatures import AnalyzeRepository, AnalyzeCodeStructure, GenerateLLMsTxt class RepositoryAnalyzerConfig(PrecompiledConfig): """Configuration for RepositoryAnalyzer.""" repo_analyzer_model: str = "gpt-5.1-2025-11-13" code_analyzer_model: str = "gpt-5.1-2025-11-13" llms_txt_generator_model: str = "gpt-4o" max_tokens: int = 16000 temperature: float = 1.0 class RepositoryAnalyzer(PrecompiledAgent): """Analyzes repository and generates llms.txt documentation.""" config: RepositoryAnalyzerConfig def __init__(self, config: RepositoryAnalyzerConfig, **kwargs): super().__init__(config, **kwargs) self.repo_analyzer_lm = dspy.LM( config.repo_analyzer_model, max_tokens=config.max_tokens, temperature=config.temperature, ) self.code_analyzer_lm = dspy.LM( config.code_analyzer_model, max_tokens=config.max_tokens, temperature=config.temperature, ) self.llms_txt_generator_lm = dspy.LM( config.llms_txt_generator_model, max_tokens=config.max_tokens, temperature=config.temperature, ) self.analyze_repo = dspy.ChainOfThought(AnalyzeRepository) self.analyze_repo.set_lm(self.repo_analyzer_lm) self.analyze_structure = dspy.ChainOfThought(AnalyzeCodeStructure) self.analyze_structure.set_lm(self.code_analyzer_lm) self.generate_examples = dspy.ChainOfThought("repo_info -> usage_examples") self.generate_examples.set_lm(self.code_analyzer_lm) self.generate_llms_txt = dspy.ChainOfThought(GenerateLLMsTxt) self.generate_llms_txt.set_lm(self.llms_txt_generator_lm) def forward( self, repo_url: str, output_file: str = "llms.txt", ): """ Analyze repository and generate llms.txt content. This method handles the complete pipeline: 1. Fetches repository information from GitHub 2. Analyzes repository structure and purpose 3. Generates llms.txt documentation 4. Writes the result to a file Args: repo_url: GitHub repository URL (e.g., https://github.com/stanfordnlp/dspy) output_file: Path to output file (default: llms.txt) Returns: dspy.Prediction with llms_txt_content, analysis, and structure """ print(f"Fetching repository information from {repo_url}...") file_tree, readme_content, package_files = gather_repository_info( repo_url, token=os.environ.get("GITHUB_ACCESS_TOKEN") ) print("Analyzing repository structure and purpose...") repo_analysis = self.analyze_repo( repo_url=repo_url, file_tree=file_tree, readme_content=readme_content ) print("Analyzing code structure...") structure_analysis = self.analyze_structure( file_tree=file_tree, package_files=package_files ) print("Generating usage examples...") usage_examples = self.generate_examples( repo_info=f"Purpose: {repo_analysis.project_purpose}\nConcepts: {repo_analysis.key_concepts}" ) print("Generating llms.txt content...") llms_txt = self.generate_llms_txt( project_purpose=repo_analysis.project_purpose, key_concepts=repo_analysis.key_concepts, architecture_overview=repo_analysis.architecture_overview, important_directories=structure_analysis.important_directories, entry_points=structure_analysis.entry_points, development_info=structure_analysis.development_info, usage_examples=usage_examples.usage_examples, ) with open(output_file, "w") as f: f.write(llms_txt.llms_txt_content) print(f"\nGenerated llms.txt saved to: {output_file}") print(f"\nPreview (first 500 characters):") print(llms_txt.llms_txt_content[:500] + "...") return dspy.Prediction( llms_txt_content=llms_txt.llms_txt_content, analysis=repo_analysis, structure=structure_analysis, )