(no commit message)
This commit is contained in:
117
src/analyzer.py
Normal file
117
src/analyzer.py
Normal file
@@ -0,0 +1,117 @@
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
import dspy
|
||||
from modaic import PrecompiledAgent, PrecompiledConfig
|
||||
|
||||
from .github_utils import gather_repository_info
|
||||
from .signatures import AnalyzeRepository, AnalyzeCodeStructure, GenerateLLMsTxt
|
||||
|
||||
|
||||
class RepositoryAnalyzerConfig(PrecompiledConfig):
|
||||
"""Configuration for RepositoryAnalyzer."""
|
||||
|
||||
repo_analyzer_model: str = "gpt-4o"
|
||||
code_analyzer_model: str = "gpt-4o"
|
||||
llms_txt_generator_model: str = "gpt-4o"
|
||||
max_tokens: int = 8192
|
||||
temperature: float = 0.7
|
||||
|
||||
|
||||
class RepositoryAnalyzer(PrecompiledAgent):
|
||||
"""Analyzes repository and generates llms.txt documentation."""
|
||||
|
||||
config: RepositoryAnalyzerConfig
|
||||
|
||||
def __init__(self, config: RepositoryAnalyzerConfig, **kwargs):
|
||||
super().__init__(config, **kwargs)
|
||||
self.repo_analyzer_lm = dspy.LM(
|
||||
config.repo_analyzer_model,
|
||||
max_tokens=config.max_tokens,
|
||||
temperature=config.temperature,
|
||||
)
|
||||
self.code_analyzer_lm = dspy.LM(
|
||||
config.code_analyzer_model,
|
||||
max_tokens=config.max_tokens,
|
||||
temperature=config.temperature,
|
||||
)
|
||||
self.llms_txt_generator_lm = dspy.LM(
|
||||
config.llms_txt_generator_model,
|
||||
max_tokens=config.max_tokens,
|
||||
temperature=config.temperature,
|
||||
)
|
||||
self.analyze_repo = dspy.ChainOfThought(AnalyzeRepository)
|
||||
self.analyze_repo.set_lm(self.repo_analyzer_lm)
|
||||
self.analyze_structure = dspy.ChainOfThought(AnalyzeCodeStructure)
|
||||
self.analyze_structure.set_lm(self.code_analyzer_lm)
|
||||
self.generate_examples = dspy.ChainOfThought("repo_info -> usage_examples")
|
||||
self.generate_examples.set_lm(self.code_analyzer_lm)
|
||||
self.generate_llms_txt = dspy.ChainOfThought(GenerateLLMsTxt)
|
||||
self.generate_llms_txt.set_lm(self.llms_txt_generator_lm)
|
||||
|
||||
def forward(
|
||||
self,
|
||||
repo_url: str,
|
||||
output_file: str = "llms.txt",
|
||||
):
|
||||
"""
|
||||
Analyze repository and generate llms.txt content.
|
||||
|
||||
This method handles the complete pipeline:
|
||||
1. Fetches repository information from GitHub
|
||||
2. Analyzes repository structure and purpose
|
||||
3. Generates llms.txt documentation
|
||||
4. Writes the result to a file
|
||||
|
||||
Args:
|
||||
repo_url: GitHub repository URL (e.g., https://github.com/stanfordnlp/dspy)
|
||||
output_file: Path to output file (default: llms.txt)
|
||||
|
||||
Returns:
|
||||
dspy.Prediction with llms_txt_content, analysis, and structure
|
||||
"""
|
||||
print(f"Fetching repository information from {repo_url}...")
|
||||
file_tree, readme_content, package_files = gather_repository_info(
|
||||
repo_url, token=os.environ.get("GITHUB_ACCESS_TOKEN")
|
||||
)
|
||||
|
||||
print("Analyzing repository structure and purpose...")
|
||||
repo_analysis = self.analyze_repo(
|
||||
repo_url=repo_url, file_tree=file_tree, readme_content=readme_content
|
||||
)
|
||||
|
||||
print("Analyzing code structure...")
|
||||
structure_analysis = self.analyze_structure(
|
||||
file_tree=file_tree, package_files=package_files
|
||||
)
|
||||
|
||||
print("Generating usage examples...")
|
||||
usage_examples = self.generate_examples(
|
||||
repo_info=f"Purpose: {repo_analysis.project_purpose}\nConcepts: {repo_analysis.key_concepts}"
|
||||
)
|
||||
|
||||
print("Generating llms.txt content...")
|
||||
llms_txt = self.generate_llms_txt(
|
||||
project_purpose=repo_analysis.project_purpose,
|
||||
key_concepts=repo_analysis.key_concepts,
|
||||
architecture_overview=repo_analysis.architecture_overview,
|
||||
important_directories=structure_analysis.important_directories,
|
||||
entry_points=structure_analysis.entry_points,
|
||||
development_info=structure_analysis.development_info,
|
||||
usage_examples=usage_examples.usage_examples,
|
||||
)
|
||||
|
||||
with open(output_file, "w") as f:
|
||||
f.write(llms_txt.llms_txt_content)
|
||||
|
||||
print(f"\nGenerated llms.txt saved to: {output_file}")
|
||||
print(f"\nPreview (first 500 characters):")
|
||||
print(llms_txt.llms_txt_content[:500] + "...")
|
||||
|
||||
return dspy.Prediction(
|
||||
llms_txt_content=llms_txt.llms_txt_content,
|
||||
analysis=repo_analysis,
|
||||
structure=structure_analysis,
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user