diff --git a/LICENSE b/LICENSE
deleted file mode 100644
index dba48ff..0000000
--- a/LICENSE
+++ /dev/null
@@ -1,18 +0,0 @@
-MIT License
-
-Copyright (c) 2025 johwang
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of this software and 
-associated documentation files (the "Software"), to deal in the Software without restriction, including 
-without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 
-copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the 
-following conditions:
-
-The above copyright notice and this permission notice shall be included in all copies or substantial 
-portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT 
-LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO 
-EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 
-IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
-USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/README.md b/README.md
index 61f14cd..bdd289c 100644
Binary files a/README.md and b/README.md differ
diff --git a/agent.json b/agent.json
new file mode 100644
index 0000000..686dc98
--- /dev/null
+++ b/agent.json
@@ -0,0 +1,193 @@
+{
+  "analyze_repo.predict": {
+    "traces": [],
+    "train": [],
+    "demos": [],
+    "signature": {
+      "instructions": "Analyze a repository structure and identify key components.",
+      "fields": [
+        {
+          "prefix": "Repo Url:",
+          "description": "GitHub repository URL"
+        },
+        {
+          "prefix": "File Tree:",
+          "description": "Repository file structure"
+        },
+        {
+          "prefix": "Readme Content:",
+          "description": "README.md content"
+        },
+        {
+          "prefix": "Reasoning: Let's think step by step in order to",
+          "description": "${reasoning}"
+        },
+        {
+          "prefix": "Project Purpose:",
+          "description": "Main purpose and goals of the project"
+        },
+        {
+          "prefix": "Key Concepts:",
+          "description": "List of important concepts and terminology"
+        },
+        {
+          "prefix": "Architecture Overview:",
+          "description": "High-level architecture description"
+        }
+      ]
+    },
+    "lm": {
+      "model": "gpt-4o",
+      "model_type": "chat",
+      "cache": true,
+      "num_retries": 3,
+      "finetuning_model": null,
+      "launch_kwargs": {},
+      "train_kwargs": {},
+      "temperature": 0.7,
+      "max_tokens": 8192
+    }
+  },
+  "analyze_structure.predict": {
+    "traces": [],
+    "train": [],
+    "demos": [],
+    "signature": {
+      "instructions": "Analyze code structure to identify important directories and files.",
+      "fields": [
+        {
+          "prefix": "File Tree:",
+          "description": "Repository file structure"
+        },
+        {
+          "prefix": "Package Files:",
+          "description": "Key package and configuration files"
+        },
+        {
+          "prefix": "Reasoning: Let's think step by step in order to",
+          "description": "${reasoning}"
+        },
+        {
+          "prefix": "Important Directories:",
+          "description": "Key directories and their purposes"
+        },
+        {
+          "prefix": "Entry Points:",
+          "description": "Main entry points and important files"
+        },
+        {
+          "prefix": "Development Info:",
+          "description": "Development setup and workflow information"
+        }
+      ]
+    },
+    "lm": {
+      "model": "gpt-4o",
+      "model_type": "chat",
+      "cache": true,
+      "num_retries": 3,
+      "finetuning_model": null,
+      "launch_kwargs": {},
+      "train_kwargs": {},
+      "temperature": 0.7,
+      "max_tokens": 8192
+    }
+  },
+  "generate_examples.predict": {
+    "traces": [],
+    "train": [],
+    "demos": [],
+    "signature": {
+      "instructions": "Given the fields `repo_info`, produce the fields `usage_examples`.",
+      "fields": [
+        {
+          "prefix": "Repo Info:",
+          "description": "${repo_info}"
+        },
+        {
+          "prefix": "Reasoning: Let's think step by step in order to",
+          "description": "${reasoning}"
+        },
+        {
+          "prefix": "Usage Examples:",
+          "description": "${usage_examples}"
+        }
+      ]
+    },
+    "lm": {
+      "model": "gpt-4o",
+      "model_type": "chat",
+      "cache": true,
+      "num_retries": 3,
+      "finetuning_model": null,
+      "launch_kwargs": {},
+      "train_kwargs": {},
+      "temperature": 0.7,
+      "max_tokens": 8192
+    }
+  },
+  "generate_llms_txt.predict": {
+    "traces": [],
+    "train": [],
+    "demos": [],
+    "signature": {
+      "instructions": "Generate a comprehensive llms.txt file from analyzed repository information.",
+      "fields": [
+        {
+          "prefix": "Project Purpose:",
+          "description": "${project_purpose}"
+        },
+        {
+          "prefix": "Key Concepts:",
+          "description": "${key_concepts}"
+        },
+        {
+          "prefix": "Architecture Overview:",
+          "description": "${architecture_overview}"
+        },
+        {
+          "prefix": "Important Directories:",
+          "description": "${important_directories}"
+        },
+        {
+          "prefix": "Entry Points:",
+          "description": "${entry_points}"
+        },
+        {
+          "prefix": "Development Info:",
+          "description": "${development_info}"
+        },
+        {
+          "prefix": "Usage Examples:",
+          "description": "Common usage patterns and examples"
+        },
+        {
+          "prefix": "Reasoning: Let's think step by step in order to",
+          "description": "${reasoning}"
+        },
+        {
+          "prefix": "Llms Txt Content:",
+          "description": "Complete llms.txt file content following the standard format"
+        }
+      ]
+    },
+    "lm": {
+      "model": "gpt-4o",
+      "model_type": "chat",
+      "cache": true,
+      "num_retries": 3,
+      "finetuning_model": null,
+      "launch_kwargs": {},
+      "train_kwargs": {},
+      "temperature": 0.7,
+      "max_tokens": 8192
+    }
+  },
+  "metadata": {
+    "dependency_versions": {
+      "python": "3.13",
+      "dspy": "3.0.4",
+      "cloudpickle": "3.1"
+    }
+  }
+}
\ No newline at end of file
diff --git a/auto_classes.json b/auto_classes.json
new file mode 100644
index 0000000..dde64b5
--- /dev/null
+++ b/auto_classes.json
@@ -0,0 +1,4 @@
+{
+  "AutoConfig": "src.analyzer.RepositoryAnalyzerConfig",
+  "AutoAgent": "src.analyzer.RepositoryAnalyzer"
+}
\ No newline at end of file
diff --git a/compile.py b/compile.py
new file mode 100644
index 0000000..181e03a
--- /dev/null
+++ b/compile.py
@@ -0,0 +1,15 @@
+import sys
+from src.analyzer import RepositoryAnalyzer, RepositoryAnalyzerConfig
+
+llms_txt_generator = RepositoryAnalyzer(
+    config=RepositoryAnalyzerConfig()
+)
+
+def main():
+    try:
+        llms_txt_generator.push_to_hub("johwang/llms-txt-generator", with_code=True)
+    except Exception as e:
+        print(f"Error: {e}", file=sys.stderr)
+
+if __name__ == "__main__":
+    main()
diff --git a/config.json b/config.json
new file mode 100644
index 0000000..cd004d0
--- /dev/null
+++ b/config.json
@@ -0,0 +1,7 @@
+{
+  "repo_analyzer_model": "gpt-4o",
+  "code_analyzer_model": "gpt-4o",
+  "llms_txt_generator_model": "gpt-4o",
+  "max_tokens": 8192,
+  "temperature": 0.7
+}
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..12f864e
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,17 @@
+[project]
+name = "llms-txt-generator"
+version = "0.1.0"
+description = "Generate llms.txt documentation for GitHub repositories using DSPy"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = ["dspy>=3.0.4", "requests>=2.31.0", "python-dotenv>=1.0.0"]
+
+[project.scripts]
+llmstxt-gen = "main:main"
+
+[project.optional-dependencies]
+dev = [
+    "pytest>=7.4.0",
+    "black>=23.0.0",
+    "ruff>=0.1.0",
+]
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 0000000..aeaad7f
--- /dev/null
+++ b/src/__init__.py
@@ -0,0 +1,14 @@
+from .signatures import AnalyzeRepository, AnalyzeCodeStructure, GenerateLLMsTxt
+from .analyzer import RepositoryAnalyzer, RepositoryAnalyzerConfig
+from .github_utils import gather_repository_info
+from .generator import generate_llms_txt
+
+__all__ = [
+    "AnalyzeRepository",
+    "AnalyzeCodeStructure",
+    "GenerateLLMsTxt",
+    "RepositoryAnalyzer",
+    "RepositoryAnalyzerConfig",
+    "gather_repository_info",
+    "generate_llms_txt",
+]
diff --git a/src/analyzer.py b/src/analyzer.py
new file mode 100644
index 0000000..20c682c
--- /dev/null
+++ b/src/analyzer.py
@@ -0,0 +1,117 @@
+import os
+from typing import Optional
+
+import dspy
+from modaic import PrecompiledAgent, PrecompiledConfig
+
+from .github_utils import gather_repository_info
+from .signatures import AnalyzeRepository, AnalyzeCodeStructure, GenerateLLMsTxt
+
+
+class RepositoryAnalyzerConfig(PrecompiledConfig):
+    """Configuration for RepositoryAnalyzer."""
+
+    repo_analyzer_model: str = "gpt-4o"
+    code_analyzer_model: str = "gpt-4o"
+    llms_txt_generator_model: str = "gpt-4o"
+    max_tokens: int = 8192
+    temperature: float = 0.7
+
+
+class RepositoryAnalyzer(PrecompiledAgent):
+    """Analyzes repository and generates llms.txt documentation."""
+
+    config: RepositoryAnalyzerConfig
+
+    def __init__(self, config: RepositoryAnalyzerConfig, **kwargs):
+        super().__init__(config, **kwargs)
+        self.repo_analyzer_lm = dspy.LM(
+            config.repo_analyzer_model,
+            max_tokens=config.max_tokens,
+            temperature=config.temperature,
+        )
+        self.code_analyzer_lm = dspy.LM(
+            config.code_analyzer_model,
+            max_tokens=config.max_tokens,
+            temperature=config.temperature,
+        )
+        self.llms_txt_generator_lm = dspy.LM(
+            config.llms_txt_generator_model,
+            max_tokens=config.max_tokens,
+            temperature=config.temperature,
+        )
+        self.analyze_repo = dspy.ChainOfThought(AnalyzeRepository)
+        self.analyze_repo.set_lm(self.repo_analyzer_lm)
+        self.analyze_structure = dspy.ChainOfThought(AnalyzeCodeStructure)
+        self.analyze_structure.set_lm(self.code_analyzer_lm)
+        self.generate_examples = dspy.ChainOfThought("repo_info -> usage_examples")
+        self.generate_examples.set_lm(self.code_analyzer_lm)
+        self.generate_llms_txt = dspy.ChainOfThought(GenerateLLMsTxt)
+        self.generate_llms_txt.set_lm(self.llms_txt_generator_lm)
+
+    def forward(
+        self,
+        repo_url: str,
+        output_file: str = "llms.txt",
+    ):
+        """
+        Analyze repository and generate llms.txt content.
+
+        This method handles the complete pipeline:
+        1. Fetches repository information from GitHub
+        2. Analyzes repository structure and purpose
+        3. Generates llms.txt documentation
+        4. Writes the result to a file
+
+        Args:
+            repo_url: GitHub repository URL (e.g., https://github.com/stanfordnlp/dspy)
+            output_file: Path to output file (default: llms.txt)
+
+        Returns:
+            dspy.Prediction with llms_txt_content, analysis, and structure
+        """
+        print(f"Fetching repository information from {repo_url}...")
+        file_tree, readme_content, package_files = gather_repository_info(
+            repo_url, token=os.environ.get("GITHUB_ACCESS_TOKEN")
+        )
+
+        print("Analyzing repository structure and purpose...")
+        repo_analysis = self.analyze_repo(
+            repo_url=repo_url, file_tree=file_tree, readme_content=readme_content
+        )
+
+        print("Analyzing code structure...")
+        structure_analysis = self.analyze_structure(
+            file_tree=file_tree, package_files=package_files
+        )
+
+        print("Generating usage examples...")
+        usage_examples = self.generate_examples(
+            repo_info=f"Purpose: {repo_analysis.project_purpose}\nConcepts: {repo_analysis.key_concepts}"
+        )
+
+        print("Generating llms.txt content...")
+        llms_txt = self.generate_llms_txt(
+            project_purpose=repo_analysis.project_purpose,
+            key_concepts=repo_analysis.key_concepts,
+            architecture_overview=repo_analysis.architecture_overview,
+            important_directories=structure_analysis.important_directories,
+            entry_points=structure_analysis.entry_points,
+            development_info=structure_analysis.development_info,
+            usage_examples=usage_examples.usage_examples,
+        )
+
+        with open(output_file, "w") as f:
+            f.write(llms_txt.llms_txt_content)
+
+        print(f"\nGenerated llms.txt saved to: {output_file}")
+        print(f"\nPreview (first 500 characters):")
+        print(llms_txt.llms_txt_content[:500] + "...")
+
+        return dspy.Prediction(
+            llms_txt_content=llms_txt.llms_txt_content,
+            analysis=repo_analysis,
+            structure=structure_analysis,
+        )
+
+    
diff --git a/src/generator.py b/src/generator.py
new file mode 100644
index 0000000..003482a
--- /dev/null
+++ b/src/generator.py
@@ -0,0 +1,56 @@
+from typing import Optional
+
+from dotenv import load_dotenv
+
+from .analyzer import RepositoryAnalyzer, RepositoryAnalyzerConfig
+
+load_dotenv()
+
+
+def generate_llms_txt(
+    repo_url: str,
+    output_file: str = "llms.txt",
+    model: str = "gpt-4o",
+    github_token: Optional[str] = None,
+) -> str:
+    """
+    Generate llms.txt file for a given repository.
+
+    This is a convenience wrapper around RepositoryAnalyzer that handles
+    configuration and invokes the analyzer's forward method.
+
+    Args:
+        repo_url: GitHub repository URL (e.g., https://github.com/stanfordnlp/dspy)
+        output_file: Path to output file (default: llms.txt)
+        model: Language model to use for analysis (default: gpt-4o)
+        github_token: GitHub access token (optional, can also use GITHUB_ACCESS_TOKEN env var)
+
+    Returns:
+        The generated llms.txt content
+
+    Example:
+        >>> result = generate_llms_txt("https://github.com/stanfordnlp/dspy")
+        >>> print(result[:100])
+    """
+    # Create analyzer configuration
+    config = RepositoryAnalyzerConfig(
+        repo_analyzer_model=model,
+        code_analyzer_model=model,
+        llms_txt_generator_model=model,
+    )
+
+    # Create analyzer instance
+    analyzer = RepositoryAnalyzer(config=config)
+
+    # Run the complete pipeline
+    result = analyzer(
+        repo_url=repo_url,
+        output_file=output_file,
+        github_token=github_token,
+    )
+
+    return result.llms_txt_content
+
+
+if __name__ == "__main__":
+    generate_llms_txt("https://github.com/stanfordnlp/dspy")
diff --git a/src/github_utils.py b/src/github_utils.py
new file mode 100644
index 0000000..4dd0708
--- /dev/null
+++ b/src/github_utils.py
@@ -0,0 +1,126 @@
+import base64
+import os
+from typing import Optional
+from dotenv import load_dotenv
+
+import requests
+
+load_dotenv()
+
+
+def get_github_file_tree(repo_url: str, token: Optional[str] = None) -> str:
+    """
+    Get repository file structure from GitHub API.
+
+    Args:
+        repo_url: GitHub repository URL (e.g., https://github.com/owner/repo)
+        token: Optional GitHub access token for authentication
+
+    Returns:
+        Newline-separated list of file paths in the repository
+
+    Raises:
+        Exception: If the API request fails
+    """
+    # extract owner/repo from URL
+    parts = repo_url.rstrip("/").split("/")
+    owner, repo = parts[-2], parts[-1]
+
+    # try both main and master branches
+    for branch in ["main", "master"]:
+        api_url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/{branch}?recursive=1"
+        headers = {}
+
+        if token:
+            headers["Authorization"] = f"Bearer {token}"
+        elif os.environ.get("GITHUB_ACCESS_TOKEN"):
+            headers["Authorization"] = f"Bearer {os.environ.get('GITHUB_ACCESS_TOKEN')}"
+
+        response = requests.get(api_url, headers=headers)
+
+        if response.status_code == 200:
+            tree_data = response.json()
+            file_paths = [
+                item["path"] for item in tree_data["tree"] if item["type"] == "blob"
+            ]
+            return "\n".join(sorted(file_paths))
+        elif response.status_code != 404:
+            raise Exception(
+                f"Failed to fetch repository tree: {response.status_code} - {response.text}"
+            )
+
+    raise Exception(f"Could not fetch repository tree. Tried branches: main, master")
+
+
+def get_github_file_content(
+    repo_url: str, file_path: str, token: Optional[str] = None
+) -> str:
+    """
+    Get specific file content from GitHub.
+
+    Args:
+        repo_url: GitHub repository URL
+        file_path: Path to the file within the repository
+        token: Optional GitHub access token for authentication
+
+    Returns:
+        Content of the file as a string
+    """
+    parts = repo_url.rstrip("/").split("/")
+    owner, repo = parts[-2], parts[-1]
+
+    api_url = f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}"
+    headers = {}
+
+    if token:
+        headers["Authorization"] = f"Bearer {token}"
+    elif os.environ.get("GITHUB_ACCESS_TOKEN"):
+        headers["Authorization"] = f"Bearer {os.environ.get('GITHUB_ACCESS_TOKEN')}"
+
+    response = requests.get(api_url, headers=headers)
+
+    if response.status_code == 200:
+        content = base64.b64decode(response.json()["content"]).decode("utf-8")
+        return content
+    else:
+        return f"Could not fetch {file_path}"
+
+
+def gather_repository_info(
+    repo_url: str, token: Optional[str] = None
+) -> tuple[str, str, str]:
+    """
+    Gather all necessary repository information.
+
+    Args:
+        repo_url: GitHub repository URL
+        token: Optional GitHub access token for authentication
+
+    Returns:
+        Tuple of (file_tree, readme_content, package_files_content)
+    """
+    file_tree = get_github_file_tree(repo_url, token)
+    readme_content = get_github_file_content(repo_url, "README.md", token)
+
+    # get key package files
+    package_files = []
+    for file_path in [
+        "pyproject.toml",
+        "setup.py",
+        "requirements.txt",
+        "package.json",
+        "Cargo.toml",
+        "go.mod",
+    ]:
+        try:
+            content = get_github_file_content(repo_url, file_path, token)
+            if "Could not fetch" not in content:
+                package_files.append(f"=== {file_path} ===\n{content}")
+        except Exception:
+            continue
+
+    package_files_content = (
+        "\n\n".join(package_files) if package_files else "No package files found"
+    )
+
+    return file_tree, readme_content, package_files_content
diff --git a/src/signatures.py b/src/signatures.py
new file mode 100644
index 0000000..7d4a97c
--- /dev/null
+++ b/src/signatures.py
@@ -0,0 +1,52 @@
+import dspy
+
+
+class AnalyzeRepository(dspy.Signature):
+    """Analyze a repository structure and identify key components."""
+
+    repo_url: str = dspy.InputField(desc="GitHub repository URL")
+    file_tree: str = dspy.InputField(desc="Repository file structure")
+    readme_content: str = dspy.InputField(desc="README.md content")
+
+    project_purpose: str = dspy.OutputField(
+        desc="Main purpose and goals of the project"
+    )
+    key_concepts: list[str] = dspy.OutputField(
+        desc="List of important concepts and terminology"
+    )
+    architecture_overview: str = dspy.OutputField(
+        desc="High-level architecture description"
+    )
+
+
+class AnalyzeCodeStructure(dspy.Signature):
+    """Analyze code structure to identify important directories and files."""
+
+    file_tree: str = dspy.InputField(desc="Repository file structure")
+    package_files: str = dspy.InputField(desc="Key package and configuration files")
+
+    important_directories: list[str] = dspy.OutputField(
+        desc="Key directories and their purposes"
+    )
+    entry_points: list[str] = dspy.OutputField(
+        desc="Main entry points and important files"
+    )
+    development_info: str = dspy.OutputField(
+        desc="Development setup and workflow information"
+    )
+
+
+class GenerateLLMsTxt(dspy.Signature):
+    """Generate a comprehensive llms.txt file from analyzed repository information."""
+
+    project_purpose: str = dspy.InputField()
+    key_concepts: list[str] = dspy.InputField()
+    architecture_overview: str = dspy.InputField()
+    important_directories: list[str] = dspy.InputField()
+    entry_points: list[str] = dspy.InputField()
+    development_info: str = dspy.InputField()
+    usage_examples: str = dspy.InputField(desc="Common usage patterns and examples")
+
+    llms_txt_content: str = dspy.OutputField(
+        desc="Complete llms.txt file content following the standard format"
+    )