diff --git a/README.md b/README.md index 7293362..e69de29 100644 --- a/README.md +++ b/README.md @@ -1,2 +0,0 @@ -# ruby-rails-doc-generator - diff --git a/auto_classes.json b/auto_classes.json new file mode 100644 index 0000000..27f024b --- /dev/null +++ b/auto_classes.json @@ -0,0 +1,4 @@ +{ + "AutoConfig": "main.DocWriterConfig", + "AutoProgram": "main.DocWriterProgram" +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..fb8026c --- /dev/null +++ b/config.json @@ -0,0 +1,6 @@ +{ + "model": null, + "max_iterations": 10, + "lm": "openai/gpt-5-mini", + "sub_lm": "openai/gpt-5-mini" +} \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..90a2182 --- /dev/null +++ b/main.py @@ -0,0 +1,79 @@ +import dspy +import os +from typing import Any +from modaic import PrecompiledConfig, PrecompiledProgram + +lm = dspy.LM("openai/gpt-5-mini", max_tokens=16000) +lm_mini = dspy.LM("openai/gpt-5-mini", max_tokens=16000) +dspy.configure(lm=lm) + + +class DocWriter(dspy.Signature): + """ + Write documentation for the provided Rails application source code. + """ + + source_tree: dict[str, Any] = dspy.InputField() + documentation: str = dspy.OutputField( + description="Generated markdown documentation." + ) + + +class DocWriterConfig(PrecompiledConfig): + max_iterations: int = 10 + lm: str = "openai/gpt-5-mini" + sub_lm: str = "openai/gpt-5-mini" + + +class DocWriterProgram(PrecompiledProgram): + config: DocWriterConfig + + def __init__(self, config: DocWriterConfig, **kwargs): + super().__init__(config=config, **kwargs) + self.doc_writer = dspy.RLM( + DocWriter, + max_iterations=config.max_iterations, + sub_lm=config.sub_lm, + verbose=True, + ) + + def forward(self, source_root: str, source_tree: dict[str, Any]) -> dspy.Prediction: + source_tree = load_source_tree(source_root) + return self.doc_writer(source_tree=source_tree) + + +def load_source_tree(root_dir: str) -> dict[str, Any]: + """Recursively load the folder into a nested dict.""" + tree: dict[str, Any] = {} + for entry in os.listdir(root_dir): + path = os.path.join(root_dir, entry) + if os.path.isdir(path): + tree[entry] = load_source_tree(path) + else: + with open(path, "r", encoding="utf-8", errors="ignore") as f: + tree[entry] = f.read() + return tree + + +doc_writer = DocWriterProgram(DocWriterConfig()) +SOURCE_ROOT = "." + + +def main(): + """ + print("Starting documentation generation...") + result = doc_writer(source_root=SOURCE_ROOT, source_tree=None) + + with open("generated_documentation.md", "w", encoding="utf-8") as f: + print("Writing documentation to file...") + f.write(result.documentation) + print("Documentation written to generated_documentation.md") + + print("Pushing documentation to hub...") + """ + doc_writer.push_to_hub("farouk1/ruby-rails-doc-generator") + print("Documentation generated and pushed to hub!") + + +if __name__ == "__main__": + main() diff --git a/program.json b/program.json new file mode 100644 index 0000000..41d1d29 --- /dev/null +++ b/program.json @@ -0,0 +1,63 @@ +{ + "doc_writer.generate_action": { + "traces": [], + "train": [], + "demos": [], + "signature": { + "instructions": "Write documentation for the provided Rails application source code.\n\nYou are tasked with producing the following outputs given the inputs `source_tree`:\n- {documentation}\n\nYou have access to a Python REPL environment. Write Python code and it will be executed. You will see the output, then write more code based on what you learned. This is an iterative process.\n\nAvailable:\n- Variables: `source_tree` (your input data)\n- `llm_query(prompt)` - query a sub-LLM (~500K char capacity) for semantic analysis\n- `llm_query_batched(prompts)` - query multiple prompts concurrently (much faster for multiple queries)\n- `print()` - ALWAYS print to see results\n- `SUBMIT(documentation)` - submit final output when done\n- Standard libraries: re, json, collections, math, etc.\n\nIMPORTANT: This is ITERATIVE. Each code block you write will execute, you'll see the output, then you decide what to do next. Do NOT try to solve everything in one step.\n\n1. EXPLORE FIRST - Look at your data before processing it. Print samples, check types/lengths, understand the structure.\n2. ITERATE - Write small code snippets, observe outputs, then decide next steps. State persists between iterations.\n3. VERIFY BEFORE SUBMITTING - If results seem wrong (zeros, empty, unexpected), reconsider your approach.\n4. USE llm_query FOR SEMANTICS - String matching finds WHERE things are; llm_query understands WHAT things mean.\n5. MINIMIZE RETYPING (INPUTS & OUTPUTS) - When values are long, precise, or error-prone (IDs, numbers, code, quotes), re-access them via variables and parse/compute in code instead of retyping. Use small, targeted prints to sanity-check, but avoid manual copying when variables can carry the exact value.\n6. SUBMIT ONLY AFTER SEEING OUTPUTS - SUBMIT ends the current run immediately. If you need to inspect printed output, run it in one step, review the result, then call SUBMIT in a later step.\n\nYou have max 50 sub-LLM calls. When done, call SUBMIT() with your output.", + "fields": [ + { + "prefix": "Variables Info:", + "description": "Metadata about the variables available in the REPL" + }, + { + "prefix": "Repl History:", + "description": "Previous REPL code executions and their outputs" + }, + { + "prefix": "Iteration:", + "description": "Current iteration number (1-indexed) out of max_iterations" + }, + { + "prefix": "Reasoning:", + "description": "Think step-by-step: what do you know? What remains? Plan your next action." + }, + { + "prefix": "Code:", + "description": "Python code to execute." + } + ] + }, + "lm": null + }, + "doc_writer.extract": { + "traces": [], + "train": [], + "demos": [], + "signature": { + "instructions": "The trajectory was generated with the following objective: \nWrite documentation for the provided Rails application source code.\n\n\nBased on the REPL trajectory, extract the final outputs now.\n\n Review your trajectory to see what information you gathered and what values you computed, then provide the final outputs.", + "fields": [ + { + "prefix": "Variables Info:", + "description": "Metadata about the variables available in the REPL" + }, + { + "prefix": "Repl History:", + "description": "Your REPL interactions so far" + }, + { + "prefix": "Documentation:", + "description": "Generated markdown documentation." + } + ] + }, + "lm": null + }, + "metadata": { + "dependency_versions": { + "python": "3.13", + "dspy": "3.1.2", + "cloudpickle": "3.1" + } + } +} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..6b0ac4c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,7 @@ +[project] +name = "ruby-rails-doc-generator" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.13" +dependencies = ["dspy>=3.1.2", "modaic>=0.10.2"]