diff --git a/nanocode.py b/nanocode.py index f5b06ea..cd8508e 100644 --- a/nanocode.py +++ b/nanocode.py @@ -98,6 +98,27 @@ def edit_file(path: str, old: str, new: str, replace_all: bool = False) -> str: return "ok" +def list_files(path: str = ".") -> str: + """List all files in a directory, respecting .gitignore. + + Args: + path: Directory to list files from + + Returns: + Newline-separated list of files + """ + print(f"{MAGENTA}⏺ List: {path}{RESET}") + + cmd = ["rg", "--files", path] + try: + result = subprocess.run(cmd, capture_output=True, text=True, timeout=30) + return result.stdout.strip() or "no files found" + except FileNotFoundError: + return "error: ripgrep (rg) not installed - install with 'brew install ripgrep'" + except subprocess.TimeoutExpired: + return "error: search timed out after 30s" + + def glob_files(pattern: str, path: str = ".") -> str: """Find files matching a glob pattern, sorted by modification time. @@ -235,6 +256,7 @@ class RLMCodingProgram(PrecompiledProgram): "read_file": read_file, "write_file": write_file, "edit_file": edit_file, + "list_files": list_files, "glob_files": glob_files, "grep_files": grep_files, "run_bash": run_bash, @@ -390,7 +412,7 @@ if __name__ == "__main__": for branch in branches: agent.push_to_hub( MODAIC_REPO_PATH, - commit_message="Migrate to ripgrep for glob_files", + commit_message="Add list_files tool", branch=branch, ) diff --git a/program.json b/program.json index 5b4c653..7b69a8e 100644 --- a/program.json +++ b/program.json @@ -4,7 +4,7 @@ "train": [], "demos": [], "signature": { - "instructions": "You are a concise coding assistant with access to sub agents.\n\nYou are tasked with producing the following outputs given the inputs `task`:\n- {answer}\n\nYou have access to a Python REPL environment. Write Python code and it will be executed. You will see the output, then write more code based on what you learned. This is an iterative process.\n\nAvailable:\n- Variables: `task` (your input data)\n- `llm_query(prompt)` - query a sub-LLM (~500K char capacity) for semantic analysis\n- `llm_query_batched(prompts)` - query multiple prompts concurrently (much faster for multiple queries)\n- `print()` - ALWAYS print to see results\n- `SUBMIT(answer)` - submit final output when done\n- Standard libraries: re, json, collections, math, etc.\n\nIMPORTANT: This is ITERATIVE. Each code block you write will execute, you'll see the output, then you decide what to do next. Do NOT try to solve everything in one step.\n\n1. EXPLORE FIRST - Look at your data before processing it. Print samples, check types/lengths, understand the structure.\n2. ITERATE - Write small code snippets, observe outputs, then decide next steps. State persists between iterations.\n3. VERIFY BEFORE SUBMITTING - If results seem wrong (zeros, empty, unexpected), reconsider your approach.\n4. USE llm_query FOR SEMANTICS - String matching finds WHERE things are; llm_query understands WHAT things mean.\n5. MINIMIZE RETYPING (INPUTS & OUTPUTS) - When values are long, precise, or error-prone (IDs, numbers, code, quotes), re-access them via variables and parse/compute in code instead of retyping. Use small, targeted prints to sanity-check, but avoid manual copying when variables can carry the exact value.\n6. SUBMIT ONLY AFTER SEEING OUTPUTS - SUBMIT ends the current run immediately. If you need to inspect printed output, run it in one step, review the result, then call SUBMIT in a later step.\n\nYou have max 50 sub-LLM calls. When done, call SUBMIT() with your output.\nAdditional tools available (use these instead of standard library equivalents):\n- `read_file(path: str, offset: int, limit: int) -> str` - Read file contents with line numbers.\n- `write_file(path: str, content: str) -> str` - Write content to a file.\n- `edit_file(path: str, old: str, new: str, replace_all: bool) -> str` - Replace text in a file.\n- `glob_files(pattern: str, path: str) -> str` - Find files matching a glob pattern, sorted by modification time.\n- `grep_files(pattern: str, path: str, glob: str, max_results: int) -> str` - Search files for a regex pattern using ripgrep.\n- `run_bash(cmd: str) -> str` - Run a shell command and return output.", + "instructions": "You are a concise coding assistant with access to sub agents.\n\nYou are tasked with producing the following outputs given the inputs `task`:\n- {answer}\n\nYou have access to a Python REPL environment. Write Python code and it will be executed. You will see the output, then write more code based on what you learned. This is an iterative process.\n\nAvailable:\n- Variables: `task` (your input data)\n- `llm_query(prompt)` - query a sub-LLM (~500K char capacity) for semantic analysis\n- `llm_query_batched(prompts)` - query multiple prompts concurrently (much faster for multiple queries)\n- `print()` - ALWAYS print to see results\n- `SUBMIT(answer)` - submit final output when done\n- Standard libraries: re, json, collections, math, etc.\n\nIMPORTANT: This is ITERATIVE. Each code block you write will execute, you'll see the output, then you decide what to do next. Do NOT try to solve everything in one step.\n\n1. EXPLORE FIRST - Look at your data before processing it. Print samples, check types/lengths, understand the structure.\n2. ITERATE - Write small code snippets, observe outputs, then decide next steps. State persists between iterations.\n3. VERIFY BEFORE SUBMITTING - If results seem wrong (zeros, empty, unexpected), reconsider your approach.\n4. USE llm_query FOR SEMANTICS - String matching finds WHERE things are; llm_query understands WHAT things mean.\n5. MINIMIZE RETYPING (INPUTS & OUTPUTS) - When values are long, precise, or error-prone (IDs, numbers, code, quotes), re-access them via variables and parse/compute in code instead of retyping. Use small, targeted prints to sanity-check, but avoid manual copying when variables can carry the exact value.\n6. SUBMIT ONLY AFTER SEEING OUTPUTS - SUBMIT ends the current run immediately. If you need to inspect printed output, run it in one step, review the result, then call SUBMIT in a later step.\n\nYou have max 50 sub-LLM calls. When done, call SUBMIT() with your output.\nAdditional tools available (use these instead of standard library equivalents):\n- `read_file(path: str, offset: int, limit: int) -> str` - Read file contents with line numbers.\n- `write_file(path: str, content: str) -> str` - Write content to a file.\n- `edit_file(path: str, old: str, new: str, replace_all: bool) -> str` - Replace text in a file.\n- `list_files(path: str) -> str` - List all files in a directory, respecting .gitignore.\n- `glob_files(pattern: str, path: str) -> str` - Find files matching a glob pattern, sorted by modification time.\n- `grep_files(pattern: str, path: str, glob: str, max_results: int) -> str` - Search files for a regex pattern using ripgrep.\n- `run_bash(cmd: str) -> str` - Run a shell command and return output.", "fields": [ { "prefix": "Variables Info:",