Remove list_files tool

This commit is contained in:
2026-01-31 13:18:46 -08:00
parent c973499445
commit e9396b7c25
3 changed files with 78 additions and 52 deletions

View File

@@ -2,7 +2,7 @@
Minimal Claude Code alternative using DSPy RLM! Single Python file, ~305 lines. Minimal Claude Code alternative using DSPy RLM! Single Python file, ~305 lines.
Shoutout to Rahul for inspiring the boilerplate. Here's his initial [nanocode implementation](https://x.com/rahulgs/status/2010179011033608227). Built using Claude Code, then used to build itself.
![screenshot](https://d1pz4mbco29rws.cloudfront.net/public/nanocode.png) ![screenshot](https://d1pz4mbco29rws.cloudfront.net/public/nanocode.png)
@@ -171,25 +171,29 @@ print(result.answer)
### Overview ### Overview
```python ```
class RLMCodingProgram(PrecompiledProgram): nanocode.py
config: RLMCodingConfig ├── File Operations
│ ├── read_file() - Read with line numbers
def forward(self, task: str) -> dspy.Prediction: ├── write_file() - Write content
# Returns prediction with .answer └── edit_file() - Find & replace
return self.agent(task=task) ├── Search Operations
│ ├── glob_files() - Pattern matching
def get_tools(self) -> dict: └── grep_files() - Regex search
# Returns dict of available tools ├── Shell Operations
│ └── run_bash() - Execute commands
def set_tool(self, name: str, tool: callable): ├── DSPy Components
# Add or replace a tool ├── CodingAssistant (Signature)
│ ├── RLMCodingProgram (PrecompiledProgram)
def remove_tool(self, name: str): │ ├── forward() - Run agent on task
# Remove a tool by name ├── get_tools() - Get available tools
│ │ ├── set_tool() - Add/replace a tool
def reload_lms(self): │ │ ├── remove_tool() - Remove a tool
# Recreate LM objects from current config ├── reload_lms() - Recreate LMs from config
│ │ └── load_state() - Load state with LM fix
│ └── RLMReasoningCallback
└── Modaic Integration
└── RLMCodingConfig (PrecompiledConfig)
``` ```
### Key Classes ### Key Classes

View File

@@ -1,8 +1,6 @@
import os import os
import glob as globlib
from modaic import PrecompiledProgram, PrecompiledConfig from modaic import PrecompiledProgram, PrecompiledConfig
import dspy import dspy
import re
import subprocess import subprocess
from dspy.utils.callback import BaseCallback from dspy.utils.callback import BaseCallback
@@ -36,13 +34,14 @@ def read_file(path: str, offset: int = 0, limit: int = None) -> str:
Returns: Returns:
File contents with line numbers File contents with line numbers
""" """
print(f"{MAGENTA}⏺ Reading file: {path}{RESET}")
lines = open(path).readlines() lines = open(path).readlines()
if limit is None: if limit is None:
limit = len(lines) limit = len(lines)
selected = lines[offset : offset + limit] selected = lines[offset : offset + limit]
return "".join(f"{offset + idx + 1:4}| {line}" for idx, line in enumerate(selected)) content = "".join(f"{offset + idx + 1:4}| {line}" for idx, line in enumerate(selected))
tokens = len(content) // 4 # ~4 chars per token estimate
print(f"{MAGENTA}⏺ Reading file: {path} (~{tokens:,} tokens){RESET}")
return content
def write_file(path: str, content: str) -> str: def write_file(path: str, content: str) -> str:
@@ -53,13 +52,23 @@ def write_file(path: str, content: str) -> str:
content: Content to write to the file content: Content to write to the file
Returns: Returns:
'ok' on success Status message with file stats
""" """
print(f"{MAGENTA}⏺ Creating file: {path}{RESET}") is_new = not os.path.exists(path)
action = "Creating" if is_new else "Overwriting"
# Auto-create parent directories
parent = os.path.dirname(path)
if parent:
os.makedirs(parent, exist_ok=True)
with open(path, "w") as f: with open(path, "w") as f:
f.write(content) f.write(content)
return "ok"
lines = content.count("\n") + (1 if content and not content.endswith("\n") else 0)
tokens = len(content) // 4
print(f"{MAGENTA}{action} file: {path} ({lines} lines, ~{tokens:,} tokens){RESET}")
return f"ok: wrote {lines} lines ({tokens:,} tokens) to {path}"
def edit_file(path: str, old: str, new: str, replace_all: bool = False) -> str: def edit_file(path: str, old: str, new: str, replace_all: bool = False) -> str:
@@ -89,7 +98,9 @@ def edit_file(path: str, old: str, new: str, replace_all: bool = False) -> str:
def glob_files(pattern: str, path: str = ".") -> str: def glob_files(pattern: str, path: str = ".") -> str:
"""Find files matching a glob pattern, sorted by modification time. """Find files by filename matching a glob pattern, sorted by modification time.
Respects .gitignore files automatically via ripgrep.
Args: Args:
pattern: Glob pattern to match (e.g., '**/*.py') pattern: Glob pattern to match (e.g., '**/*.py')
@@ -100,38 +111,49 @@ def glob_files(pattern: str, path: str = ".") -> str:
""" """
print(f"{MAGENTA}⏺ Glob: {pattern}{RESET}") print(f"{MAGENTA}⏺ Glob: {pattern}{RESET}")
full_pattern = (path + "/" + pattern).replace("//", "/") cmd = ["rg", "--files", "-g", pattern, path]
files = globlib.glob(full_pattern, recursive=True) try:
files = sorted( result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
files, files = result.stdout.strip().split("\n") if result.stdout.strip() else []
key=lambda f: os.path.getmtime(f) if os.path.isfile(f) else 0, files = sorted(
reverse=True, files,
) key=lambda f: os.path.getmtime(f) if os.path.isfile(f) else 0,
return "\n".join(files) or "no files found" reverse=True,
)
return "\n".join(files) or "no files found"
except FileNotFoundError:
return "error: ripgrep (rg) not installed - install with 'brew install ripgrep'"
except subprocess.TimeoutExpired:
return "error: search timed out after 30s"
def grep_files(pattern: str, path: str = ".") -> str: def grep_files(pattern: str, path: str = ".", glob: str = None, max_results: int = 50) -> str:
"""Search files for a regex pattern. """Search files for a regex pattern using ripgrep.
Args: Args:
pattern: Regular expression pattern to search for pattern: Regular expression pattern to search for
path: Base directory to search in path: Base directory to search in
glob: Optional glob pattern to filter files (e.g., '*.py')
max_results: Maximum number of results to return
Returns: Returns:
Matching lines in format 'filepath:line_num:content' Matching lines in format 'filepath:line_num:content'
""" """
print(f"{MAGENTA}⏺ Grep: {pattern}{RESET}") print(f"{MAGENTA}⏺ Grep: {pattern}{RESET}")
regex = re.compile(pattern) cmd = ["rg", "-n", "--no-heading", "--color=never", f"-m{max_results}"]
hits = [] if glob:
for filepath in globlib.glob(path + "/**", recursive=True): cmd.extend(["-g", glob])
try: cmd.extend([pattern, path])
for line_num, line in enumerate(open(filepath), 1):
if regex.search(line): try:
hits.append(f"{filepath}:{line_num}:{line.rstrip()}") result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
except Exception: output = result.stdout.strip()
pass return output if output else "no matches found"
return "\n".join(hits[:50]) or "no matches found" except FileNotFoundError:
return "error: ripgrep (rg) not installed - install with 'brew install ripgrep'"
except subprocess.TimeoutExpired:
return "error: search timed out after 30s"
# --- Shell operations --- # --- Shell operations ---
@@ -363,11 +385,11 @@ class RLMCodingProgram(PrecompiledProgram):
if __name__ == "__main__": if __name__ == "__main__":
agent = RLMCodingProgram(RLMCodingConfig()) agent = RLMCodingProgram(RLMCodingConfig())
#agent(task="explicity call llm_query(who is the ceo of apple?) to get the answer to 'who is the ceo of apple?'") #agent(task="explicity call llm_query(who is the ceo of apple?) to get the answer to 'who is the ceo of apple?'")
branches = ["main", "dev", "prod"] branches = ["dev", "main", "prod"]
for branch in branches: for branch in branches:
agent.push_to_hub( agent.push_to_hub(
MODAIC_REPO_PATH, MODAIC_REPO_PATH,
commit_message="Fix config override bug by recreating LMs after load_state", commit_message="Remove list_files tool",
branch=branch, branch=branch,
) )

View File

@@ -4,7 +4,7 @@
"train": [], "train": [],
"demos": [], "demos": [],
"signature": { "signature": {
"instructions": "You are a concise coding assistant with access to sub agents.\n\nYou are tasked with producing the following outputs given the inputs `task`:\n- {answer}\n\nYou have access to a Python REPL environment. Write Python code and it will be executed. You will see the output, then write more code based on what you learned. This is an iterative process.\n\nAvailable:\n- Variables: `task` (your input data)\n- `llm_query(prompt)` - query a sub-LLM (~500K char capacity) for semantic analysis\n- `llm_query_batched(prompts)` - query multiple prompts concurrently (much faster for multiple queries)\n- `print()` - ALWAYS print to see results\n- `SUBMIT(answer)` - submit final output when done\n- Standard libraries: re, json, collections, math, etc.\n\nIMPORTANT: This is ITERATIVE. Each code block you write will execute, you'll see the output, then you decide what to do next. Do NOT try to solve everything in one step.\n\n1. EXPLORE FIRST - Look at your data before processing it. Print samples, check types/lengths, understand the structure.\n2. ITERATE - Write small code snippets, observe outputs, then decide next steps. State persists between iterations.\n3. VERIFY BEFORE SUBMITTING - If results seem wrong (zeros, empty, unexpected), reconsider your approach.\n4. USE llm_query FOR SEMANTICS - String matching finds WHERE things are; llm_query understands WHAT things mean.\n5. MINIMIZE RETYPING (INPUTS & OUTPUTS) - When values are long, precise, or error-prone (IDs, numbers, code, quotes), re-access them via variables and parse/compute in code instead of retyping. Use small, targeted prints to sanity-check, but avoid manual copying when variables can carry the exact value.\n6. SUBMIT ONLY AFTER SEEING OUTPUTS - SUBMIT ends the current run immediately. If you need to inspect printed output, run it in one step, review the result, then call SUBMIT in a later step.\n\nYou have max 50 sub-LLM calls. When done, call SUBMIT() with your output.\nAdditional tools available (use these instead of standard library equivalents):\n- `read_file(path: str, offset: int, limit: int) -> str` - Read file contents with line numbers.\n- `write_file(path: str, content: str) -> str` - Write content to a file.\n- `edit_file(path: str, old: str, new: str, replace_all: bool) -> str` - Replace text in a file.\n- `glob_files(pattern: str, path: str) -> str` - Find files matching a glob pattern, sorted by modification time.\n- `grep_files(pattern: str, path: str) -> str` - Search files for a regex pattern.\n- `run_bash(cmd: str) -> str` - Run a shell command and return output.", "instructions": "You are a concise coding assistant with access to sub agents.\n\nYou are tasked with producing the following outputs given the inputs `task`:\n- {answer}\n\nYou have access to a Python REPL environment. Write Python code and it will be executed. You will see the output, then write more code based on what you learned. This is an iterative process.\n\nAvailable:\n- Variables: `task` (your input data)\n- `llm_query(prompt)` - query a sub-LLM (~500K char capacity) for semantic analysis\n- `llm_query_batched(prompts)` - query multiple prompts concurrently (much faster for multiple queries)\n- `print()` - ALWAYS print to see results\n- `SUBMIT(answer)` - submit final output when done\n- Standard libraries: re, json, collections, math, etc.\n\nIMPORTANT: This is ITERATIVE. Each code block you write will execute, you'll see the output, then you decide what to do next. Do NOT try to solve everything in one step.\n\n1. EXPLORE FIRST - Look at your data before processing it. Print samples, check types/lengths, understand the structure.\n2. ITERATE - Write small code snippets, observe outputs, then decide next steps. State persists between iterations.\n3. VERIFY BEFORE SUBMITTING - If results seem wrong (zeros, empty, unexpected), reconsider your approach.\n4. USE llm_query FOR SEMANTICS - String matching finds WHERE things are; llm_query understands WHAT things mean.\n5. MINIMIZE RETYPING (INPUTS & OUTPUTS) - When values are long, precise, or error-prone (IDs, numbers, code, quotes), re-access them via variables and parse/compute in code instead of retyping. Use small, targeted prints to sanity-check, but avoid manual copying when variables can carry the exact value.\n6. SUBMIT ONLY AFTER SEEING OUTPUTS - SUBMIT ends the current run immediately. If you need to inspect printed output, run it in one step, review the result, then call SUBMIT in a later step.\n\nYou have max 50 sub-LLM calls. When done, call SUBMIT() with your output.\nAdditional tools available (use these instead of standard library equivalents):\n- `read_file(path: str, offset: int, limit: int) -> str` - Read file contents with line numbers.\n- `write_file(path: str, content: str) -> str` - Write content to a file.\n- `edit_file(path: str, old: str, new: str, replace_all: bool) -> str` - Replace text in a file.\n- `glob_files(pattern: str, path: str) -> str` - Find files by filename matching a glob pattern, sorted by modification time.\n- `grep_files(pattern: str, path: str, glob: str, max_results: int) -> str` - Search files for a regex pattern using ripgrep.\n- `run_bash(cmd: str) -> str` - Run a shell command and return output.",
"fields": [ "fields": [
{ {
"prefix": "Variables Info:", "prefix": "Variables Info:",