Remove list_files tool

This commit is contained in:
2026-02-01 01:12:04 -08:00
parent 14325cc16e
commit 2e4a79e925
4 changed files with 47 additions and 27 deletions

View File

@@ -4,12 +4,8 @@ import dspy
import subprocess
from dspy.utils.callback import BaseCallback
# --- Modaic ---
MODAIC_REPO_PATH = "farouk1/nanocode"
# --- ANSI colors ---
RESET = "\033[0m"
BOLD = "\033[1m"
DIM = "\033[2m"
@@ -38,7 +34,9 @@ def read_file(path: str, offset: int = 0, limit: int = None) -> str:
if limit is None:
limit = len(lines)
selected = lines[offset : offset + limit]
content = "".join(f"{offset + idx + 1:4}| {line}" for idx, line in enumerate(selected))
content = "".join(
f"{offset + idx + 1:4}| {line}" for idx, line in enumerate(selected)
)
tokens = len(content) // 4 # ~4 chars per token estimate
print(f"{MAGENTA}⏺ Reading file({path}) (~{tokens:,} tokens){RESET}")
return content
@@ -67,7 +65,9 @@ def write_file(path: str, content: str) -> str:
lines = content.count("\n") + (1 if content and not content.endswith("\n") else 0)
tokens = len(content) // 4
print(f"{MAGENTA}{action} file({path}) ({lines} lines, ~{tokens:,} tokens){RESET}")
print(
f"{MAGENTA}{action} file({path}) ({lines} lines, ~{tokens:,} tokens){RESET}"
)
return f"ok: wrote {lines} lines ({tokens:,} tokens) to {path}"
@@ -98,7 +98,7 @@ def edit_file(path: str, old: str, new: str, replace_all: bool = False) -> str:
def glob_files(pattern: str, path: str = ".") -> str:
"""[EXTERNAL FILESYSTEM] Find files on disk matching a glob pattern.
"""[EXTERNAL FILESYSTEM] Do not use for simple file listing, run bash instead. Find files on disk matching a glob pattern.
Respects .gitignore files automatically via ripgrep. Sorted by modification time.
@@ -111,7 +111,7 @@ def glob_files(pattern: str, path: str = ".") -> str:
"""
print(f"{MAGENTA}⏺ Glob({pattern}): {path}{RESET}")
cmd = ["rg", "--files", "-g", pattern, path]
cmd = ["rg", "--files", "--no-require-git", "-g", pattern, path]
try:
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
files = result.stdout.strip().split("\n") if result.stdout.strip() else []
@@ -127,7 +127,9 @@ def glob_files(pattern: str, path: str = ".") -> str:
return "error: search timed out after 30s"
def grep_files(pattern: str, path: str = ".", glob: str = None, max_results: int = 50) -> str:
def grep_files(
pattern: str, path: str = ".", glob: str = None, max_results: int = 50
) -> str:
"""[EXTERNAL FILESYSTEM] Search files on disk for a regex pattern using ripgrep.
Args:
@@ -206,19 +208,19 @@ class RLMReasoningCallback(BaseCallback):
class CodingAssistant(dspy.Signature):
"""You are a concise coding assistant.
CRITICAL - Two execution environments exist:
CRITICAL - Two execution environments exist:
1. INTERNAL REPL (sandbox): Standard Python code you write executes in an isolated sandbox. Variables persist between iterations. Use for data processing, string manipulation, logic, loops, etc.
1. INTERNAL REPL (sandbox): Standard Python code you write executes in an isolated sandbox. Variables persist between iterations. Use for data processing, string manipulation, logic, loops, etc.
2. EXTERNAL TOOLS (real system): Functions like read_file(), write_file(), run_bash(), glob_files(), grep_files() execute OUTSIDE the sandbox on the real filesystem and host machine. These have real, persistent side effects.
2. EXTERNAL TOOLS (real system): Functions like read_file(), write_file(), run_bash(), glob_files(), grep_files() execute OUTSIDE the sandbox on the real filesystem and host machine. These have real, persistent side effects.
When you need to:
- Process data, do math, manipulate strings, iterate → write Python code directly in the REPL
- Read/write actual files on disk → call read_file(), write_file(), edit_file()
- Run shell commands on the host → call run_bash()
- Search the codebase → call glob_files(), grep_files()
When you need to:
- Process data, do math, manipulate strings, iterate → write Python code directly in the REPL
- Read/write actual files on disk → call read_file(), write_file(), edit_file()
- Run shell commands on the host → call run_bash()
- Search the codebase → call glob_files(), grep_files()
Do NOT confuse REPL variables with external files. Reading a file into a variable does not mean the variable updates if the file changes - you must call read_file() again."""
Do NOT confuse REPL variables with external files. Reading a file into a variable does not mean the variable updates if the file changes - you must call read_file() again."""
task: str = dspy.InputField(desc="The user's coding task or question")
answer: str = dspy.OutputField(
@@ -235,6 +237,7 @@ class RLMCodingConfig(PrecompiledConfig):
max_output_chars: int = 100000
verbose: bool = True
track_usage: bool = True
track_trace: bool = False
class RLMCodingProgram(PrecompiledProgram):
@@ -256,6 +259,21 @@ class RLMCodingProgram(PrecompiledProgram):
def __init__(self, config: RLMCodingConfig, **kwargs):
super().__init__(config, **kwargs)
if config.track_trace:
import weave
project = kwargs.get("project", os.getenv("WANDB_PROJECT"))
if project is None:
raise ValueError("project is required when track_trace is True")
wandb_key = kwargs.get("wandb_key", os.getenv("WANDB_API_KEY"))
if wandb_key is None:
raise ValueError("wandb_key is required when track_trace is True")
os.environ["WANDB_PROJECT"] = project
os.environ["WANDB_API_KEY"] = wandb_key
weave.init(project_name=project)
self.config = config
self.tools = {
"read_file": read_file,
@@ -284,7 +302,7 @@ class RLMCodingProgram(PrecompiledProgram):
tools=self.tools,
max_output_chars=self.config.max_output_chars,
max_iterations=self.config.max_iters,
verbose=False, # We add our own verbose logging
verbose=False, # We add our own verbose logging
)
self.agent.set_lm(self.lm)
@@ -362,7 +380,7 @@ class RLMCodingProgram(PrecompiledProgram):
def reload_repl(
self,
): # we need to create a new instance for tool mutations to be passed back into the REPL
): # We need to create a new instance for tool mutations to be passed back into the REPL
"""Reload the REPL with the current tools."""
new_instance = dspy.RLM(
@@ -406,17 +424,18 @@ class RLMCodingProgram(PrecompiledProgram):
fix this in a later patch for future devs.
"""
super().load_state(state)
self.reload_lms() # recreate LMs from config (not from saved state)
self.reload_lms() # Recreate LMs from config (not from saved state)
if __name__ == "__main__":
agent = RLMCodingProgram(RLMCodingConfig())
#agent(task="explicity call llm_query(who is the ceo of apple?) to get the answer to 'who is the ceo of apple?'")
branches = ["dev", "main", "prod"]
#agent(task="what's 1 + 1?")
branches = ["dev"]
for branch in branches:
agent.push_to_hub(
MODAIC_REPO_PATH,
commit_message="Remove list_files tool",
branch=branch,
)