Remove list_files tool
This commit is contained in:
67
nanocode.py
67
nanocode.py
@@ -4,12 +4,8 @@ import dspy
|
||||
import subprocess
|
||||
from dspy.utils.callback import BaseCallback
|
||||
|
||||
# --- Modaic ---
|
||||
|
||||
MODAIC_REPO_PATH = "farouk1/nanocode"
|
||||
|
||||
# --- ANSI colors ---
|
||||
|
||||
RESET = "\033[0m"
|
||||
BOLD = "\033[1m"
|
||||
DIM = "\033[2m"
|
||||
@@ -38,7 +34,9 @@ def read_file(path: str, offset: int = 0, limit: int = None) -> str:
|
||||
if limit is None:
|
||||
limit = len(lines)
|
||||
selected = lines[offset : offset + limit]
|
||||
content = "".join(f"{offset + idx + 1:4}| {line}" for idx, line in enumerate(selected))
|
||||
content = "".join(
|
||||
f"{offset + idx + 1:4}| {line}" for idx, line in enumerate(selected)
|
||||
)
|
||||
tokens = len(content) // 4 # ~4 chars per token estimate
|
||||
print(f"{MAGENTA}⏺ Reading file({path}) (~{tokens:,} tokens){RESET}")
|
||||
return content
|
||||
@@ -67,7 +65,9 @@ def write_file(path: str, content: str) -> str:
|
||||
|
||||
lines = content.count("\n") + (1 if content and not content.endswith("\n") else 0)
|
||||
tokens = len(content) // 4
|
||||
print(f"{MAGENTA}⏺ {action} file({path}) ({lines} lines, ~{tokens:,} tokens){RESET}")
|
||||
print(
|
||||
f"{MAGENTA}⏺ {action} file({path}) ({lines} lines, ~{tokens:,} tokens){RESET}"
|
||||
)
|
||||
return f"ok: wrote {lines} lines ({tokens:,} tokens) to {path}"
|
||||
|
||||
|
||||
@@ -98,7 +98,7 @@ def edit_file(path: str, old: str, new: str, replace_all: bool = False) -> str:
|
||||
|
||||
|
||||
def glob_files(pattern: str, path: str = ".") -> str:
|
||||
"""[EXTERNAL FILESYSTEM] Find files on disk matching a glob pattern.
|
||||
"""[EXTERNAL FILESYSTEM] Do not use for simple file listing, run bash instead. Find files on disk matching a glob pattern.
|
||||
|
||||
Respects .gitignore files automatically via ripgrep. Sorted by modification time.
|
||||
|
||||
@@ -111,7 +111,7 @@ def glob_files(pattern: str, path: str = ".") -> str:
|
||||
"""
|
||||
print(f"{MAGENTA}⏺ Glob({pattern}): {path}{RESET}")
|
||||
|
||||
cmd = ["rg", "--files", "-g", pattern, path]
|
||||
cmd = ["rg", "--files", "--no-require-git", "-g", pattern, path]
|
||||
try:
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
|
||||
files = result.stdout.strip().split("\n") if result.stdout.strip() else []
|
||||
@@ -127,7 +127,9 @@ def glob_files(pattern: str, path: str = ".") -> str:
|
||||
return "error: search timed out after 30s"
|
||||
|
||||
|
||||
def grep_files(pattern: str, path: str = ".", glob: str = None, max_results: int = 50) -> str:
|
||||
def grep_files(
|
||||
pattern: str, path: str = ".", glob: str = None, max_results: int = 50
|
||||
) -> str:
|
||||
"""[EXTERNAL FILESYSTEM] Search files on disk for a regex pattern using ripgrep.
|
||||
|
||||
Args:
|
||||
@@ -206,19 +208,19 @@ class RLMReasoningCallback(BaseCallback):
|
||||
class CodingAssistant(dspy.Signature):
|
||||
"""You are a concise coding assistant.
|
||||
|
||||
CRITICAL - Two execution environments exist:
|
||||
CRITICAL - Two execution environments exist:
|
||||
|
||||
1. INTERNAL REPL (sandbox): Standard Python code you write executes in an isolated sandbox. Variables persist between iterations. Use for data processing, string manipulation, logic, loops, etc.
|
||||
1. INTERNAL REPL (sandbox): Standard Python code you write executes in an isolated sandbox. Variables persist between iterations. Use for data processing, string manipulation, logic, loops, etc.
|
||||
|
||||
2. EXTERNAL TOOLS (real system): Functions like read_file(), write_file(), run_bash(), glob_files(), grep_files() execute OUTSIDE the sandbox on the real filesystem and host machine. These have real, persistent side effects.
|
||||
2. EXTERNAL TOOLS (real system): Functions like read_file(), write_file(), run_bash(), glob_files(), grep_files() execute OUTSIDE the sandbox on the real filesystem and host machine. These have real, persistent side effects.
|
||||
|
||||
When you need to:
|
||||
- Process data, do math, manipulate strings, iterate → write Python code directly in the REPL
|
||||
- Read/write actual files on disk → call read_file(), write_file(), edit_file()
|
||||
- Run shell commands on the host → call run_bash()
|
||||
- Search the codebase → call glob_files(), grep_files()
|
||||
When you need to:
|
||||
- Process data, do math, manipulate strings, iterate → write Python code directly in the REPL
|
||||
- Read/write actual files on disk → call read_file(), write_file(), edit_file()
|
||||
- Run shell commands on the host → call run_bash()
|
||||
- Search the codebase → call glob_files(), grep_files()
|
||||
|
||||
Do NOT confuse REPL variables with external files. Reading a file into a variable does not mean the variable updates if the file changes - you must call read_file() again."""
|
||||
Do NOT confuse REPL variables with external files. Reading a file into a variable does not mean the variable updates if the file changes - you must call read_file() again."""
|
||||
|
||||
task: str = dspy.InputField(desc="The user's coding task or question")
|
||||
answer: str = dspy.OutputField(
|
||||
@@ -235,6 +237,7 @@ class RLMCodingConfig(PrecompiledConfig):
|
||||
max_output_chars: int = 100000
|
||||
verbose: bool = True
|
||||
track_usage: bool = True
|
||||
track_trace: bool = False
|
||||
|
||||
|
||||
class RLMCodingProgram(PrecompiledProgram):
|
||||
@@ -256,6 +259,21 @@ class RLMCodingProgram(PrecompiledProgram):
|
||||
def __init__(self, config: RLMCodingConfig, **kwargs):
|
||||
super().__init__(config, **kwargs)
|
||||
|
||||
if config.track_trace:
|
||||
import weave
|
||||
|
||||
project = kwargs.get("project", os.getenv("WANDB_PROJECT"))
|
||||
if project is None:
|
||||
raise ValueError("project is required when track_trace is True")
|
||||
|
||||
wandb_key = kwargs.get("wandb_key", os.getenv("WANDB_API_KEY"))
|
||||
if wandb_key is None:
|
||||
raise ValueError("wandb_key is required when track_trace is True")
|
||||
|
||||
os.environ["WANDB_PROJECT"] = project
|
||||
os.environ["WANDB_API_KEY"] = wandb_key
|
||||
weave.init(project_name=project)
|
||||
|
||||
self.config = config
|
||||
self.tools = {
|
||||
"read_file": read_file,
|
||||
@@ -284,7 +302,7 @@ class RLMCodingProgram(PrecompiledProgram):
|
||||
tools=self.tools,
|
||||
max_output_chars=self.config.max_output_chars,
|
||||
max_iterations=self.config.max_iters,
|
||||
verbose=False, # We add our own verbose logging
|
||||
verbose=False, # We add our own verbose logging
|
||||
)
|
||||
self.agent.set_lm(self.lm)
|
||||
|
||||
@@ -362,7 +380,7 @@ class RLMCodingProgram(PrecompiledProgram):
|
||||
|
||||
def reload_repl(
|
||||
self,
|
||||
): # we need to create a new instance for tool mutations to be passed back into the REPL
|
||||
): # We need to create a new instance for tool mutations to be passed back into the REPL
|
||||
"""Reload the REPL with the current tools."""
|
||||
|
||||
new_instance = dspy.RLM(
|
||||
@@ -406,17 +424,18 @@ class RLMCodingProgram(PrecompiledProgram):
|
||||
fix this in a later patch for future devs.
|
||||
"""
|
||||
super().load_state(state)
|
||||
self.reload_lms() # recreate LMs from config (not from saved state)
|
||||
self.reload_lms() # Recreate LMs from config (not from saved state)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
agent = RLMCodingProgram(RLMCodingConfig())
|
||||
#agent(task="explicity call llm_query(who is the ceo of apple?) to get the answer to 'who is the ceo of apple?'")
|
||||
branches = ["dev", "main", "prod"]
|
||||
|
||||
#agent(task="what's 1 + 1?")
|
||||
|
||||
branches = ["dev"]
|
||||
for branch in branches:
|
||||
agent.push_to_hub(
|
||||
MODAIC_REPO_PATH,
|
||||
commit_message="Remove list_files tool",
|
||||
branch=branch,
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user