Remove list_files tool

2026-02-01 01:12:04 -08:00
parent 14325cc16e
commit 2e4a79e925
4 changed files with 47 additions and 27 deletions
--- a/nanocode.py
+++ b/nanocode.py
@@ -4,12 +4,8 @@ import dspy
 import subprocess
 from dspy.utils.callback import BaseCallback

-# --- Modaic ---
-
 MODAIC_REPO_PATH = "farouk1/nanocode"

-# --- ANSI colors ---
-
 RESET = "\033[0m"
 BOLD = "\033[1m"
 DIM = "\033[2m"
@@ -38,7 +34,9 @@ def read_file(path: str, offset: int = 0, limit: int = None) -> str:
    if limit is None:
        limit = len(lines)
    selected = lines[offset : offset + limit]
-    content = "".join(f"{offset + idx + 1:4}| {line}" for idx, line in enumerate(selected))
+    content = "".join(
+        f"{offset + idx + 1:4}| {line}" for idx, line in enumerate(selected)
+    )
    tokens = len(content) // 4  # ~4 chars per token estimate
    print(f"{MAGENTA}⏺ Reading file({path}) (~{tokens:,} tokens){RESET}")
    return content
@@ -67,7 +65,9 @@ def write_file(path: str, content: str) -> str:

    lines = content.count("\n") + (1 if content and not content.endswith("\n") else 0)
    tokens = len(content) // 4
-    print(f"{MAGENTA}⏺ {action} file({path}) ({lines} lines, ~{tokens:,} tokens){RESET}")
+    print(
+        f"{MAGENTA}⏺ {action} file({path}) ({lines} lines, ~{tokens:,} tokens){RESET}"
+    )
    return f"ok: wrote {lines} lines ({tokens:,} tokens) to {path}"


@@ -98,7 +98,7 @@ def edit_file(path: str, old: str, new: str, replace_all: bool = False) -> str:


 def glob_files(pattern: str, path: str = ".") -> str:
-    """[EXTERNAL FILESYSTEM] Find files on disk matching a glob pattern.
+    """[EXTERNAL FILESYSTEM] Do not use for simple file listing, run bash instead. Find files on disk matching a glob pattern.

    Respects .gitignore files automatically via ripgrep. Sorted by modification time.

@@ -111,7 +111,7 @@ def glob_files(pattern: str, path: str = ".") -> str:
    """
    print(f"{MAGENTA}⏺ Glob({pattern}): {path}{RESET}")

-    cmd = ["rg", "--files", "-g", pattern, path]
+    cmd = ["rg", "--files", "--no-require-git", "-g", pattern, path]
    try:
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
        files = result.stdout.strip().split("\n") if result.stdout.strip() else []
@@ -127,7 +127,9 @@ def glob_files(pattern: str, path: str = ".") -> str:
        return "error: search timed out after 30s"


-def grep_files(pattern: str, path: str = ".", glob: str = None, max_results: int = 50) -> str:
+def grep_files(
+    pattern: str, path: str = ".", glob: str = None, max_results: int = 50
+) -> str:
    """[EXTERNAL FILESYSTEM] Search files on disk for a regex pattern using ripgrep.

    Args:
@@ -206,19 +208,19 @@ class RLMReasoningCallback(BaseCallback):
 class CodingAssistant(dspy.Signature):
    """You are a concise coding assistant.

-CRITICAL - Two execution environments exist:
+    CRITICAL - Two execution environments exist:

-1. INTERNAL REPL (sandbox): Standard Python code you write executes in an isolated sandbox. Variables persist between iterations. Use for data processing, string manipulation, logic, loops, etc.
+    1. INTERNAL REPL (sandbox): Standard Python code you write executes in an isolated sandbox. Variables persist between iterations. Use for data processing, string manipulation, logic, loops, etc.

-2. EXTERNAL TOOLS (real system): Functions like read_file(), write_file(), run_bash(), glob_files(), grep_files() execute OUTSIDE the sandbox on the real filesystem and host machine. These have real, persistent side effects.
+    2. EXTERNAL TOOLS (real system): Functions like read_file(), write_file(), run_bash(), glob_files(), grep_files() execute OUTSIDE the sandbox on the real filesystem and host machine. These have real, persistent side effects.

-When you need to:
- Process data, do math, manipulate strings, iterate → write Python code directly in the REPL
- Read/write actual files on disk → call read_file(), write_file(), edit_file()
- Run shell commands on the host → call run_bash()
- Search the codebase → call glob_files(), grep_files()
+    When you need to:
+    - Process data, do math, manipulate strings, iterate → write Python code directly in the REPL
+    - Read/write actual files on disk → call read_file(), write_file(), edit_file()
+    - Run shell commands on the host → call run_bash()
+    - Search the codebase → call glob_files(), grep_files()

-Do NOT confuse REPL variables with external files. Reading a file into a variable does not mean the variable updates if the file changes - you must call read_file() again."""
+    Do NOT confuse REPL variables with external files. Reading a file into a variable does not mean the variable updates if the file changes - you must call read_file() again."""

    task: str = dspy.InputField(desc="The user's coding task or question")
    answer: str = dspy.OutputField(
@@ -235,6 +237,7 @@ class RLMCodingConfig(PrecompiledConfig):
    max_output_chars: int = 100000
    verbose: bool = True
    track_usage: bool = True
+    track_trace: bool = False


 class RLMCodingProgram(PrecompiledProgram):
@@ -256,6 +259,21 @@ class RLMCodingProgram(PrecompiledProgram):
    def __init__(self, config: RLMCodingConfig, **kwargs):
        super().__init__(config, **kwargs)

+        if config.track_trace:
+            import weave
+
+            project = kwargs.get("project", os.getenv("WANDB_PROJECT"))
+            if project is None:
+                raise ValueError("project is required when track_trace is True")
+
+            wandb_key = kwargs.get("wandb_key", os.getenv("WANDB_API_KEY"))
+            if wandb_key is None:
+                raise ValueError("wandb_key is required when track_trace is True")
+
+            os.environ["WANDB_PROJECT"] = project
+            os.environ["WANDB_API_KEY"] = wandb_key
+            weave.init(project_name=project)
+
        self.config = config
        self.tools = {
            "read_file": read_file,
@@ -284,7 +302,7 @@ class RLMCodingProgram(PrecompiledProgram):
            tools=self.tools,
            max_output_chars=self.config.max_output_chars,
            max_iterations=self.config.max_iters,
-            verbose=False, # We add our own verbose logging
+            verbose=False,  # We add our own verbose logging
        )
        self.agent.set_lm(self.lm)

@@ -362,7 +380,7 @@ class RLMCodingProgram(PrecompiledProgram):

    def reload_repl(
        self,
-    ):  # we need to create a new instance for tool mutations to be passed back into the REPL
+    ):  # We need to create a new instance for tool mutations to be passed back into the REPL
        """Reload the REPL with the current tools."""

        new_instance = dspy.RLM(
@@ -406,17 +424,18 @@ class RLMCodingProgram(PrecompiledProgram):
        fix this in a later patch for future devs.
        """
        super().load_state(state)
-        self.reload_lms()  # recreate LMs from config (not from saved state)
+        self.reload_lms()  # Recreate LMs from config (not from saved state)


 if __name__ == "__main__":
    agent = RLMCodingProgram(RLMCodingConfig())
-    #agent(task="explicity call llm_query(who is the ceo of apple?) to get the answer to 'who is the ceo of apple?'")
-    branches = ["dev", "main", "prod"]
+
+    #agent(task="what's 1 + 1?")
+
+    branches = ["dev"]
    for branch in branches:
        agent.push_to_hub(
            MODAIC_REPO_PATH,
            commit_message="Remove list_files tool",
            branch=branch,
        )
-