37 Commits
main ... dev

Author SHA1 Message Date
7016105521 Remove list_files tool 2026-02-01 01:26:07 -08:00
2e4a79e925 Remove list_files tool 2026-02-01 01:12:04 -08:00
14325cc16e Remove list_files tool 2026-01-31 23:42:21 -08:00
d9acd94155 Remove list_files tool 2026-01-31 16:13:45 -08:00
85d5330fc8 Remove list_files tool 2026-01-31 16:09:27 -08:00
9f27228565 Remove list_files tool 2026-01-31 15:14:19 -08:00
c27cdf870b Remove list_files tool 2026-01-31 13:18:44 -08:00
f9b1b0964e Remove list_files tool 2026-01-31 12:41:04 -08:00
fb5aa0070f Add list_files tool 2026-01-31 12:36:10 -08:00
35616d3e3e Migrate to ripgrep for glob_files 2026-01-31 12:30:11 -08:00
64e375e960 Fix config override bug by recreating LMs after load_state 2026-01-31 12:24:18 -08:00
3087006561 Fix config override bug by recreating LMs after load_state 2026-01-31 12:19:30 -08:00
3fa9f925ff Fix config override bug by recreating LMs after load_state 2026-01-24 15:50:24 -08:00
0e27e83b57 Fix config override bug by recreating LMs after load_state 2026-01-24 15:41:33 -08:00
33f34e9615 Fix config override bug by recreating LMs after load_state 2026-01-24 14:57:54 -08:00
3257f4dc01 Fix config override bug by recreating LMs after load_state 2026-01-24 14:45:24 -08:00
e633f96338 Fix config override bug by recreating LMs after load_state 2026-01-24 02:48:58 -08:00
2149e14573 Fix config override bug by recreating LMs after load_state 2026-01-24 02:29:24 -08:00
b8400242e4 Fix config override bug by recreating LMs after load_state 2026-01-24 01:25:11 -08:00
a234b48cda Fix config override bug by recreating LMs after load_state 2026-01-24 01:08:08 -08:00
2e671bd27f Fix config override bug by recreating LMs after load_state 2026-01-24 00:53:08 -08:00
2cd7286320 Add reload_lms method and debug forward() 2026-01-24 00:41:45 -08:00
fa1a5847cf Add reload_lms method and debug forward() 2026-01-24 00:37:42 -08:00
10163277ca Add reload_lms method and debug forward() 2026-01-24 00:34:19 -08:00
1a005b6584 Add reload_lms method and debug forward() 2026-01-24 00:29:10 -08:00
a4ae97ef81 change signature 2026-01-23 20:11:53 -08:00
378a657595 change signature 2026-01-23 20:11:43 -08:00
7f0aba9241 change signature 2026-01-23 19:36:06 -08:00
e69c82dea9 change signature 2026-01-23 19:35:16 -08:00
70597dc453 change signature 2026-01-23 04:19:28 -08:00
af90aeddf9 change signature 2026-01-22 19:47:29 -08:00
175979fb15 change signature 2026-01-22 17:37:10 -08:00
367fad475b change signature 2026-01-22 03:33:16 -08:00
c086336a0f debug 2026-01-22 03:01:32 -08:00
2d252e3221 debug 2026-01-22 02:32:24 -08:00
fdb81ee671 debug 2026-01-22 02:30:55 -08:00
22b7e87aaf debug 2026-01-22 02:26:21 -08:00
4 changed files with 46 additions and 27 deletions

View File

@@ -7,5 +7,6 @@
"max_tokens": 50000,
"max_output_chars": 100000,
"verbose": true,
"track_usage": true
"track_usage": true,
"track_trace": false
}

View File

@@ -1,15 +1,12 @@
import os
from modaic import PrecompiledProgram, PrecompiledConfig
import dspy
import weave
import subprocess
from dspy.utils.callback import BaseCallback
# --- Modaic ---
MODAIC_REPO_PATH = "farouk1/nanocode"
# --- ANSI colors ---
RESET = "\033[0m"
BOLD = "\033[1m"
DIM = "\033[2m"
@@ -38,7 +35,9 @@ def read_file(path: str, offset: int = 0, limit: int = None) -> str:
if limit is None:
limit = len(lines)
selected = lines[offset : offset + limit]
content = "".join(f"{offset + idx + 1:4}| {line}" for idx, line in enumerate(selected))
content = "".join(
f"{offset + idx + 1:4}| {line}" for idx, line in enumerate(selected)
)
tokens = len(content) // 4 # ~4 chars per token estimate
print(f"{MAGENTA}⏺ Reading file({path}) (~{tokens:,} tokens){RESET}")
return content
@@ -67,7 +66,9 @@ def write_file(path: str, content: str) -> str:
lines = content.count("\n") + (1 if content and not content.endswith("\n") else 0)
tokens = len(content) // 4
print(f"{MAGENTA}{action} file({path}) ({lines} lines, ~{tokens:,} tokens){RESET}")
print(
f"{MAGENTA}{action} file({path}) ({lines} lines, ~{tokens:,} tokens){RESET}"
)
return f"ok: wrote {lines} lines ({tokens:,} tokens) to {path}"
@@ -98,7 +99,7 @@ def edit_file(path: str, old: str, new: str, replace_all: bool = False) -> str:
def glob_files(pattern: str, path: str = ".") -> str:
"""[EXTERNAL FILESYSTEM] Find files on disk matching a glob pattern.
"""[EXTERNAL FILESYSTEM] Do not use for simple file listing, run bash instead. Find files on disk matching a glob pattern.
Respects .gitignore files automatically via ripgrep. Sorted by modification time.
@@ -111,7 +112,7 @@ def glob_files(pattern: str, path: str = ".") -> str:
"""
print(f"{MAGENTA}⏺ Glob({pattern}): {path}{RESET}")
cmd = ["rg", "--files", "-g", pattern, path]
cmd = ["rg", "--files", "--no-require-git", "-g", pattern, path]
try:
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
files = result.stdout.strip().split("\n") if result.stdout.strip() else []
@@ -127,7 +128,9 @@ def glob_files(pattern: str, path: str = ".") -> str:
return "error: search timed out after 30s"
def grep_files(pattern: str, path: str = ".", glob: str = None, max_results: int = 50) -> str:
def grep_files(
pattern: str, path: str = ".", glob: str = None, max_results: int = 50
) -> str:
"""[EXTERNAL FILESYSTEM] Search files on disk for a regex pattern using ripgrep.
Args:
@@ -206,19 +209,19 @@ class RLMReasoningCallback(BaseCallback):
class CodingAssistant(dspy.Signature):
"""You are a concise coding assistant.
CRITICAL - Two execution environments exist:
CRITICAL - Two execution environments exist:
1. INTERNAL REPL (sandbox): Standard Python code you write executes in an isolated sandbox. Variables persist between iterations. Use for data processing, string manipulation, logic, loops, etc.
1. INTERNAL REPL (sandbox): Standard Python code you write executes in an isolated sandbox. Variables persist between iterations. Use for data processing, string manipulation, logic, loops, etc.
2. EXTERNAL TOOLS (real system): Functions like read_file(), write_file(), run_bash(), glob_files(), grep_files() execute OUTSIDE the sandbox on the real filesystem and host machine. These have real, persistent side effects.
2. EXTERNAL TOOLS (real system): Functions like read_file(), write_file(), run_bash(), glob_files(), grep_files() execute OUTSIDE the sandbox on the real filesystem and host machine. These have real, persistent side effects.
When you need to:
- Process data, do math, manipulate strings, iterate → write Python code directly in the REPL
- Read/write actual files on disk → call read_file(), write_file(), edit_file()
- Run shell commands on the host → call run_bash()
- Search the codebase → call glob_files(), grep_files()
When you need to:
- Process data, do math, manipulate strings, iterate → write Python code directly in the REPL
- Read/write actual files on disk → call read_file(), write_file(), edit_file()
- Run shell commands on the host → call run_bash()
- Search the codebase → call glob_files(), grep_files()
Do NOT confuse REPL variables with external files. Reading a file into a variable does not mean the variable updates if the file changes - you must call read_file() again."""
Do NOT confuse REPL variables with external files. Reading a file into a variable does not mean the variable updates if the file changes - you must call read_file() again."""
task: str = dspy.InputField(desc="The user's coding task or question")
answer: str = dspy.OutputField(
@@ -235,6 +238,7 @@ class RLMCodingConfig(PrecompiledConfig):
max_output_chars: int = 100000
verbose: bool = True
track_usage: bool = True
track_trace: bool = False
class RLMCodingProgram(PrecompiledProgram):
@@ -256,6 +260,19 @@ class RLMCodingProgram(PrecompiledProgram):
def __init__(self, config: RLMCodingConfig, **kwargs):
super().__init__(config, **kwargs)
if config.track_trace:
project = kwargs.get("project", os.getenv("WANDB_PROJECT"))
if project is None:
raise ValueError("project is required when track_trace is True")
wandb_key = kwargs.get("wandb_key", os.getenv("WANDB_API_KEY"))
if wandb_key is None:
raise ValueError("wandb_key is required when track_trace is True")
os.environ["WANDB_PROJECT"] = project
os.environ["WANDB_API_KEY"] = wandb_key
weave.init(project_name=project)
self.config = config
self.tools = {
"read_file": read_file,
@@ -362,7 +379,7 @@ class RLMCodingProgram(PrecompiledProgram):
def reload_repl(
self,
): # we need to create a new instance for tool mutations to be passed back into the REPL
): # We need to create a new instance for tool mutations to be passed back into the REPL
"""Reload the REPL with the current tools."""
new_instance = dspy.RLM(
@@ -406,17 +423,18 @@ class RLMCodingProgram(PrecompiledProgram):
fix this in a later patch for future devs.
"""
super().load_state(state)
self.reload_lms() # recreate LMs from config (not from saved state)
self.reload_lms() # Recreate LMs from config (not from saved state)
if __name__ == "__main__":
agent = RLMCodingProgram(RLMCodingConfig())
#agent(task="explicity call llm_query(who is the ceo of apple?) to get the answer to 'who is the ceo of apple?'")
branches = ["dev", "main", "prod"]
# agent(task="what's 1 + 1?")
branches = ["dev"]
for branch in branches:
agent.push_to_hub(
MODAIC_REPO_PATH,
commit_message="Remove list_files tool",
branch=branch,
)

View File

@@ -4,7 +4,7 @@
"train": [],
"demos": [],
"signature": {
"instructions": "You are a concise coding assistant.\n\nCRITICAL - Two execution environments exist:\n\n1. INTERNAL REPL (sandbox): Standard Python code you write executes in an isolated sandbox. Variables persist between iterations. Use for data processing, string manipulation, logic, loops, etc.\n\n2. EXTERNAL TOOLS (real system): Functions like read_file(), write_file(), run_bash(), glob_files(), grep_files() execute OUTSIDE the sandbox on the real filesystem and host machine. These have real, persistent side effects.\n\nWhen you need to:\n- Process data, do math, manipulate strings, iterate \u2192 write Python code directly in the REPL\n- Read/write actual files on disk \u2192 call read_file(), write_file(), edit_file()\n- Run shell commands on the host \u2192 call run_bash()\n- Search the codebase \u2192 call glob_files(), grep_files()\n\nDo NOT confuse REPL variables with external files. Reading a file into a variable does not mean the variable updates if the file changes - you must call read_file() again.\n\nYou are tasked with producing the following outputs given the inputs `task`:\n- {answer}\n\nYou have access to a Python REPL environment. Write Python code and it will be executed. You will see the output, then write more code based on what you learned. This is an iterative process.\n\nAvailable:\n- Variables: `task` (your input data)\n- `llm_query(prompt)` - query a sub-LLM (~500K char capacity) for semantic analysis\n- `llm_query_batched(prompts)` - query multiple prompts concurrently (much faster for multiple queries)\n- `print()` - ALWAYS print to see results\n- `SUBMIT(answer)` - submit final output when done\n- Standard libraries: re, json, collections, math, etc.\n\nIMPORTANT: This is ITERATIVE. Each code block you write will execute, you'll see the output, then you decide what to do next. Do NOT try to solve everything in one step.\n\n1. EXPLORE FIRST - Look at your data before processing it. Print samples, check types/lengths, understand the structure.\n2. ITERATE - Write small code snippets, observe outputs, then decide next steps. State persists between iterations.\n3. VERIFY BEFORE SUBMITTING - If results seem wrong (zeros, empty, unexpected), reconsider your approach.\n4. USE llm_query FOR SEMANTICS - String matching finds WHERE things are; llm_query understands WHAT things mean.\n5. MINIMIZE RETYPING (INPUTS & OUTPUTS) - When values are long, precise, or error-prone (IDs, numbers, code, quotes), re-access them via variables and parse/compute in code instead of retyping. Use small, targeted prints to sanity-check, but avoid manual copying when variables can carry the exact value.\n6. SUBMIT ONLY AFTER SEEING OUTPUTS - SUBMIT ends the current run immediately. If you need to inspect printed output, run it in one step, review the result, then call SUBMIT in a later step.\n\nYou have max 50 sub-LLM calls. When done, call SUBMIT() with your output.\nAdditional tools available (use these instead of standard library equivalents):\n- `read_file(path: str, offset: int, limit: int) -> str` - [EXTERNAL FILESYSTEM] Read file contents from disk with line numbers.\n- `write_file(path: str, content: str) -> str` - [EXTERNAL FILESYSTEM] Write content to a file on disk (creates or overwrites).\n- `edit_file(path: str, old: str, new: str, replace_all: bool) -> str` - [EXTERNAL FILESYSTEM] Replace text in a file on disk.\n- `glob_files(pattern: str, path: str) -> str` - [EXTERNAL FILESYSTEM] Find files on disk matching a glob pattern.\n- `grep_files(pattern: str, path: str, glob: str, max_results: int) -> str` - [EXTERNAL FILESYSTEM] Search files on disk for a regex pattern using ripgrep.\n- `run_bash(cmd: str) -> str` - [EXTERNAL SYSTEM] Run a shell command on the host machine.",
"instructions": "You are a concise coding assistant.\n\nCRITICAL - Two execution environments exist:\n\n1. INTERNAL REPL (sandbox): Standard Python code you write executes in an isolated sandbox. Variables persist between iterations. Use for data processing, string manipulation, logic, loops, etc.\n\n2. EXTERNAL TOOLS (real system): Functions like read_file(), write_file(), run_bash(), glob_files(), grep_files() execute OUTSIDE the sandbox on the real filesystem and host machine. These have real, persistent side effects.\n\nWhen you need to:\n- Process data, do math, manipulate strings, iterate \u2192 write Python code directly in the REPL\n- Read/write actual files on disk \u2192 call read_file(), write_file(), edit_file()\n- Run shell commands on the host \u2192 call run_bash()\n- Search the codebase \u2192 call glob_files(), grep_files()\n\nDo NOT confuse REPL variables with external files. Reading a file into a variable does not mean the variable updates if the file changes - you must call read_file() again.\n\nYou are tasked with producing the following outputs given the inputs `task`:\n- {answer}\n\nYou have access to a Python REPL environment. Write Python code and it will be executed. You will see the output, then write more code based on what you learned. This is an iterative process.\n\nAvailable:\n- Variables: `task` (your input data)\n- `llm_query(prompt)` - query a sub-LLM (~500K char capacity) for semantic analysis\n- `llm_query_batched(prompts)` - query multiple prompts concurrently (much faster for multiple queries)\n- `print()` - ALWAYS print to see results\n- `SUBMIT(answer)` - submit final output when done\n- Standard libraries: re, json, collections, math, etc.\n\nIMPORTANT: This is ITERATIVE. Each code block you write will execute, you'll see the output, then you decide what to do next. Do NOT try to solve everything in one step.\n\n1. EXPLORE FIRST - Look at your data before processing it. Print samples, check types/lengths, understand the structure.\n2. ITERATE - Write small code snippets, observe outputs, then decide next steps. State persists between iterations.\n3. VERIFY BEFORE SUBMITTING - If results seem wrong (zeros, empty, unexpected), reconsider your approach.\n4. USE llm_query FOR SEMANTICS - String matching finds WHERE things are; llm_query understands WHAT things mean.\n5. MINIMIZE RETYPING (INPUTS & OUTPUTS) - When values are long, precise, or error-prone (IDs, numbers, code, quotes), re-access them via variables and parse/compute in code instead of retyping. Use small, targeted prints to sanity-check, but avoid manual copying when variables can carry the exact value.\n6. SUBMIT ONLY AFTER SEEING OUTPUTS - SUBMIT ends the current run immediately. If you need to inspect printed output, run it in one step, review the result, then call SUBMIT in a later step.\n\nYou have max 50 sub-LLM calls. When done, call SUBMIT() with your output.\nAdditional tools available (use these instead of standard library equivalents):\n- `read_file(path: str, offset: int, limit: int) -> str` - [EXTERNAL FILESYSTEM] Read file contents from disk with line numbers.\n- `write_file(path: str, content: str) -> str` - [EXTERNAL FILESYSTEM] Write content to a file on disk (creates or overwrites).\n- `edit_file(path: str, old: str, new: str, replace_all: bool) -> str` - [EXTERNAL FILESYSTEM] Replace text in a file on disk.\n- `glob_files(pattern: str, path: str) -> str` - [EXTERNAL FILESYSTEM] Do not use for simple file listing, run bash instead. Find files on disk matching a glob pattern.\n- `grep_files(pattern: str, path: str, glob: str, max_results: int) -> str` - [EXTERNAL FILESYSTEM] Search files on disk for a regex pattern using ripgrep.\n- `run_bash(cmd: str) -> str` - [EXTERNAL SYSTEM] Run a shell command on the host machine.",
"fields": [
{
"prefix": "Variables Info:",

View File

@@ -4,4 +4,4 @@ version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.13"
dependencies = ["dspy>=3.1.2", "fastmcp>=2.14.3", "mcp2py>=0.6.0", "modaic>=0.10.4", "weave>=0.52.25"]
dependencies = ["dspy>=3.1.2", "fastmcp>=2.14.3", "mcp2py>=0.6.0", "modaic>=0.10.4", "wandb>=0.24.1", "weave>=0.52.25"]