Switch to RLM instead of ReAct

This commit is contained in:
2026-01-21 18:29:54 -08:00
parent 415e711fa7
commit 939538291c
5 changed files with 73 additions and 72 deletions

View File

@@ -1,6 +1,6 @@
# nanocode # nanocode
Minimal Claude Code alternative. Single Python file, zero dependencies, ~250 lines. Minimal Claude Code alternative using DSPy ReAct! Single Python file, zero dependencies, ~250 lines.
Built using Claude Code, then used to build itself. Built using Claude Code, then used to build itself.
@@ -13,13 +13,6 @@ Built using Claude Code, then used to build itself.
- Conversation history - Conversation history
- Colored terminal output - Colored terminal output
## Usage
```bash
export ANTHROPIC_API_KEY="your-key"
python nanocode.py
```
### OpenRouter ### OpenRouter
Use [OpenRouter](https://openrouter.ai) to access any model: Use [OpenRouter](https://openrouter.ai) to access any model:
@@ -68,4 +61,4 @@ python nanocode.py
## License ## License
MIT MIT

View File

@@ -1,4 +1,4 @@
{ {
"AutoConfig": "nanocode.AgentConfig", "AutoConfig": "nanocode.RLMCodingConfig",
"AutoProgram": "nanocode.AgentProgram" "AutoProgram": "nanocode.RLMCodingProgram"
} }

View File

@@ -1,7 +1,10 @@
{ {
"model": null, "model": null,
"max_iters": 15, "max_iters": 20,
"lm": "openai/gpt-5.2-codex", "lm": "openai/gpt-5.2-codex",
"sub_lm": "openrouter/openai/gpt-4.1",
"api_base": "https://openrouter.ai/api/v1", "api_base": "https://openrouter.ai/api/v1",
"max_tokens": 16000 "max_tokens": 16000,
"max_output_chars": 100000,
"verbose": false
} }

View File

@@ -1,6 +1,3 @@
#!/usr/bin/env python3
"""nanocode-dspy - minimal claude code alternative using DSPy ReAct"""
import os import os
import re import re
import glob as globlib import glob as globlib
@@ -9,6 +6,9 @@ from modaic import PrecompiledProgram, PrecompiledConfig
import dspy import dspy
from dspy.utils.callback import BaseCallback from dspy.utils.callback import BaseCallback
# --- Modaic ---
MODAIC_REPO_PATH = "farouk1/nanocode"
# --- ANSI colors --- # --- ANSI colors ---
@@ -22,7 +22,6 @@ YELLOW = "\033[33m"
RED = "\033[31m" RED = "\033[31m"
MAGENTA = "\033[35m" MAGENTA = "\033[35m"
# --- Display utilities --- # --- Display utilities ---
@@ -172,14 +171,14 @@ def run_bash(cmd: str) -> str:
# --- Model selection --- # --- Model selection ---
AVAILABLE_MODELS = { AVAILABLE_MODELS = {
"1": ("Claude 3.5 Sonnet", "anthropic/claude-3.5-sonnet"), "1": ("GPT-5.2 Codex", "openai/gpt-5.2-codex"),
"2": ("Claude 3.5 Haiku", "anthropic/claude-3.5-haiku"), "2": ("GPT-5.2", "openai/gpt-5.2"),
"3": ("GPT-4o", "openai/gpt-4o"), "3": ("Claude Opus 4.5", "anthropic/claude-opus-4.5"),
"4": ("GPT-4o mini", "openai/gpt-4o-mini"), "4": ("Claude Opus 4", "anthropic/claude-opus-4"),
"5": ("Gemini Pro 1.5", "google/gemini-pro-1.5"), "5": ("Qwen 3 Coder", "qwen/qwen3-coder"),
"6": ("Llama 3.1 405B", "meta-llama/llama-3.1-405b-instruct"), "6": ("Gemini 3 Flash Preview", "google/gemini-3-flash-preview"),
"7": ("DeepSeek V3", "deepseek/deepseek-chat"), "7": ("Kimi K2 0905", "moonshotai/kimi-k2-0905"),
"8": ("Qwen 2.5 72B", "qwen/qwen-2.5-72b-instruct"), "8": ("Minimax M2.1", "minimax/minimax-m2.1"),
} }
@@ -223,9 +222,6 @@ def select_model():
exit(1) exit(1)
# --- DSPy Signature ---
class CodingAssistant(dspy.Signature): class CodingAssistant(dspy.Signature):
"""You are a concise coding assistant. Help the user with their coding task by using the available tools to read, write, edit files, search the codebase, and run commands.""" """You are a concise coding assistant. Help the user with their coding task by using the available tools to read, write, edit files, search the codebase, and run commands."""
@@ -238,9 +234,14 @@ class CodingAssistant(dspy.Signature):
) )
# ReAct agent with tools tools = {
"readfile": read_file,
tools = [read_file, write_file, edit_file, glob_files, grep_files, run_bash] "writefile": write_file,
"editfile": edit_file,
"globfiles": glob_files,
"grepfiles": grep_files,
"runbash": run_bash,
}
class ToolLoggingCallback(BaseCallback): class ToolLoggingCallback(BaseCallback):
@@ -272,31 +273,44 @@ class ToolLoggingCallback(BaseCallback):
print(f" {MAGENTA}{call.name}({args_str}){RESET}", flush=True) print(f" {MAGENTA}{call.name}({args_str}){RESET}", flush=True)
class AgentConfig(PrecompiledConfig): class RLMCodingConfig(PrecompiledConfig):
max_iters: int = 15 max_iters: int = 20
lm: str = "openrouter/anthropic/claude-3.5-sonnet" # Default fallback lm: str = "openrouter/anthropic/claude-3.5-sonnet" # Default fallback
sub_lm: str = "openrouter/openai/gpt-4.1" # Default fallback
api_base: str = "https://openrouter.ai/api/v1" api_base: str = "https://openrouter.ai/api/v1"
max_tokens: int = 16000 max_tokens: int = 16000
max_output_chars: int = 100000
verbose: bool = False
class AgentProgram(PrecompiledProgram): class RLMCodingProgram(PrecompiledProgram):
config: AgentConfig config: RLMCodingConfig
def __init__(self, config: AgentConfig, **kwargs): def __init__(self, config: RLMCodingConfig, **kwargs):
self.config = config self.config = config
super().__init__(config, **kwargs) super().__init__(config, **kwargs)
# Configure logging callback globally # tool logging for introspections on multi-turn conversations
dspy.settings.configure(callbacks=[ToolLoggingCallback()]) dspy.settings.configure(callbacks=[ToolLoggingCallback()])
agent = dspy.ReAct(
CodingAssistant, tools=tools, max_iters=self.config.max_iters
)
lm = dspy.LM( lm = dspy.LM(
self.config.lm, self.config.lm,
api_base=self.config.api_base, api_base=self.config.api_base,
max_tokens=self.config.max_tokens, max_tokens=self.config.max_tokens,
) )
sub_lm = dspy.LM(
self.config.sub_lm,
api_base=self.config.api_base,
max_tokens=self.config.max_tokens,
)
agent = dspy.RLM(
CodingAssistant,
sub_lm=sub_lm,
tools=tools,
max_output_chars=self.config.max_output_chars,
max_iterations=self.config.max_iters,
verbose=self.config.verbose,
)
agent.set_lm(lm) agent.set_lm(lm)
self.agent = agent self.agent = agent
@@ -304,12 +318,7 @@ class AgentProgram(PrecompiledProgram):
assert task, "Task cannot be empty" assert task, "Task cannot be empty"
return self.agent(task=task) return self.agent(task=task)
# --- Main ---
def main(): def main():
"""Create AgentConfig with selected model."""
model = os.getenv("MODEL") model = os.getenv("MODEL")
if model is None: if model is None:
model = select_model() model = select_model()
@@ -318,10 +327,10 @@ def main():
if not model.startswith("openrouter/"): if not model.startswith("openrouter/"):
model = f"openrouter/{model}" model = f"openrouter/{model}"
config = AgentConfig() config = RLMCodingConfig()
config.lm = model config.lm = model
agent = AgentProgram(config) agent = RLMCodingProgram(config)
print( print(
f"{BOLD}nanocode-dspy{RESET} | {DIM}{agent.config.lm} | {os.getcwd()}{RESET}\n" f"{BOLD}nanocode-dspy{RESET} | {DIM}{agent.config.lm} | {os.getcwd()}{RESET}\n"
) )
@@ -376,6 +385,6 @@ def main():
if __name__ == "__main__": if __name__ == "__main__":
agent = AgentProgram(AgentConfig(lm="openai/gpt-5.2-codex")) agent = RLMCodingProgram(RLMCodingConfig(lm="openai/gpt-5.2-codex"))
agent.push_to_hub("farouk1/nanocode") agent.push_to_hub(MODAIC_REPO_PATH, commit_message="Switch to RLM instead of ReAct", tag="v0.0.1")
#main() #main()

View File

@@ -1,30 +1,30 @@
{ {
"agent.react": { "agent.generate_action": {
"traces": [], "traces": [],
"train": [], "train": [],
"demos": [], "demos": [],
"signature": { "signature": {
"instructions": "You are a concise coding assistant. Help the user with their coding task by using the available tools to read, write, edit files, search the codebase, and run commands.\n\nYou are an Agent. In each episode, you will be given the fields `task` as input. And you can see your past trajectory so far.\nYour goal is to use one or more of the supplied tools to collect any necessary information for producing `answer`, `affected_files`.\n\nTo do this, you will interleave next_thought, next_tool_name, and next_tool_args in each turn, and also when finishing the task.\nAfter each tool call, you receive a resulting observation, which gets appended to your trajectory.\n\nWhen writing next_thought, you may reason about the current situation and plan for future steps.\nWhen selecting the next_tool_name and its next_tool_args, the tool must be one of:\n\n(1) read_file, whose description is <desc>Read file contents with line numbers. Args: path: Path to the file to read offset: Line number to start from (0-indexed) limit: Maximum number of lines to read Returns: File contents with line numbers </desc>. It takes arguments {'path': {'type': 'string'}, 'offset': {'type': 'integer', 'default': 0}, 'limit': {'type': 'integer', 'default': None}}.\n(2) write_file, whose description is <desc>Write content to a file. Args: path: Path to the file to write content: Content to write to the file Returns: 'ok' on success </desc>. It takes arguments {'path': {'type': 'string'}, 'content': {'type': 'string'}}.\n(3) edit_file, whose description is <desc>Replace text in a file. Args: path: Path to the file to edit old: Text to find and replace new: Replacement text replace_all: If True, replace all occurrences; otherwise old must be unique Returns: 'ok' on success, error message on failure </desc>. It takes arguments {'path': {'type': 'string'}, 'old': {'type': 'string'}, 'new': {'type': 'string'}, 'replace_all': {'type': 'boolean', 'default': False}}.\n(4) glob_files, whose description is <desc>Find files matching a glob pattern, sorted by modification time. Args: pattern: Glob pattern to match (e.g., '**/*.py') path: Base directory to search in Returns: Newline-separated list of matching files </desc>. It takes arguments {'pattern': {'type': 'string'}, 'path': {'type': 'string', 'default': '.'}}.\n(5) grep_files, whose description is <desc>Search files for a regex pattern. Args: pattern: Regular expression pattern to search for path: Base directory to search in Returns: Matching lines in format 'filepath:line_num:content' </desc>. It takes arguments {'pattern': {'type': 'string'}, 'path': {'type': 'string', 'default': '.'}}.\n(6) run_bash, whose description is <desc>Run a shell command and return output. Args: cmd: Shell command to execute Returns: Command output (stdout and stderr combined) </desc>. It takes arguments {'cmd': {'type': 'string'}}.\n(7) finish, whose description is <desc>Marks the task as complete. That is, signals that all information for producing the outputs, i.e. `answer`, `affected_files`, are now available to be extracted.</desc>. It takes arguments {}.\nWhen providing `next_tool_args`, the value inside the field must be in JSON format", "instructions": "You are a concise coding assistant. Help the user with their coding task by using the available tools to read, write, edit files, search the codebase, and run commands.\n\nYou are tasked with producing the following outputs given the inputs `task`:\n- {answer}\n- {affected_files} # note: the value you produce must adhere to the JSON schema: {\"type\": \"array\", \"items\": {\"type\": \"string\"}}\n\nYou have access to a Python REPL environment. Write Python code and it will be executed. You will see the output, then write more code based on what you learned. This is an iterative process.\n\nAvailable:\n- Variables: `task` (your input data)\n- `llm_query(prompt)` - query a sub-LLM (~500K char capacity) for semantic analysis\n- `llm_query_batched(prompts)` - query multiple prompts concurrently (much faster for multiple queries)\n- `print()` - ALWAYS print to see results\n- `SUBMIT(answer, affected_files)` - submit final output when done\n- Standard libraries: re, json, collections, math, etc.\n\nIMPORTANT: This is ITERATIVE. Each code block you write will execute, you'll see the output, then you decide what to do next. Do NOT try to solve everything in one step.\n\n1. EXPLORE FIRST - Look at your data before processing it. Print samples, check types/lengths, understand the structure.\n2. ITERATE - Write small code snippets, observe outputs, then decide next steps. State persists between iterations.\n3. VERIFY BEFORE SUBMITTING - If results seem wrong (zeros, empty, unexpected), reconsider your approach.\n4. USE llm_query FOR SEMANTICS - String matching finds WHERE things are; llm_query understands WHAT things mean.\n5. MINIMIZE RETYPING (INPUTS & OUTPUTS) - When values are long, precise, or error-prone (IDs, numbers, code, quotes), re-access them via variables and parse/compute in code instead of retyping. Use small, targeted prints to sanity-check, but avoid manual copying when variables can carry the exact value.\n6. SUBMIT ONLY AFTER SEEING OUTPUTS - SUBMIT ends the current run immediately. If you need to inspect printed output, run it in one step, review the result, then call SUBMIT in a later step.\n\nYou have max 50 sub-LLM calls. When done, call SUBMIT() with your output.\nAdditional tools available (use these instead of standard library equivalents):\n- `readfile(path: str, offset: int, limit: int) -> str` - Read file contents with line numbers.\n- `writefile(path: str, content: str) -> str` - Write content to a file.\n- `editfile(path: str, old: str, new: str, replace_all: bool) -> str` - Replace text in a file.\n- `globfiles(pattern: str, path: str) -> str` - Find files matching a glob pattern, sorted by modification time.\n- `grepfiles(pattern: str, path: str) -> str` - Search files for a regex pattern.\n- `runbash(cmd: str) -> str` - Run a shell command and return output.",
"fields": [ "fields": [
{ {
"prefix": "Task:", "prefix": "Variables Info:",
"description": "The user's coding task or question" "description": "Metadata about the variables available in the REPL"
}, },
{ {
"prefix": "Trajectory:", "prefix": "Repl History:",
"description": "${trajectory}" "description": "Previous REPL code executions and their outputs"
}, },
{ {
"prefix": "Next Thought:", "prefix": "Iteration:",
"description": "${next_thought}" "description": "Current iteration number (1-indexed) out of max_iterations"
}, },
{ {
"prefix": "Next Tool Name:", "prefix": "Reasoning:",
"description": "${next_tool_name}" "description": "Think step-by-step: what do you know? What remains? Plan your next action."
}, },
{ {
"prefix": "Next Tool Args:", "prefix": "Code:",
"description": "${next_tool_args}" "description": "Python code to execute."
} }
] ]
}, },
@@ -41,24 +41,20 @@
"api_base": "https://openrouter.ai/api/v1" "api_base": "https://openrouter.ai/api/v1"
} }
}, },
"agent.extract.predict": { "agent.extract": {
"traces": [], "traces": [],
"train": [], "train": [],
"demos": [], "demos": [],
"signature": { "signature": {
"instructions": "You are a concise coding assistant. Help the user with their coding task by using the available tools to read, write, edit files, search the codebase, and run commands.", "instructions": "The trajectory was generated with the following objective: \nYou are a concise coding assistant. Help the user with their coding task by using the available tools to read, write, edit files, search the codebase, and run commands.\n\n\nBased on the REPL trajectory, extract the final outputs now.\n\n Review your trajectory to see what information you gathered and what values you computed, then provide the final outputs.",
"fields": [ "fields": [
{ {
"prefix": "Task:", "prefix": "Variables Info:",
"description": "The user's coding task or question" "description": "Metadata about the variables available in the REPL"
}, },
{ {
"prefix": "Trajectory:", "prefix": "Repl History:",
"description": "${trajectory}" "description": "Your REPL interactions so far"
},
{
"prefix": "Reasoning: Let's think step by step in order to",
"description": "${reasoning}"
}, },
{ {
"prefix": "Answer:", "prefix": "Answer:",