Switch to RLM instead of ReAct

2026-01-21 18:29:54 -08:00
parent 415e711fa7
commit 939538291c
5 changed files with 73 additions and 72 deletions
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # nanocode

-Minimal Claude Code alternative. Single Python file, zero dependencies, ~250 lines.
+Minimal Claude Code alternative using DSPy ReAct! Single Python file, zero dependencies, ~250 lines.

 Built using Claude Code, then used to build itself.

@@ -13,13 +13,6 @@ Built using Claude Code, then used to build itself.
 - Conversation history
 - Colored terminal output

-## Usage
-
-```bash
-export ANTHROPIC_API_KEY="your-key"
-python nanocode.py
-```
-
 ### OpenRouter

 Use [OpenRouter](https://openrouter.ai) to access any model:
--- a/auto_classes.json
+++ b/auto_classes.json
@@ -1,4 +1,4 @@
 {
-  "AutoConfig": "nanocode.AgentConfig",
-  "AutoProgram": "nanocode.AgentProgram"
+  "AutoConfig": "nanocode.RLMCodingConfig",
+  "AutoProgram": "nanocode.RLMCodingProgram"
 }
--- a/config.json
+++ b/config.json
@@ -1,7 +1,10 @@
 {
  "model": null,
-  "max_iters": 15,
+  "max_iters": 20,
  "lm": "openai/gpt-5.2-codex",
+  "sub_lm": "openrouter/openai/gpt-4.1",
  "api_base": "https://openrouter.ai/api/v1",
-  "max_tokens": 16000
+  "max_tokens": 16000,
+  "max_output_chars": 100000,
+  "verbose": false
 }
--- a/nanocode.py
+++ b/nanocode.py
@@ -1,6 +1,3 @@
-#!/usr/bin/env python3
-"""nanocode-dspy - minimal claude code alternative using DSPy ReAct"""
-
 import os
 import re
 import glob as globlib
@@ -9,6 +6,9 @@ from modaic import PrecompiledProgram, PrecompiledConfig
 import dspy
 from dspy.utils.callback import BaseCallback

+# --- Modaic ---
+
+MODAIC_REPO_PATH = "farouk1/nanocode"

 # --- ANSI colors ---

@@ -22,7 +22,6 @@ YELLOW = "\033[33m"
 RED = "\033[31m"
 MAGENTA = "\033[35m"

-
 # --- Display utilities ---


@@ -172,14 +171,14 @@ def run_bash(cmd: str) -> str:
 # --- Model selection ---

 AVAILABLE_MODELS = {
-    "1": ("Claude 3.5 Sonnet", "anthropic/claude-3.5-sonnet"),
-    "2": ("Claude 3.5 Haiku", "anthropic/claude-3.5-haiku"),
-    "3": ("GPT-4o", "openai/gpt-4o"),
-    "4": ("GPT-4o mini", "openai/gpt-4o-mini"),
-    "5": ("Gemini Pro 1.5", "google/gemini-pro-1.5"),
-    "6": ("Llama 3.1 405B", "meta-llama/llama-3.1-405b-instruct"),
-    "7": ("DeepSeek V3", "deepseek/deepseek-chat"),
-    "8": ("Qwen 2.5 72B", "qwen/qwen-2.5-72b-instruct"),
+    "1": ("GPT-5.2 Codex", "openai/gpt-5.2-codex"),
+    "2": ("GPT-5.2", "openai/gpt-5.2"),
+    "3": ("Claude Opus 4.5", "anthropic/claude-opus-4.5"),
+    "4": ("Claude Opus 4", "anthropic/claude-opus-4"),
+    "5": ("Qwen 3 Coder", "qwen/qwen3-coder"),
+    "6": ("Gemini 3 Flash Preview", "google/gemini-3-flash-preview"),
+    "7": ("Kimi K2 0905", "moonshotai/kimi-k2-0905"),
+    "8": ("Minimax M2.1", "minimax/minimax-m2.1"),
 }


@@ -223,9 +222,6 @@ def select_model():
            exit(1)


-# --- DSPy Signature ---
-
-
 class CodingAssistant(dspy.Signature):
    """You are a concise coding assistant. Help the user with their coding task by using the available tools to read, write, edit files, search the codebase, and run commands."""

@@ -238,9 +234,14 @@ class CodingAssistant(dspy.Signature):
    )


-# ReAct agent with tools
-
-tools = [read_file, write_file, edit_file, glob_files, grep_files, run_bash]
+tools = {
+    "readfile": read_file,
+    "writefile": write_file,
+    "editfile": edit_file,
+    "globfiles": glob_files,
+    "grepfiles": grep_files,
+    "runbash": run_bash,
+}


 class ToolLoggingCallback(BaseCallback):
@@ -272,31 +273,44 @@ class ToolLoggingCallback(BaseCallback):
                    print(f"  {MAGENTA}⏺ {call.name}({args_str}){RESET}", flush=True)


-class AgentConfig(PrecompiledConfig):
-    max_iters: int = 15
+class RLMCodingConfig(PrecompiledConfig):
+    max_iters: int = 20
    lm: str = "openrouter/anthropic/claude-3.5-sonnet"  # Default fallback
+    sub_lm: str = "openrouter/openai/gpt-4.1"  # Default fallback
    api_base: str = "https://openrouter.ai/api/v1"
    max_tokens: int = 16000
+    max_output_chars: int = 100000
+    verbose: bool = False


-class AgentProgram(PrecompiledProgram):
-    config: AgentConfig
+class RLMCodingProgram(PrecompiledProgram):
+    config: RLMCodingConfig

-    def __init__(self, config: AgentConfig, **kwargs):
+    def __init__(self, config: RLMCodingConfig, **kwargs):
        self.config = config
        super().__init__(config, **kwargs)

-        # Configure logging callback globally
+        # tool logging for introspections on multi-turn conversations
        dspy.settings.configure(callbacks=[ToolLoggingCallback()])
-
-        agent = dspy.ReAct(
-            CodingAssistant, tools=tools, max_iters=self.config.max_iters
-        )
        lm = dspy.LM(
            self.config.lm,
            api_base=self.config.api_base,
            max_tokens=self.config.max_tokens,
        )
+        sub_lm = dspy.LM(
+            self.config.sub_lm,
+            api_base=self.config.api_base,
+            max_tokens=self.config.max_tokens,
+        )
+        agent = dspy.RLM(
+            CodingAssistant,
+            sub_lm=sub_lm,
+            tools=tools,
+            max_output_chars=self.config.max_output_chars,
+            max_iterations=self.config.max_iters,
+            verbose=self.config.verbose,
+        )
+
        agent.set_lm(lm)
        self.agent = agent

@@ -304,12 +318,7 @@ class AgentProgram(PrecompiledProgram):
        assert task, "Task cannot be empty"
        return self.agent(task=task)

-
-# --- Main ---
-
-
 def main():
-    """Create AgentConfig with selected model."""
    model = os.getenv("MODEL")
    if model is None:
        model = select_model()
@@ -318,10 +327,10 @@ def main():
    if not model.startswith("openrouter/"):
        model = f"openrouter/{model}"

-    config = AgentConfig()
+    config = RLMCodingConfig()
    config.lm = model

-    agent = AgentProgram(config)
+    agent = RLMCodingProgram(config)
    print(
        f"{BOLD}nanocode-dspy{RESET} | {DIM}{agent.config.lm} | {os.getcwd()}{RESET}\n"
    )
@@ -376,6 +385,6 @@ def main():


 if __name__ == "__main__":
-    agent = AgentProgram(AgentConfig(lm="openai/gpt-5.2-codex"))
-    agent.push_to_hub("farouk1/nanocode")
+    agent = RLMCodingProgram(RLMCodingConfig(lm="openai/gpt-5.2-codex"))
+    agent.push_to_hub(MODAIC_REPO_PATH, commit_message="Switch to RLM instead of ReAct", tag="v0.0.1")
    #main()
--- a/program.json
+++ b/program.json
@@ -1,30 +1,30 @@
 {
-  "agent.react": {
+  "agent.generate_action": {
    "traces": [],
    "train": [],
    "demos": [],
    "signature": {
-      "instructions": "You are a concise coding assistant. Help the user with their coding task by using the available tools to read, write, edit files, search the codebase, and run commands.\n\nYou are an Agent. In each episode, you will be given the fields `task` as input. And you can see your past trajectory so far.\nYour goal is to use one or more of the supplied tools to collect any necessary information for producing `answer`, `affected_files`.\n\nTo do this, you will interleave next_thought, next_tool_name, and next_tool_args in each turn, and also when finishing the task.\nAfter each tool call, you receive a resulting observation, which gets appended to your trajectory.\n\nWhen writing next_thought, you may reason about the current situation and plan for future steps.\nWhen selecting the next_tool_name and its next_tool_args, the tool must be one of:\n\n(1) read_file, whose description is <desc>Read file contents with line numbers.    Args:      path: Path to the file to read      offset: Line number to start from (0-indexed)      limit: Maximum number of lines to read    Returns:      File contents with line numbers  </desc>. It takes arguments {'path': {'type': 'string'}, 'offset': {'type': 'integer', 'default': 0}, 'limit': {'type': 'integer', 'default': None}}.\n(2) write_file, whose description is <desc>Write content to a file.    Args:      path: Path to the file to write      content: Content to write to the file    Returns:      'ok' on success  </desc>. It takes arguments {'path': {'type': 'string'}, 'content': {'type': 'string'}}.\n(3) edit_file, whose description is <desc>Replace text in a file.    Args:      path: Path to the file to edit      old: Text to find and replace      new: Replacement text      replace_all: If True, replace all occurrences; otherwise old must be unique    Returns:      'ok' on success, error message on failure  </desc>. It takes arguments {'path': {'type': 'string'}, 'old': {'type': 'string'}, 'new': {'type': 'string'}, 'replace_all': {'type': 'boolean', 'default': False}}.\n(4) glob_files, whose description is <desc>Find files matching a glob pattern, sorted by modification time.    Args:      pattern: Glob pattern to match (e.g., '**/*.py')      path: Base directory to search in    Returns:      Newline-separated list of matching files  </desc>. It takes arguments {'pattern': {'type': 'string'}, 'path': {'type': 'string', 'default': '.'}}.\n(5) grep_files, whose description is <desc>Search files for a regex pattern.    Args:      pattern: Regular expression pattern to search for      path: Base directory to search in    Returns:      Matching lines in format 'filepath:line_num:content'  </desc>. It takes arguments {'pattern': {'type': 'string'}, 'path': {'type': 'string', 'default': '.'}}.\n(6) run_bash, whose description is <desc>Run a shell command and return output.    Args:      cmd: Shell command to execute    Returns:      Command output (stdout and stderr combined)  </desc>. It takes arguments {'cmd': {'type': 'string'}}.\n(7) finish, whose description is <desc>Marks the task as complete. That is, signals that all information for producing the outputs, i.e. `answer`, `affected_files`, are now available to be extracted.</desc>. It takes arguments {}.\nWhen providing `next_tool_args`, the value inside the field must be in JSON format",
+      "instructions": "You are a concise coding assistant. Help the user with their coding task by using the available tools to read, write, edit files, search the codebase, and run commands.\n\nYou are tasked with producing the following outputs given the inputs `task`:\n- {answer}\n- {affected_files}        # note: the value you produce must adhere to the JSON schema: {\"type\": \"array\", \"items\": {\"type\": \"string\"}}\n\nYou have access to a Python REPL environment. Write Python code and it will be executed. You will see the output, then write more code based on what you learned. This is an iterative process.\n\nAvailable:\n- Variables: `task` (your input data)\n- `llm_query(prompt)` - query a sub-LLM (~500K char capacity) for semantic analysis\n- `llm_query_batched(prompts)` - query multiple prompts concurrently (much faster for multiple queries)\n- `print()` - ALWAYS print to see results\n- `SUBMIT(answer, affected_files)` - submit final output when done\n- Standard libraries: re, json, collections, math, etc.\n\nIMPORTANT: This is ITERATIVE. Each code block you write will execute, you'll see the output, then you decide what to do next. Do NOT try to solve everything in one step.\n\n1. EXPLORE FIRST - Look at your data before processing it. Print samples, check types/lengths, understand the structure.\n2. ITERATE - Write small code snippets, observe outputs, then decide next steps. State persists between iterations.\n3. VERIFY BEFORE SUBMITTING - If results seem wrong (zeros, empty, unexpected), reconsider your approach.\n4. USE llm_query FOR SEMANTICS - String matching finds WHERE things are; llm_query understands WHAT things mean.\n5. MINIMIZE RETYPING (INPUTS & OUTPUTS) - When values are long, precise, or error-prone (IDs, numbers, code, quotes), re-access them via variables and parse/compute in code instead of retyping. Use small, targeted prints to sanity-check, but avoid manual copying when variables can carry the exact value.\n6. SUBMIT ONLY AFTER SEEING OUTPUTS - SUBMIT ends the current run immediately. If you need to inspect printed output, run it in one step, review the result, then call SUBMIT in a later step.\n\nYou have max 50 sub-LLM calls. When done, call SUBMIT() with your output.\nAdditional tools available (use these instead of standard library equivalents):\n- `readfile(path: str, offset: int, limit: int) -> str` - Read file contents with line numbers.\n- `writefile(path: str, content: str) -> str` - Write content to a file.\n- `editfile(path: str, old: str, new: str, replace_all: bool) -> str` - Replace text in a file.\n- `globfiles(pattern: str, path: str) -> str` - Find files matching a glob pattern, sorted by modification time.\n- `grepfiles(pattern: str, path: str) -> str` - Search files for a regex pattern.\n- `runbash(cmd: str) -> str` - Run a shell command and return output.",
      "fields": [
        {
-          "prefix": "Task:",
-          "description": "The user's coding task or question"
+          "prefix": "Variables Info:",
+          "description": "Metadata about the variables available in the REPL"
        },
        {
-          "prefix": "Trajectory:",
-          "description": "${trajectory}"
+          "prefix": "Repl History:",
+          "description": "Previous REPL code executions and their outputs"
        },
        {
-          "prefix": "Next Thought:",
-          "description": "${next_thought}"
+          "prefix": "Iteration:",
+          "description": "Current iteration number (1-indexed) out of max_iterations"
        },
        {
-          "prefix": "Next Tool Name:",
-          "description": "${next_tool_name}"
+          "prefix": "Reasoning:",
+          "description": "Think step-by-step: what do you know? What remains? Plan your next action."
        },
        {
-          "prefix": "Next Tool Args:",
-          "description": "${next_tool_args}"
+          "prefix": "Code:",
+          "description": "Python code to execute."
        }
      ]
    },
@@ -41,24 +41,20 @@
      "api_base": "https://openrouter.ai/api/v1"
    }
  },
-  "agent.extract.predict": {
+  "agent.extract": {
    "traces": [],
    "train": [],
    "demos": [],
    "signature": {
-      "instructions": "You are a concise coding assistant. Help the user with their coding task by using the available tools to read, write, edit files, search the codebase, and run commands.",
+      "instructions": "The trajectory was generated with the following objective: \nYou are a concise coding assistant. Help the user with their coding task by using the available tools to read, write, edit files, search the codebase, and run commands.\n\n\nBased on the REPL trajectory, extract the final outputs now.\n\n            Review your trajectory to see what information you gathered and what values you computed, then provide the final outputs.",
      "fields": [
        {
-          "prefix": "Task:",
-          "description": "The user's coding task or question"
+          "prefix": "Variables Info:",
+          "description": "Metadata about the variables available in the REPL"
        },
        {
-          "prefix": "Trajectory:",
-          "description": "${trajectory}"
-        },
-        {
-          "prefix": "Reasoning: Let's think step by step in order to",
-          "description": "${reasoning}"
+          "prefix": "Repl History:",
+          "description": "Your REPL interactions so far"
        },
        {
          "prefix": "Answer:",