From c1847586befeb0300110034a40a22ff8420d7c0b Mon Sep 17 00:00:00 2001 From: jmanhype Date: Sun, 19 Oct 2025 14:16:34 -0500 Subject: [PATCH] (no commit message) --- README.md | 264 ++++++++++++++++++++++++++++++- agent.json | 136 ++++++++++++++++ auto_classes.json | 5 + config.json | 5 + contracts/assumption_v1.py | 11 ++ contracts/innovation_layer_v1.py | 7 + contracts/job_v1.py | 8 + contracts/scorecard_v1.py | 14 ++ plugins/llm_dspy.py | 173 ++++++++++++++++++++ pyproject.toml | 6 + service/modaic_agent.py | 142 +++++++++++++++++ service/retrievers.py | 73 +++++++++ tools/push_modaic_agent.py | 41 +++++ 13 files changed, 884 insertions(+), 1 deletion(-) create mode 100644 agent.json create mode 100644 auto_classes.json create mode 100644 config.json create mode 100644 contracts/assumption_v1.py create mode 100644 contracts/innovation_layer_v1.py create mode 100644 contracts/job_v1.py create mode 100644 contracts/scorecard_v1.py create mode 100644 plugins/llm_dspy.py create mode 100644 pyproject.toml create mode 100644 service/modaic_agent.py create mode 100644 service/retrievers.py create mode 100644 tools/push_modaic_agent.py diff --git a/README.md b/README.md index 959598f..f24cb17 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,264 @@ -# jtbd-agent +# JTBD Idea Validator + +A **Jobs to be Done (JTBD)** analysis agent powered by DSPy that validates business ideas through comprehensive framework-based evaluation. + +## What it does + +This tool performs systematic business idea validation using JTBD methodology: + +- **Assumption Deconstruction**: Extract and classify core business assumptions (1-3 levels) +- **JTBD Analysis**: Generate 5 distinct job statements with Four Forces (push/pull/anxiety/inertia) +- **Moat Analysis**: Assess competitive advantages using innovation layers +- **Scoring & Judgment**: Evaluate ideas across 5 criteria with detailed rationales +- **Validation Planning**: Create actionable plans for assumption testing + +## Quick Start + +```bash +# Setup environment +python -m venv .venv && source .venv/bin/activate # Windows: .venv\Scripts\activate +pip install -U pip +pip install -e . + +# Configure LLM (required) +export OPENAI_API_KEY=... # for OpenAI models +export ANTHROPIC_API_KEY=... # for Claude models + +# Run analysis on example +python run_direct.py examples/rehab_exercise_tracking_rich.json + +# Or specify custom output location +python run_direct.py examples/insurance_photo_ai.json --output custom_reports/ +``` + +## Output Files + +The tool generates organized reports in timestamped directories: + +- **Gamma Presentations**: `gamma/presentation.md` (Gamma-ready) + `gamma/presentation.html` (preview) +- **CSV Exports**: `csv/` - Structured data for spreadsheet analysis +- **JSON Data**: `json/analysis_data.json` - Raw analysis data +- **Charts**: `assets/` - Radar charts, waterfall charts, and Four Forces diagrams + +## Technical Architecture + +This implementation uses **DSPy** (Declarative Self-improving Language Programs) for structured LLM interactions through **Signatures** and **Modules**. + +### DSPy Signatures + +Signatures define input/output schemas for LLM tasks: + +```python +class DeconstructSig(dspy.Signature): + """Extract assumptions and classify levels. + Return JSON list of objects: [{text, level(1..3), confidence, evidence:[]}]""" + idea: str = dspy.InputField() + hunches: List[str] = dspy.InputField() + assumptions_json: str = dspy.OutputField() + +class JobsSig(dspy.Signature): + """Generate 5 distinct JTBD statements with Four Forces each.""" + context: str = dspy.InputField() + constraints: str = dspy.InputField() + jobs_json: str = dspy.OutputField() +``` + +### DSPy Modules + +Modules implement business logic with automatic prompt optimization: + +- **`Deconstruct`**: Extracts assumptions with confidence scoring +- **`Jobs`**: Generates JTBD statements with Four Forces analysis +- **`Moat`**: Applies Doblin innovation framework + strategic triggers +- **`JudgeScore`**: Evaluates ideas across 5 standardized criteria: + - Underserved Opportunity + - Strategic Impact + - Market Scale + - Solution Differentiability + - Business Model Innovation + +### Dual-Judge Arbitration + +The system uses two independent judges with tie-breaking for scoring reliability: + +```python +USE_DOUBLE_JUDGE = os.getenv("JTBD_DOUBLE_JUDGE", "1") == "1" # default ON + +def judge_with_arbitration(summary: str): + if USE_DOUBLE_JUDGE: + score1 = JudgeScore()(summary=summary) + score2 = JudgeScore()(summary=summary) + return merge_scores(score1, score2) # tie-breaker logic + return JudgeScore()(summary=summary) +``` + +## Configuration + +**Model Selection**: Edit `plugins/llm_dspy.py` → `configure_lm()` or set `JTBD_DSPY_MODEL`: + +```bash +export JTBD_DSPY_MODEL="gpt-4o-mini" # OpenAI +export JTBD_DSPY_MODEL="claude-3-5-sonnet-20240620" # Anthropic +``` + +**Other Options**: + +- `JTBD_LLM_TEMPERATURE=0.2` - Response randomness (0.0-1.0) +- `JTBD_DOUBLE_JUDGE=1` - Enable dual-judge arbitration (default: enabled) + +## Input Format + +Ideas are defined in JSON files with the following structure: + +```json +{ + "idea_id": "urn:idea:example:001", + "title": "Your business idea title", + "hunches": [ + "Key assumption about the problem", + "Belief about customer behavior", + "Market hypothesis" + ], + "problem_statement": "Clear description of the problem", + "solution_overview": "How your idea solves the problem", + "target_customer": { + "primary": "Main customer segment", + "secondary": "Secondary users", + "demographics": "Age, profession, context" + }, + "value_propositions": ["Key benefit 1", "Key benefit 2"], + "competitive_landscape": ["Competitor 1", "Competitor 2"], + "revenue_streams": ["Revenue model 1", "Revenue model 2"] +} +``` + +See `examples/` directory for complete examples. + +## Alternative Execution Methods + +### Direct Python Script + +```bash +python run_direct.py your_idea.json +``` + +### FastAPI Service (Optional) + +Run as a service with HTTP endpoints: + +```bash +uvicorn service.dspy_sidecar:app --port 8088 --reload +``` + +Exposes endpoints: `/deconstruct`, `/jobs`, `/moat`, `/judge` + +### Prefect Flow (Advanced) + +For complex orchestration scenarios using the Prefect workflow engine. + +## Advanced Features + +### Judge Optimization with DSPy + +The system supports **compiled judge models** using DSPy's GEPA optimizer (reflective prompt evolution): + +```bash +# 1. Add training data to data/judge_train.jsonl +# Format: {"summary": "...", "scorecard": {"criteria":[...], "total": 6.7}} + +# 2. Train the judge using GEPA (evolutionary optimizer) +python tools/optimize_judge.py --train data/judge_train.jsonl --out artifacts/judge_compiled.dspy --budget medium + +# 3. Use the compiled judge (automatically loaded at runtime) +export JTBD_JUDGE_COMPILED=artifacts/judge_compiled.dspy +python run_direct.py your_idea.json +``` + +**GEPA** is an evolutionary optimizer for prompt optimization that: +- Captures full execution traces of DSPy modules +- Uses reflection to evolve text components (prompts/instructions) +- Allows textual feedback at predictor or system level +- Outperforms reinforcement learning approaches + +From the actual implementation in `tools/optimize_judge.py`: + +```python +from dspy.teleprompt import GEPA + +def non_decreasing_metric(example, pred, trace=None, pred_name=None, pred_trace=None): + """Returns 1 if predicted total >= gold total, else 0.""" + try: + p = json.loads(pred.scorecard_json) + g = json.loads(example.scorecard_json) + return 1.0 if p.get("total",0) >= g.get("total",0) else 0.0 + except Exception: + return 0.0 + +# Budget options: "light", "medium", "heavy" +tele = GEPA(metric=non_decreasing_metric, auto=budget) +compiled = tele.compile(dspy.Predict(JudgeScoreSig), trainset=train) +``` + +The compiled judge replaces the default `dspy.Predict` with an optimized program: + +```python +_compiled_judge = None +if JUDGE_COMPILED_PATH and os.path.exists(JUDGE_COMPILED_PATH): + with open(JUDGE_COMPILED_PATH, "rb") as f: + _compiled_judge = pickle.load(f) + +class JudgeScore(dspy.Module): + def __init__(self): + self.p = _compiled_judge or dspy.Predict(JudgeScoreSig) # fallback +``` + +### Environment Variables + +- `OPENAI_API_KEY` / `ANTHROPIC_API_KEY` - API keys for LLM providers +- `JTBD_DSPY_MODEL` - Model name (default: "gpt-4o-mini") +- `JTBD_LLM_TEMPERATURE` - Temperature setting (default: 0.2) +- `JTBD_LLM_SEED` - Random seed for reproducibility (default: 42) +- `JTBD_DOUBLE_JUDGE` - Enable dual-judge arbitration (default: 1) +- `JTBD_JUDGE_COMPILED` - Path to compiled judge model +- `OTEL_SERVICE_NAME` / `DEPLOY_ENV` - Identify the service in OTLP exports (defaults: `jtbd-dspy-sidecar`, `dev`) +- `OTLP_ENDPOINT` / `OTLP_HEADERS` - Configure OTLP HTTP exporter endpoint and optional headers +- `MODAIC_AGENT_ID` / `MODAIC_AGENT_REV` - Load a precompiled Modaic agent instead of the local default +- `MODAIC_TOKEN` - Authentication token for private Modaic repositories +- `RETRIEVER_KIND` / `RETRIEVER_NOTES` - Retriever selection (e.g., `notes`) and seed data for contextual hints +- `API_BEARER_TOKEN` - Optional bearer token required by the FastAPI service +- `STREAM_CHUNK_SIZE` - Chunk size for SSE streaming responses (default: 60) + +## Project Structure + +``` +├── contracts/ # Pydantic models (v1 frozen contracts) +├── core/ # Main business logic +│ ├── pipeline.py # Main analysis pipeline +│ ├── score.py # Scoring algorithms +│ ├── plan.py # Validation planning +│ └── export_*.py # Output formatters +├── plugins/ # External integrations +│ ├── llm_dspy.py # DSPy LLM interface +│ └── charts_quickchart.py # Chart generation +├── service/ # FastAPI service +├── orchestration/ # Prefect flows +├── examples/ # Sample business ideas +├── tools/ # Optimization utilities +└── run_direct.py # Main CLI entry point +``` + +## Dependencies + +- **DSPy**: Language model orchestration framework +- **Pydantic**: Data validation and serialization +- **FastAPI/Uvicorn**: Optional HTTP service +- **Modaic**: Precompiled agent runtime with retriever support +- **OpenTelemetry**: Request tracing + OTLP exporter (service observability) +- **sse-starlette**: Server-Sent Events streaming for OpenAI-compatible responses +- **Prefect**: Optional workflow orchestration +- **Requests**: HTTP client for external services + +## Contract Stability + +Data contracts in `contracts/*_v1.py` are frozen. For changes, create new `v2` versions rather than modifying existing contracts to ensure backward compatibility. diff --git a/agent.json b/agent.json new file mode 100644 index 0000000..3f5a421 --- /dev/null +++ b/agent.json @@ -0,0 +1,136 @@ +{ + "_deconstruct.p": { + "traces": [], + "train": [], + "demos": [], + "signature": { + "instructions": "Extract assumptions and classify levels.\nReturn JSON list of objects: [{text, level(1..3), confidence, evidence:[]}]", + "fields": [ + { + "prefix": "Idea:", + "description": "${idea}" + }, + { + "prefix": "Hunches:", + "description": "${hunches}" + }, + { + "prefix": "Assumptions Json:", + "description": "${assumptions_json}" + } + ] + }, + "lm": null + }, + "_jobs.p": { + "traces": [], + "train": [], + "demos": [], + "signature": { + "instructions": "Generate 5 distinct JTBD statements with Four Forces (push/pull/anxiety/inertia) each.\nReturn JSON list: [{statement, forces:{push:[], pull:[], anxiety:[], inertia:[]}}]", + "fields": [ + { + "prefix": "Context:", + "description": "${context}" + }, + { + "prefix": "Constraints:", + "description": "${constraints}" + }, + { + "prefix": "Jobs Json:", + "description": "${jobs_json}" + } + ] + }, + "lm": null + }, + "_moat.p": { + "traces": [], + "train": [], + "demos": [], + "signature": { + "instructions": "Apply Doblin/10-types + timing/ops/customer/value triggers to strengthen concept.\nReturn JSON list: [{type, trigger, effect}]", + "fields": [ + { + "prefix": "Concept:", + "description": "${concept}" + }, + { + "prefix": "Triggers:", + "description": "${triggers}" + }, + { + "prefix": "Layers Json:", + "description": "${layers_json}" + } + ] + }, + "lm": null + }, + "react.react": { + "traces": [], + "train": [], + "demos": [], + "signature": { + "instructions": "Given the fields `question`, produce the fields `answer`.\n\nYou are an Agent. In each episode, you will be given the fields `question` as input. And you can see your past trajectory so far.\nYour goal is to use one or more of the supplied tools to collect any necessary information for producing `answer`.\n\nTo do this, you will interleave next_thought, next_tool_name, and next_tool_args in each turn, and also when finishing the task.\nAfter each tool call, you receive a resulting observation, which gets appended to your trajectory.\n\nWhen writing next_thought, you may reason about the current situation and plan for future steps.\nWhen selecting the next_tool_name and its next_tool_args, the tool must be one of:\n\n(1) retrieve. It takes arguments {'query': {'type': 'string'}}.\n(2) deconstruct. It takes arguments {'idea': {'type': 'string'}, 'hunches': {'anyOf': [{'items': {'type': 'string'}, 'type': 'array'}, {'type': 'null'}], 'default': None}}.\n(3) jobs. It takes arguments {'context': {'anyOf': [{'additionalProperties': True, 'type': 'object'}, {'type': 'null'}], 'default': None}, 'constraints': {'anyOf': [{'items': {'type': 'string'}, 'type': 'array'}, {'type': 'null'}], 'default': None}}.\n(4) moat. It takes arguments {'concept': {'type': 'string'}, 'triggers': {'anyOf': [{'type': 'string'}, {'type': 'null'}], 'default': ''}}.\n(5) judge. It takes arguments {'summary': {'type': 'string'}}.\n(6) finish, whose description is Marks the task as complete. That is, signals that all information for producing the outputs, i.e. `answer`, are now available to be extracted.. It takes arguments {}.\nWhen providing `next_tool_args`, the value inside the field must be in JSON format", + "fields": [ + { + "prefix": "Question:", + "description": "${question}" + }, + { + "prefix": "Trajectory:", + "description": "${trajectory}" + }, + { + "prefix": "Next Thought:", + "description": "${next_thought}" + }, + { + "prefix": "Next Tool Name:", + "description": "${next_tool_name}" + }, + { + "prefix": "Next Tool Args:", + "description": "${next_tool_args}" + } + ] + }, + "lm": null + }, + "react.extract.predict": { + "traces": [], + "train": [], + "demos": [], + "signature": { + "instructions": "Given the fields `question`, produce the fields `answer`.", + "fields": [ + { + "prefix": "Question:", + "description": "${question}" + }, + { + "prefix": "Trajectory:", + "description": "${trajectory}" + }, + { + "prefix": "Reasoning: Let's think step by step in order to", + "description": "${reasoning}" + }, + { + "prefix": "Answer:", + "description": "${answer}" + } + ] + }, + "lm": null + }, + "metadata": { + "dependency_versions": { + "python": "3.10", + "dspy": "3.0.3", + "cloudpickle": "3.1" + } + } +} diff --git a/auto_classes.json b/auto_classes.json new file mode 100644 index 0000000..e32cef6 --- /dev/null +++ b/auto_classes.json @@ -0,0 +1,5 @@ +{ + "AutoConfig": "service.modaic_agent.JTBDConfig", + "AutoAgent": "service.modaic_agent.JTBDDSPyAgent", + "AutoRetriever": "service.retrievers.NotesRetriever" +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..43749ed --- /dev/null +++ b/config.json @@ -0,0 +1,5 @@ +{ + "default_mode": "deconstruct", + "allow_freeform_route": true, + "return_json": true +} \ No newline at end of file diff --git a/contracts/assumption_v1.py b/contracts/assumption_v1.py new file mode 100644 index 0000000..32e2571 --- /dev/null +++ b/contracts/assumption_v1.py @@ -0,0 +1,11 @@ +from pydantic import BaseModel, Field, ConfigDict +from typing import List, Optional + +class AssumptionV1(BaseModel): + model_config = ConfigDict(extra='forbid', frozen=True, strict=True) + assumption_id: str + text: str + level: int = Field(ge=1, le=3, description="1=observed,2=educated,3=strategic") + confidence: float = Field(ge=0.0, le=1.0) + evidence: List[str] = [] + validation_exp_id: Optional[str] = None diff --git a/contracts/innovation_layer_v1.py b/contracts/innovation_layer_v1.py new file mode 100644 index 0000000..f519e07 --- /dev/null +++ b/contracts/innovation_layer_v1.py @@ -0,0 +1,7 @@ +from pydantic import BaseModel, ConfigDict +class InnovationLayerV1(BaseModel): + model_config = ConfigDict(extra='forbid', frozen=True, strict=True) + layer_id: str + type: str + trigger: str + effect: str diff --git a/contracts/job_v1.py b/contracts/job_v1.py new file mode 100644 index 0000000..aa6ea9d --- /dev/null +++ b/contracts/job_v1.py @@ -0,0 +1,8 @@ +from pydantic import BaseModel, ConfigDict +from typing import Dict, List + +class JobV1(BaseModel): + model_config = ConfigDict(extra='forbid', frozen=True, strict=True) + job_id: str + statement: str + forces: Dict[str, List[str]] # push/pull/anxiety/inertia diff --git a/contracts/scorecard_v1.py b/contracts/scorecard_v1.py new file mode 100644 index 0000000..e9bd673 --- /dev/null +++ b/contracts/scorecard_v1.py @@ -0,0 +1,14 @@ +from pydantic import BaseModel, Field, ConfigDict +from typing import List + +class Criterion(BaseModel): + name: str + score: float = Field(ge=0, le=10) + rationale: str + +class ScorecardV1(BaseModel): + model_config = ConfigDict(extra='forbid', frozen=True, strict=True) + target_id: str + scheme: str = "v1" + criteria: List[Criterion] + total: float = Field(ge=0, le=10) diff --git a/plugins/llm_dspy.py b/plugins/llm_dspy.py new file mode 100644 index 0000000..019d04d --- /dev/null +++ b/plugins/llm_dspy.py @@ -0,0 +1,173 @@ +import os, json, hashlib, random +import dspy +from typing import List, Dict, Tuple +from contracts.assumption_v1 import AssumptionV1 +from contracts.job_v1 import JobV1 +from contracts.scorecard_v1 import ScorecardV1, Criterion +from contracts.innovation_layer_v1 import InnovationLayerV1 + +TEMPERATURE = float(os.getenv("JTBD_LLM_TEMPERATURE", "0.2")) +SEED = int(os.getenv("JTBD_LLM_SEED", "42")) +USE_DOUBLE_JUDGE = os.getenv("JTBD_DOUBLE_JUDGE", "1") == "1" # default ON + +def _uid(s: str) -> str: + return hashlib.sha1(s.encode()).hexdigest()[:10] + +def configure_lm(): + """Configure DSPy global LLM. Edit model name here to your provider choice.""" + model = os.getenv("JTBD_DSPY_MODEL", "gpt-4o-mini") + + # Check if it's a Claude model + if "claude" in model.lower(): + try: + lm = dspy.Anthropic(model=model, max_tokens=4000, temperature=TEMPERATURE) + except Exception: + # Fallback to generic LM + lm = dspy.LM(model=model, max_tokens=4000, temperature=TEMPERATURE) + else: + # Try OpenAI first + try: + lm = dspy.OpenAI(model=model, max_tokens=4000, temperature=TEMPERATURE, seed=SEED) + except Exception: + # Fallback to a generic LM + lm = dspy.LM(model=model, max_tokens=4000, temperature=TEMPERATURE) + dspy.configure(lm=lm) + +# ---------------- Signatures ---------------- +class DeconstructSig(dspy.Signature): + """Extract assumptions and classify levels. + Return JSON list of objects: [{text, level(1..3), confidence, evidence:[]}]""" + idea: str = dspy.InputField() + hunches: List[str] = dspy.InputField() + assumptions_json: str = dspy.OutputField() + +class JobsSig(dspy.Signature): + """Generate 5 distinct JTBD statements with Four Forces (push/pull/anxiety/inertia) each. + Return JSON list: [{statement, forces:{push:[], pull:[], anxiety:[], inertia:[]}}]""" + context: str = dspy.InputField() + constraints: str = dspy.InputField() + jobs_json: str = dspy.OutputField() + +class MoatSig(dspy.Signature): + """Apply Doblin/10-types + timing/ops/customer/value triggers to strengthen concept. + Return JSON list: [{type, trigger, effect}]""" + concept: str = dspy.InputField() + triggers: str = dspy.InputField() + layers_json: str = dspy.OutputField() + +class JudgeScoreSig(dspy.Signature): + """Score business idea on exactly these 5 criteria (0-10 scale) with rationales. + Return JSON: {"criteria":[{"name":"Underserved Opportunity","score":7.0,"rationale":"Clear need exists..."}, {"name":"Strategic Impact","score":6.0,"rationale":"..."}, {"name":"Market Scale","score":8.0,"rationale":"..."}, {"name":"Solution Differentiability","score":5.0,"rationale":"..."}, {"name":"Business Model Innovation","score":7.0,"rationale":"..."}], "total":6.6}""" + summary: str = dspy.InputField() + scorecard_json: str = dspy.OutputField() + +# ---------------- Modules ---------------- +class Deconstruct(dspy.Module): + def __init__(self): super().__init__(); self.p = dspy.Predict(DeconstructSig) + def forward(self, idea: str, hunches: List[str]): + out = self.p(idea=idea, hunches=hunches) + data = json.loads(out.assumptions_json) + # post-process: bound / defaults + items = [] + for obj in data[:8]: + text = obj.get("text","").strip() + if not text: continue + level = int(obj.get("level", 2)) + level = 1 if level < 1 else 3 if level > 3 else level + conf = float(obj.get("confidence", 0.6)) + conf = max(0.0, min(1.0, conf)) + items.append(AssumptionV1( + assumption_id=f"assump:{_uid(text)}", text=text, level=level, confidence=conf, + evidence=[e for e in obj.get("evidence", []) if isinstance(e, str)] + )) + return items + +class Jobs(dspy.Module): + def __init__(self): super().__init__(); self.p = dspy.Predict(JobsSig) + def forward(self, context: Dict[str,str], constraints: List[str]): + out = self.p(context=json.dumps(context), constraints=json.dumps(constraints)) + arr = json.loads(out.jobs_json) + jobs = [] + seen = set() + for obj in arr[:12]: + stmt = obj.get("statement","").strip() + if not stmt or stmt in seen: continue + seen.add(stmt) + forces = obj.get("forces",{}) or {} + for k in ["push","pull","anxiety","inertia"]: + forces.setdefault(k, []) + jobs.append(JobV1(job_id=f"job:{_uid(stmt)}", statement=stmt, forces=forces)) + if len(jobs) >= 5: break + return jobs + +class Moat(dspy.Module): + def __init__(self): super().__init__(); self.p = dspy.Predict(MoatSig) + def forward(self, concept: str, triggers: str): + out = self.p(concept=concept, triggers=triggers) + arr = json.loads(out.layers_json) + layers = [] + for obj in arr[:6]: + t = str(obj.get("type","")).strip() + tr = str(obj.get("trigger","")).strip() + ef = str(obj.get("effect","")).strip() + if not t or not tr or not ef: continue + layers.append(InnovationLayerV1(layer_id=f"layer:{_uid(t+tr+ef)}", type=t, trigger=tr, effect=ef)) + return layers + +CRITERIA = ["Underserved Opportunity","Strategic Impact","Market Scale","Solution Differentiability","Business Model Innovation"] + + +import pickle +JUDGE_COMPILED_PATH = os.getenv("JTBD_JUDGE_COMPILED") +_compiled_judge = None +if JUDGE_COMPILED_PATH and os.path.exists(JUDGE_COMPILED_PATH): + try: + with open(JUDGE_COMPILED_PATH, "rb") as f: + _compiled_judge = pickle.load(f) + except Exception: + _compiled_judge = None + +class JudgeScore(dspy.Module): + def __init__(self): super().__init__(); self.p = _compiled_judge or dspy.Predict(JudgeScoreSig) + def forward(self, summary: str): + out = self.p(summary=summary) + try: + data = json.loads(out.scorecard_json) + except json.JSONDecodeError as e: + print(f"JSON decode error: {e}") + print(f"Raw output: {out.scorecard_json}") + # Return default scores if JSON parsing fails + data = {"criteria": [], "total": 5.0} + crits = [] + for item in data.get("criteria", []): + name = item.get("name") + if name not in CRITERIA: continue + score = float(item.get("score", 5.0)) + score = max(0.0, min(10.0, score)) + rationale = item.get("rationale","") + crits.append(Criterion(name=name, score=score, rationale=rationale)) + # Fill any missing criteria to maintain schema shape + present = {c.name for c in crits} + for name in CRITERIA: + if name not in present: + crits.append(Criterion(name=name, score=5.0, rationale="defaulted")) + total = round(sum(c.score for c in crits)/len(crits), 2) + return ScorecardV1(target_id="target:final", criteria=crits, total=total) + +# --------------- Double-judge arbitration (optional) --------------- +def judge_with_arbitration(summary: str) -> ScorecardV1: + if not USE_DOUBLE_JUDGE: + return JudgeScore()(summary=summary) + j1 = JudgeScore()(summary=summary) + j2 = JudgeScore()(summary=summary) + # Simple tie-breaker: take the criterion-wise average if they differ by <=1.5, else choose the lower. + merged = [] + for name in CRITERIA: + c1 = next(c for c in j1.criteria if c.name==name) + c2 = next(c for c in j2.criteria if c.name==name) + diff = abs(c1.score - c2.score) + score = (c1.score + c2.score)/2.0 if diff <= 1.5 else min(c1.score, c2.score) + rationale = f"arb: {c1.rationale} | {c2.rationale}" + merged.append(Criterion(name=name, score=round(score,1), rationale=rationale)) + total = round(sum(c.score for c in merged)/len(merged), 2) + return ScorecardV1(target_id="target:final", criteria=merged, total=total) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..dae8620 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,6 @@ +[project] +name = "jtbd-agent" +version = "0.1.0" +requires-python = ">=3.10" +dependencies = ["pydantic>=2.7", "prefect>=3.0.0", "requests>=2.32", "dspy-ai>=2.5.12", "fastapi>=0.111", "uvicorn>=0.30", "modaic>=0.1", "opentelemetry-api>=1.27", "opentelemetry-sdk>=1.27", "opentelemetry-exporter-otlp>=1.27", "opentelemetry-instrumentation-fastapi>=0.48b0", "sse-starlette>=2.0"] + diff --git a/service/modaic_agent.py b/service/modaic_agent.py new file mode 100644 index 0000000..5626443 --- /dev/null +++ b/service/modaic_agent.py @@ -0,0 +1,142 @@ +"""Modaic-compatible JTBD DSPy agent with retriever integration.""" + +from __future__ import annotations + +import json +from typing import Any, Dict, List, Optional + +import dspy +from modaic import PrecompiledAgent, PrecompiledConfig, Retriever + +from plugins.llm_dspy import ( + Deconstruct, + Jobs, + Moat, + configure_lm, + judge_with_arbitration, +) +from service.retrievers import NullRetriever + + +configure_lm() + + +class JTBDConfig(PrecompiledConfig): + default_mode: str = "deconstruct" + allow_freeform_route: bool = True + return_json: bool = True + + +class JTBDDSPyAgent(PrecompiledAgent): + """Agent exposing DSPy modules via Modaic's PrecompiledAgent interface.""" + + config: JTBDConfig + + def __init__(self, config: Optional[JTBDConfig] = None, retriever: Optional[Retriever] = None, **kwargs): + config = config or JTBDConfig() + self.config = config + self.retriever = retriever or NullRetriever() + + self._deconstruct = Deconstruct() + self._jobs = Jobs() + self._moat = Moat() + + super().__init__(config=config, retriever=self.retriever, **kwargs) + + # ReAct agent that can call the retriever alongside core tools. + self.react = dspy.ReAct( + signature="question->answer", + tools=[ + self.retriever.retrieve, + self.deconstruct, + self.jobs, + self.moat, + self.judge, + ], + ) + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + def __call__(self, query: str, **kwargs) -> str: # type: ignore[override] + return self.forward(query, **kwargs) + + def forward(self, query: str, **kwargs) -> str: # type: ignore[override] + # Allow JSON envelopes to force tool dispatch. + try: + payload = json.loads(query) + except Exception: + payload = None + + if isinstance(payload, dict) and "tool" in payload and "args" in payload: + return self._dispatch(str(payload["tool"]), payload.get("args") or {}) + + if not self.config.allow_freeform_route: + return self._dispatch(self.config.default_mode, {"query": query}) + + lowered = query.lower() + if any(token in lowered for token in ("context", "note", "retriev")): + context = self.retriever.retrieve(query) + return self._as_json({"context": context}) + if any(token in lowered for token in ("assumption", "deconstruct")): + return self.deconstruct(idea=query, hunches=[]) + if "jtbd" in lowered or "job" in lowered: + return self.jobs(context={"prompt": query}, constraints=[]) + if any(token in lowered for token in ("moat", "defens")): + return self.moat(concept=query, triggers="") + if any(token in lowered for token in ("judge", "score", "evaluate")): + return self.judge(summary=query) + + return self._dispatch(self.config.default_mode, {"query": query}) + + # ------------------------------------------------------------------ + # Tool wrappers + # ------------------------------------------------------------------ + def deconstruct(self, idea: str, hunches: Optional[List[str]] = None) -> str: + items = self._deconstruct(idea=idea, hunches=hunches or []) + return self._as_json({"assumptions": [item.model_dump() for item in items]}) + + def jobs(self, context: Optional[Dict[str, Any]] = None, constraints: Optional[List[str]] = None) -> str: + jobs = self._jobs(context=context or {}, constraints=constraints or []) + return self._as_json({"jobs": [job.model_dump() for job in jobs]}) + + def moat(self, concept: str, triggers: Optional[str] = "") -> str: + layers = self._moat(concept=concept, triggers=triggers or "") + return self._as_json({"layers": [layer.model_dump() for layer in layers]}) + + def judge(self, summary: str) -> str: + scorecard = judge_with_arbitration(summary=summary) + return self._as_json({"scorecard": scorecard.model_dump()}) + + # ------------------------------------------------------------------ + # Helpers + # ------------------------------------------------------------------ + def _dispatch(self, tool: str, args: Dict[str, Any]) -> str: + slug = tool.lower() + if slug in {"retrieve", "retriever", "context"}: + context = self.retriever.retrieve(args.get("query", "")) + return self._as_json({"context": context}) + if slug == "deconstruct": + return self.deconstruct( + idea=args.get("idea", ""), + hunches=args.get("hunches") or [], + ) + if slug == "jobs": + return self.jobs( + context=args.get("context") or {}, + constraints=args.get("constraints") or [], + ) + if slug == "moat": + return self.moat( + concept=args.get("concept", ""), + triggers=args.get("triggers", ""), + ) + if slug == "judge": + return self.judge(summary=args.get("summary", "")) + return self._as_json({"error": f"unknown tool '{tool}'"}) + + def _as_json(self, payload: Dict[str, Any]) -> str: + if self.config.return_json: + return json.dumps(payload) + return str(payload) + diff --git a/service/retrievers.py b/service/retrievers.py new file mode 100644 index 0000000..93e4fdd --- /dev/null +++ b/service/retrievers.py @@ -0,0 +1,73 @@ +"""Retriever implementations used by the JTBD DSPy agent.""" + +from __future__ import annotations + +from typing import Iterable, List + +from modaic import PrecompiledConfig, Retriever + + +class NullRetrieverConfig(PrecompiledConfig): + """Configuration placeholder for the null retriever.""" + + +class NotesRetrieverConfig(PrecompiledConfig): + """Serializable configuration for the in-memory notes retriever.""" + + notes: List[str] = [] + top_k: int = 3 + + +class NullRetriever(Retriever): + """No-op retriever for environments without contextual data.""" + + config: NullRetrieverConfig + + def __init__(self, config: NullRetrieverConfig | None = None, **kwargs): + super().__init__(config or NullRetrieverConfig(), **kwargs) + + def retrieve(self, query: str) -> str: # type: ignore[override] + return "" + + +class NotesRetriever(Retriever): + """Very small keyword-based retriever backed by an in-memory list of notes.""" + + config: NotesRetrieverConfig + + def __init__( + self, + notes: Iterable[str] | None = None, + top_k: int | None = None, + config: NotesRetrieverConfig | None = None, + **kwargs, + ): + if config is None: + cfg = NotesRetrieverConfig() + cfg.notes = list(notes or []) + if top_k is not None: + cfg.top_k = int(top_k) + else: + cfg = config + if notes is not None: + cfg.notes = list(notes) + if top_k is not None: + cfg.top_k = int(top_k) + + super().__init__(cfg, **kwargs) + + def retrieve(self, query: str) -> str: # type: ignore[override] + terms = {token for token in query.lower().split() if token} + if not terms: + return "" + + scored: List[tuple[int, str]] = [] + for note in self.config.notes: + tokens = {token for token in note.lower().split() if token} + score = len(terms & tokens) + if score > 0: + scored.append((score, note)) + + scored.sort(key=lambda item: item[0], reverse=True) + top_matches = [note for _, note in scored[: self.config.top_k]] + return "\n".join(top_matches) diff --git a/tools/push_modaic_agent.py b/tools/push_modaic_agent.py new file mode 100644 index 0000000..097df27 --- /dev/null +++ b/tools/push_modaic_agent.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python +"""Push the JTBD DSPy agent to Modaic Hub using environment variables.""" + +from __future__ import annotations + +import os +import sys + +from service.modaic_agent import JTBDDSPyAgent, JTBDConfig +from service.retrievers import NotesRetriever, NullRetriever + + +def build_retriever(): + kind = os.getenv("RETRIEVER_KIND", "notes").lower() + if kind == "notes": + raw = os.getenv("RETRIEVER_NOTES", "") + notes = [line for line in raw.splitlines() if line.strip()] + return NotesRetriever(notes=notes or ["JTBD primer"]) + return NullRetriever() + + +def main() -> int: + agent_id = os.getenv("MODAIC_AGENT_ID") + token = os.getenv("MODAIC_TOKEN") + + if not agent_id: + print("MODAIC_AGENT_ID is not set", file=sys.stderr) + return 1 + if not token: + print("MODAIC_TOKEN is not set", file=sys.stderr) + return 1 + + agent = JTBDDSPyAgent(JTBDConfig(), retriever=build_retriever()) + agent.push_to_hub(agent_id, with_code=True) + print(f"Agent pushed to Modaic Hub: {agent_id}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) +