From 9030bead4e71673039016256b0b68b992dfbedaa Mon Sep 17 00:00:00 2001
From: matthildur <matthildur@montageventures.com>
Date: Thu, 16 Apr 2026 15:31:25 -0700
Subject: [PATCH] (no commit message)

---
 README.md    | 10 +++++++-
 config.json  | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 program.json | 52 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 131 insertions(+), 1 deletion(-)
 create mode 100644 config.json
 create mode 100644 program.json

diff --git a/README.md b/README.md
index c500980..e02c0a2 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,10 @@
-# research-relevance-judge
+---
+a: 3.537087
+b: -1.616884
+is_arbiter: true
+probe_model: modaic/gpt-oss-120b-probe
+size: large
+supports_reasoning: true
+---
 
+# research-relevance-judge
\ No newline at end of file
diff --git a/config.json b/config.json
new file mode 100644
index 0000000..278e1b3
--- /dev/null
+++ b/config.json
@@ -0,0 +1,70 @@
+{
+  "model": null,
+  "signature": {
+    "$defs": {
+      "Reasoning": {
+        "type": "dspy.Reasoning"
+      }
+    },
+    "description": "You are evaluating search results for a technical investment diligence report.\nGiven a research topic and a search result, judge how relevant it is.\n\nScoring:\n1 = Not relevant. Completely different domain, product, or problem.\n2 = Marginally relevant. Loosely related but addresses a different specific problem.\n3 = Relevant. Same technical problem or domain \u2014 worth including in a research dump.\n4 = Highly relevant. Directly discusses the core problem with practitioner experience,\n    benchmarks, or specific tool comparisons. High signal for diligence.\n\nFocus on technical specificity. Generic advice scores lower than specific\npractitioner experience with real numbers or tool names.",
+    "properties": {
+      "topic": {
+        "__dspy_field_type": "input",
+        "desc": "The research topic being investigated",
+        "prefix": "Topic:",
+        "title": "Topic",
+        "type": "string"
+      },
+      "title": {
+        "__dspy_field_type": "input",
+        "desc": "Title of the search result",
+        "prefix": "Title:",
+        "title": "Title",
+        "type": "string"
+      },
+      "snippet": {
+        "__dspy_field_type": "input",
+        "desc": "Short excerpt or snippet from the result",
+        "prefix": "Snippet:",
+        "title": "Snippet",
+        "type": "string"
+      },
+      "source": {
+        "__dspy_field_type": "input",
+        "desc": "Source platform (Reddit, HN, Stack Overflow, etc.)",
+        "prefix": "Source:",
+        "title": "Source",
+        "type": "string"
+      },
+      "reasoning": {
+        "$ref": "#/$defs/Reasoning",
+        "__dspy_field_type": "output",
+        "desc": "Your reasoning for your answer. Inlude any uncertainties about your answer or ambiguity in the task.",
+        "prefix": "Reasoning:"
+      },
+      "relevance": {
+        "__dspy_field_type": "output",
+        "desc": "Relevance score: 1=not relevant, 2=marginal, 3=relevant, 4=highly relevant",
+        "enum": [
+          "1",
+          "2",
+          "3",
+          "4"
+        ],
+        "prefix": "Relevance:",
+        "title": "Relevance",
+        "type": "string"
+      }
+    },
+    "required": [
+      "topic",
+      "title",
+      "snippet",
+      "source",
+      "reasoning",
+      "relevance"
+    ],
+    "title": "StringSignature",
+    "type": "object"
+  }
+}
\ No newline at end of file
diff --git a/program.json b/program.json
new file mode 100644
index 0000000..61de39b
--- /dev/null
+++ b/program.json
@@ -0,0 +1,52 @@
+{
+  "traces": [],
+  "train": [],
+  "demos": [],
+  "signature": {
+    "instructions": "You are evaluating search results for a technical investment diligence report.\nGiven a research topic and a search result, judge how relevant it is.\n\nScoring:\n1 = Not relevant. Completely different domain, product, or problem.\n2 = Marginally relevant. Loosely related but addresses a different specific problem.\n3 = Relevant. Same technical problem or domain \u2014 worth including in a research dump.\n4 = Highly relevant. Directly discusses the core problem with practitioner experience,\n    benchmarks, or specific tool comparisons. High signal for diligence.\n\nFocus on technical specificity. Generic advice scores lower than specific\npractitioner experience with real numbers or tool names.",
+    "fields": [
+      {
+        "prefix": "Topic:",
+        "description": "The research topic being investigated"
+      },
+      {
+        "prefix": "Title:",
+        "description": "Title of the search result"
+      },
+      {
+        "prefix": "Snippet:",
+        "description": "Short excerpt or snippet from the result"
+      },
+      {
+        "prefix": "Source:",
+        "description": "Source platform (Reddit, HN, Stack Overflow, etc.)"
+      },
+      {
+        "prefix": "Reasoning:",
+        "description": "Your reasoning for your answer. Inlude any uncertainties about your answer or ambiguity in the task."
+      },
+      {
+        "prefix": "Relevance:",
+        "description": "Relevance score: 1=not relevant, 2=marginal, 3=relevant, 4=highly relevant"
+      }
+    ]
+  },
+  "lm": {
+    "model": "openrouter/openai/gpt-oss-120b",
+    "model_type": "chat",
+    "cache": true,
+    "num_retries": 3,
+    "finetuning_model": null,
+    "launch_kwargs": {},
+    "train_kwargs": {},
+    "temperature": null,
+    "max_tokens": null
+  },
+  "metadata": {
+    "dependency_versions": {
+      "python": "3.11",
+      "dspy": "3.1.3",
+      "cloudpickle": "3.1"
+    }
+  }
+}
\ No newline at end of file