(no commit message)

2026-04-16 15:31:25 -07:00
parent c542fb0750
commit 9030bead4e
3 changed files with 131 additions and 1 deletions
--- a/README.md
+++ b/README.md
@@ -1,2 +1,10 @@
-# research-relevance-judge
+---
+a: 3.537087
+b: -1.616884
+is_arbiter: true
+probe_model: modaic/gpt-oss-120b-probe
+size: large
+supports_reasoning: true
+---

+# research-relevance-judge
--- a/config.json
+++ b/config.json
@@ -0,0 +1,70 @@
+{
+  "model": null,
+  "signature": {
+    "$defs": {
+      "Reasoning": {
+        "type": "dspy.Reasoning"
+      }
+    },
+    "description": "You are evaluating search results for a technical investment diligence report.\nGiven a research topic and a search result, judge how relevant it is.\n\nScoring:\n1 = Not relevant. Completely different domain, product, or problem.\n2 = Marginally relevant. Loosely related but addresses a different specific problem.\n3 = Relevant. Same technical problem or domain \u2014 worth including in a research dump.\n4 = Highly relevant. Directly discusses the core problem with practitioner experience,\n    benchmarks, or specific tool comparisons. High signal for diligence.\n\nFocus on technical specificity. Generic advice scores lower than specific\npractitioner experience with real numbers or tool names.",
+    "properties": {
+      "topic": {
+        "__dspy_field_type": "input",
+        "desc": "The research topic being investigated",
+        "prefix": "Topic:",
+        "title": "Topic",
+        "type": "string"
+      },
+      "title": {
+        "__dspy_field_type": "input",
+        "desc": "Title of the search result",
+        "prefix": "Title:",
+        "title": "Title",
+        "type": "string"
+      },
+      "snippet": {
+        "__dspy_field_type": "input",
+        "desc": "Short excerpt or snippet from the result",
+        "prefix": "Snippet:",
+        "title": "Snippet",
+        "type": "string"
+      },
+      "source": {
+        "__dspy_field_type": "input",
+        "desc": "Source platform (Reddit, HN, Stack Overflow, etc.)",
+        "prefix": "Source:",
+        "title": "Source",
+        "type": "string"
+      },
+      "reasoning": {
+        "$ref": "#/$defs/Reasoning",
+        "__dspy_field_type": "output",
+        "desc": "Your reasoning for your answer. Inlude any uncertainties about your answer or ambiguity in the task.",
+        "prefix": "Reasoning:"
+      },
+      "relevance": {
+        "__dspy_field_type": "output",
+        "desc": "Relevance score: 1=not relevant, 2=marginal, 3=relevant, 4=highly relevant",
+        "enum": [
+          "1",
+          "2",
+          "3",
+          "4"
+        ],
+        "prefix": "Relevance:",
+        "title": "Relevance",
+        "type": "string"
+      }
+    },
+    "required": [
+      "topic",
+      "title",
+      "snippet",
+      "source",
+      "reasoning",
+      "relevance"
+    ],
+    "title": "StringSignature",
+    "type": "object"
+  }
+}
--- a/program.json
+++ b/program.json
@@ -0,0 +1,52 @@
+{
+  "traces": [],
+  "train": [],
+  "demos": [],
+  "signature": {
+    "instructions": "You are evaluating search results for a technical investment diligence report.\nGiven a research topic and a search result, judge how relevant it is.\n\nScoring:\n1 = Not relevant. Completely different domain, product, or problem.\n2 = Marginally relevant. Loosely related but addresses a different specific problem.\n3 = Relevant. Same technical problem or domain \u2014 worth including in a research dump.\n4 = Highly relevant. Directly discusses the core problem with practitioner experience,\n    benchmarks, or specific tool comparisons. High signal for diligence.\n\nFocus on technical specificity. Generic advice scores lower than specific\npractitioner experience with real numbers or tool names.",
+    "fields": [
+      {
+        "prefix": "Topic:",
+        "description": "The research topic being investigated"
+      },
+      {
+        "prefix": "Title:",
+        "description": "Title of the search result"
+      },
+      {
+        "prefix": "Snippet:",
+        "description": "Short excerpt or snippet from the result"
+      },
+      {
+        "prefix": "Source:",
+        "description": "Source platform (Reddit, HN, Stack Overflow, etc.)"
+      },
+      {
+        "prefix": "Reasoning:",
+        "description": "Your reasoning for your answer. Inlude any uncertainties about your answer or ambiguity in the task."
+      },
+      {
+        "prefix": "Relevance:",
+        "description": "Relevance score: 1=not relevant, 2=marginal, 3=relevant, 4=highly relevant"
+      }
+    ]
+  },
+  "lm": {
+    "model": "openrouter/openai/gpt-oss-120b",
+    "model_type": "chat",
+    "cache": true,
+    "num_retries": 3,
+    "finetuning_model": null,
+    "launch_kwargs": {},
+    "train_kwargs": {},
+    "temperature": null,
+    "max_tokens": null
+  },
+  "metadata": {
+    "dependency_versions": {
+      "python": "3.11",
+      "dspy": "3.1.3",
+      "cloudpickle": "3.1"
+    }
+  }
+}