(no commit message)

2026-01-18 12:19:47 -08:00
parent c123bf0f1a
commit b7f546052a
3 changed files with 105 additions and 2 deletions
--- a/config.json
+++ b/config.json
@@ -0,0 +1,59 @@
+{
+  "model": null,
+  "signature": {
+    "description": "    \"Task: Evaluate and compare the quality of two responses (Response A and Response B) given a specific question (input). Determine which response better addresses the question by focusing on factual correctness, completeness, and adherence to any specific requirements mentioned in the question prompt. Select the response that provides the most accurate, comprehensive, and relevant solution or explanation to the problem presented. Document your decision in the format \"A>B\" if Response A is the better choice, or \"B>A\" if Response B is superior.\n\nDetailed Instructions:\n\n1. **Understand the Question Context:**\n   - Ensure you comprehend the full context and requirements specified by the question or problem statement. This includes understanding any specific instructions such as formats, calculations, algorithms, or methodologies that are mentioned.\n   - Note any domain-specific terminologies or conditions, such as units of measurement or specific constants that need to be included in calculations or explanations.\n\n2. **Evaluate Each Response:**\n   - Check for factual accuracy in the content, calculations, or recommendations provided in each response.\n   - Assess the response for completeness\u2014whether it completely addresses all aspects of the question.\n   - Verify the adherence of each response to the specified question requirements, such as avoiding certain methods (e.g., no saving as CSV) or changing visualization formats.\n   - Consider clarity and structure of the explanation or solution provided.\n\n3. **Factual and Domain-Specific Considerations:**\n   - When evaluating technical or scientific information (e.g., coding, mathematical problems, chemistry), use established domain knowledge to verify calculations, logic, or proposed solutions.\n   - Take note of domain-specific constants or equations omitted in the response that might be critical.\n\n4. **Decision Making:**\n   - Determine which response (A or B) best meets the above criteria.\n   - Use a systematic approach to compare responses, focusing on differences that impact the quality of the solution or explanation.\n   - Select the response that is not only correct but also most aligns with the question\u2019s specific requirements and limitations.\n\n5. **Output Your Conclusion:**\n   - Once you have determined which response is better, output your decision in the required format: \"A>B\" if Response A is better, or \"B>A\" if Response B is better.\"",
+    "properties": {
+      "question": {
+        "__dspy_field_type": "input",
+        "desc": "The original question or prompt",
+        "prefix": "Question:",
+        "title": "Question",
+        "type": "string"
+      },
+      "response_A": {
+        "__dspy_field_type": "input",
+        "desc": "First response to evaluate",
+        "prefix": "Response A:",
+        "title": "Response A",
+        "type": "string"
+      },
+      "response_B": {
+        "__dspy_field_type": "input",
+        "desc": "Second response to evaluate",
+        "prefix": "Response B:",
+        "title": "Response B",
+        "type": "string"
+      },
+      "label": {
+        "__dspy_field_type": "output",
+        "desc": "Which response is better: 'A>B' or 'B>A'",
+        "enum": [
+          "A>B",
+          "B>A"
+        ],
+        "prefix": "Label:",
+        "title": "Label",
+        "type": "string"
+      }
+    },
+    "required": [
+      "question",
+      "response_A",
+      "response_B",
+      "label"
+    ],
+    "title": "JudgeSignature",
+    "type": "object"
+  },
+  "lm": {
+    "model": "huggingface/together/Qwen/Qwen3-VL-32B-Instruct",
+    "model_type": "chat",
+    "cache": true,
+    "num_retries": 3,
+    "finetuning_model": null,
+    "launch_kwargs": {},
+    "train_kwargs": {},
+    "temperature": null,
+    "max_tokens": null
+  }
+}