44 lines
1.4 KiB
JSON
44 lines
1.4 KiB
JSON
{
|
|
"traces": [],
|
|
"train": [],
|
|
"demos": [],
|
|
"signature": {
|
|
"instructions": "Determine if the answer/response is correct (true) or incorrect (false).\n\nThis judge handles:\n- Boolean QA (yes/no questions with passage context)\n- Legal Yes/No questions\n\nFirst reason through your thought process in the `reasoning` field.\nBe sure to verbalize any uncertainty in your thought process.\nThen output your conclusion in the `label` field as a boolean.",
|
|
"fields": [
|
|
{
|
|
"prefix": "Question:",
|
|
"description": "The question or prompt to evaluate"
|
|
},
|
|
{
|
|
"prefix": "Context:",
|
|
"description": "Optional context passage (empty string if not applicable)"
|
|
},
|
|
{
|
|
"prefix": "Reasoning:",
|
|
"description": "Your step by step reasoning. Verbally express uncertainty in your thought process."
|
|
},
|
|
{
|
|
"prefix": "Label:",
|
|
"description": "True if correct/yes, False if incorrect/no"
|
|
}
|
|
]
|
|
},
|
|
"lm": {
|
|
"model": "together_ai/Qwen/Qwen3-VL-32B-Instruct",
|
|
"model_type": "chat",
|
|
"cache": true,
|
|
"num_retries": 3,
|
|
"finetuning_model": null,
|
|
"launch_kwargs": {},
|
|
"train_kwargs": {},
|
|
"temperature": null,
|
|
"max_tokens": null
|
|
},
|
|
"metadata": {
|
|
"dependency_versions": {
|
|
"python": "3.11",
|
|
"dspy": "3.1.2",
|
|
"cloudpickle": "3.1"
|
|
}
|
|
}
|
|
} |