judge_bench/program.json

{
  "traces": [],
  "train": [],
  "demos": [],
  "signature": {
    "instructions": "Reason step-by-step to compare the two responses for the given context.\nAs you reason, note any uncertainties, ambiguities, and gaps \u2014 both in how the instructions apply to the task at hand and in whether you have the knowledge needed to solve it. Be honest in your reasoning when you are unsure about your answer.\n\nChoose 'A>B' if response A is meaningfully better, 'B>A' if response B is\nmeaningfully better, or 'tie' if neither response is clearly better than the other.",
    "fields": [
      {
        "prefix": "Question:",
        "description": "The question or prompt provided."
      },
      {
        "prefix": "Response A:",
        "description": "The first response option."
      },
      {
        "prefix": "Response B:",
        "description": "The second response option."
      },
      {
        "prefix": "Reasoning:",
        "description": "Step-by-step reasoning, including uncertainty, ambiguity, or uncertainty in your thought process when relevant."
      },
      {
        "prefix": "Label:",
        "description": "'A>B' if A is better, 'B>A' if B is better, 'tie' if neither is clearly better."
      }
    ]
  },
  "lm": {
    "model": "together_ai/Qwen/Qwen2.5-7B-Instruct-Turbo",
    "model_type": "chat",
    "cache": true,
    "num_retries": 3,
    "finetuning_model": null,
    "launch_kwargs": {},
    "train_kwargs": {},
    "temperature": null,
    "max_tokens": null
  },
  "metadata": {
    "dependency_versions": {
      "python": "3.11",
      "dspy": "3.1.3",
      "cloudpickle": "3.1"
    }
  }
}