(no commit message)

This commit is contained in:
2026-03-08 21:47:14 -07:00
parent 5793e1a979
commit deccdc70ae
3 changed files with 115 additions and 2 deletions

View File

@@ -1,2 +0,0 @@
# projudgebench-process-evaluation-judge

63
config.json Normal file
View File

@@ -0,0 +1,63 @@
{
"model": null,
"signature": {
"description": "You are given a scientific problem, its correct final answer, and a student\u2019s solution to evaluate.\nYour task is to: first, solve the problem yourself, using the correct final answer as a hint. Ensure your reasoning leads to the\ncorrect answer. Once you have a clear understanding of how the problem could be solved, evaluate the correctness of the given step in the student\u2019s solution.\nFocus exclusively on the scientific, logical, or mathematical correctness of the solution. Ignore differences in formatting,\nexpression style, specific wording, or presentation order, as long as the reasoning and results are valid.\nFor the step, perform:\nBinary scoring: true if step is correct, false if incorrect\nThen in your reasoning, provide:\n1. Error classification (only if the step is incorrect):\na. Numerical Calculation Error. Errors in basic arithmetic operations such as addition, subtraction, division, or square roots.\nb. Symbolic Calculation Error. Errors in manipulating algebraic expressions, such as incorrect expansion, factoring, simplification, or solving equations with variables.\nc. Visual Interpretation Error. Errors in interpreting graphical data, such as misidentifying coordinates, shapes, spatial\nrelationships, or data within figures.\nd. Reasoning Error. Errors in the logical thinking process that lead to incorrect conclusions, such as flawed arguments, invalid\ninferences, or gaps in the logical flow of the solution.\ne. Knowledge Error. Errors caused by insufficient understanding or incorrect application of necessary knowledge (e.g.,\nconcepts, formulas, theorems, methods), or using outdated or incorrect information.\nf. Question Understanding Error. Errors due to misunderstanding or misinterpreting the problems\u2019 conditions or requirements, such as misreading questions or misapplying given conditions.\ng. No solution provided. The model refuses to answer, fails to follow instructions to make a solution, or encounters anomalies\nin generation process such as repetitive responses or incomplete outputs.\n2. Provide a brief explanation for the identified error.\n3. Include any uncertainty or ambiguity in your reasoning.",
"properties": {
"question": {
"__dspy_field_type": "input",
"desc": "Scientific problem statement.",
"prefix": "Question:",
"title": "Question",
"type": "string"
},
"correct_answer": {
"__dspy_field_type": "input",
"desc": "Correct final answer for the problem.",
"prefix": "Correct Answer:",
"title": "Correct Answer",
"type": "string"
},
"steps": {
"__dspy_field_type": "input",
"desc": "Full student solution steps.",
"items": {
"type": "string"
},
"prefix": "Steps:",
"title": "Steps",
"type": "array"
},
"step_to_evaluate": {
"__dspy_field_type": "input",
"desc": "Single step to evaluate for correctness.",
"prefix": "Step To Evaluate:",
"title": "Step To Evaluate",
"type": "string"
},
"reasoning": {
"__dspy_field_type": "output",
"desc": "Reasoning for the binary correctness decision.",
"prefix": "Reasoning:",
"title": "Reasoning",
"type": "string"
},
"correct": {
"__dspy_field_type": "output",
"desc": "Whether the evaluated step is correct.",
"prefix": "Correct:",
"title": "Correct",
"type": "boolean"
}
},
"required": [
"question",
"correct_answer",
"steps",
"step_to_evaluate",
"reasoning",
"correct"
],
"title": "StringSignature",
"type": "object"
}
}

52
program.json Normal file

File diff suppressed because one or more lines are too long