From 65f0c999786f94239123ae69cbd07d941ec3c49b Mon Sep 17 00:00:00 2001 From: Tyrin Todd Date: Thu, 23 Apr 2026 23:09:25 -0700 Subject: [PATCH] server-inference push code_judge_bench --- README.md | 5 ++++- config.json | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++ program.json | 48 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 108 insertions(+), 1 deletion(-) create mode 100644 config.json create mode 100644 program.json diff --git a/README.md b/README.md index 93cc6c5..15a92f6 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,5 @@ -# code_judge_bench +--- +{} +--- +# code_judge_bench \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..533c098 --- /dev/null +++ b/config.json @@ -0,0 +1,56 @@ +{ + "model": null, + "signature": { + "description": "Reason step-by-step to determine which code implementation is better for the given problem.\nAs you reason, note any uncertainties, ambiguities, and gaps \u2014 both in how the instructions apply to the task at hand and in whether you have the knowledge needed to solve it. Be honest in your reasoning when you are unsure about your answer. Your reasoning must lead directly to\nthe better code option (A or B).", + "properties": { + "problem": { + "__dspy_field_type": "input", + "desc": "The coding problem to be solved.", + "prefix": "Problem:", + "title": "Problem", + "type": "string" + }, + "code_A": { + "__dspy_field_type": "input", + "desc": "The first code implementation.", + "prefix": "Code A:", + "title": "Code A", + "type": "string" + }, + "code_B": { + "__dspy_field_type": "input", + "desc": "The second code implementation.", + "prefix": "Code B:", + "title": "Code B", + "type": "string" + }, + "reasoning": { + "__dspy_field_type": "output", + "desc": "Step-by-step reasoning, including uncertainty, ambiguity, or uncertainty in your thought process when relevant.", + "prefix": "Reasoning:", + "title": "Reasoning", + "type": "string" + }, + "label": { + "__dspy_field_type": "output", + "desc": "The label of the better code implementation ('A' or 'B').", + "enum": [ + "A", + "B" + ], + "prefix": "Label:", + "title": "Label", + "type": "string" + } + }, + "required": [ + "problem", + "code_A", + "code_B", + "reasoning", + "label" + ], + "title": "CodePreferenceJudgeSig", + "type": "object" + } +} \ No newline at end of file diff --git a/program.json b/program.json new file mode 100644 index 0000000..b867ef0 --- /dev/null +++ b/program.json @@ -0,0 +1,48 @@ +{ + "traces": [], + "train": [], + "demos": [], + "signature": { + "instructions": "Reason step-by-step to determine which code implementation is better for the given problem.\nAs you reason, note any uncertainties, ambiguities, and gaps \u2014 both in how the instructions apply to the task at hand and in whether you have the knowledge needed to solve it. Be honest in your reasoning when you are unsure about your answer. Your reasoning must lead directly to\nthe better code option (A or B).", + "fields": [ + { + "prefix": "Problem:", + "description": "The coding problem to be solved." + }, + { + "prefix": "Code A:", + "description": "The first code implementation." + }, + { + "prefix": "Code B:", + "description": "The second code implementation." + }, + { + "prefix": "Reasoning:", + "description": "Step-by-step reasoning, including uncertainty, ambiguity, or uncertainty in your thought process when relevant." + }, + { + "prefix": "Label:", + "description": "The label of the better code implementation ('A' or 'B')." + } + ] + }, + "lm": { + "model": "together_ai/Qwen/Qwen2.5-7B-Instruct-Turbo", + "model_type": "chat", + "cache": true, + "num_retries": 3, + "finetuning_model": null, + "launch_kwargs": {}, + "train_kwargs": {}, + "temperature": null, + "max_tokens": null + }, + "metadata": { + "dependency_versions": { + "python": "3.11", + "dspy": "3.1.3", + "cloudpickle": "3.1" + } + } +} \ No newline at end of file