From 07562b012cac68d9f4270b1510fd4f8f382fdb3c Mon Sep 17 00:00:00 2001 From: Tyrin Todd Date: Thu, 23 Apr 2026 23:05:07 -0700 Subject: [PATCH] server-inference push aes2_essay_scoring --- README.md | 5 ++++- config.json | 44 ++++++++++++++++++++++++++++++++++++++++++++ program.json | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 config.json create mode 100644 program.json diff --git a/README.md b/README.md index 7459c41..1305124 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,5 @@ -# aes2_essay_scoring +--- +{} +--- +# aes2_essay_scoring \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..235be6b --- /dev/null +++ b/config.json @@ -0,0 +1,44 @@ +{ + "model": null, + "signature": { + "description": "You are an AES2 essay-scoring judge. Score essays only. Return one score in the range 1-6 using the rubric below.\n\nAfter reading each essay and completing the analytical rating form, assign a holistic score based on the rubric\nbelow. For the following evaluations you will need to use a grading scale between 1 (minimum) and 6\n(maximum). As with the analytical rating form, the distance between each grade (e.g., 1-2, 3-4, 4-5) should be\nconsidered equal.\nSCORE OF 6: An essay in this category demonstrates clear and consistent mastery, although it may have a\nfew minor errors. A typical essay effectively and insightfully develops a point of view on the issue and\ndemonstrates outstanding critical thinking; the essay uses clearly appropriate examples, reasons, and other\nevidence taken from the source text(s) to support its position; the essay is well organized and clearly focused,\ndemonstrating clear coherence and smooth progression of ideas; the essay exhibits skillful use of language,\nusing a varied, accurate, and apt vocabulary and demonstrates meaningful variety in sentence structure; the\nessay is free of most errors in grammar, usage, and mechanics.\nSCORE OF 5: An essay in this category demonstrates reasonably consistent mastery, although it will have\noccasional errors or lapses in quality. A typical essay effectively develops a point of view on the issue and\ndemonstrates strong critical thinking; the essay generally using appropriate examples, reasons, and other\nevidence taken from the source text(s) to support its position; the essay is well organized and focused,\ndemonstrating coherence and progression of ideas; the essay exhibits facility in the use of language, using\nappropriate vocabulary demonstrates variety in sentence structure; the essay is generally free of most errors in\ngrammar, usage, and mechanics.\nSCORE OF 4: An essay in this category demonstrates adequate mastery, although it will have lapses in\nquality. A typical essay develops a point of view on the issue and demonstrates competent critical thinking; the\nessay using adequate examples, reasons, and other evidence taken from the source text(s) to support its\nposition; the essay is generally organized and focused, demonstrating some coherence and progression of ideas\nexhibits adequate; the essay may demonstrate inconsistent facility in the use of language, using generally\nappropriate vocabulary demonstrates some variety in sentence structure; the essay may have some errors in\ngrammar, usage, and mechanics.\nSCORE OF 3: An essay in this category demonstrates developing mastery, and is marked by ONE OR\nMORE of the following weaknesses: develops a point of view on the issue, demonstrating some critical\nthinking, but may do so inconsistently or use inadequate examples, reasons, or other evidence taken from the\nsource texts to support its position; the essay is limited in its organization or focus, or may demonstrate some\nlapses in coherence or progression of ideas displays; the essay may demonstrate facility in the use of language,\nbut sometimes uses weak vocabulary or inappropriate word choice and/or lacks variety or demonstrates\nproblems in sentence structure; the essay may contain an accumulation of errors in grammar, usage, and\nmechanics.\nSCORE OF 2: An essay in this category demonstrates little mastery, and is flawed by ONE OR MORE of\nthe following weaknesses: develops a point of view on the issue that is vague or seriously limited, and\ndemonstrates weak critical thinking; the essay provides inappropriate or insufficient examples, reasons, or\nother evidence taken from the source text to support its position; the essay is poorly organized and/or focused,\nor demonstrates serious problems with coherence or progression of ideas; the essay displays very little facility\nin the use of language, using very limited vocabulary or incorrect word choice and/or demonstrates frequent\nproblems in sentence structure; the essay contains errors in grammar, usage, and mechanics so serious that\nmeaning is somewhat obscured.\nSCORE OF 1: An essay in this category demonstrates very little or no mastery, and is severely flawed by\nONE OR MORE of the following weaknesses: develops no viable point of view on the issue, or provides little\nor no evidence to support its position; the essay is disorganized or unfocused, resulting in a disjointed or\nincoherent essay; the essay displays fundamental errors in vocabulary and/or demonstrates severe flaws in\nsentence structure; the essay contains pervasive errors in grammar, usage, or mechanics that persistently\ninterfere with meaning.", + "properties": { + "full_text": { + "__dspy_field_type": "input", + "desc": "Essay text to score.", + "prefix": "Full Text:", + "title": "Full Text", + "type": "string" + }, + "reasoning": { + "__dspy_field_type": "output", + "desc": "Step-by-step reasoning, including uncertainty, ambiguity, or uncertainty in your thought process when relevant.", + "prefix": "Reasoning:", + "title": "Reasoning", + "type": "string" + }, + "score": { + "__dspy_field_type": "output", + "desc": "Holistic essay score on a 1-6 scale.", + "enum": [ + "1", + "2", + "3", + "4", + "5", + "6" + ], + "prefix": "Score:", + "title": "Score", + "type": "string" + } + }, + "required": [ + "full_text", + "reasoning", + "score" + ], + "title": "StringSignature", + "type": "object" + } +} \ No newline at end of file diff --git a/program.json b/program.json new file mode 100644 index 0000000..5d54395 --- /dev/null +++ b/program.json @@ -0,0 +1,40 @@ +{ + "traces": [], + "train": [], + "demos": [], + "signature": { + "instructions": "You are an AES2 essay-scoring judge. Score essays only. Return one score in the range 1-6 using the rubric below.\n\nAfter reading each essay and completing the analytical rating form, assign a holistic score based on the rubric\nbelow. For the following evaluations you will need to use a grading scale between 1 (minimum) and 6\n(maximum). As with the analytical rating form, the distance between each grade (e.g., 1-2, 3-4, 4-5) should be\nconsidered equal.\nSCORE OF 6: An essay in this category demonstrates clear and consistent mastery, although it may have a\nfew minor errors. A typical essay effectively and insightfully develops a point of view on the issue and\ndemonstrates outstanding critical thinking; the essay uses clearly appropriate examples, reasons, and other\nevidence taken from the source text(s) to support its position; the essay is well organized and clearly focused,\ndemonstrating clear coherence and smooth progression of ideas; the essay exhibits skillful use of language,\nusing a varied, accurate, and apt vocabulary and demonstrates meaningful variety in sentence structure; the\nessay is free of most errors in grammar, usage, and mechanics.\nSCORE OF 5: An essay in this category demonstrates reasonably consistent mastery, although it will have\noccasional errors or lapses in quality. A typical essay effectively develops a point of view on the issue and\ndemonstrates strong critical thinking; the essay generally using appropriate examples, reasons, and other\nevidence taken from the source text(s) to support its position; the essay is well organized and focused,\ndemonstrating coherence and progression of ideas; the essay exhibits facility in the use of language, using\nappropriate vocabulary demonstrates variety in sentence structure; the essay is generally free of most errors in\ngrammar, usage, and mechanics.\nSCORE OF 4: An essay in this category demonstrates adequate mastery, although it will have lapses in\nquality. A typical essay develops a point of view on the issue and demonstrates competent critical thinking; the\nessay using adequate examples, reasons, and other evidence taken from the source text(s) to support its\nposition; the essay is generally organized and focused, demonstrating some coherence and progression of ideas\nexhibits adequate; the essay may demonstrate inconsistent facility in the use of language, using generally\nappropriate vocabulary demonstrates some variety in sentence structure; the essay may have some errors in\ngrammar, usage, and mechanics.\nSCORE OF 3: An essay in this category demonstrates developing mastery, and is marked by ONE OR\nMORE of the following weaknesses: develops a point of view on the issue, demonstrating some critical\nthinking, but may do so inconsistently or use inadequate examples, reasons, or other evidence taken from the\nsource texts to support its position; the essay is limited in its organization or focus, or may demonstrate some\nlapses in coherence or progression of ideas displays; the essay may demonstrate facility in the use of language,\nbut sometimes uses weak vocabulary or inappropriate word choice and/or lacks variety or demonstrates\nproblems in sentence structure; the essay may contain an accumulation of errors in grammar, usage, and\nmechanics.\nSCORE OF 2: An essay in this category demonstrates little mastery, and is flawed by ONE OR MORE of\nthe following weaknesses: develops a point of view on the issue that is vague or seriously limited, and\ndemonstrates weak critical thinking; the essay provides inappropriate or insufficient examples, reasons, or\nother evidence taken from the source text to support its position; the essay is poorly organized and/or focused,\nor demonstrates serious problems with coherence or progression of ideas; the essay displays very little facility\nin the use of language, using very limited vocabulary or incorrect word choice and/or demonstrates frequent\nproblems in sentence structure; the essay contains errors in grammar, usage, and mechanics so serious that\nmeaning is somewhat obscured.\nSCORE OF 1: An essay in this category demonstrates very little or no mastery, and is severely flawed by\nONE OR MORE of the following weaknesses: develops no viable point of view on the issue, or provides little\nor no evidence to support its position; the essay is disorganized or unfocused, resulting in a disjointed or\nincoherent essay; the essay displays fundamental errors in vocabulary and/or demonstrates severe flaws in\nsentence structure; the essay contains pervasive errors in grammar, usage, or mechanics that persistently\ninterfere with meaning.", + "fields": [ + { + "prefix": "Full Text:", + "description": "Essay text to score." + }, + { + "prefix": "Reasoning:", + "description": "Step-by-step reasoning, including uncertainty, ambiguity, or uncertainty in your thought process when relevant." + }, + { + "prefix": "Score:", + "description": "Holistic essay score on a 1-6 scale." + } + ] + }, + "lm": { + "model": "together_ai/Qwen/Qwen2.5-7B-Instruct-Turbo", + "model_type": "chat", + "cache": true, + "num_retries": 3, + "finetuning_model": null, + "launch_kwargs": {}, + "train_kwargs": {}, + "temperature": null, + "max_tokens": null + }, + "metadata": { + "dependency_versions": { + "python": "3.11", + "dspy": "3.1.3", + "cloudpickle": "3.1" + } + } +} \ No newline at end of file