70 lines
5.3 KiB
Python
70 lines
5.3 KiB
Python
error_categories = """
|
||
Error Categories:
|
||
1) Fabricated claim: Introduction of a claim not present in the reference.
|
||
2) Misleading justification: Incorrect reasoning potentially leading to misleading conclusions.
|
||
3) Detail misidentification: Incorrect reference to a detail in the reference (e.g., body part, finding).
|
||
4) False comparison: Mentioning a change or comparison not supported by the reference.
|
||
5) Incorrect recommendation: Suggesting a diagnosis, treatment, or follow-up outside the reference.
|
||
6) Missing claim: Failure to mention a claim present in the reference.
|
||
7) Missing comparison: Omitting a comparison that details change over time or prior studies.
|
||
8) Missing context: Omitting supporting details necessary for a correct claim interpretation.
|
||
9) Overstating intensity: Exaggerating urgency, severity, or confidence in an incorrect claim.
|
||
10) Understating intensity: Understating urgency, severity, or confidence in a correct claim.
|
||
11) Other: Additional errors not covered in the defined categories.
|
||
"""
|
||
|
||
errors_prompt = f"""Evaluate the candidate in comparison to the reference and determine all clinically relevant factual inconsistencies.
|
||
|
||
Output Requirements:
|
||
- Return a *list* of ErrorAssessment objects.
|
||
- Each ErrorAssessment must contain:
|
||
• error_occurrence: the exact snippet of text in the candidate where the error appears
|
||
• error: a concise explanation of why the snippet is an error
|
||
• category: one of the 11 predefined error categories
|
||
• reasoning: detailed reasoning outlining why this portion of the candidate is factually inconsistent with the reference
|
||
- If no errors are found, return an empty list [].
|
||
- Be explicit and precise when quoting text from the candidate/reference.
|
||
- Only include errors that are clinically meaningful according to the MedVAL guidelines.
|
||
{error_categories}
|
||
""".format(error_categories=error_categories)
|
||
|
||
level_1 = "Level 1 (No Risk): The candidate contains no clinically meaningful factual inconsistencies. Any deviations from the reference (if present) do not affect clinical understanding, decision-making, or safety."
|
||
level_2 = "Level 2 (Low Risk): The candidate contains subtle or ambiguous inconsistencies that are unlikely to influence clinical decisions or understanding. These inconsistencies do not introduce confusion or risk."
|
||
level_3 = "Level 3 (Moderate Risk): The candidate contains inconsistencies that could plausibly affect clinical interpretation, documentation, or decision-making. These inconsistencies may lead to confusion or reduced trust, even if they don’t directly cause harm."
|
||
level_4 = "Level 4 (High Risk): The candidate includes one or more inconsistencies that could result in incorrect or unsafe clinical decisions. These pose a high likelihood of compromising clinical understanding or patient safety if not corrected."
|
||
adversarial_attacks = [level_1, level_2, level_3, level_4]
|
||
|
||
risk_levels_prompt = f"""Your output must be an integer from 1, 2, 3, or 4. Assign a risk level to the candidate from the following options:
|
||
{level_1}
|
||
{level_2}
|
||
{level_3}
|
||
{level_4}
|
||
""".format(level_1=level_1, level_2=level_2, level_3=level_3, level_4=level_4)
|
||
|
||
adversarial_attack_base = """
|
||
Guidelines:
|
||
- If asked to inject errors, introduce real-world clinical errors to simulate ecologically meaningful degradation rather than unrealistic, worst-case outputs.
|
||
- The candidate should be """
|
||
|
||
task_keys = (
|
||
"report2simplified",
|
||
"impression2simplified",
|
||
"report2impression",
|
||
"bhc2spanish",
|
||
"query2question",
|
||
"dialogue2note",
|
||
"medication2answer",
|
||
)
|
||
|
||
instruction_mappings_prompt = """
|
||
{
|
||
"report2simplified": "Create a simplified, patient-friendly version of the reference.\n1. Reference Description: The original text containing medical terminology.\n2. Candidate Description: The simplified, patient-friendly, and easy-to-understand version of the text.\n",
|
||
"impression2simplified": "Create a simplified, patient-friendly version of the reference.\n1. Reference Description: The original text containing medical terminology.\n2. Candidate Description: The simplified, patient-friendly, and easy-to-understand version of the text.\n",
|
||
"report2impression": "Summarize the radiology report findings into an impression with minimal text.\n1. Reference Description: The findings section of the radiology report.\n2. Candidate Description: The impression section of the radiology report with minimal text.\n",
|
||
"bhc2spanish": "Translate the brief hospital course into Spanish.\n1. Reference Description: The brief hospital course section of the discharge note.\n2. Candidate Description: The Spanish-translated version of the brief hospital course.\n",
|
||
"query2question": "Summarize the patient health query into one question of 15 words or less.\n1. Reference Description: The patient health query.\n2. Candidate Description: The patient health question of 15 words or less.\n",
|
||
"dialogue2note": "Summarize the patient/doctor dialogue into an assessment and plan.\n1. Reference Description: The original patient/doctor dialogue.\n2. Candidate Description: The assessment and plan section.\n",
|
||
"medication2answer": "Answer the following medication-related patient health question.\n1. Reference Description: The medication-related patient health question.\n2. Candidate Description: The answer to the medication-related question.\n"
|
||
}
|
||
"""
|