MedVAL-pipeline/medval/validator.py

import dspy
from utils.prompts import (
    errors_prompt,
    error_categories,
    risk_levels_prompt,
    task_keys,
    instruction_mappings_prompt,
)
from typing import Literal, List
from pydantic import BaseModel, Field


class ErrorAssessment(BaseModel):
    error_occurrence: str = Field(
        description="The exact snippet of text in the candidate where the error appears."
    )
    error: str = Field(
        description="A concise explanation of why the snippet is an error."
    )
    category: str = Field(
        description=f"One of the 11 predefined error categories:\n{error_categories}"
    )
    reasoning: str = Field(
        description="Detailed reasoning outlining why this portion of the candidate is factually inconsistent with the reference."
    )


class DetectTask(dspy.Signature):
    """
    Detect the intended task from the reference text and the generated candidate
    """

    reference: str = dspy.InputField()
    candidate: str = dspy.InputField()
    task: Literal[*task_keys] = dspy.OutputField(
        description=instruction_mappings_prompt
    )


class MedVAL_Validator(dspy.Signature):
    """
    Evaluate a candidate in comparison to the reference composed by an expert.

    Instructions:
    1. Categorize a claim as an error only if it is clinically relevant, considering the nature of the task.
    2. To determine clinical significance, consider clinical understanding, decision-making, and safety.
    3. Some tasks (e.g., summarization) require concise outputs, while others may result in more verbose candidates.
       - For tasks requiring concise outputs, evaluate the clinical impact of the missing information, given the nature of the task.
       - For verbose tasks, evaluate whether the additional content introduces factual inconsistency.
    """

    instruction: str = dspy.InputField()
    reference: str = dspy.InputField()
    candidate: str = dspy.InputField()
    # errors: str = dspy.OutputField(description=errors_prompt)
    errors: List[ErrorAssessment] = dspy.OutputField(description=errors_prompt)
    risk_level: Literal[1, 2, 3, 4] = dspy.OutputField(description=risk_levels_prompt)