(no commit message)
This commit is contained in:
57
medval/validator.py
Normal file
57
medval/validator.py
Normal file
@@ -0,0 +1,57 @@
|
||||
import dspy
|
||||
from utils.prompts import (
|
||||
errors_prompt,
|
||||
error_categories,
|
||||
risk_levels_prompt,
|
||||
task_keys,
|
||||
instruction_mappings_prompt,
|
||||
)
|
||||
from typing import Literal, List
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class ErrorAssessment(BaseModel):
|
||||
error_occurrence: str = Field(
|
||||
description="The exact snippet of text in the candidate where the error appears."
|
||||
)
|
||||
error: str = Field(
|
||||
description="A concise explanation of why the snippet is an error."
|
||||
)
|
||||
category: str = Field(
|
||||
description=f"One of the 11 predefined error categories:\n{error_categories}"
|
||||
)
|
||||
reasoning: str = Field(
|
||||
description="Detailed reasoning outlining why this portion of the candidate is factually inconsistent with the reference."
|
||||
)
|
||||
|
||||
|
||||
class DetectTask(dspy.Signature):
|
||||
"""
|
||||
Detect the intended task from the reference text and the generated candidate
|
||||
"""
|
||||
|
||||
reference: str = dspy.InputField()
|
||||
candidate: str = dspy.InputField()
|
||||
task: Literal[*task_keys] = dspy.OutputField(
|
||||
description=instruction_mappings_prompt
|
||||
)
|
||||
|
||||
|
||||
class MedVAL_Validator(dspy.Signature):
|
||||
"""
|
||||
Evaluate a candidate in comparison to the reference composed by an expert.
|
||||
|
||||
Instructions:
|
||||
1. Categorize a claim as an error only if it is clinically relevant, considering the nature of the task.
|
||||
2. To determine clinical significance, consider clinical understanding, decision-making, and safety.
|
||||
3. Some tasks (e.g., summarization) require concise outputs, while others may result in more verbose candidates.
|
||||
- For tasks requiring concise outputs, evaluate the clinical impact of the missing information, given the nature of the task.
|
||||
- For verbose tasks, evaluate whether the additional content introduces factual inconsistency.
|
||||
"""
|
||||
|
||||
instruction: str = dspy.InputField()
|
||||
reference: str = dspy.InputField()
|
||||
candidate: str = dspy.InputField()
|
||||
# errors: str = dspy.OutputField(description=errors_prompt)
|
||||
errors: List[ErrorAssessment] = dspy.OutputField(description=errors_prompt)
|
||||
risk_level: Literal[1, 2, 3, 4] = dspy.OutputField(description=risk_levels_prompt)
|
||||
Reference in New Issue
Block a user