(no commit message)
This commit is contained in:
211
models/liver.py
Normal file
211
models/liver.py
Normal file
@@ -0,0 +1,211 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
models/liver.py
|
||||
This script sets up a series of data extraction models using the dspy library for pathology reports, specifically focusing on hepatocellular carcinoma (cholangiocarcinoma not included). It includes model loading, signature definitions for various cancer types, and functions to convert model predictions into structured JSON formats.
|
||||
|
||||
author: Hong-Kai (Walther) Chen, Po-Yen Tzeng and Kai-Po Chang @ Med NLP Lab, China Medical University
|
||||
date: 2025-10-13
|
||||
"""
|
||||
|
||||
__version__ = "1.0.0"
|
||||
__date__ = "2025-10-13"
|
||||
__author__ = ["Hong-Kai (Walther) Chen", "Po-Yen Tzeng", "Kai-Po Chang"]
|
||||
__copyright__ = "Copyright 2025, Med NLP Lab, China Medical University"
|
||||
__license__ = "MIT"
|
||||
__ajcc_version__ = 8
|
||||
__cap_version__ = "4.3.0.0"
|
||||
|
||||
import dspy
|
||||
from typing import Literal
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class LiverMargin(BaseModel):
|
||||
margin_category: (
|
||||
Literal["parenchymal", "hepatic_vein", "portal_vein", "bile_duct", "others"]
|
||||
| None
|
||||
) = Field(
|
||||
None,
|
||||
description="acceptable value for surgical margins in liver cancer. If not included in these standard margins, should be classified as others.",
|
||||
)
|
||||
margin_involved: bool
|
||||
distance: int | None = Field(
|
||||
None,
|
||||
description="If margin is involved, return 0. If margin is uninvolved/free, try your best to find the distance at both microscopic and macroscopic(gross) description, and specify the distance from tumor to margin in mm, rounded to integer. If the margin is uninvolved/free and, after your best effort, the distance is still not specified, return null",
|
||||
)
|
||||
description: str | None
|
||||
|
||||
|
||||
class LiverLN(BaseModel):
|
||||
involved: int
|
||||
examined: int
|
||||
station_name: str | None = Field(
|
||||
None, description="specify the name of the lymph node station here."
|
||||
)
|
||||
|
||||
|
||||
class LiverCancerNonnested(dspy.Signature):
|
||||
"""you need to extract the value of the specified items below from the given hepatocellular carcinoma excision report. DO NOT JUST RETURN NULL. IF SOME ITEM IS NOT PRESENT, RETURN NULL FOR THAT ITEM, BUT TRY YOUR BEST TO FILL IN THE OTHERS."""
|
||||
|
||||
report: list = dspy.InputField(
|
||||
desc="this is a pathological report for hepatocellular carcinoma excision, separated into paragraphs."
|
||||
)
|
||||
report_jsonized: dict = dspy.InputField(
|
||||
desc="this is a roughly structured json summary of the pathological report, which is generated by another model."
|
||||
)
|
||||
procedure: (
|
||||
Literal[
|
||||
"wedge_resection",
|
||||
"partial_hepatectomy",
|
||||
"segmentectomy",
|
||||
"lobectomy",
|
||||
"total_hepatectomy",
|
||||
"others",
|
||||
]
|
||||
| None
|
||||
) = dspy.OutputField(
|
||||
desc="identify which surgery procedure was used. e.g. partial hepatectomy"
|
||||
)
|
||||
tumor_site: (
|
||||
Literal["right_lobe", "left_lobe", "caudate_lobe", "quadrate_lobe", "others"]
|
||||
| None
|
||||
) = dspy.OutputField(desc="identify the site of the tumor. e.g. right lobe")
|
||||
histology: (
|
||||
Literal[
|
||||
"hepatocellular_carcinoma",
|
||||
"hepatocellular_carcinoma_fibrolamellar",
|
||||
"hepatocellular_carcinoma_scirrhous",
|
||||
"hepatocellular_carcinoma_clear_cell",
|
||||
"others",
|
||||
]
|
||||
| None
|
||||
) = dspy.OutputField(
|
||||
desc="identify the histological type of the cancer. e.g. invasive carcinoma of no special type"
|
||||
)
|
||||
grade: Literal[1, 2, 3, 4] | None = dspy.OutputField(
|
||||
desc="identify the grade of the cancer, well->1, moderate->2, poor->3, undifferentiated->4"
|
||||
)
|
||||
tumor_size: int | None = dspy.OutputField(
|
||||
desc="identify the size of the tumor in mm, rounded, if multiple tumors are present, please provide the size of the largest tumor"
|
||||
)
|
||||
tumor_focality: Literal["unifocal", "multifocal"] | None = dspy.OutputField(
|
||||
desc="identify whether the tumor is unifocal or multifocal"
|
||||
)
|
||||
perineural_invasion: bool | None = dspy.OutputField(
|
||||
desc="check whether or not perineural invasion is present"
|
||||
)
|
||||
distant_metastasis: bool | None = dspy.OutputField(
|
||||
desc="check whether or not distant metastasis is present"
|
||||
)
|
||||
treatment_effect: str | None = dspy.OutputField(
|
||||
desc='check the treatment effect of the cancer. If you see "No known presurgical therapy", return None'
|
||||
)
|
||||
|
||||
|
||||
class LiverCancerExtent(dspy.Signature):
|
||||
"""
|
||||
You need to determine whether the tumor have extended out of liver. If so, extract list of tumor extension to one or more of the specified items below from the given hepatocellular carcinoma excision report. DO NOT JUST RETURN NULL. IF SOME ITEM IS NOT PRESENT, RETURN NULL FOR THAT ITEM, BUT TRY YOUR BEST TO FILL IN THE OTHERS.
|
||||
"""
|
||||
|
||||
report: list = dspy.InputField(
|
||||
desc="this is a pathological report for hepatocellular carcinoma excision, separated into paragraphs."
|
||||
)
|
||||
report_jsonized: dict = dspy.InputField(
|
||||
desc="this is a roughly structured json summary of the pathological report, which is generated by another model."
|
||||
)
|
||||
tumor_extent: (
|
||||
list[
|
||||
Literal[
|
||||
"hepatic_vein",
|
||||
"portal_vein",
|
||||
"visceral_peritoneum",
|
||||
"gallbladder",
|
||||
"diaphragm",
|
||||
"others",
|
||||
]
|
||||
]
|
||||
| None
|
||||
) = dspy.OutputField(
|
||||
desc='return all of the possible tumor extension. example: ["hepatic_vein", "gallbladder"]. If not present, just output None, do not overdiagnosis'
|
||||
)
|
||||
|
||||
|
||||
class LiverCancerVascularInvasion(dspy.Signature):
|
||||
"""
|
||||
You need to determine whether the tumor have vascular invasion. If so, extract list of vascular invasion to one or more of the specified items below from the given hepatocellular carcinoma excision report. DO NOT JUST RETURN NULL. IF SOME ITEM IS NOT PRESENT, RETURN NULL FOR THAT ITEM, BUT TRY YOUR BEST TO FILL IN THE OTHERS.
|
||||
"""
|
||||
|
||||
report: list = dspy.InputField(
|
||||
desc="this is a pathological report for hepatocellular carcinoma excision, separated into paragraphs."
|
||||
)
|
||||
report_jsonized: dict = dspy.InputField(
|
||||
desc="this is a roughly structured json summary of the pathological report, which is generated by another model."
|
||||
)
|
||||
vascular_invasion: (
|
||||
list[Literal["large_hepatic_vein", "large_portal_vein", "small_vessel"]] | None
|
||||
) = dspy.OutputField(
|
||||
desc='return all of the possible vascular invasion. example: ["large_hepatic_vein", "small_vessel"]. If not present, just output None, do not overdiagnosis'
|
||||
)
|
||||
|
||||
|
||||
class LiverCancerStaging(dspy.Signature):
|
||||
"""you need to extract the value of the specified items below from the given liver cancer excision report. DO NOT JUST RETURN NULL. IF SOME ITEM IS NOT PRESENT, RETURN NULL FOR THAT ITEM, BUT TRY YOUR BEST TO FILL IN THE OTHERS."""
|
||||
|
||||
report: list = dspy.InputField(
|
||||
desc="this is a pathological report for hepatocellular carcinoma excision, separated into paragraphs."
|
||||
)
|
||||
report_jsonized: dict = dspy.InputField(
|
||||
desc="this is a roughly structured json summary of the pathological report, which is generated by another model."
|
||||
)
|
||||
tnm_descriptor: Literal["y", "r", "m"] | None = dspy.OutputField(
|
||||
desc='identify the tnm descriptor of the tumor. e.g., "y" (post-therapy), "r", etc.'
|
||||
)
|
||||
pt_category: Literal["tx", "t1a", "t1b", "t2", "t3", "t4"] | None = (
|
||||
dspy.OutputField(desc="identify the pt category of the tumor")
|
||||
)
|
||||
pn_category: Literal["nx", "n0", "n1"] | None = dspy.OutputField(
|
||||
desc="identify the pn category of the tumor"
|
||||
)
|
||||
pm_category: Literal["mx", "m0", "m1"] | None = dspy.OutputField(
|
||||
desc="identify the pm category of the tumor. if you see cM0 or cM1, etc., code as mx, since pathological M category is not available"
|
||||
)
|
||||
overall_stage: Literal["ia", "ib", "ii", "iiia", "iiib", "iva", "ivb"] | None = (
|
||||
dspy.OutputField(desc="identify the overall stage of the tumor")
|
||||
)
|
||||
ajcc_version: int | None = dspy.OutputField(
|
||||
desc="identify the ajcc version of the pathological staging"
|
||||
)
|
||||
|
||||
|
||||
class LiverCancerMargins(dspy.Signature):
|
||||
"""you need to extract the value of the specified items below from the given hepatocellular carcinoma excision report. DO NOT JUST RETURN NULL. IF SOME ITEM IS NOT PRESENT, RETURN NULL FOR THAT ITEM, BUT TRY YOUR BEST TO FILL IN THE OTHERS."""
|
||||
|
||||
report: list = dspy.InputField(
|
||||
desc="this is a pathological report for hepatocellular carcinoma excision, separated into paragraphs."
|
||||
)
|
||||
report_jsonized: dict = dspy.InputField(
|
||||
desc="this is a roughly structured json summary of the pathological report, which is generated by another model."
|
||||
)
|
||||
margins: list[LiverMargin] | None = dspy.OutputField(
|
||||
desc="""return all of the possible involved margins and its distance from cancer. example:[{"margin_category": "proximal_duodenal", "margin_involved": true, "distance": null}, {"margin_category": "anterior_outmost", "margin_involved": false, "distance": 10}, {"margin_category": "radial", "margin_involved": false, "distance": null}]. If not present, just output null for every margin"""
|
||||
)
|
||||
|
||||
|
||||
class LiverCancerLN(dspy.Signature):
|
||||
"""you need to extract the value of the specified items below from the given hepatocellular carcinoma excision report. DO NOT JUST RETURN NULL. IF SOME ITEM IS NOT PRESENT, RETURN NULL FOR THAT ITEM, BUT TRY YOUR BEST TO FILL IN THE OTHERS."""
|
||||
|
||||
report: list = dspy.InputField(
|
||||
desc="this is a pathological report for hepatocellular carcinoma excision, separated into paragraphs."
|
||||
)
|
||||
report_jsonized: dict = dspy.InputField(
|
||||
desc="this is a roughly structured json summary of the pathological report, which is generated by another model."
|
||||
)
|
||||
regional_lymph_node: list[LiverLN] | None = dspy.OutputField(
|
||||
desc="""return all of the involved regional lymph node. example:[{"involved": 2, "examined": 5, "station_name": "station 1"}, {"involved": 0, "examined": 3, "station_name": "station 2"}, ...]. If not present, just output null for every lymph node"""
|
||||
)
|
||||
extranodal_extension: bool | None = dspy.OutputField(
|
||||
desc="check whether or not extranodal extension is present; if no lymph node metastasis, should be None"
|
||||
)
|
||||
maximal_ln_size: int | None = dspy.OutputField(
|
||||
desc="check the maximal size of node metastatic tumor in mm, rounded to integer; if no lymph node metastasis, should be None"
|
||||
)
|
||||
Reference in New Issue
Block a user