diff --git a/README.md b/README.md deleted file mode 100644 index d2c031b..0000000 --- a/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# doc-extract - diff --git a/auto_classes.json b/auto_classes.json new file mode 100644 index 0000000..650c688 --- /dev/null +++ b/auto_classes.json @@ -0,0 +1,4 @@ +{ + "AutoConfig": "modaic.PrecompiledConfig", + "AutoProgram": "program.DocExtract" +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..4475ebc --- /dev/null +++ b/config.json @@ -0,0 +1,3 @@ +{ + "model": null +} \ No newline at end of file diff --git a/program.json b/program.json new file mode 100644 index 0000000..7650fc9 --- /dev/null +++ b/program.json @@ -0,0 +1,32 @@ +{ + "cot.predict": { + "traces": [], + "train": [], + "demos": [], + "signature": { + "instructions": "Classify a document snippet into one of the supported label buckets.", + "fields": [ + { + "prefix": "Context:", + "description": "Document text or snippet to classify" + }, + { + "prefix": "Reasoning: Let's think step by step in order to", + "description": "${reasoning}" + }, + { + "prefix": "Label:", + "description": "Return exactly one of: legal, healthcare, personal, tax, or other" + } + ] + }, + "lm": null + }, + "metadata": { + "dependency_versions": { + "python": "3.11", + "dspy": "3.1.3", + "cloudpickle": "3.1" + } + } +} \ No newline at end of file diff --git a/program.py b/program.py new file mode 100644 index 0000000..9613d5a --- /dev/null +++ b/program.py @@ -0,0 +1,28 @@ +# Here you can define your dspy.Module as a modaic.PrecompiledProgram +from typing import Literal + +import dspy +from modaic import PrecompiledProgram + +DocumentLabel = Literal["legal", "healthcare", "personal", "tax", "other"] + + +class ExtractSignature(dspy.Signature): + """Classify a document snippet into one of the supported label buckets.""" + + context: str = dspy.InputField(desc="Document text or snippet to classify") + label: DocumentLabel = dspy.OutputField( + desc="Return exactly one of: legal, healthcare, personal, tax, or other" + ) + + +class DocExtract(PrecompiledProgram): + def __init__(self): + super().__init__() + self.cot = dspy.ChainOfThought(ExtractSignature) + + def forward(self, context: str) -> dspy.Prediction: + return self.cot(context=context) + + +DocExtract().push_to_hub("tyrin/doc-extract") diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..4fad6c0 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,7 @@ +[project] +name = "doc-extract" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.11" +dependencies = ["dspy>=3.1.3", "modaic>=0.29.1"]