Add MODAIC token support and fix init to accept kwargs
This commit is contained in:
92
README.md
92
README.md
@@ -0,0 +1,92 @@
|
|||||||
|
Modaic and Weaviate
|
||||||
|
Modaic is a hub to share and manage DSPy programs!
|
||||||
|
|
||||||
|
You can learn more about the modaic sdk here and check out programs on the hub here.
|
||||||
|
|
||||||
|
This notebook will illustrate how to load the CrossEncoderRanker program from the Modaic Hub, as well as PromptToSignature.
|
||||||
|
|
||||||
|
Further, check out ce_ranker.py and pyproject.toml to see how to publish your programs on the Modaic Hub!
|
||||||
|
|
||||||
|
CrossEncoderRanker
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
import os
|
||||||
|
from modaic import AutoProgram
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# This is looking for MODAIC_TOKEN, WEAVIATE_URL, WEAVIATE_API_KEY, and OPENAI_API_KEY
|
||||||
|
|
||||||
|
ce_ranker = AutoProgram.from_precompiled(
|
||||||
|
"connor/CrossEncoderRanker",
|
||||||
|
config_options={ # replace this with your collection name and other custom parameters
|
||||||
|
"collection_name": "IRPapersText_Default",
|
||||||
|
"return_properties": ["content"],
|
||||||
|
"k": 5
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
response = ce_ranker("What does HyDE stand for?");
|
||||||
|
|
||||||
|
for idx, ranked_doc in enumerate(response):
|
||||||
|
print(f"Rank {idx+1}: {ranked_doc[:300]}")
|
||||||
|
SSSS connor/CrossEncoderRanker
|
||||||
|
Rank 1: Figure 1: An illustration of the HyDE model. Document snippets are shown. HyDE serves all types of queries without changing the underlying InstructGPT and Contriever/mContriever models.
|
||||||
|
|
||||||
|
GPT-3 (Brown et al., 2020) models can be aligned to human intents to follow instructions faithfully.
|
||||||
|
|
||||||
|
With these
|
||||||
|
Rank 2: | | Scifact | FiQA | DBPedia |
|
||||||
|
|-----------|---------|-------|---------|
|
||||||
|
| Contriever | 64.9 | 24.5 | 29.2 |
|
||||||
|
| HyDE w/ InstructGPT | **69.1** | 27.3 | 36.8 |
|
||||||
|
| w/ GPT-3 | 65.9 | **27.9** | **40.5** |
|
||||||
|
|
||||||
|
Table 5: nDCG@10 comparing InstructGPT vs. 3-shot GPT-3 on BEIR. Bes
|
||||||
|
Rank 3: 6 Conclusion
|
||||||
|
|
||||||
|
In this paper, we introduce HyDE, a new approach for building effective dense retrievers in a completely unsupervised manner, without the need for any relevance labels. We demonstrate that some aspects of relevance modeling can be delegated to a more powerful, flexible, and general-pur
|
||||||
|
Rank 4: estimate Equation 5 by sampling N documents from g, [d̂1, d̂2, ..., d̂N]:
|
||||||
|
|
||||||
|
v̂qij = 1/N ∑_(d̂k ∼ g(qij, INSTi)) f(d̂k) (6)
|
||||||
|
|
||||||
|
= 1/N ∑_(k=1)^N f(d̂k) (7)
|
||||||
|
|
||||||
|
We also consider the query as a possible hypothesis:
|
||||||
|
|
||||||
|
v̂qij = 1/(N+1) [(∑_(k=1)^N f(d̂k)) + f(qij)] (8)
|
||||||
|
|
||||||
|
Inner p
|
||||||
|
Rank 5: | | sw | wo | ko | ja | bn |
|
||||||
|
|-----------|-----|-----|------|------|------|
|
||||||
|
| **Unsupervised** | | | | | |
|
||||||
|
| BM25 | 38.9| 28.5| 21.2 | 41.8 | |
|
||||||
|
| mContriever| 38.3| 22.3| 19.5 | 35.3 | |
|
||||||
|
| HyDE | 41.7| 30.6| 30.7 | 41.3 | |
|
||||||
|
|
|
||||||
|
PromptToSignature
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
import os
|
||||||
|
from modaic import AutoProgram
|
||||||
|
|
||||||
|
load_dotenv() # This is looking for MODAIC_TOKEN and OPENROUTER_API_KEY
|
||||||
|
|
||||||
|
agent = AutoProgram.from_precompiled(
|
||||||
|
"fadeleke/prompt-to-signature",
|
||||||
|
config_options={
|
||||||
|
"lm": "openrouter/anthropic/claude-sonnet-4.5",
|
||||||
|
"max_tokens": 32000,
|
||||||
|
"temperature": 0.7,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
result = agent("Summarize a document and extract key entities")
|
||||||
|
|
||||||
|
print(result)
|
||||||
|
SSSS fadeleke/prompt-to-signature
|
||||||
|
Signature generation took 0.00 seconds in inference.
|
||||||
|
Prediction(
|
||||||
|
signature_name='DocumentSummaryAndEntityExtraction',
|
||||||
|
task_description='Extract a concise summary of a document and identify key entities (people, organizations, locations, dates, etc.) mentioned within it.',
|
||||||
|
signature_fields=[GeneratedField(name='document', type=<FieldType.STRING: 'str'>, role=<FieldRole.INPUT: 'input'>, description='The document text to summarize and extract entities from', pydantic_model_schema=None, literal_values=None, default_value=None), GeneratedField(name='summary', type=<FieldType.STRING: 'str'>, role=<FieldRole.OUTPUT: 'output'>, description="A concise summary of the document's main points and content", pydantic_model_schema=None, literal_values=None, default_value=None), GeneratedField(name='key_entities', type=<FieldType.PYDANTIC_MODEL: 'pydantic'>, role=<FieldRole.OUTPUT: 'output'>, description='Structured extraction of key entities found in the document', pydantic_model_schema=PydanticModelSchema(model_name='KeyEntities', description='Container for extracted entities from the document', fields=[PydanticFieldDef(name='people', type=<FieldType.LIST_STRING: 'list[str]'>, description='Names of people mentioned in the document', required=False, literal_values=None, nested_model=None), PydanticFieldDef(name='organizations', type=<FieldType.LIST_STRING: 'list[str]'>, description='Names of organizations, companies, or institutions mentioned', required=False, literal_values=None, nested_model=None), PydanticFieldDef(name='locations', type=<FieldType.LIST_STRING: 'list[str]'>, description='Geographic locations, cities, countries, or regions mentioned', required=False, literal_values=None, nested_model=None), PydanticFieldDef(name='dates', type=<FieldType.LIST_STRING: 'list[str]'>, description='Important dates or time references mentioned in the document', required=False, literal_values=None, nested_model=None), PydanticFieldDef(name='topics', type=<FieldType.LIST_STRING: 'list[str]'>, description='Main topics or themes covered in the document', required=False, literal_values=None, nested_model=None)]), literal_values=None, default_value=None)],
|
||||||
|
reasoning=None
|
||||||
|
)
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
{
|
{
|
||||||
"AutoConfig": "ce_ranker.CERankerConfig",
|
"AutoConfig": "ce_ranker.CERankerConfig",
|
||||||
"AutoAgent": "ce_ranker.CERankerAgent"
|
"AutoProgram": "ce_ranker.CERankerAgent"
|
||||||
}
|
}
|
||||||
11
ce_ranker.py
11
ce_ranker.py
@@ -3,7 +3,7 @@ import os
|
|||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
import dspy
|
import dspy
|
||||||
from modaic import PrecompiledAgent, PrecompiledConfig
|
from modaic import PrecompiledProgram, PrecompiledConfig
|
||||||
import weaviate
|
import weaviate
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
@@ -22,7 +22,7 @@ class CERankerConfig(PrecompiledConfig):
|
|||||||
lm: str = "openai/gpt-4.1-mini"
|
lm: str = "openai/gpt-4.1-mini"
|
||||||
|
|
||||||
|
|
||||||
class CERankerAgent(PrecompiledAgent):
|
class CERankerAgent(PrecompiledProgram):
|
||||||
config: CERankerConfig
|
config: CERankerConfig
|
||||||
|
|
||||||
def __init__(self, config: CERankerConfig, **kwargs):
|
def __init__(self, config: CERankerConfig, **kwargs):
|
||||||
@@ -73,10 +73,9 @@ if __name__ == "__main__":
|
|||||||
return_properties=["content"],
|
return_properties=["content"],
|
||||||
k=5
|
k=5
|
||||||
)
|
)
|
||||||
agent = CERankerAgent(config)
|
reranker = CERankerAgent(config)
|
||||||
print(agent(query="What is HyDE?"))
|
reranker.push_to_hub(
|
||||||
agent.push_to_hub(
|
|
||||||
"connor/CrossEncoderRanker",
|
"connor/CrossEncoderRanker",
|
||||||
with_code=True,
|
with_code=True,
|
||||||
commit_message="Fix init to accept kwargs"
|
commit_message="Add MODAIC token support and fix init to accept kwargs"
|
||||||
)
|
)
|
||||||
@@ -1,4 +1,5 @@
|
|||||||
{
|
{
|
||||||
|
"model": null,
|
||||||
"collection_name": "IRPapersText_Default",
|
"collection_name": "IRPapersText_Default",
|
||||||
"return_properties": [
|
"return_properties": [
|
||||||
"content"
|
"content"
|
||||||
|
|||||||
@@ -28,7 +28,7 @@
|
|||||||
},
|
},
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"dependency_versions": {
|
"dependency_versions": {
|
||||||
"python": "3.11",
|
"python": "3.13",
|
||||||
"dspy": "3.0.4",
|
"dspy": "3.0.4",
|
||||||
"cloudpickle": "3.1"
|
"cloudpickle": "3.1"
|
||||||
}
|
}
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "CrossEncoderRanker"
|
name = "CrossEncoderRanker"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
description = "Add your description here"
|
description = "Cross encoder ranker using modaic"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.11"
|
requires-python = ">=3.11"
|
||||||
dependencies = ["modaic>=0.4.1", "weaviate-client>=4.18.1"]
|
dependencies = ["modaic>=0.8.0", "weaviate-client>=4.18.1"]
|
||||||
|
|||||||
Reference in New Issue
Block a user