(no commit message)
This commit is contained in:
142
README.md
142
README.md
@@ -48,14 +48,18 @@ Once you have a `dataset.jsonl` file, you can optimize the agent with dspy's bui
|
||||
1. Create a file called `compile.py` with the following code. Replace `<your-username>` with your modaic username.
|
||||
|
||||
```python
|
||||
from sqlalchemy.sql import true
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
import dspy
|
||||
import json
|
||||
from dspy import Prediction, Example
|
||||
from modaic import AutoAgent
|
||||
from typing import Optional, Tuple
|
||||
|
||||
searcher = AutoAgent.from_precompiled("swagginty/persana-lead-gen", api_key=os.getenv("PERSANA_KEY"))
|
||||
|
||||
searcher = AutoAgent.from_precompiled(
|
||||
"swagginty/persana-lead-gen", api_key=os.getenv("PERSANA_KEY"), train=True
|
||||
)
|
||||
feedback_creator = searcher.feedback_creator
|
||||
|
||||
|
||||
@@ -66,11 +70,103 @@ class SearchExample(Example):
|
||||
|
||||
|
||||
class SearchPrediction(Prediction):
|
||||
profiles: list[dict]
|
||||
profiles: Optional[list[dict]]
|
||||
search_parameters: dict
|
||||
|
||||
|
||||
def evaluate_results(
|
||||
def any_in(list: list[str], string: str):
|
||||
return any(item.lower() in string.lower() for item in list)
|
||||
|
||||
|
||||
def include_profile(
|
||||
search_parameters: dict,
|
||||
profile: dict,
|
||||
) -> Tuple[bool, Optional[str]]:
|
||||
if (titles := search_parameters.get("include_job_titles")) and (
|
||||
not any_in(titles, profile["experience_data"]["title"])
|
||||
):
|
||||
return (
|
||||
False,
|
||||
f"include_job_titles: {titles} not in {profile}['experience_data']['title']",
|
||||
)
|
||||
if (companies := search_parameters.get("include_companies")) and (
|
||||
not any_in(companies, profile["experience_data"]["company_name"])
|
||||
):
|
||||
return (
|
||||
False,
|
||||
f"include_companies: {companies} not in {profile}['experience_data']['company_name']",
|
||||
)
|
||||
if (company_types := search_parameters.get("company_types")) and (
|
||||
not any_in(company_types, profile["experience_data"]["company_type"])
|
||||
):
|
||||
return (
|
||||
False,
|
||||
f"company_types: {company_types} not in {profile}['experience_data']['company_type']",
|
||||
)
|
||||
if (
|
||||
(company_keywords := search_parameters.get("company_include_keywords"))
|
||||
and not any_in(
|
||||
company_keywords, profile["experience_data"]["company_company_headline"]
|
||||
)
|
||||
and not any_in(
|
||||
company_keywords, profile["experience_data"]["company_description"]
|
||||
)
|
||||
):
|
||||
return (
|
||||
False,
|
||||
f"company_include_keywords: {company_keywords} not in {profile}['experience_data']['company_company_headline'] or {profile}['experience_data']['company_description']",
|
||||
)
|
||||
return True, None
|
||||
|
||||
|
||||
def exclude_profile(
|
||||
search_parameters: dict,
|
||||
profile: dict,
|
||||
) -> Tuple[bool, Optional[str]]:
|
||||
if (titles := search_parameters.get("exclude_job_titles")) and (
|
||||
any_in(titles, profile["experience_data"]["title"])
|
||||
):
|
||||
return (
|
||||
True,
|
||||
f"exclude_job_titles: {titles} in {profile}['experience_data']['title']",
|
||||
)
|
||||
if (companies := search_parameters.get("exclude_companies")) and (
|
||||
any_in(companies, profile["experience_data"]["company_name"])
|
||||
):
|
||||
return (
|
||||
True,
|
||||
f"exclude_companies: {companies} in {profile}['experience_data']['company_name']",
|
||||
)
|
||||
if (company_keywords := search_parameters.get("company_exclude_keywords")) and (
|
||||
any_in(company_keywords, profile["experience_data"]["company_company_headline"])
|
||||
and any_in(company_keywords, profile["experience_data"]["company_description"])
|
||||
):
|
||||
return (
|
||||
True,
|
||||
f"company_exclude_keywords: {company_keywords} in {profile}['experience_data']['company_company_headline'] or {profile}['experience_data']['company_description']",
|
||||
)
|
||||
return False, None
|
||||
|
||||
|
||||
def get_search_eval(
|
||||
search_parameters: dict,
|
||||
target_profiles: list[dict],
|
||||
) -> Tuple[float, list[dict]]:
|
||||
count = 0
|
||||
exclude_reasons = []
|
||||
for profile in target_profiles:
|
||||
include, exclude_reason = include_profile(search_parameters, profile)
|
||||
if include:
|
||||
exclude, exclude_reason = exclude_profile(search_parameters, profile)
|
||||
if not exclude:
|
||||
count += 1
|
||||
if exclude_reason:
|
||||
exclude_reasons.append(exclude_reason)
|
||||
score = count / len(target_profiles)
|
||||
return score, exclude_reasons
|
||||
|
||||
|
||||
def evaluate_results_expensive(
|
||||
target: SearchExample,
|
||||
predictied: SearchPrediction,
|
||||
trace=None,
|
||||
@@ -86,7 +182,6 @@ def evaluate_results(
|
||||
for t_result in target.selected_profiles:
|
||||
if t_result["profile_id"] in pred_ids:
|
||||
count += 1
|
||||
|
||||
score = count / len(target.selected_profiles)
|
||||
|
||||
target_ids = {result["profile_id"] for result in target.selected_profiles}
|
||||
@@ -113,6 +208,30 @@ def evaluate_results(
|
||||
)
|
||||
|
||||
|
||||
def evaluate_results_cheap(
|
||||
target: SearchExample,
|
||||
predictied: SearchPrediction,
|
||||
trace=None,
|
||||
pred_name=None,
|
||||
pred_trace=None,
|
||||
) -> Prediction:
|
||||
"""
|
||||
Evaluates the search results target results were retrieved
|
||||
"""
|
||||
# How many of the target profiles were retrieved
|
||||
score, unselected_profiles = get_search_eval(
|
||||
predictied.search_parameters, target.selected_profiles
|
||||
)
|
||||
feedback = (
|
||||
"The model failed to retrieve the following profiles in the search: "
|
||||
+ ", ".join([str(profile) for profile in unselected_profiles])
|
||||
)
|
||||
return Prediction(
|
||||
score=score,
|
||||
feedback=feedback,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
load_dotenv()
|
||||
|
||||
@@ -125,14 +244,19 @@ if __name__ == "__main__":
|
||||
).with_inputs("company_description", "target_customer")
|
||||
for e in data
|
||||
]
|
||||
|
||||
compiler = dspy.GEPA(
|
||||
metric=evaluate_results,
|
||||
auto="light",
|
||||
metric=evaluate_results_cheap,
|
||||
reflection_lm=dspy.LM("openai/gpt-5", temperature=1.0, max_tokens=32000),
|
||||
auto="light",
|
||||
)
|
||||
compiled_searcher = compiler.compile(
|
||||
searcher,
|
||||
trainset=trainset,
|
||||
)
|
||||
compiled_searcher = compiler.compile(searcher, trainset=trainset)
|
||||
compiled_searcher.save("compiled_searcher.json")
|
||||
compiled_searcher.push_to_hub("<your-username>/persana-lead-gen") # Replace <your-username> with your username
|
||||
compiled_searcher.push_to_hub("<your-username>/persana-lead-gen", with_code=True)
|
||||
|
||||
```
|
||||
|
||||
2. Run the file
|
||||
|
||||
Reference in New Issue
Block a user