(no commit message)

This commit is contained in:
2025-10-06 21:49:50 -07:00
parent 8a2608f427
commit d0dbad868e
11 changed files with 1064 additions and 9 deletions

142
README.md
View File

@@ -48,14 +48,18 @@ Once you have a `dataset.jsonl` file, you can optimize the agent with dspy's bui
1. Create a file called `compile.py` with the following code. Replace `<your-username>` with your modaic username.
```python
from sqlalchemy.sql import true
from dotenv import load_dotenv
import os
import dspy
import json
from dspy import Prediction, Example
from modaic import AutoAgent
from typing import Optional, Tuple
searcher = AutoAgent.from_precompiled("swagginty/persana-lead-gen", api_key=os.getenv("PERSANA_KEY"))
searcher = AutoAgent.from_precompiled(
"swagginty/persana-lead-gen", api_key=os.getenv("PERSANA_KEY"), train=True
)
feedback_creator = searcher.feedback_creator
@@ -66,11 +70,103 @@ class SearchExample(Example):
class SearchPrediction(Prediction):
profiles: list[dict]
profiles: Optional[list[dict]]
search_parameters: dict
def evaluate_results(
def any_in(list: list[str], string: str):
return any(item.lower() in string.lower() for item in list)
def include_profile(
search_parameters: dict,
profile: dict,
) -> Tuple[bool, Optional[str]]:
if (titles := search_parameters.get("include_job_titles")) and (
not any_in(titles, profile["experience_data"]["title"])
):
return (
False,
f"include_job_titles: {titles} not in {profile}['experience_data']['title']",
)
if (companies := search_parameters.get("include_companies")) and (
not any_in(companies, profile["experience_data"]["company_name"])
):
return (
False,
f"include_companies: {companies} not in {profile}['experience_data']['company_name']",
)
if (company_types := search_parameters.get("company_types")) and (
not any_in(company_types, profile["experience_data"]["company_type"])
):
return (
False,
f"company_types: {company_types} not in {profile}['experience_data']['company_type']",
)
if (
(company_keywords := search_parameters.get("company_include_keywords"))
and not any_in(
company_keywords, profile["experience_data"]["company_company_headline"]
)
and not any_in(
company_keywords, profile["experience_data"]["company_description"]
)
):
return (
False,
f"company_include_keywords: {company_keywords} not in {profile}['experience_data']['company_company_headline'] or {profile}['experience_data']['company_description']",
)
return True, None
def exclude_profile(
search_parameters: dict,
profile: dict,
) -> Tuple[bool, Optional[str]]:
if (titles := search_parameters.get("exclude_job_titles")) and (
any_in(titles, profile["experience_data"]["title"])
):
return (
True,
f"exclude_job_titles: {titles} in {profile}['experience_data']['title']",
)
if (companies := search_parameters.get("exclude_companies")) and (
any_in(companies, profile["experience_data"]["company_name"])
):
return (
True,
f"exclude_companies: {companies} in {profile}['experience_data']['company_name']",
)
if (company_keywords := search_parameters.get("company_exclude_keywords")) and (
any_in(company_keywords, profile["experience_data"]["company_company_headline"])
and any_in(company_keywords, profile["experience_data"]["company_description"])
):
return (
True,
f"company_exclude_keywords: {company_keywords} in {profile}['experience_data']['company_company_headline'] or {profile}['experience_data']['company_description']",
)
return False, None
def get_search_eval(
search_parameters: dict,
target_profiles: list[dict],
) -> Tuple[float, list[dict]]:
count = 0
exclude_reasons = []
for profile in target_profiles:
include, exclude_reason = include_profile(search_parameters, profile)
if include:
exclude, exclude_reason = exclude_profile(search_parameters, profile)
if not exclude:
count += 1
if exclude_reason:
exclude_reasons.append(exclude_reason)
score = count / len(target_profiles)
return score, exclude_reasons
def evaluate_results_expensive(
target: SearchExample,
predictied: SearchPrediction,
trace=None,
@@ -86,7 +182,6 @@ def evaluate_results(
for t_result in target.selected_profiles:
if t_result["profile_id"] in pred_ids:
count += 1
score = count / len(target.selected_profiles)
target_ids = {result["profile_id"] for result in target.selected_profiles}
@@ -113,6 +208,30 @@ def evaluate_results(
)
def evaluate_results_cheap(
target: SearchExample,
predictied: SearchPrediction,
trace=None,
pred_name=None,
pred_trace=None,
) -> Prediction:
"""
Evaluates the search results target results were retrieved
"""
# How many of the target profiles were retrieved
score, unselected_profiles = get_search_eval(
predictied.search_parameters, target.selected_profiles
)
feedback = (
"The model failed to retrieve the following profiles in the search: "
+ ", ".join([str(profile) for profile in unselected_profiles])
)
return Prediction(
score=score,
feedback=feedback,
)
if __name__ == "__main__":
load_dotenv()
@@ -125,14 +244,19 @@ if __name__ == "__main__":
).with_inputs("company_description", "target_customer")
for e in data
]
compiler = dspy.GEPA(
metric=evaluate_results,
auto="light",
metric=evaluate_results_cheap,
reflection_lm=dspy.LM("openai/gpt-5", temperature=1.0, max_tokens=32000),
auto="light",
)
compiled_searcher = compiler.compile(
searcher,
trainset=trainset,
)
compiled_searcher = compiler.compile(searcher, trainset=trainset)
compiled_searcher.save("compiled_searcher.json")
compiled_searcher.push_to_hub("<your-username>/persana-lead-gen") # Replace <your-username> with your username
compiled_searcher.push_to_hub("<your-username>/persana-lead-gen", with_code=True)
```
2. Run the file