# Persana Lead Gen Agent Uses a process of human in the loop iterative refinement search to find leads given a company description and target customer description. ## Usage ### With uv (preferred) 1. Create a new folder 2. [Install uv](https://docs.astral.sh/uv/getting-started/installation/) 3. Init workspace ```bash uv init ``` 4. Install dependencies ```bash uv add textual rich modaic dspy python-dotenv ``` 5. Run the file ```bash uv run main.py ``` ### With pip 1. Copy `main.py` to a new workspace folder. 2. Create a `.env` file with your API keys using the `.example.env` file. 3. Install dependencies ```bash pip install textual rich modaic dspy python-dotenv ``` 3. Run the file with `python main.py`. Follow the prompts to create a new `dataset.jsonl` file. ## Run Prompt Optimization Starting accuracy 45.3% Once you have a `dataset.jsonl` file, you can optimize the agent with dspy's built in prompt optimization. 1. Create a file called `compile.py` with the following code. Replace `` with your modaic username. ```python from sqlalchemy.sql import true from dotenv import load_dotenv import os import dspy import json from dspy import Prediction, Example from typing import Optional, Tuple searcher = AutoAgent.from_precompiled( "swagginty/persana-lead-gen", api_key=os.getenv("PERSANA_KEY"), train=True ) feedback_creator = searcher.feedback_creator class SearchExample(Example): company_description: str target_customer: str selected_profiles: list[dict] class SearchPrediction(Prediction): profiles: Optional[list[dict]] search_parameters: dict def any_in(list: list[str], string: str): return any(item.lower() in string.lower() for item in list) def include_profile( search_parameters: dict, profile: dict, ) -> Tuple[bool, Optional[str]]: if (titles := search_parameters.get("include_job_titles")) and ( not any_in(titles, profile["experience_data"]["title"]) ): return ( False, f"include_job_titles: {titles} not in {profile}['experience_data']['title']", ) if (companies := search_parameters.get("include_companies")) and ( not any_in(companies, profile["experience_data"]["company_name"]) ): return ( False, f"include_companies: {companies} not in {profile}['experience_data']['company_name']", ) if (company_types := search_parameters.get("company_types")) and ( not any_in(company_types, profile["experience_data"]["company_type"]) ): return ( False, f"company_types: {company_types} not in {profile}['experience_data']['company_type']", ) if ( (company_keywords := search_parameters.get("company_include_keywords")) and not any_in( company_keywords, profile["experience_data"]["company_company_headline"] ) and not any_in( company_keywords, profile["experience_data"]["company_description"] ) ): return ( False, f"company_include_keywords: {company_keywords} not in {profile}['experience_data']['company_company_headline'] or {profile}['experience_data']['company_description']", ) return True, None def exclude_profile( search_parameters: dict, profile: dict, ) -> Tuple[bool, Optional[str]]: if (titles := search_parameters.get("exclude_job_titles")) and ( any_in(titles, profile["experience_data"]["title"]) ): return ( True, f"exclude_job_titles: {titles} in {profile}['experience_data']['title']", ) if (companies := search_parameters.get("exclude_companies")) and ( any_in(companies, profile["experience_data"]["company_name"]) ): return ( True, f"exclude_companies: {companies} in {profile}['experience_data']['company_name']", ) if (company_keywords := search_parameters.get("company_exclude_keywords")) and ( any_in(company_keywords, profile["experience_data"]["company_company_headline"]) and any_in(company_keywords, profile["experience_data"]["company_description"]) ): return ( True, f"company_exclude_keywords: {company_keywords} in {profile}['experience_data']['company_company_headline'] or {profile}['experience_data']['company_description']", ) return False, None def get_search_eval( search_parameters: dict, target_profiles: list[dict], ) -> Tuple[float, list[dict]]: count = 0 exclude_reasons = [] for profile in target_profiles: include, exclude_reason = include_profile(search_parameters, profile) if include: exclude, exclude_reason = exclude_profile(search_parameters, profile) if not exclude: count += 1 if exclude_reason: exclude_reasons.append(exclude_reason) score = count / len(target_profiles) return score, exclude_reasons def evaluate_results_expensive( target: SearchExample, predictied: SearchPrediction, trace=None, pred_name=None, pred_trace=None, ) -> Prediction: """ Evaluates the search results target results were retrieved """ # How many of the target profiles were retrieved pred_ids = {result["profile_id"] for result in predictied.profiles} count = 0 for t_result in target.selected_profiles: if t_result["profile_id"] in pred_ids: count += 1 score = count / len(target.selected_profiles) target_ids = {result["profile_id"] for result in target.selected_profiles} # Which retrieved profiles were target profiles selected_preds = [ result for result in predictied.profiles if result["profile_id"] in target_ids ] # Which retrieved profiles were not target profiles unselected_preds = [ result for result in predictied.profiles if result["profile_id"] not in target_ids ] # Resuse feedback creator to get feedback for prompt creation feedback = feedback_creator( search_parameters=predictied.search_parameters, selected_profiles=selected_preds, unselected_profiles=unselected_preds, user_feedback=None, ).feedback return Prediction( score=score, feedback=feedback, ) def evaluate_results_cheap( target: SearchExample, predictied: SearchPrediction, trace=None, pred_name=None, pred_trace=None, ) -> Prediction: """ Evaluates the search results target results were retrieved """ # How many of the target profiles were retrieved score, unselected_profiles = get_search_eval( predictied.search_parameters, target.selected_profiles ) feedback = ( "The model failed to retrieve the following profiles in the search: " + ", ".join([str(profile) for profile in unselected_profiles]) ) return Prediction( score=score, feedback=feedback, ) if __name__ == "__main__": load_dotenv() data = [json.loads(line) for line in open("dataset.jsonl", "r")] trainset = [ dspy.Example( company_description=e["company_description"], target_customer=e["target_customer"], selected_profiles=e["selected_profiles"], ).with_inputs("company_description", "target_customer") for e in data ] compiler = dspy.GEPA( metric=evaluate_results_cheap, reflection_lm=dspy.LM("openai/gpt-5", temperature=1.0, max_tokens=32000), auto="light", ) compiled_searcher = compiler.compile( searcher, trainset=trainset, ) compiled_searcher.save("compiled_searcher.json") compiled_searcher.push_to_hub("/persana-lead-gen", with_code=True) ``` 2. Run the file With uv: ```bash uv run compile.py ``` With python: ```bash python compile.py ```