Complete Migration
This commit is contained in:
0
__init__.py
Normal file
0
__init__.py
Normal file
172
agent.py
172
agent.py
@@ -1,172 +0,0 @@
|
|||||||
from modaic import PrecompiledAgent, PrecompiledConfig
|
|
||||||
from modules import TweetGeneratorModule, TweetEvaluatorModule
|
|
||||||
from models import EvaluationResult
|
|
||||||
from hill_climbing import HillClimbingOptimizer
|
|
||||||
from typing import Optional, List, Dict, Any
|
|
||||||
from utils import get_dspy_lm
|
|
||||||
from constants import DEFAULT_CATEGORIES, DEFAULT_ITERATIONS, DEFAULT_PATIENCE
|
|
||||||
|
|
||||||
|
|
||||||
class TweetOptimizerConfig(PrecompiledConfig):
|
|
||||||
lm: str = "openrouter/google/gemini-2.5-flash"
|
|
||||||
eval_lm: str = "openrouter/openai/gpt-5"
|
|
||||||
categories: List[str] = DEFAULT_CATEGORIES
|
|
||||||
max_iterations: int = DEFAULT_ITERATIONS
|
|
||||||
patience: int = DEFAULT_PATIENCE
|
|
||||||
|
|
||||||
|
|
||||||
class TweetOptimizerAgent(PrecompiledAgent):
|
|
||||||
config: TweetOptimizerConfig
|
|
||||||
|
|
||||||
def __init__(self, config: TweetOptimizerConfig):
|
|
||||||
super().__init__(config)
|
|
||||||
self.tweet_generator = TweetGeneratorModule()
|
|
||||||
self.tweet_evaluator = TweetEvaluatorModule()
|
|
||||||
|
|
||||||
# set up optimizer
|
|
||||||
self.optimizer = HillClimbingOptimizer(
|
|
||||||
generator=self.tweet_generator,
|
|
||||||
evaluator=self.tweet_evaluator,
|
|
||||||
categories=config.categories,
|
|
||||||
max_iterations=config.max_iterations,
|
|
||||||
patience=config.patience
|
|
||||||
)
|
|
||||||
|
|
||||||
self.lm = config.lm
|
|
||||||
self.eval_lm = config.eval_lm
|
|
||||||
|
|
||||||
# initialize DSPy with the specified model
|
|
||||||
self.tweet_generator.set_lm(get_dspy_lm(config.lm))
|
|
||||||
self.tweet_evaluator.set_lm(get_dspy_lm(config.eval_lm))
|
|
||||||
|
|
||||||
def forward(
|
|
||||||
self,
|
|
||||||
input_text: str,
|
|
||||||
current_tweet: str = "",
|
|
||||||
previous_evaluation: Optional[EvaluationResult] = None,
|
|
||||||
) -> str:
|
|
||||||
"""Generate a single optimized tweet (single iteration)."""
|
|
||||||
tweet = self.tweet_generator(input_text, current_tweet, previous_evaluation)
|
|
||||||
return tweet
|
|
||||||
|
|
||||||
def optimize(
|
|
||||||
self,
|
|
||||||
input_text: str,
|
|
||||||
iterations: Optional[int] = None,
|
|
||||||
patience: Optional[int] = None
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""Run full optimization process like the CLI."""
|
|
||||||
max_iterations = iterations or self.config.max_iterations
|
|
||||||
patience_limit = patience or self.config.patience
|
|
||||||
|
|
||||||
results = {
|
|
||||||
'initial_text': input_text,
|
|
||||||
'final_tweet': '',
|
|
||||||
'best_score': 0.0,
|
|
||||||
'iterations_run': 0,
|
|
||||||
'early_stopped': False,
|
|
||||||
'scores_history': [],
|
|
||||||
'improvement_count': 0
|
|
||||||
}
|
|
||||||
|
|
||||||
best_tweet = ""
|
|
||||||
best_score = 0.0
|
|
||||||
|
|
||||||
for iteration, (current_tweet, scores, is_improvement, patience_counter, _, _) in enumerate(
|
|
||||||
self.optimizer.optimize(input_text)
|
|
||||||
):
|
|
||||||
iteration_num = iteration + 1
|
|
||||||
results['iterations_run'] = iteration_num
|
|
||||||
results['scores_history'].append(scores)
|
|
||||||
|
|
||||||
if is_improvement:
|
|
||||||
best_tweet = current_tweet
|
|
||||||
best_score = sum(scores.category_scores) / len(scores.category_scores)
|
|
||||||
results['improvement_count'] += 1
|
|
||||||
|
|
||||||
# check for early stopping
|
|
||||||
if patience_counter >= patience_limit:
|
|
||||||
results['early_stopped'] = True
|
|
||||||
break
|
|
||||||
|
|
||||||
# stop at max iterations
|
|
||||||
if iteration_num >= max_iterations:
|
|
||||||
break
|
|
||||||
|
|
||||||
results.update({
|
|
||||||
'final_tweet': best_tweet,
|
|
||||||
'best_score': best_score
|
|
||||||
})
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
def evaluate_tweet(
|
|
||||||
self,
|
|
||||||
tweet_text: str,
|
|
||||||
original_text: str = "",
|
|
||||||
current_best_tweet: str = ""
|
|
||||||
) -> EvaluationResult:
|
|
||||||
"""Evaluate a tweet using the configured categories."""
|
|
||||||
return self.tweet_evaluator(tweet_text, self.config.categories, original_text, current_best_tweet)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
# create agent with default config
|
|
||||||
config = TweetOptimizerConfig()
|
|
||||||
tweet_optimizer = TweetOptimizerAgent(config)
|
|
||||||
"""
|
|
||||||
import os
|
|
||||||
|
|
||||||
# set up test environment (replace with real API key for actual usage)
|
|
||||||
if not os.getenv("OPENROUTER_API_KEY"):
|
|
||||||
raise ValueError("OPENROUTER_API_KEY environment variable is not set")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# single tweet generation
|
|
||||||
print("=== Single Tweet Generation ===")
|
|
||||||
try:
|
|
||||||
single_tweet = tweet_optimizer(
|
|
||||||
input_text="Anthropic added a new OSS model on HuggingFace.",
|
|
||||||
current_tweet="",
|
|
||||||
previous_evaluation=None,
|
|
||||||
)
|
|
||||||
print(f"Generated tweet: {single_tweet}")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error in single generation: {e}")
|
|
||||||
|
|
||||||
# full optimization process
|
|
||||||
print("\n=== Full Optimization Process ===")
|
|
||||||
try:
|
|
||||||
results = tweet_optimizer.optimize(
|
|
||||||
input_text="Anthropic added a new OSS model on HuggingFace.",
|
|
||||||
iterations=10, # Reduced for testing
|
|
||||||
patience=8
|
|
||||||
)
|
|
||||||
print(f"Initial text: {results['initial_text']}")
|
|
||||||
print(f"Final tweet: {results['final_tweet']}")
|
|
||||||
print(f"Best score: {results['best_score']:.2f}")
|
|
||||||
print(f"Iterations run: {results['iterations_run']}")
|
|
||||||
print(f"Improvements found: {results['improvement_count']}")
|
|
||||||
print(f"Early stopped: {results['early_stopped']}")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error in optimization: {e}")
|
|
||||||
"""
|
|
||||||
# push to hub
|
|
||||||
print("\n=== Push to Hub ===")
|
|
||||||
try:
|
|
||||||
tweet_optimizer.push_to_hub(
|
|
||||||
"farouk1/tweet-optimizer-v2",
|
|
||||||
commit_message="Complete Migration",
|
|
||||||
with_code=True
|
|
||||||
)
|
|
||||||
print("Successfully pushed to hub!")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error pushing to hub: {e}")
|
|
||||||
"""
|
|
||||||
print("\n=== Agent Configuration ===")
|
|
||||||
print(f"Model: {config.lm}")
|
|
||||||
print(f"Categories: {config.categories}")
|
|
||||||
print(f"Max iterations: {config.max_iterations}")
|
|
||||||
print(f"Patience: {config.patience}")
|
|
||||||
"""
|
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
75
constants.py
75
constants.py
@@ -1,75 +0,0 @@
|
|||||||
from typing import Dict, List
|
|
||||||
|
|
||||||
# tweet configuration
|
|
||||||
TWEET_MAX_LENGTH = 280
|
|
||||||
TWEET_TRUNCATION_SUFFIX = "..."
|
|
||||||
TWEET_TRUNCATION_LENGTH = TWEET_MAX_LENGTH - len(TWEET_TRUNCATION_SUFFIX)
|
|
||||||
|
|
||||||
# score configuration
|
|
||||||
MIN_SCORE = 1
|
|
||||||
MAX_SCORE = 9
|
|
||||||
DEFAULT_SCORE = 5
|
|
||||||
|
|
||||||
# file paths
|
|
||||||
CATEGORIES_FILE = "categories.json"
|
|
||||||
SETTINGS_FILE = "settings.json"
|
|
||||||
HISTORY_FILE = "input_history.json"
|
|
||||||
|
|
||||||
# history configuration
|
|
||||||
MAX_HISTORY_ITEMS = 50 # maximum number of historical inputs to store
|
|
||||||
|
|
||||||
# model configuration
|
|
||||||
DEFAULT_MODEL = "openrouter/anthropic/claude-sonnet-4.5"
|
|
||||||
|
|
||||||
AVAILABLE_MODELS: Dict[str, str] = {
|
|
||||||
"Claude Sonnet 4.5": "openrouter/anthropic/claude-sonnet-4.5",
|
|
||||||
"Opus 4.1": "openrouter/anthropic/claude-opus-4.1",
|
|
||||||
"Gemini 2.5 Flash": "openrouter/google/gemini-2.5-flash",
|
|
||||||
"Gemini 2.5 Flash Lite": "openrouter/google/gemini-2.5-flash-lite",
|
|
||||||
"Gemini 2.5 Pro": "openrouter/google/gemini-2.5-pro",
|
|
||||||
"GPT-5": "openrouter/openai/gpt-5"
|
|
||||||
}
|
|
||||||
|
|
||||||
# openrouter API configuration
|
|
||||||
OPENROUTER_API_BASE = "https://openrouter.ai/api/v1"
|
|
||||||
OPENROUTER_MAX_TOKENS = 4096
|
|
||||||
OPENROUTER_TEMPERATURE = 0.7
|
|
||||||
|
|
||||||
# optimization defaults
|
|
||||||
DEFAULT_ITERATIONS = 10
|
|
||||||
DEFAULT_PATIENCE = 5
|
|
||||||
DEFAULT_USE_CACHE = True
|
|
||||||
|
|
||||||
# default evaluation categories
|
|
||||||
DEFAULT_CATEGORIES: List[str] = [
|
|
||||||
"Engagement potential - how likely users are to like, retweet, or reply",
|
|
||||||
"Clarity and readability - how easy the tweet is to understand",
|
|
||||||
"Emotional impact - how well the tweet evokes feelings or reactions",
|
|
||||||
"Relevance to target audience - how well it resonates with intended readers"
|
|
||||||
]
|
|
||||||
|
|
||||||
# error messages
|
|
||||||
ERROR_PARSING = "Default evaluation due to parsing error"
|
|
||||||
ERROR_VALIDATION = "Default evaluation due to validation error"
|
|
||||||
ERROR_GENERATION = "Tweet generation failed"
|
|
||||||
ERROR_EVALUATION = "Tweet evaluation failed"
|
|
||||||
ERROR_DSPy_INIT = "DSPy initialization failed"
|
|
||||||
ERROR_NO_API_KEY = "OPENROUTER_API_KEY environment variable is required"
|
|
||||||
ERROR_SAVE_CATEGORIES = "Failed to save categories"
|
|
||||||
ERROR_LOAD_CATEGORIES = "Failed to load categories"
|
|
||||||
ERROR_SAVE_SETTINGS = "Failed to save settings"
|
|
||||||
ERROR_LOAD_SETTINGS = "Failed to load settings"
|
|
||||||
ERROR_SAVE_HISTORY = "Failed to save input history"
|
|
||||||
ERROR_LOAD_HISTORY = "Failed to load input history"
|
|
||||||
|
|
||||||
# cache configuration
|
|
||||||
CACHE_ENABLE_MEMORY = True
|
|
||||||
CACHE_ENABLE_DISK = True
|
|
||||||
|
|
||||||
# iteration display
|
|
||||||
ITERATION_SLEEP_TIME = 0.1 # seconds
|
|
||||||
|
|
||||||
# truncation display
|
|
||||||
CATEGORY_DISPLAY_MAX_LENGTH = 30
|
|
||||||
CATEGORY_DISPLAY_TRUNCATION = "..."
|
|
||||||
CATEGORY_IMPROVEMENT_MAX_LENGTH = 50
|
|
||||||
85
helpers.py
85
helpers.py
@@ -1,85 +0,0 @@
|
|||||||
from typing import Optional, Dict, Any
|
|
||||||
from models import EvaluationResult
|
|
||||||
from constants import MAX_SCORE
|
|
||||||
|
|
||||||
|
|
||||||
def format_evaluation_for_generator(evaluation: Optional[EvaluationResult]) -> str:
|
|
||||||
"""
|
|
||||||
Format an evaluation result as text for the generator module.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
evaluation: The evaluation result to format
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Formatted string with category-by-category reasoning and scores
|
|
||||||
"""
|
|
||||||
if not evaluation or not evaluation.evaluations:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
eval_lines = []
|
|
||||||
for eval in evaluation.evaluations:
|
|
||||||
eval_lines.append(f"{eval.category} (Score: {eval.score}/{MAX_SCORE}): {eval.reasoning}")
|
|
||||||
|
|
||||||
return "\n".join(eval_lines)
|
|
||||||
|
|
||||||
|
|
||||||
def build_settings_dict(
|
|
||||||
selected_model: str,
|
|
||||||
iterations: int,
|
|
||||||
patience: int,
|
|
||||||
use_cache: bool
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Build a settings dictionary for saving.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
selected_model: The selected model name
|
|
||||||
iterations: Number of optimization iterations
|
|
||||||
patience: Patience threshold for early stopping
|
|
||||||
use_cache: Whether to use DSPy cache
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary containing all settings
|
|
||||||
"""
|
|
||||||
return {
|
|
||||||
"selected_model": selected_model,
|
|
||||||
"iterations": iterations,
|
|
||||||
"patience": patience,
|
|
||||||
"use_cache": use_cache
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def truncate_tweet(tweet: str, max_length: int, suffix: str = "...") -> str:
|
|
||||||
"""
|
|
||||||
Truncate a tweet to the maximum length with a suffix.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
tweet: The tweet text to truncate
|
|
||||||
max_length: Maximum allowed length
|
|
||||||
suffix: Suffix to add when truncating (default: "...")
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Truncated tweet text
|
|
||||||
"""
|
|
||||||
tweet = tweet.strip()
|
|
||||||
if len(tweet) <= max_length:
|
|
||||||
return tweet
|
|
||||||
|
|
||||||
truncation_point = max_length - len(suffix)
|
|
||||||
return tweet[:truncation_point] + suffix
|
|
||||||
|
|
||||||
|
|
||||||
def truncate_category_display(category: str, max_length: int = 30) -> str:
|
|
||||||
"""
|
|
||||||
Truncate a category name for display purposes.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
category: The category name
|
|
||||||
max_length: Maximum display length (default: 30)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Truncated category name with "..." if needed
|
|
||||||
"""
|
|
||||||
if len(category) <= max_length:
|
|
||||||
return category
|
|
||||||
return category[:max_length] + "..."
|
|
||||||
119
hill_climbing.py
119
hill_climbing.py
@@ -1,119 +0,0 @@
|
|||||||
from typing import List, Iterator, Tuple, Dict
|
|
||||||
from models import EvaluationResult
|
|
||||||
from agent.modules import TweetGeneratorModule, TweetEvaluatorModule
|
|
||||||
from helpers import format_evaluation_for_generator
|
|
||||||
|
|
||||||
class HillClimbingOptimizer:
|
|
||||||
"""Hill climbing optimizer for tweet improvement."""
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
generator: TweetGeneratorModule,
|
|
||||||
evaluator: TweetEvaluatorModule,
|
|
||||||
categories: List[str],
|
|
||||||
max_iterations: int = 10,
|
|
||||||
patience: int = 5
|
|
||||||
):
|
|
||||||
self.generator = generator
|
|
||||||
self.evaluator = evaluator
|
|
||||||
self.categories = categories
|
|
||||||
self.max_iterations = max_iterations
|
|
||||||
self.patience = patience
|
|
||||||
|
|
||||||
def optimize(self, initial_text: str) -> Iterator[Tuple[str, EvaluationResult, bool, int, Dict[str, str], Dict[str, str]]]:
|
|
||||||
"""
|
|
||||||
Optimize tweet using hill climbing algorithm.
|
|
||||||
|
|
||||||
Yields:
|
|
||||||
Tuple of (current_tweet, evaluation_result, is_improvement, patience_counter, generator_inputs, evaluator_inputs)
|
|
||||||
"""
|
|
||||||
# Generate initial tweet
|
|
||||||
generator_inputs = {
|
|
||||||
"input_text": initial_text,
|
|
||||||
"current_tweet": "",
|
|
||||||
"previous_evaluation": ""
|
|
||||||
}
|
|
||||||
current_tweet = self.generator(
|
|
||||||
input_text=initial_text,
|
|
||||||
current_tweet="",
|
|
||||||
previous_evaluation=None
|
|
||||||
)
|
|
||||||
|
|
||||||
evaluator_inputs = {
|
|
||||||
"original_text": initial_text,
|
|
||||||
"current_best_tweet": "",
|
|
||||||
"tweet_text": current_tweet
|
|
||||||
}
|
|
||||||
current_score = self.evaluator(
|
|
||||||
tweet_text=current_tweet,
|
|
||||||
categories=self.categories,
|
|
||||||
original_text=initial_text,
|
|
||||||
current_best_tweet=""
|
|
||||||
)
|
|
||||||
|
|
||||||
best_tweet = current_tweet
|
|
||||||
best_score = current_score
|
|
||||||
patience_counter = 0
|
|
||||||
|
|
||||||
yield (current_tweet, current_score, True, patience_counter, generator_inputs, evaluator_inputs)
|
|
||||||
|
|
||||||
for iteration in range(1, self.max_iterations):
|
|
||||||
# Generate improved tweet with previous evaluation as feedback
|
|
||||||
try:
|
|
||||||
# Format evaluation for display in generator inputs
|
|
||||||
eval_text = format_evaluation_for_generator(best_score)
|
|
||||||
|
|
||||||
generator_inputs = {
|
|
||||||
"input_text": initial_text,
|
|
||||||
"current_tweet": best_tweet,
|
|
||||||
"previous_evaluation": eval_text
|
|
||||||
}
|
|
||||||
|
|
||||||
candidate_tweet = self.generator(
|
|
||||||
input_text=initial_text,
|
|
||||||
current_tweet=best_tweet,
|
|
||||||
previous_evaluation=best_score
|
|
||||||
)
|
|
||||||
|
|
||||||
# Evaluate candidate
|
|
||||||
evaluator_inputs = {
|
|
||||||
"original_text": initial_text,
|
|
||||||
"current_best_tweet": best_tweet,
|
|
||||||
"tweet_text": candidate_tweet
|
|
||||||
}
|
|
||||||
candidate_score = self.evaluator(
|
|
||||||
tweet_text=candidate_tweet,
|
|
||||||
categories=self.categories,
|
|
||||||
original_text=initial_text,
|
|
||||||
current_best_tweet=best_tweet
|
|
||||||
)
|
|
||||||
|
|
||||||
# Check if candidate is better (hill climbing condition)
|
|
||||||
is_improvement = candidate_score > best_score
|
|
||||||
|
|
||||||
if is_improvement:
|
|
||||||
best_tweet = candidate_tweet
|
|
||||||
best_score = candidate_score
|
|
||||||
patience_counter = 0
|
|
||||||
yield (candidate_tweet, candidate_score, True, patience_counter, generator_inputs, evaluator_inputs)
|
|
||||||
else:
|
|
||||||
patience_counter += 1
|
|
||||||
yield (best_tweet, candidate_score, False, patience_counter, generator_inputs, evaluator_inputs)
|
|
||||||
|
|
||||||
# Early stopping if no improvement for 'patience' iterations
|
|
||||||
if patience_counter >= self.patience:
|
|
||||||
break
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
# If generation fails, yield current best
|
|
||||||
patience_counter += 1
|
|
||||||
evaluator_inputs = {
|
|
||||||
"original_text": initial_text,
|
|
||||||
"current_best_tweet": best_tweet,
|
|
||||||
"tweet_text": best_tweet
|
|
||||||
}
|
|
||||||
yield (best_tweet, best_score, False, patience_counter, generator_inputs, evaluator_inputs)
|
|
||||||
|
|
||||||
if patience_counter >= self.patience:
|
|
||||||
break
|
|
||||||
|
|
||||||
60
models.py
60
models.py
@@ -1,60 +0,0 @@
|
|||||||
from pydantic import BaseModel, Field, validator
|
|
||||||
from typing import List
|
|
||||||
from constants import MIN_SCORE, MAX_SCORE
|
|
||||||
|
|
||||||
class CategoryEvaluation(BaseModel):
|
|
||||||
"""Pydantic model for a single category evaluation with reasoning."""
|
|
||||||
|
|
||||||
category: str = Field(description="The evaluation category name")
|
|
||||||
reasoning: str = Field(description="Explanation for the score")
|
|
||||||
score: int = Field(
|
|
||||||
description=f"Score for this category ({MIN_SCORE}-{MAX_SCORE})",
|
|
||||||
ge=MIN_SCORE,
|
|
||||||
le=MAX_SCORE
|
|
||||||
)
|
|
||||||
|
|
||||||
@validator('score')
|
|
||||||
def validate_score(cls, score):
|
|
||||||
"""Ensure score is within the valid range."""
|
|
||||||
if not isinstance(score, int) or score < MIN_SCORE or score > MAX_SCORE:
|
|
||||||
raise ValueError(f"Score {score} must be an integer between {MIN_SCORE} and {MAX_SCORE}")
|
|
||||||
return score
|
|
||||||
|
|
||||||
class EvaluationResult(BaseModel):
|
|
||||||
"""Pydantic model for tweet evaluation results."""
|
|
||||||
|
|
||||||
evaluations: List[CategoryEvaluation] = Field(
|
|
||||||
description="List of category evaluations with reasoning and scores"
|
|
||||||
)
|
|
||||||
|
|
||||||
@validator('evaluations')
|
|
||||||
def validate_evaluations(cls, evals):
|
|
||||||
"""Ensure we have at least one evaluation."""
|
|
||||||
if not evals or len(evals) < 1:
|
|
||||||
raise ValueError("Must have at least one category evaluation")
|
|
||||||
return evals
|
|
||||||
|
|
||||||
@property
|
|
||||||
def category_scores(self) -> List[int]:
|
|
||||||
"""Get list of scores for backwards compatibility."""
|
|
||||||
return [eval.score for eval in self.evaluations]
|
|
||||||
|
|
||||||
def total_score(self) -> float:
|
|
||||||
"""Calculate the total score across all categories."""
|
|
||||||
return sum(eval.score for eval in self.evaluations)
|
|
||||||
|
|
||||||
def average_score(self) -> float:
|
|
||||||
"""Calculate the average score across all categories."""
|
|
||||||
return self.total_score() / len(self.evaluations)
|
|
||||||
|
|
||||||
def __gt__(self, other):
|
|
||||||
"""Compare evaluation results based on total score."""
|
|
||||||
if not isinstance(other, EvaluationResult):
|
|
||||||
return NotImplemented
|
|
||||||
return self.total_score() > other.total_score()
|
|
||||||
|
|
||||||
def __eq__(self, other):
|
|
||||||
"""Check equality based on total score."""
|
|
||||||
if not isinstance(other, EvaluationResult):
|
|
||||||
return NotImplemented
|
|
||||||
return self.total_score() == other.total_score()
|
|
||||||
128
modules.py
128
modules.py
@@ -1,128 +0,0 @@
|
|||||||
import dspy
|
|
||||||
from typing import List, Optional
|
|
||||||
from models import EvaluationResult, CategoryEvaluation
|
|
||||||
from constants import (
|
|
||||||
TWEET_MAX_LENGTH,
|
|
||||||
TWEET_TRUNCATION_SUFFIX,
|
|
||||||
DEFAULT_SCORE,
|
|
||||||
ERROR_PARSING,
|
|
||||||
ERROR_VALIDATION,
|
|
||||||
ERROR_GENERATION,
|
|
||||||
ERROR_EVALUATION,
|
|
||||||
MIN_SCORE,
|
|
||||||
MAX_SCORE
|
|
||||||
)
|
|
||||||
from helpers import format_evaluation_for_generator, truncate_tweet
|
|
||||||
|
|
||||||
class TweetGenerator(dspy.Signature):
|
|
||||||
"""Generate or improve a tweet based on input text and detailed evaluation feedback with reasoning."""
|
|
||||||
|
|
||||||
input_text: str = dspy.InputField(desc="Original text or current tweet to improve")
|
|
||||||
current_tweet: str = dspy.InputField(desc="Current best tweet version (empty for first generation)")
|
|
||||||
previous_evaluation: str = dspy.InputField(desc="Previous evaluation with category-by-category reasoning and scores (empty for first generation)")
|
|
||||||
improved_tweet: str = dspy.OutputField(desc=f"Generated or improved tweet text (max {TWEET_MAX_LENGTH} characters)")
|
|
||||||
|
|
||||||
class TweetEvaluator(dspy.Signature):
|
|
||||||
"""Evaluate a tweet across multiple custom categories. For each category, provide detailed reasoning explaining the score, then assign a score. Ensure the tweet maintains the same meaning as the original text."""
|
|
||||||
|
|
||||||
original_text: str = dspy.InputField(desc="Original input text that started the optimization")
|
|
||||||
current_best_tweet: str = dspy.InputField(desc="Current best tweet version for comparison (empty for first evaluation)")
|
|
||||||
tweet_text: str = dspy.InputField(desc="Tweet text to evaluate")
|
|
||||||
categories: str = dspy.InputField(desc="Comma-separated list of evaluation category descriptions")
|
|
||||||
evaluations: List[CategoryEvaluation] = dspy.OutputField(
|
|
||||||
desc=f"List of evaluations with category name, detailed reasoning, and score ({MIN_SCORE}-{MAX_SCORE}) for each category. Ensure the tweet conveys the same meaning as the original text."
|
|
||||||
)
|
|
||||||
|
|
||||||
class TweetGeneratorModule(dspy.Module):
|
|
||||||
"""DSPy module for generating and improving tweets."""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
super().__init__()
|
|
||||||
self.generate = dspy.ChainOfThought(TweetGenerator)
|
|
||||||
|
|
||||||
def forward(self, input_text: str, current_tweet: str = "", previous_evaluation: Optional[EvaluationResult] = None) -> str:
|
|
||||||
"""Generate or improve a tweet."""
|
|
||||||
try:
|
|
||||||
# Format previous evaluation as text
|
|
||||||
eval_text = format_evaluation_for_generator(previous_evaluation)
|
|
||||||
|
|
||||||
result = self.generate(
|
|
||||||
input_text=input_text,
|
|
||||||
current_tweet=current_tweet,
|
|
||||||
previous_evaluation=eval_text
|
|
||||||
)
|
|
||||||
|
|
||||||
# Ensure tweet doesn't exceed character limit
|
|
||||||
tweet = truncate_tweet(result.improved_tweet, TWEET_MAX_LENGTH, TWEET_TRUNCATION_SUFFIX)
|
|
||||||
|
|
||||||
return tweet
|
|
||||||
except Exception as e:
|
|
||||||
raise Exception(f"{ERROR_GENERATION}: {str(e)}")
|
|
||||||
|
|
||||||
class TweetEvaluatorModule(dspy.Module):
|
|
||||||
"""DSPy module for evaluating tweets across custom categories."""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
super().__init__()
|
|
||||||
self.evaluate = dspy.ChainOfThought(TweetEvaluator)
|
|
||||||
|
|
||||||
def forward(self, tweet_text: str, categories: List[str], original_text: str = "", current_best_tweet: str = "") -> EvaluationResult:
|
|
||||||
"""Evaluate a tweet across specified categories."""
|
|
||||||
try:
|
|
||||||
# Join categories into comma-separated string
|
|
||||||
categories_str = ", ".join(categories)
|
|
||||||
|
|
||||||
result = self.evaluate(
|
|
||||||
original_text=original_text,
|
|
||||||
current_best_tweet=current_best_tweet,
|
|
||||||
tweet_text=tweet_text,
|
|
||||||
categories=categories_str
|
|
||||||
)
|
|
||||||
|
|
||||||
# Extract and validate evaluations
|
|
||||||
evaluations = result.evaluations
|
|
||||||
|
|
||||||
# Ensure we have the right number of evaluations
|
|
||||||
if len(evaluations) != len(categories):
|
|
||||||
# Create default evaluations if mismatch
|
|
||||||
evaluations = [
|
|
||||||
CategoryEvaluation(
|
|
||||||
category=cat,
|
|
||||||
reasoning=ERROR_PARSING,
|
|
||||||
score=DEFAULT_SCORE
|
|
||||||
) for cat in categories
|
|
||||||
]
|
|
||||||
else:
|
|
||||||
# Validate each evaluation
|
|
||||||
validated_evals = []
|
|
||||||
for i, eval in enumerate(evaluations):
|
|
||||||
try:
|
|
||||||
# Ensure score is valid
|
|
||||||
score = max(MIN_SCORE, min(MAX_SCORE, int(eval.score)))
|
|
||||||
validated_evals.append(CategoryEvaluation(
|
|
||||||
category=categories[i] if i < len(categories) else eval.category,
|
|
||||||
reasoning=eval.reasoning if eval.reasoning else "No reasoning provided",
|
|
||||||
score=score
|
|
||||||
))
|
|
||||||
except (ValueError, TypeError, AttributeError):
|
|
||||||
validated_evals.append(CategoryEvaluation(
|
|
||||||
category=categories[i] if i < len(categories) else "Unknown",
|
|
||||||
reasoning=ERROR_VALIDATION,
|
|
||||||
score=DEFAULT_SCORE
|
|
||||||
))
|
|
||||||
evaluations = validated_evals
|
|
||||||
|
|
||||||
# Create validated result
|
|
||||||
validated_result = EvaluationResult(evaluations=evaluations)
|
|
||||||
|
|
||||||
return validated_result
|
|
||||||
except Exception as e:
|
|
||||||
# Return default evaluations on error
|
|
||||||
default_evals = [
|
|
||||||
CategoryEvaluation(
|
|
||||||
category=cat,
|
|
||||||
reasoning=f"{ERROR_EVALUATION}: {str(e)}",
|
|
||||||
score=DEFAULT_SCORE
|
|
||||||
) for cat in categories
|
|
||||||
]
|
|
||||||
return EvaluationResult(evaluations=default_evals)
|
|
||||||
@@ -3,16 +3,7 @@ name = "tweet-optimizer-v2"
|
|||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
description = "CLI tool for optimizing tweets using DSPy and hill-climbing algorithm"
|
description = "CLI tool for optimizing tweets using DSPy and hill-climbing algorithm"
|
||||||
requires-python = ">=3.11"
|
requires-python = ">=3.11"
|
||||||
dependencies = [
|
dependencies = ["dspy>=3.0.3", "dspy-ai>=3.0.3", "modaic>=0.3.0", "pandas>=2.3.3", "pydantic>=2.12.2", "pytest>=8.4.2", "pytest-mock>=3.15.1", "requests>=2.32.5"]
|
||||||
"dspy>=3.0.3",
|
|
||||||
"dspy-ai>=3.0.3",
|
|
||||||
"modaic>=0.1.1",
|
|
||||||
"pandas>=2.3.3",
|
|
||||||
"pydantic>=2.12.2",
|
|
||||||
"pytest>=8.4.2",
|
|
||||||
"pytest-mock>=3.15.1",
|
|
||||||
"requests>=2.32.5",
|
|
||||||
]
|
|
||||||
|
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
tweet-optimizer = "cli:main"
|
tweet-optimizer = "cli:main"
|
||||||
|
|||||||
192
utils.py
192
utils.py
@@ -1,192 +0,0 @@
|
|||||||
import json
|
|
||||||
import os
|
|
||||||
import dspy
|
|
||||||
from typing import List, Dict, Any
|
|
||||||
from constants import (
|
|
||||||
CATEGORIES_FILE,
|
|
||||||
SETTINGS_FILE,
|
|
||||||
HISTORY_FILE,
|
|
||||||
DEFAULT_CATEGORIES,
|
|
||||||
DEFAULT_MODEL,
|
|
||||||
DEFAULT_ITERATIONS,
|
|
||||||
DEFAULT_PATIENCE,
|
|
||||||
DEFAULT_USE_CACHE,
|
|
||||||
MAX_HISTORY_ITEMS,
|
|
||||||
OPENROUTER_API_BASE,
|
|
||||||
OPENROUTER_MAX_TOKENS,
|
|
||||||
OPENROUTER_TEMPERATURE,
|
|
||||||
ERROR_NO_API_KEY,
|
|
||||||
ERROR_SAVE_CATEGORIES,
|
|
||||||
ERROR_LOAD_CATEGORIES,
|
|
||||||
ERROR_SAVE_SETTINGS,
|
|
||||||
ERROR_LOAD_SETTINGS,
|
|
||||||
ERROR_SAVE_HISTORY,
|
|
||||||
ERROR_LOAD_HISTORY,
|
|
||||||
ERROR_DSPy_INIT,
|
|
||||||
TWEET_MAX_LENGTH
|
|
||||||
)
|
|
||||||
|
|
||||||
def save_categories(categories: List[str]) -> None:
|
|
||||||
"""Save categories to JSON file."""
|
|
||||||
try:
|
|
||||||
with open(CATEGORIES_FILE, 'w') as f:
|
|
||||||
json.dump(categories, f, indent=2)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"{ERROR_SAVE_CATEGORIES}: {str(e)}")
|
|
||||||
|
|
||||||
def load_categories() -> List[str]:
|
|
||||||
"""Load categories from JSON file."""
|
|
||||||
try:
|
|
||||||
if os.path.exists(CATEGORIES_FILE):
|
|
||||||
with open(CATEGORIES_FILE, 'r') as f:
|
|
||||||
categories = json.load(f)
|
|
||||||
return categories if isinstance(categories, list) else []
|
|
||||||
else:
|
|
||||||
save_categories(DEFAULT_CATEGORIES)
|
|
||||||
return DEFAULT_CATEGORIES
|
|
||||||
except Exception as e:
|
|
||||||
print(f"{ERROR_LOAD_CATEGORIES}: {str(e)}")
|
|
||||||
return []
|
|
||||||
|
|
||||||
def get_dspy_lm(model_name: str):
|
|
||||||
"""Get a DSPy LM instance for the specified model (cached per model)."""
|
|
||||||
try:
|
|
||||||
openrouter_key = os.getenv("OPENROUTER_API_KEY")
|
|
||||||
if not openrouter_key:
|
|
||||||
raise ValueError(ERROR_NO_API_KEY)
|
|
||||||
|
|
||||||
max_tokens = 16000 if "openai/gpt-5" in model_name else OPENROUTER_MAX_TOKENS
|
|
||||||
temperature = 1.0 if "openai/gpt-5" in model_name else OPENROUTER_TEMPERATURE
|
|
||||||
|
|
||||||
lm = dspy.LM(
|
|
||||||
model=model_name,
|
|
||||||
api_key=openrouter_key,
|
|
||||||
api_base=OPENROUTER_API_BASE,
|
|
||||||
max_tokens=max_tokens,
|
|
||||||
temperature=temperature
|
|
||||||
)
|
|
||||||
return lm
|
|
||||||
except Exception as e:
|
|
||||||
raise Exception(f"Failed to create LM: {str(e)}")
|
|
||||||
|
|
||||||
def initialize_dspy(model_name: str = DEFAULT_MODEL, use_cache: bool = DEFAULT_USE_CACHE) -> bool:
|
|
||||||
"""Initialize DSPy with OpenRouter and selected model."""
|
|
||||||
# Configure cache settings
|
|
||||||
try:
|
|
||||||
dspy.configure_cache(
|
|
||||||
enable_memory_cache=use_cache,
|
|
||||||
enable_disk_cache=use_cache
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
# Cache configuration might fail in some environments, continue anyway
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Only configure DSPy once globally
|
|
||||||
if not hasattr(dspy, '_replit_configured'):
|
|
||||||
try:
|
|
||||||
# Get the LM for the default model
|
|
||||||
default_lm = get_dspy_lm(model_name)
|
|
||||||
dspy.configure(lm=default_lm)
|
|
||||||
dspy._replit_configured = True # type: ignore
|
|
||||||
except Exception as e:
|
|
||||||
raise Exception(f"{ERROR_DSPy_INIT}: {str(e)}")
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
def format_tweet_for_display(tweet: str) -> str:
|
|
||||||
"""Format tweet text for better display."""
|
|
||||||
return tweet.strip()
|
|
||||||
|
|
||||||
def calculate_tweet_length(tweet: str) -> int:
|
|
||||||
"""Calculate tweet length."""
|
|
||||||
return len(tweet.strip())
|
|
||||||
|
|
||||||
def is_valid_tweet(tweet: str) -> bool:
|
|
||||||
"""Check if tweet is valid (not empty and within character limit)."""
|
|
||||||
cleaned_tweet = tweet.strip()
|
|
||||||
return bool(cleaned_tweet) and len(cleaned_tweet) <= TWEET_MAX_LENGTH
|
|
||||||
|
|
||||||
def save_settings(settings: Dict[str, Any]) -> None:
|
|
||||||
"""Save settings to JSON file."""
|
|
||||||
try:
|
|
||||||
with open(SETTINGS_FILE, 'w') as f:
|
|
||||||
json.dump(settings, f, indent=2)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"{ERROR_SAVE_SETTINGS}: {str(e)}")
|
|
||||||
|
|
||||||
def load_settings() -> Dict[str, Any]:
|
|
||||||
"""Load settings from JSON file."""
|
|
||||||
try:
|
|
||||||
if os.path.exists(SETTINGS_FILE):
|
|
||||||
with open(SETTINGS_FILE, 'r') as f:
|
|
||||||
settings = json.load(f)
|
|
||||||
return settings if isinstance(settings, dict) else get_default_settings()
|
|
||||||
else:
|
|
||||||
# Return default settings if file doesn't exist
|
|
||||||
default_settings = get_default_settings()
|
|
||||||
save_settings(default_settings)
|
|
||||||
return default_settings
|
|
||||||
except Exception as e:
|
|
||||||
print(f"{ERROR_LOAD_SETTINGS}: {str(e)}")
|
|
||||||
return get_default_settings()
|
|
||||||
|
|
||||||
def get_default_settings() -> Dict[str, Any]:
|
|
||||||
"""Get default settings."""
|
|
||||||
return {
|
|
||||||
"selected_model": DEFAULT_MODEL,
|
|
||||||
"iterations": DEFAULT_ITERATIONS,
|
|
||||||
"patience": DEFAULT_PATIENCE,
|
|
||||||
"use_cache": DEFAULT_USE_CACHE
|
|
||||||
}
|
|
||||||
|
|
||||||
def save_input_history(history: List[str]) -> None:
|
|
||||||
"""Save input history to JSON file."""
|
|
||||||
try:
|
|
||||||
with open(HISTORY_FILE, 'w') as f:
|
|
||||||
json.dump(history, f, indent=2)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"{ERROR_SAVE_HISTORY}: {str(e)}")
|
|
||||||
|
|
||||||
def load_input_history() -> List[str]:
|
|
||||||
"""Load input history from JSON file."""
|
|
||||||
try:
|
|
||||||
if os.path.exists(HISTORY_FILE):
|
|
||||||
with open(HISTORY_FILE, 'r') as f:
|
|
||||||
history = json.load(f)
|
|
||||||
return history if isinstance(history, list) else []
|
|
||||||
else:
|
|
||||||
return []
|
|
||||||
except Exception as e:
|
|
||||||
print(f"{ERROR_LOAD_HISTORY}: {str(e)}")
|
|
||||||
return []
|
|
||||||
|
|
||||||
def add_to_input_history(history: List[str], new_input: str) -> List[str]:
|
|
||||||
"""
|
|
||||||
Add a new input to history, maintaining max size and avoiding duplicates.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
history: Current history list
|
|
||||||
new_input: New input text to add
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Updated history list with new input at the beginning
|
|
||||||
"""
|
|
||||||
# Strip whitespace from input
|
|
||||||
new_input = new_input.strip()
|
|
||||||
|
|
||||||
# Don't add empty strings
|
|
||||||
if not new_input:
|
|
||||||
return history
|
|
||||||
|
|
||||||
# Remove duplicate if it exists
|
|
||||||
if new_input in history:
|
|
||||||
history.remove(new_input)
|
|
||||||
|
|
||||||
# Add to beginning of list
|
|
||||||
updated_history = [new_input] + history
|
|
||||||
|
|
||||||
# Trim to max size
|
|
||||||
if len(updated_history) > MAX_HISTORY_ITEMS:
|
|
||||||
updated_history = updated_history[:MAX_HISTORY_ITEMS]
|
|
||||||
|
|
||||||
return updated_history
|
|
||||||
Reference in New Issue
Block a user