Complete Migration
This commit is contained in:
217
README.md
217
README.md
@@ -1,2 +1,217 @@
|
||||
# tweet-optimizer-v2
|
||||
# DSPy Tweet Optimizer - Modaic Agent
|
||||
|
||||
A composable DSPy agent that optimizes tweets using a reflective generate-evaluate algorithim, packaged for the [Modaic Hub](https://modaic.dev). Generate, evaluate, and iteratively improve tweets with configurable evaluation categories and automated optimization.
|
||||
|
||||
## Features
|
||||
|
||||
- **Modaic Agent**: Deployable on Modaic Hub for easy sharing and reuse
|
||||
- **Hill-Climbing Optimization**: Iteratively improves tweets through automated evaluation
|
||||
- **Customizable Categories**: Define evaluation criteria (engagement, clarity, tone, etc.)
|
||||
- **Multiple Usage Modes**: Single generation, full optimization, or standalone evaluation
|
||||
- **Structured Evaluation**: 1-9 scoring with detailed reasoning per category
|
||||
- **CLI Compatibility**: Same functionality as the original CLI tool
|
||||
- **Easy Configuration**: Flexible model, iteration, and patience settings
|
||||
|
||||
## Installation
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- Python 3.11+
|
||||
- OpenRouter API key ([Get one here](https://openrouter.ai/))
|
||||
- Modaic account (for hub deployment)
|
||||
|
||||
### Setup
|
||||
|
||||
1. **Clone the repository:**
|
||||
```bash
|
||||
git clone https://git.modaic.dev/farouk1/tweet-optimizer-v2.git
|
||||
cd tweet-optimizer
|
||||
```
|
||||
|
||||
2. **Install dependencies:**
|
||||
```bash
|
||||
uv sync
|
||||
```
|
||||
|
||||
3. **Set up your API key and Modaic Token:**
|
||||
```bash
|
||||
export OPENROUTER_API_KEY='your-api-key-here'
|
||||
export MODAIC_TOKEN='your-modaic-token'
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Basic Agent Usage
|
||||
|
||||
```python
|
||||
from tweet_optimizer_agent import TweetOptimizerAgent, TweetOptimizerConfig
|
||||
|
||||
# Create agent with default settings
|
||||
config = TweetOptimizerConfig()
|
||||
agent = TweetOptimizerAgent(config)
|
||||
|
||||
# Single tweet generation
|
||||
tweet = agent(
|
||||
input_text="Create a tweet about HuggingFace transformers",
|
||||
current_tweet="",
|
||||
previous_evaluation=None
|
||||
)
|
||||
print(f"Generated: {tweet}")
|
||||
```
|
||||
|
||||
### Full Optimization Process
|
||||
|
||||
```python
|
||||
# Run complete optimization (like CLI)
|
||||
results = agent.optimize(
|
||||
input_text="Create a tweet about HuggingFace transformers",
|
||||
iterations=10,
|
||||
patience=5
|
||||
)
|
||||
|
||||
print(f"Original: {results['initial_text']}")
|
||||
print(f"Optimized: {results['final_tweet']}")
|
||||
print(f"Score: {results['best_score']:.2f}")
|
||||
print(f"Iterations: {results['iterations_run']}")
|
||||
```
|
||||
|
||||
### Custom Configuration
|
||||
|
||||
```python
|
||||
# Custom evaluation categories and settings
|
||||
config = TweetOptimizerConfig(
|
||||
lm="openrouter/anthropic/claude-sonnet-4.5",
|
||||
categories=[
|
||||
"Engagement potential",
|
||||
"Clarity and readability",
|
||||
"Professional tone",
|
||||
"Call-to-action strength"
|
||||
],
|
||||
max_iterations=15,
|
||||
patience=8
|
||||
)
|
||||
|
||||
agent = TweetOptimizerAgent(config)
|
||||
```
|
||||
|
||||
### Tweet Evaluation
|
||||
|
||||
```python
|
||||
# Evaluate a specific tweet
|
||||
evaluation = agent.evaluate_tweet(
|
||||
tweet_text="Excited to share our new AI model!",
|
||||
original_text="We released a new AI model",
|
||||
current_best_tweet=""
|
||||
)
|
||||
|
||||
for eval in evaluation.evaluations:
|
||||
print(f"{eval.category}: {eval.score}/9 - {eval.reasoning}")
|
||||
```
|
||||
|
||||
### Deploy to Modaic Hub
|
||||
|
||||
```python
|
||||
# Push your trained agent to Modaic Hub
|
||||
agent.push_to_hub(
|
||||
"your-username/tweet-optimizer",
|
||||
commit_message="Deploy tweet optimizer agent",
|
||||
with_code=True
|
||||
)
|
||||
```
|
||||
|
||||
### Load from Hub
|
||||
|
||||
```python
|
||||
# Load a pre-trained agent from Modaic Hub
|
||||
agent = TweetOptimizerAgent.from_precompiled("your-username/tweet-optimizer")
|
||||
|
||||
# Use immediately
|
||||
optimized = agent("Your tweet content here")
|
||||
```
|
||||
|
||||
## CLI Tool
|
||||
|
||||
The original CLI functionality is still available:
|
||||
|
||||
```bash
|
||||
# Basic usage
|
||||
python cli.py "Create a tweet about AI breakthroughs"
|
||||
|
||||
# With custom settings
|
||||
python cli.py "Product launch announcement" \
|
||||
--model "Claude Sonnet 4.5" \
|
||||
--iterations 15 \
|
||||
--patience 8 \
|
||||
--categories "Excitement" "Clarity" "Call-to-action"
|
||||
|
||||
# List available models
|
||||
python cli.py --list-models
|
||||
|
||||
# Quiet mode (output only final tweet)
|
||||
python cli.py "Content here" --quiet
|
||||
```
|
||||
|
||||
## Configuration Options
|
||||
|
||||
### TweetOptimizerConfig
|
||||
|
||||
| Parameter | Type | Default | Description |
|
||||
|------------------|-----------|----------------------------------------|---------------------------------------------|
|
||||
| `lm` | str | `"openrouter/google/gemini-2.5-flash"` | Language model to use |
|
||||
| `eval_lm` | str | `"openrouter/openai/gpt-5"` | Evaluator language model to use |
|
||||
| `categories` | List[str] | Default evaluation categories | Custom evaluation criteria |
|
||||
| `max_iterations` | int | 10 | Maximum optimization iterations |
|
||||
| `patience` | int | 5 | Stop after N iterations without improvement |
|
||||
|
||||
### Default Categories
|
||||
|
||||
1. **Engagement potential** - How likely users are to like, retweet, or reply
|
||||
2. **Clarity and readability** - How easy the tweet is to understand
|
||||
3. **Emotional impact** - How well the tweet evokes feelings or reactions
|
||||
4. **Relevance to target audience** - How well it resonates with intended readers
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
dspy-tweet-optimizer/
|
||||
tweet_optimizer_agent.py # Main Modaic agent implementation
|
||||
cli.py # Command-line interface
|
||||
modules.py # DSPy generator and evaluator modules
|
||||
hill_climbing.py # Optimization algorithm
|
||||
models.py # Pydantic data models
|
||||
helpers.py # Utility functions
|
||||
utils.py # File I/O and DSPy utilities
|
||||
constants.py # Configuration constants
|
||||
tests/ # Test suite
|
||||
```
|
||||
|
||||
### Core Components
|
||||
|
||||
- **TweetOptimizerAgent**: Main Modaic agent with optimization methods
|
||||
- **TweetGeneratorModule**: DSPy module for generating/improving tweets
|
||||
- **TweetEvaluatorModule**: DSPy module for structured evaluation
|
||||
- **HillClimbingOptimizer**: Iterative improvement algorithm
|
||||
|
||||
## License
|
||||
|
||||
MIT License - see [LICENSE](LICENSE) file for details.
|
||||
|
||||
## Credits
|
||||
|
||||
This Modaic agent implementation is based on the original DSPy Tweet Optimizer by [tom-doerr](https://github.com/tom-doerr/dspy-tweet-optimizer), licensed under MIT. The original project provided the foundation including:
|
||||
|
||||
- Core DSPy modules (TweetGeneratorModule, TweetEvaluatorModule)
|
||||
- Hill-climbing optimization algorithm
|
||||
- CLI interface and utilities
|
||||
- Comprehensive testing framework
|
||||
|
||||
**Original Author**: Tom Doerr ([@tom-doerr](https://github.com/tom-doerr))
|
||||
**Original Repository**: [dspy-tweet-optimizer](https://github.com/tom-doerr/dspy-tweet-optimizer)
|
||||
|
||||
### Modifications for Modaic
|
||||
|
||||
- Packaged as a Modaic PrecompiledAgent
|
||||
- Added hub deployment functionality
|
||||
- Enhanced configuration options
|
||||
- Maintained CLI compatibility
|
||||
- Extended usage examples
|
||||
99
agent.json
Normal file
99
agent.json
Normal file
@@ -0,0 +1,99 @@
|
||||
{
|
||||
"tweet_generator.generate.predict": {
|
||||
"traces": [],
|
||||
"train": [],
|
||||
"demos": [],
|
||||
"signature": {
|
||||
"instructions": "Generate or improve a tweet based on input text and detailed evaluation feedback with reasoning.",
|
||||
"fields": [
|
||||
{
|
||||
"prefix": "Input Text:",
|
||||
"description": "Original text or current tweet to improve"
|
||||
},
|
||||
{
|
||||
"prefix": "Current Tweet:",
|
||||
"description": "Current best tweet version (empty for first generation)"
|
||||
},
|
||||
{
|
||||
"prefix": "Previous Evaluation:",
|
||||
"description": "Previous evaluation with category-by-category reasoning and scores (empty for first generation)"
|
||||
},
|
||||
{
|
||||
"prefix": "Reasoning: Let's think step by step in order to",
|
||||
"description": "${reasoning}"
|
||||
},
|
||||
{
|
||||
"prefix": "Improved Tweet:",
|
||||
"description": "Generated or improved tweet text (max 280 characters)"
|
||||
}
|
||||
]
|
||||
},
|
||||
"lm": {
|
||||
"model": "openrouter/google/gemini-2.5-flash",
|
||||
"model_type": "chat",
|
||||
"cache": true,
|
||||
"num_retries": 3,
|
||||
"finetuning_model": null,
|
||||
"launch_kwargs": {},
|
||||
"train_kwargs": {},
|
||||
"temperature": 0.7,
|
||||
"max_tokens": 4096,
|
||||
"api_key": "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",
|
||||
"api_base": "https://openrouter.ai/api/v1"
|
||||
}
|
||||
},
|
||||
"tweet_evaluator.evaluate.predict": {
|
||||
"traces": [],
|
||||
"train": [],
|
||||
"demos": [],
|
||||
"signature": {
|
||||
"instructions": "Evaluate a tweet across multiple custom categories. For each category, provide detailed reasoning explaining the score, then assign a score. Ensure the tweet maintains the same meaning as the original text.",
|
||||
"fields": [
|
||||
{
|
||||
"prefix": "Original Text:",
|
||||
"description": "Original input text that started the optimization"
|
||||
},
|
||||
{
|
||||
"prefix": "Current Best Tweet:",
|
||||
"description": "Current best tweet version for comparison (empty for first evaluation)"
|
||||
},
|
||||
{
|
||||
"prefix": "Tweet Text:",
|
||||
"description": "Tweet text to evaluate"
|
||||
},
|
||||
{
|
||||
"prefix": "Categories:",
|
||||
"description": "Comma-separated list of evaluation category descriptions"
|
||||
},
|
||||
{
|
||||
"prefix": "Reasoning: Let's think step by step in order to",
|
||||
"description": "${reasoning}"
|
||||
},
|
||||
{
|
||||
"prefix": "Evaluations:",
|
||||
"description": "List of evaluations with category name, detailed reasoning, and score (1-9) for each category. Ensure the tweet conveys the same meaning as the original text."
|
||||
}
|
||||
]
|
||||
},
|
||||
"lm": {
|
||||
"model": "openrouter/openai/gpt-5",
|
||||
"model_type": "chat",
|
||||
"cache": true,
|
||||
"num_retries": 3,
|
||||
"finetuning_model": null,
|
||||
"launch_kwargs": {},
|
||||
"train_kwargs": {},
|
||||
"temperature": 1.0,
|
||||
"max_completion_tokens": 16000,
|
||||
"api_key": "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",
|
||||
"api_base": "https://openrouter.ai/api/v1"
|
||||
}
|
||||
},
|
||||
"metadata": {
|
||||
"dependency_versions": {
|
||||
"python": "3.13",
|
||||
"dspy": "3.0.3",
|
||||
"cloudpickle": "3.1"
|
||||
}
|
||||
}
|
||||
}
|
||||
172
agent.py
Normal file
172
agent.py
Normal file
@@ -0,0 +1,172 @@
|
||||
from modaic import PrecompiledAgent, PrecompiledConfig
|
||||
from modules import TweetGeneratorModule, TweetEvaluatorModule
|
||||
from models import EvaluationResult
|
||||
from hill_climbing import HillClimbingOptimizer
|
||||
from typing import Optional, List, Dict, Any
|
||||
from utils import get_dspy_lm
|
||||
from constants import DEFAULT_CATEGORIES, DEFAULT_ITERATIONS, DEFAULT_PATIENCE
|
||||
|
||||
|
||||
class TweetOptimizerConfig(PrecompiledConfig):
|
||||
lm: str = "openrouter/google/gemini-2.5-flash"
|
||||
eval_lm: str = "openrouter/openai/gpt-5"
|
||||
categories: List[str] = DEFAULT_CATEGORIES
|
||||
max_iterations: int = DEFAULT_ITERATIONS
|
||||
patience: int = DEFAULT_PATIENCE
|
||||
|
||||
|
||||
class TweetOptimizerAgent(PrecompiledAgent):
|
||||
config: TweetOptimizerConfig
|
||||
|
||||
def __init__(self, config: TweetOptimizerConfig):
|
||||
super().__init__(config)
|
||||
self.tweet_generator = TweetGeneratorModule()
|
||||
self.tweet_evaluator = TweetEvaluatorModule()
|
||||
|
||||
# set up optimizer
|
||||
self.optimizer = HillClimbingOptimizer(
|
||||
generator=self.tweet_generator,
|
||||
evaluator=self.tweet_evaluator,
|
||||
categories=config.categories,
|
||||
max_iterations=config.max_iterations,
|
||||
patience=config.patience
|
||||
)
|
||||
|
||||
self.lm = config.lm
|
||||
self.eval_lm = config.eval_lm
|
||||
|
||||
# initialize DSPy with the specified model
|
||||
self.tweet_generator.set_lm(get_dspy_lm(config.lm))
|
||||
self.tweet_evaluator.set_lm(get_dspy_lm(config.eval_lm))
|
||||
|
||||
def forward(
|
||||
self,
|
||||
input_text: str,
|
||||
current_tweet: str = "",
|
||||
previous_evaluation: Optional[EvaluationResult] = None,
|
||||
) -> str:
|
||||
"""Generate a single optimized tweet (single iteration)."""
|
||||
tweet = self.tweet_generator(input_text, current_tweet, previous_evaluation)
|
||||
return tweet
|
||||
|
||||
def optimize(
|
||||
self,
|
||||
input_text: str,
|
||||
iterations: Optional[int] = None,
|
||||
patience: Optional[int] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""Run full optimization process like the CLI."""
|
||||
max_iterations = iterations or self.config.max_iterations
|
||||
patience_limit = patience or self.config.patience
|
||||
|
||||
results = {
|
||||
'initial_text': input_text,
|
||||
'final_tweet': '',
|
||||
'best_score': 0.0,
|
||||
'iterations_run': 0,
|
||||
'early_stopped': False,
|
||||
'scores_history': [],
|
||||
'improvement_count': 0
|
||||
}
|
||||
|
||||
best_tweet = ""
|
||||
best_score = 0.0
|
||||
|
||||
for iteration, (current_tweet, scores, is_improvement, patience_counter, _, _) in enumerate(
|
||||
self.optimizer.optimize(input_text)
|
||||
):
|
||||
iteration_num = iteration + 1
|
||||
results['iterations_run'] = iteration_num
|
||||
results['scores_history'].append(scores)
|
||||
|
||||
if is_improvement:
|
||||
best_tweet = current_tweet
|
||||
best_score = sum(scores.category_scores) / len(scores.category_scores)
|
||||
results['improvement_count'] += 1
|
||||
|
||||
# check for early stopping
|
||||
if patience_counter >= patience_limit:
|
||||
results['early_stopped'] = True
|
||||
break
|
||||
|
||||
# stop at max iterations
|
||||
if iteration_num >= max_iterations:
|
||||
break
|
||||
|
||||
results.update({
|
||||
'final_tweet': best_tweet,
|
||||
'best_score': best_score
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
def evaluate_tweet(
|
||||
self,
|
||||
tweet_text: str,
|
||||
original_text: str = "",
|
||||
current_best_tweet: str = ""
|
||||
) -> EvaluationResult:
|
||||
"""Evaluate a tweet using the configured categories."""
|
||||
return self.tweet_evaluator(tweet_text, self.config.categories, original_text, current_best_tweet)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# create agent with default config
|
||||
config = TweetOptimizerConfig()
|
||||
tweet_optimizer = TweetOptimizerAgent(config)
|
||||
"""
|
||||
import os
|
||||
|
||||
# set up test environment (replace with real API key for actual usage)
|
||||
if not os.getenv("OPENROUTER_API_KEY"):
|
||||
raise ValueError("OPENROUTER_API_KEY environment variable is not set")
|
||||
|
||||
|
||||
|
||||
# single tweet generation
|
||||
print("=== Single Tweet Generation ===")
|
||||
try:
|
||||
single_tweet = tweet_optimizer(
|
||||
input_text="Anthropic added a new OSS model on HuggingFace.",
|
||||
current_tweet="",
|
||||
previous_evaluation=None,
|
||||
)
|
||||
print(f"Generated tweet: {single_tweet}")
|
||||
except Exception as e:
|
||||
print(f"Error in single generation: {e}")
|
||||
|
||||
# full optimization process
|
||||
print("\n=== Full Optimization Process ===")
|
||||
try:
|
||||
results = tweet_optimizer.optimize(
|
||||
input_text="Anthropic added a new OSS model on HuggingFace.",
|
||||
iterations=10, # Reduced for testing
|
||||
patience=8
|
||||
)
|
||||
print(f"Initial text: {results['initial_text']}")
|
||||
print(f"Final tweet: {results['final_tweet']}")
|
||||
print(f"Best score: {results['best_score']:.2f}")
|
||||
print(f"Iterations run: {results['iterations_run']}")
|
||||
print(f"Improvements found: {results['improvement_count']}")
|
||||
print(f"Early stopped: {results['early_stopped']}")
|
||||
except Exception as e:
|
||||
print(f"Error in optimization: {e}")
|
||||
"""
|
||||
# push to hub
|
||||
print("\n=== Push to Hub ===")
|
||||
try:
|
||||
tweet_optimizer.push_to_hub(
|
||||
"farouk1/tweet-optimizer-v2",
|
||||
commit_message="Complete Migration",
|
||||
with_code=True
|
||||
)
|
||||
print("Successfully pushed to hub!")
|
||||
except Exception as e:
|
||||
print(f"Error pushing to hub: {e}")
|
||||
"""
|
||||
print("\n=== Agent Configuration ===")
|
||||
print(f"Model: {config.lm}")
|
||||
print(f"Categories: {config.categories}")
|
||||
print(f"Max iterations: {config.max_iterations}")
|
||||
print(f"Patience: {config.patience}")
|
||||
"""
|
||||
0
agent/__init__.py
Normal file
0
agent/__init__.py
Normal file
BIN
agent/__pycache__/__init__.cpython-310.pyc
Normal file
BIN
agent/__pycache__/__init__.cpython-310.pyc
Normal file
Binary file not shown.
BIN
agent/__pycache__/agent.cpython-310.pyc
Normal file
BIN
agent/__pycache__/agent.cpython-310.pyc
Normal file
Binary file not shown.
BIN
agent/__pycache__/constants.cpython-310.pyc
Normal file
BIN
agent/__pycache__/constants.cpython-310.pyc
Normal file
Binary file not shown.
BIN
agent/__pycache__/helpers.cpython-310.pyc
Normal file
BIN
agent/__pycache__/helpers.cpython-310.pyc
Normal file
Binary file not shown.
BIN
agent/__pycache__/hill_climbing.cpython-310.pyc
Normal file
BIN
agent/__pycache__/hill_climbing.cpython-310.pyc
Normal file
Binary file not shown.
BIN
agent/__pycache__/models.cpython-310.pyc
Normal file
BIN
agent/__pycache__/models.cpython-310.pyc
Normal file
Binary file not shown.
BIN
agent/__pycache__/modules.cpython-310.pyc
Normal file
BIN
agent/__pycache__/modules.cpython-310.pyc
Normal file
Binary file not shown.
BIN
agent/__pycache__/utils.cpython-310.pyc
Normal file
BIN
agent/__pycache__/utils.cpython-310.pyc
Normal file
Binary file not shown.
146
agent/agent.py
Normal file
146
agent/agent.py
Normal file
@@ -0,0 +1,146 @@
|
||||
from modaic import PrecompiledAgent, PrecompiledConfig
|
||||
from .modules import TweetGeneratorModule, TweetEvaluatorModule
|
||||
from .models import EvaluationResult
|
||||
from .hill_climbing import HillClimbingOptimizer
|
||||
from typing import Optional, List
|
||||
from .utils import get_dspy_lm
|
||||
from .constants import DEFAULT_CATEGORIES, DEFAULT_ITERATIONS, DEFAULT_PATIENCE
|
||||
|
||||
|
||||
class TweetOptimizerConfig(PrecompiledConfig):
|
||||
lm: str = "openrouter/google/gemini-2.5-flash"
|
||||
eval_lm: str = "openrouter/openai/gpt-5"
|
||||
categories: List[str] = DEFAULT_CATEGORIES
|
||||
max_iterations: int = DEFAULT_ITERATIONS
|
||||
patience: int = DEFAULT_PATIENCE
|
||||
|
||||
|
||||
class TweetOptimizerAgent(PrecompiledAgent):
|
||||
config: TweetOptimizerConfig
|
||||
|
||||
current_tweet: str = ""
|
||||
previous_evaluation: Optional[EvaluationResult] = None
|
||||
|
||||
def __init__(self, config: TweetOptimizerConfig, **kwargs):
|
||||
super().__init__(config, **kwargs)
|
||||
self.tweet_generator = TweetGeneratorModule()
|
||||
self.tweet_evaluator = TweetEvaluatorModule()
|
||||
|
||||
# set up optimizer
|
||||
self.optimizer = HillClimbingOptimizer(
|
||||
generator=self.tweet_generator,
|
||||
evaluator=self.tweet_evaluator,
|
||||
categories=config.categories,
|
||||
max_iterations=config.max_iterations,
|
||||
patience=config.patience
|
||||
)
|
||||
|
||||
self.lm = config.lm
|
||||
self.eval_lm = config.eval_lm
|
||||
|
||||
# initialize DSPy with the specified model
|
||||
self.tweet_generator.set_lm(get_dspy_lm(config.lm))
|
||||
self.tweet_evaluator.set_lm(get_dspy_lm(config.eval_lm))
|
||||
|
||||
def forward(
|
||||
self,
|
||||
input_text: str,
|
||||
iterations: Optional[int] = None,
|
||||
patience: Optional[int] = None
|
||||
) -> str:
|
||||
"""Run full optimization process."""
|
||||
max_iterations = iterations or self.config.max_iterations
|
||||
patience_limit = patience or self.config.patience
|
||||
|
||||
results = {
|
||||
'initial_text': input_text,
|
||||
'final_tweet': '',
|
||||
'best_score': 0.0,
|
||||
'iterations_run': 0,
|
||||
'early_stopped': False,
|
||||
'scores_history': [],
|
||||
'improvement_count': 0
|
||||
}
|
||||
|
||||
best_tweet = ""
|
||||
best_score = 0.0
|
||||
|
||||
for iteration, (current_tweet, scores, is_improvement, patience_counter, _, _) in enumerate(
|
||||
self.optimizer.optimize(input_text)
|
||||
):
|
||||
iteration_num = iteration + 1
|
||||
results['iterations_run'] = iteration_num
|
||||
results['scores_history'].append(scores)
|
||||
|
||||
if is_improvement:
|
||||
best_tweet = current_tweet
|
||||
best_score = sum(scores.category_scores) / len(scores.category_scores)
|
||||
results['improvement_count'] += 1
|
||||
|
||||
# check for early stopping
|
||||
if patience_counter >= patience_limit:
|
||||
results['early_stopped'] = True
|
||||
break
|
||||
|
||||
# stop at max iterations
|
||||
if iteration_num >= max_iterations:
|
||||
break
|
||||
|
||||
results.update({
|
||||
'final_tweet': best_tweet,
|
||||
'best_score': best_score
|
||||
})
|
||||
|
||||
self.reset()
|
||||
|
||||
return results
|
||||
|
||||
def reset(self):
|
||||
self.current_tweet = ""
|
||||
self.previous_evaluation = None
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# create agent with default config
|
||||
config = TweetOptimizerConfig()
|
||||
tweet_optimizer = TweetOptimizerAgent(config)
|
||||
import os
|
||||
|
||||
# set up test environment (replace with real API key for actual usage)
|
||||
if not os.getenv("OPENROUTER_API_KEY"):
|
||||
raise ValueError("OPENROUTER_API_KEY environment variable is not set")
|
||||
|
||||
# full optimization process
|
||||
print("\n=== Full Optimization Process ===")
|
||||
try:
|
||||
results = tweet_optimizer(
|
||||
input_text="Anthropic added a new OSS model on HuggingFace.",
|
||||
iterations=10, # Reduced for testing
|
||||
patience=8
|
||||
)
|
||||
print(f"Initial text: {results['initial_text']}")
|
||||
print(f"Final tweet: {results['final_tweet']}")
|
||||
print(f"Best score: {results['best_score']:.2f}")
|
||||
print(f"Iterations run: {results['iterations_run']}")
|
||||
print(f"Improvements found: {results['improvement_count']}")
|
||||
print(f"Early stopped: {results['early_stopped']}")
|
||||
except Exception as e:
|
||||
print(f"Error in optimization: {e}")
|
||||
|
||||
# push to hub
|
||||
print("\n=== Push to Hub ===")
|
||||
try:
|
||||
tweet_optimizer.push_to_hub(
|
||||
"farouk1/tweet-optimizer-v2",
|
||||
commit_message="Complete Migration",
|
||||
with_code=True
|
||||
)
|
||||
print("Successfully pushed to hub!")
|
||||
except Exception as e:
|
||||
print(f"Error pushing to hub: {e}")
|
||||
|
||||
print("\n=== Agent Configuration ===")
|
||||
print(f"Model: {config.lm}")
|
||||
print(f"Categories: {config.categories}")
|
||||
print(f"Max iterations: {config.max_iterations}")
|
||||
print(f"Patience: {config.patience}")
|
||||
75
agent/constants.py
Normal file
75
agent/constants.py
Normal file
@@ -0,0 +1,75 @@
|
||||
from typing import Dict, List
|
||||
|
||||
# tweet configuration
|
||||
TWEET_MAX_LENGTH = 280
|
||||
TWEET_TRUNCATION_SUFFIX = "..."
|
||||
TWEET_TRUNCATION_LENGTH = TWEET_MAX_LENGTH - len(TWEET_TRUNCATION_SUFFIX)
|
||||
|
||||
# score configuration
|
||||
MIN_SCORE = 1
|
||||
MAX_SCORE = 9
|
||||
DEFAULT_SCORE = 5
|
||||
|
||||
# file paths
|
||||
CATEGORIES_FILE = "categories.json"
|
||||
SETTINGS_FILE = "settings.json"
|
||||
HISTORY_FILE = "input_history.json"
|
||||
|
||||
# history configuration
|
||||
MAX_HISTORY_ITEMS = 50 # maximum number of historical inputs to store
|
||||
|
||||
# model configuration
|
||||
DEFAULT_MODEL = "openrouter/anthropic/claude-sonnet-4.5"
|
||||
|
||||
AVAILABLE_MODELS: Dict[str, str] = {
|
||||
"Claude Sonnet 4.5": "openrouter/anthropic/claude-sonnet-4.5",
|
||||
"Opus 4.1": "openrouter/anthropic/claude-opus-4.1",
|
||||
"Gemini 2.5 Flash": "openrouter/google/gemini-2.5-flash",
|
||||
"Gemini 2.5 Flash Lite": "openrouter/google/gemini-2.5-flash-lite",
|
||||
"Gemini 2.5 Pro": "openrouter/google/gemini-2.5-pro",
|
||||
"GPT-5": "openrouter/openai/gpt-5"
|
||||
}
|
||||
|
||||
# openrouter API configuration
|
||||
OPENROUTER_API_BASE = "https://openrouter.ai/api/v1"
|
||||
OPENROUTER_MAX_TOKENS = 4096
|
||||
OPENROUTER_TEMPERATURE = 0.7
|
||||
|
||||
# optimization defaults
|
||||
DEFAULT_ITERATIONS = 10
|
||||
DEFAULT_PATIENCE = 5
|
||||
DEFAULT_USE_CACHE = True
|
||||
|
||||
# default evaluation categories
|
||||
DEFAULT_CATEGORIES: List[str] = [
|
||||
"Engagement potential - how likely users are to like, retweet, or reply",
|
||||
"Clarity and readability - how easy the tweet is to understand",
|
||||
"Emotional impact - how well the tweet evokes feelings or reactions",
|
||||
"Relevance to target audience - how well it resonates with intended readers"
|
||||
]
|
||||
|
||||
# error messages
|
||||
ERROR_PARSING = "Default evaluation due to parsing error"
|
||||
ERROR_VALIDATION = "Default evaluation due to validation error"
|
||||
ERROR_GENERATION = "Tweet generation failed"
|
||||
ERROR_EVALUATION = "Tweet evaluation failed"
|
||||
ERROR_DSPy_INIT = "DSPy initialization failed"
|
||||
ERROR_NO_API_KEY = "OPENROUTER_API_KEY environment variable is required"
|
||||
ERROR_SAVE_CATEGORIES = "Failed to save categories"
|
||||
ERROR_LOAD_CATEGORIES = "Failed to load categories"
|
||||
ERROR_SAVE_SETTINGS = "Failed to save settings"
|
||||
ERROR_LOAD_SETTINGS = "Failed to load settings"
|
||||
ERROR_SAVE_HISTORY = "Failed to save input history"
|
||||
ERROR_LOAD_HISTORY = "Failed to load input history"
|
||||
|
||||
# cache configuration
|
||||
CACHE_ENABLE_MEMORY = True
|
||||
CACHE_ENABLE_DISK = True
|
||||
|
||||
# iteration display
|
||||
ITERATION_SLEEP_TIME = 0.1 # seconds
|
||||
|
||||
# truncation display
|
||||
CATEGORY_DISPLAY_MAX_LENGTH = 30
|
||||
CATEGORY_DISPLAY_TRUNCATION = "..."
|
||||
CATEGORY_IMPROVEMENT_MAX_LENGTH = 50
|
||||
85
agent/helpers.py
Normal file
85
agent/helpers.py
Normal file
@@ -0,0 +1,85 @@
|
||||
from typing import Optional, Dict, Any
|
||||
from .models import EvaluationResult
|
||||
from .constants import MAX_SCORE
|
||||
|
||||
|
||||
def format_evaluation_for_generator(evaluation: Optional[EvaluationResult]) -> str:
|
||||
"""
|
||||
Format an evaluation result as text for the generator module.
|
||||
|
||||
Args:
|
||||
evaluation: The evaluation result to format
|
||||
|
||||
Returns:
|
||||
Formatted string with category-by-category reasoning and scores
|
||||
"""
|
||||
if not evaluation or not evaluation.evaluations:
|
||||
return ""
|
||||
|
||||
eval_lines = []
|
||||
for eval in evaluation.evaluations:
|
||||
eval_lines.append(f"{eval.category} (Score: {eval.score}/{MAX_SCORE}): {eval.reasoning}")
|
||||
|
||||
return "\n".join(eval_lines)
|
||||
|
||||
|
||||
def build_settings_dict(
|
||||
selected_model: str,
|
||||
iterations: int,
|
||||
patience: int,
|
||||
use_cache: bool
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Build a settings dictionary for saving.
|
||||
|
||||
Args:
|
||||
selected_model: The selected model name
|
||||
iterations: Number of optimization iterations
|
||||
patience: Patience threshold for early stopping
|
||||
use_cache: Whether to use DSPy cache
|
||||
|
||||
Returns:
|
||||
Dictionary containing all settings
|
||||
"""
|
||||
return {
|
||||
"selected_model": selected_model,
|
||||
"iterations": iterations,
|
||||
"patience": patience,
|
||||
"use_cache": use_cache
|
||||
}
|
||||
|
||||
|
||||
def truncate_tweet(tweet: str, max_length: int, suffix: str = "...") -> str:
|
||||
"""
|
||||
Truncate a tweet to the maximum length with a suffix.
|
||||
|
||||
Args:
|
||||
tweet: The tweet text to truncate
|
||||
max_length: Maximum allowed length
|
||||
suffix: Suffix to add when truncating (default: "...")
|
||||
|
||||
Returns:
|
||||
Truncated tweet text
|
||||
"""
|
||||
tweet = tweet.strip()
|
||||
if len(tweet) <= max_length:
|
||||
return tweet
|
||||
|
||||
truncation_point = max_length - len(suffix)
|
||||
return tweet[:truncation_point] + suffix
|
||||
|
||||
|
||||
def truncate_category_display(category: str, max_length: int = 30) -> str:
|
||||
"""
|
||||
Truncate a category name for display purposes.
|
||||
|
||||
Args:
|
||||
category: The category name
|
||||
max_length: Maximum display length (default: 30)
|
||||
|
||||
Returns:
|
||||
Truncated category name with "..." if needed
|
||||
"""
|
||||
if len(category) <= max_length:
|
||||
return category
|
||||
return category[:max_length] + "..."
|
||||
119
agent/hill_climbing.py
Normal file
119
agent/hill_climbing.py
Normal file
@@ -0,0 +1,119 @@
|
||||
from typing import List, Iterator, Tuple, Dict
|
||||
from .models import EvaluationResult
|
||||
from .modules import TweetGeneratorModule, TweetEvaluatorModule
|
||||
from .helpers import format_evaluation_for_generator
|
||||
|
||||
class HillClimbingOptimizer:
|
||||
"""Hill climbing optimizer for tweet improvement."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
generator: TweetGeneratorModule,
|
||||
evaluator: TweetEvaluatorModule,
|
||||
categories: List[str],
|
||||
max_iterations: int = 10,
|
||||
patience: int = 5
|
||||
):
|
||||
self.generator = generator
|
||||
self.evaluator = evaluator
|
||||
self.categories = categories
|
||||
self.max_iterations = max_iterations
|
||||
self.patience = patience
|
||||
|
||||
def optimize(self, initial_text: str) -> Iterator[Tuple[str, EvaluationResult, bool, int, Dict[str, str], Dict[str, str]]]:
|
||||
"""
|
||||
Optimize tweet using hill climbing algorithm.
|
||||
|
||||
Yields:
|
||||
Tuple of (current_tweet, evaluation_result, is_improvement, patience_counter, generator_inputs, evaluator_inputs)
|
||||
"""
|
||||
# Generate initial tweet
|
||||
generator_inputs = {
|
||||
"input_text": initial_text,
|
||||
"current_tweet": "",
|
||||
"previous_evaluation": ""
|
||||
}
|
||||
current_tweet = self.generator(
|
||||
input_text=initial_text,
|
||||
current_tweet="",
|
||||
previous_evaluation=None
|
||||
)
|
||||
|
||||
evaluator_inputs = {
|
||||
"original_text": initial_text,
|
||||
"current_best_tweet": "",
|
||||
"tweet_text": current_tweet
|
||||
}
|
||||
current_score = self.evaluator(
|
||||
tweet_text=current_tweet,
|
||||
categories=self.categories,
|
||||
original_text=initial_text,
|
||||
current_best_tweet=""
|
||||
)
|
||||
|
||||
best_tweet = current_tweet
|
||||
best_score = current_score
|
||||
patience_counter = 0
|
||||
|
||||
yield (current_tweet, current_score, True, patience_counter, generator_inputs, evaluator_inputs)
|
||||
|
||||
for iteration in range(1, self.max_iterations):
|
||||
# Generate improved tweet with previous evaluation as feedback
|
||||
try:
|
||||
# Format evaluation for display in generator inputs
|
||||
eval_text = format_evaluation_for_generator(best_score)
|
||||
|
||||
generator_inputs = {
|
||||
"input_text": initial_text,
|
||||
"current_tweet": best_tweet,
|
||||
"previous_evaluation": eval_text
|
||||
}
|
||||
|
||||
candidate_tweet = self.generator(
|
||||
input_text=initial_text,
|
||||
current_tweet=best_tweet,
|
||||
previous_evaluation=best_score
|
||||
)
|
||||
|
||||
# Evaluate candidate
|
||||
evaluator_inputs = {
|
||||
"original_text": initial_text,
|
||||
"current_best_tweet": best_tweet,
|
||||
"tweet_text": candidate_tweet
|
||||
}
|
||||
candidate_score = self.evaluator(
|
||||
tweet_text=candidate_tweet,
|
||||
categories=self.categories,
|
||||
original_text=initial_text,
|
||||
current_best_tweet=best_tweet
|
||||
)
|
||||
|
||||
# Check if candidate is better (hill climbing condition)
|
||||
is_improvement = candidate_score > best_score
|
||||
|
||||
if is_improvement:
|
||||
best_tweet = candidate_tweet
|
||||
best_score = candidate_score
|
||||
patience_counter = 0
|
||||
yield (candidate_tweet, candidate_score, True, patience_counter, generator_inputs, evaluator_inputs)
|
||||
else:
|
||||
patience_counter += 1
|
||||
yield (best_tweet, candidate_score, False, patience_counter, generator_inputs, evaluator_inputs)
|
||||
|
||||
# Early stopping if no improvement for 'patience' iterations
|
||||
if patience_counter >= self.patience:
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
# If generation fails, yield current best
|
||||
patience_counter += 1
|
||||
evaluator_inputs = {
|
||||
"original_text": initial_text,
|
||||
"current_best_tweet": best_tweet,
|
||||
"tweet_text": best_tweet
|
||||
}
|
||||
yield (best_tweet, best_score, False, patience_counter, generator_inputs, evaluator_inputs)
|
||||
|
||||
if patience_counter >= self.patience:
|
||||
break
|
||||
|
||||
60
agent/models.py
Normal file
60
agent/models.py
Normal file
@@ -0,0 +1,60 @@
|
||||
from pydantic import BaseModel, Field, validator
|
||||
from typing import List
|
||||
from .constants import MIN_SCORE, MAX_SCORE
|
||||
|
||||
class CategoryEvaluation(BaseModel):
|
||||
"""Pydantic model for a single category evaluation with reasoning."""
|
||||
|
||||
category: str = Field(description="The evaluation category name")
|
||||
reasoning: str = Field(description="Explanation for the score")
|
||||
score: int = Field(
|
||||
description=f"Score for this category ({MIN_SCORE}-{MAX_SCORE})",
|
||||
ge=MIN_SCORE,
|
||||
le=MAX_SCORE
|
||||
)
|
||||
|
||||
@validator('score')
|
||||
def validate_score(cls, score):
|
||||
"""Ensure score is within the valid range."""
|
||||
if not isinstance(score, int) or score < MIN_SCORE or score > MAX_SCORE:
|
||||
raise ValueError(f"Score {score} must be an integer between {MIN_SCORE} and {MAX_SCORE}")
|
||||
return score
|
||||
|
||||
class EvaluationResult(BaseModel):
|
||||
"""Pydantic model for tweet evaluation results."""
|
||||
|
||||
evaluations: List[CategoryEvaluation] = Field(
|
||||
description="List of category evaluations with reasoning and scores"
|
||||
)
|
||||
|
||||
@validator('evaluations')
|
||||
def validate_evaluations(cls, evals):
|
||||
"""Ensure we have at least one evaluation."""
|
||||
if not evals or len(evals) < 1:
|
||||
raise ValueError("Must have at least one category evaluation")
|
||||
return evals
|
||||
|
||||
@property
|
||||
def category_scores(self) -> List[int]:
|
||||
"""Get list of scores for backwards compatibility."""
|
||||
return [eval.score for eval in self.evaluations]
|
||||
|
||||
def total_score(self) -> float:
|
||||
"""Calculate the total score across all categories."""
|
||||
return sum(eval.score for eval in self.evaluations)
|
||||
|
||||
def average_score(self) -> float:
|
||||
"""Calculate the average score across all categories."""
|
||||
return self.total_score() / len(self.evaluations)
|
||||
|
||||
def __gt__(self, other):
|
||||
"""Compare evaluation results based on total score."""
|
||||
if not isinstance(other, EvaluationResult):
|
||||
return NotImplemented
|
||||
return self.total_score() > other.total_score()
|
||||
|
||||
def __eq__(self, other):
|
||||
"""Check equality based on total score."""
|
||||
if not isinstance(other, EvaluationResult):
|
||||
return NotImplemented
|
||||
return self.total_score() == other.total_score()
|
||||
128
agent/modules.py
Normal file
128
agent/modules.py
Normal file
@@ -0,0 +1,128 @@
|
||||
import dspy
|
||||
from typing import List, Optional
|
||||
from .models import EvaluationResult, CategoryEvaluation
|
||||
from .constants import (
|
||||
TWEET_MAX_LENGTH,
|
||||
TWEET_TRUNCATION_SUFFIX,
|
||||
DEFAULT_SCORE,
|
||||
ERROR_PARSING,
|
||||
ERROR_VALIDATION,
|
||||
ERROR_GENERATION,
|
||||
ERROR_EVALUATION,
|
||||
MIN_SCORE,
|
||||
MAX_SCORE
|
||||
)
|
||||
from .helpers import format_evaluation_for_generator, truncate_tweet
|
||||
|
||||
class TweetGenerator(dspy.Signature):
|
||||
"""Generate or improve a tweet based on input text and detailed evaluation feedback with reasoning."""
|
||||
|
||||
input_text: str = dspy.InputField(desc="Original text or current tweet to improve")
|
||||
current_tweet: str = dspy.InputField(desc="Current best tweet version (empty for first generation)")
|
||||
previous_evaluation: str = dspy.InputField(desc="Previous evaluation with category-by-category reasoning and scores (empty for first generation)")
|
||||
improved_tweet: str = dspy.OutputField(desc=f"Generated or improved tweet text (max {TWEET_MAX_LENGTH} characters)")
|
||||
|
||||
class TweetEvaluator(dspy.Signature):
|
||||
"""Evaluate a tweet across multiple custom categories. For each category, provide detailed reasoning explaining the score, then assign a score. Ensure the tweet maintains the same meaning as the original text."""
|
||||
|
||||
original_text: str = dspy.InputField(desc="Original input text that started the optimization")
|
||||
current_best_tweet: str = dspy.InputField(desc="Current best tweet version for comparison (empty for first evaluation)")
|
||||
tweet_text: str = dspy.InputField(desc="Tweet text to evaluate")
|
||||
categories: str = dspy.InputField(desc="Comma-separated list of evaluation category descriptions")
|
||||
evaluations: List[CategoryEvaluation] = dspy.OutputField(
|
||||
desc=f"List of evaluations with category name, detailed reasoning, and score ({MIN_SCORE}-{MAX_SCORE}) for each category. Ensure the tweet conveys the same meaning as the original text."
|
||||
)
|
||||
|
||||
class TweetGeneratorModule(dspy.Module):
|
||||
"""DSPy module for generating and improving tweets."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.generate = dspy.ChainOfThought(TweetGenerator)
|
||||
|
||||
def forward(self, input_text: str, current_tweet: str = "", previous_evaluation: Optional[EvaluationResult] = None) -> str:
|
||||
"""Generate or improve a tweet."""
|
||||
try:
|
||||
# Format previous evaluation as text
|
||||
eval_text = format_evaluation_for_generator(previous_evaluation)
|
||||
|
||||
result = self.generate(
|
||||
input_text=input_text,
|
||||
current_tweet=current_tweet,
|
||||
previous_evaluation=eval_text
|
||||
)
|
||||
|
||||
# Ensure tweet doesn't exceed character limit
|
||||
tweet = truncate_tweet(result.improved_tweet, TWEET_MAX_LENGTH, TWEET_TRUNCATION_SUFFIX)
|
||||
|
||||
return tweet
|
||||
except Exception as e:
|
||||
raise Exception(f"{ERROR_GENERATION}: {str(e)}")
|
||||
|
||||
class TweetEvaluatorModule(dspy.Module):
|
||||
"""DSPy module for evaluating tweets across custom categories."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.evaluate = dspy.ChainOfThought(TweetEvaluator)
|
||||
|
||||
def forward(self, tweet_text: str, categories: List[str], original_text: str = "", current_best_tweet: str = "") -> EvaluationResult:
|
||||
"""Evaluate a tweet across specified categories."""
|
||||
try:
|
||||
# Join categories into comma-separated string
|
||||
categories_str = ", ".join(categories)
|
||||
|
||||
result = self.evaluate(
|
||||
original_text=original_text,
|
||||
current_best_tweet=current_best_tweet,
|
||||
tweet_text=tweet_text,
|
||||
categories=categories_str
|
||||
)
|
||||
|
||||
# Extract and validate evaluations
|
||||
evaluations = result.evaluations
|
||||
|
||||
# Ensure we have the right number of evaluations
|
||||
if len(evaluations) != len(categories):
|
||||
# Create default evaluations if mismatch
|
||||
evaluations = [
|
||||
CategoryEvaluation(
|
||||
category=cat,
|
||||
reasoning=ERROR_PARSING,
|
||||
score=DEFAULT_SCORE
|
||||
) for cat in categories
|
||||
]
|
||||
else:
|
||||
# Validate each evaluation
|
||||
validated_evals = []
|
||||
for i, eval in enumerate(evaluations):
|
||||
try:
|
||||
# Ensure score is valid
|
||||
score = max(MIN_SCORE, min(MAX_SCORE, int(eval.score)))
|
||||
validated_evals.append(CategoryEvaluation(
|
||||
category=categories[i] if i < len(categories) else eval.category,
|
||||
reasoning=eval.reasoning if eval.reasoning else "No reasoning provided",
|
||||
score=score
|
||||
))
|
||||
except (ValueError, TypeError, AttributeError):
|
||||
validated_evals.append(CategoryEvaluation(
|
||||
category=categories[i] if i < len(categories) else "Unknown",
|
||||
reasoning=ERROR_VALIDATION,
|
||||
score=DEFAULT_SCORE
|
||||
))
|
||||
evaluations = validated_evals
|
||||
|
||||
# Create validated result
|
||||
validated_result = EvaluationResult(evaluations=evaluations)
|
||||
|
||||
return validated_result
|
||||
except Exception as e:
|
||||
# Return default evaluations on error
|
||||
default_evals = [
|
||||
CategoryEvaluation(
|
||||
category=cat,
|
||||
reasoning=f"{ERROR_EVALUATION}: {str(e)}",
|
||||
score=DEFAULT_SCORE
|
||||
) for cat in categories
|
||||
]
|
||||
return EvaluationResult(evaluations=default_evals)
|
||||
192
agent/utils.py
Normal file
192
agent/utils.py
Normal file
@@ -0,0 +1,192 @@
|
||||
import json
|
||||
import os
|
||||
import dspy
|
||||
from typing import List, Dict, Any
|
||||
from .constants import (
|
||||
CATEGORIES_FILE,
|
||||
SETTINGS_FILE,
|
||||
HISTORY_FILE,
|
||||
DEFAULT_CATEGORIES,
|
||||
DEFAULT_MODEL,
|
||||
DEFAULT_ITERATIONS,
|
||||
DEFAULT_PATIENCE,
|
||||
DEFAULT_USE_CACHE,
|
||||
MAX_HISTORY_ITEMS,
|
||||
OPENROUTER_API_BASE,
|
||||
OPENROUTER_MAX_TOKENS,
|
||||
OPENROUTER_TEMPERATURE,
|
||||
ERROR_NO_API_KEY,
|
||||
ERROR_SAVE_CATEGORIES,
|
||||
ERROR_LOAD_CATEGORIES,
|
||||
ERROR_SAVE_SETTINGS,
|
||||
ERROR_LOAD_SETTINGS,
|
||||
ERROR_SAVE_HISTORY,
|
||||
ERROR_LOAD_HISTORY,
|
||||
ERROR_DSPy_INIT,
|
||||
TWEET_MAX_LENGTH
|
||||
)
|
||||
|
||||
def save_categories(categories: List[str]) -> None:
|
||||
"""Save categories to JSON file."""
|
||||
try:
|
||||
with open(CATEGORIES_FILE, 'w') as f:
|
||||
json.dump(categories, f, indent=2)
|
||||
except Exception as e:
|
||||
print(f"{ERROR_SAVE_CATEGORIES}: {str(e)}")
|
||||
|
||||
def load_categories() -> List[str]:
|
||||
"""Load categories from JSON file."""
|
||||
try:
|
||||
if os.path.exists(CATEGORIES_FILE):
|
||||
with open(CATEGORIES_FILE, 'r') as f:
|
||||
categories = json.load(f)
|
||||
return categories if isinstance(categories, list) else []
|
||||
else:
|
||||
save_categories(DEFAULT_CATEGORIES)
|
||||
return DEFAULT_CATEGORIES
|
||||
except Exception as e:
|
||||
print(f"{ERROR_LOAD_CATEGORIES}: {str(e)}")
|
||||
return []
|
||||
|
||||
def get_dspy_lm(model_name: str):
|
||||
"""Get a DSPy LM instance for the specified model (cached per model)."""
|
||||
try:
|
||||
openrouter_key = os.getenv("OPENROUTER_API_KEY")
|
||||
if not openrouter_key:
|
||||
raise ValueError(ERROR_NO_API_KEY)
|
||||
|
||||
max_tokens = 16000 if "openai/gpt-5" in model_name else OPENROUTER_MAX_TOKENS
|
||||
temperature = 1.0 if "openai/gpt-5" in model_name else OPENROUTER_TEMPERATURE
|
||||
|
||||
lm = dspy.LM(
|
||||
model=model_name,
|
||||
api_key=openrouter_key,
|
||||
api_base=OPENROUTER_API_BASE,
|
||||
max_tokens=max_tokens,
|
||||
temperature=temperature
|
||||
)
|
||||
return lm
|
||||
except Exception as e:
|
||||
raise Exception(f"Failed to create LM: {str(e)}")
|
||||
|
||||
def initialize_dspy(model_name: str = DEFAULT_MODEL, use_cache: bool = DEFAULT_USE_CACHE) -> bool:
|
||||
"""Initialize DSPy with OpenRouter and selected model."""
|
||||
# Configure cache settings
|
||||
try:
|
||||
dspy.configure_cache(
|
||||
enable_memory_cache=use_cache,
|
||||
enable_disk_cache=use_cache
|
||||
)
|
||||
except Exception:
|
||||
# Cache configuration might fail in some environments, continue anyway
|
||||
pass
|
||||
|
||||
# Only configure DSPy once globally
|
||||
if not hasattr(dspy, '_replit_configured'):
|
||||
try:
|
||||
# Get the LM for the default model
|
||||
default_lm = get_dspy_lm(model_name)
|
||||
dspy.configure(lm=default_lm)
|
||||
dspy._replit_configured = True # type: ignore
|
||||
except Exception as e:
|
||||
raise Exception(f"{ERROR_DSPy_INIT}: {str(e)}")
|
||||
|
||||
return True
|
||||
|
||||
def format_tweet_for_display(tweet: str) -> str:
|
||||
"""Format tweet text for better display."""
|
||||
return tweet.strip()
|
||||
|
||||
def calculate_tweet_length(tweet: str) -> int:
|
||||
"""Calculate tweet length."""
|
||||
return len(tweet.strip())
|
||||
|
||||
def is_valid_tweet(tweet: str) -> bool:
|
||||
"""Check if tweet is valid (not empty and within character limit)."""
|
||||
cleaned_tweet = tweet.strip()
|
||||
return bool(cleaned_tweet) and len(cleaned_tweet) <= TWEET_MAX_LENGTH
|
||||
|
||||
def save_settings(settings: Dict[str, Any]) -> None:
|
||||
"""Save settings to JSON file."""
|
||||
try:
|
||||
with open(SETTINGS_FILE, 'w') as f:
|
||||
json.dump(settings, f, indent=2)
|
||||
except Exception as e:
|
||||
print(f"{ERROR_SAVE_SETTINGS}: {str(e)}")
|
||||
|
||||
def load_settings() -> Dict[str, Any]:
|
||||
"""Load settings from JSON file."""
|
||||
try:
|
||||
if os.path.exists(SETTINGS_FILE):
|
||||
with open(SETTINGS_FILE, 'r') as f:
|
||||
settings = json.load(f)
|
||||
return settings if isinstance(settings, dict) else get_default_settings()
|
||||
else:
|
||||
# Return default settings if file doesn't exist
|
||||
default_settings = get_default_settings()
|
||||
save_settings(default_settings)
|
||||
return default_settings
|
||||
except Exception as e:
|
||||
print(f"{ERROR_LOAD_SETTINGS}: {str(e)}")
|
||||
return get_default_settings()
|
||||
|
||||
def get_default_settings() -> Dict[str, Any]:
|
||||
"""Get default settings."""
|
||||
return {
|
||||
"selected_model": DEFAULT_MODEL,
|
||||
"iterations": DEFAULT_ITERATIONS,
|
||||
"patience": DEFAULT_PATIENCE,
|
||||
"use_cache": DEFAULT_USE_CACHE
|
||||
}
|
||||
|
||||
def save_input_history(history: List[str]) -> None:
|
||||
"""Save input history to JSON file."""
|
||||
try:
|
||||
with open(HISTORY_FILE, 'w') as f:
|
||||
json.dump(history, f, indent=2)
|
||||
except Exception as e:
|
||||
print(f"{ERROR_SAVE_HISTORY}: {str(e)}")
|
||||
|
||||
def load_input_history() -> List[str]:
|
||||
"""Load input history from JSON file."""
|
||||
try:
|
||||
if os.path.exists(HISTORY_FILE):
|
||||
with open(HISTORY_FILE, 'r') as f:
|
||||
history = json.load(f)
|
||||
return history if isinstance(history, list) else []
|
||||
else:
|
||||
return []
|
||||
except Exception as e:
|
||||
print(f"{ERROR_LOAD_HISTORY}: {str(e)}")
|
||||
return []
|
||||
|
||||
def add_to_input_history(history: List[str], new_input: str) -> List[str]:
|
||||
"""
|
||||
Add a new input to history, maintaining max size and avoiding duplicates.
|
||||
|
||||
Args:
|
||||
history: Current history list
|
||||
new_input: New input text to add
|
||||
|
||||
Returns:
|
||||
Updated history list with new input at the beginning
|
||||
"""
|
||||
# Strip whitespace from input
|
||||
new_input = new_input.strip()
|
||||
|
||||
# Don't add empty strings
|
||||
if not new_input:
|
||||
return history
|
||||
|
||||
# Remove duplicate if it exists
|
||||
if new_input in history:
|
||||
history.remove(new_input)
|
||||
|
||||
# Add to beginning of list
|
||||
updated_history = [new_input] + history
|
||||
|
||||
# Trim to max size
|
||||
if len(updated_history) > MAX_HISTORY_ITEMS:
|
||||
updated_history = updated_history[:MAX_HISTORY_ITEMS]
|
||||
|
||||
return updated_history
|
||||
4
auto_classes.json
Normal file
4
auto_classes.json
Normal file
@@ -0,0 +1,4 @@
|
||||
{
|
||||
"AutoConfig": "agent.agent.TweetOptimizerConfig",
|
||||
"AutoAgent": "agent.agent.TweetOptimizerAgent"
|
||||
}
|
||||
12
config.json
Normal file
12
config.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"lm": "openrouter/google/gemini-2.5-flash",
|
||||
"eval_lm": "openrouter/openai/gpt-5",
|
||||
"categories": [
|
||||
"Engagement potential - how likely users are to like, retweet, or reply",
|
||||
"Clarity and readability - how easy the tweet is to understand",
|
||||
"Emotional impact - how well the tweet evokes feelings or reactions",
|
||||
"Relevance to target audience - how well it resonates with intended readers"
|
||||
],
|
||||
"max_iterations": 10,
|
||||
"patience": 5
|
||||
}
|
||||
75
constants.py
Normal file
75
constants.py
Normal file
@@ -0,0 +1,75 @@
|
||||
from typing import Dict, List
|
||||
|
||||
# tweet configuration
|
||||
TWEET_MAX_LENGTH = 280
|
||||
TWEET_TRUNCATION_SUFFIX = "..."
|
||||
TWEET_TRUNCATION_LENGTH = TWEET_MAX_LENGTH - len(TWEET_TRUNCATION_SUFFIX)
|
||||
|
||||
# score configuration
|
||||
MIN_SCORE = 1
|
||||
MAX_SCORE = 9
|
||||
DEFAULT_SCORE = 5
|
||||
|
||||
# file paths
|
||||
CATEGORIES_FILE = "categories.json"
|
||||
SETTINGS_FILE = "settings.json"
|
||||
HISTORY_FILE = "input_history.json"
|
||||
|
||||
# history configuration
|
||||
MAX_HISTORY_ITEMS = 50 # maximum number of historical inputs to store
|
||||
|
||||
# model configuration
|
||||
DEFAULT_MODEL = "openrouter/anthropic/claude-sonnet-4.5"
|
||||
|
||||
AVAILABLE_MODELS: Dict[str, str] = {
|
||||
"Claude Sonnet 4.5": "openrouter/anthropic/claude-sonnet-4.5",
|
||||
"Opus 4.1": "openrouter/anthropic/claude-opus-4.1",
|
||||
"Gemini 2.5 Flash": "openrouter/google/gemini-2.5-flash",
|
||||
"Gemini 2.5 Flash Lite": "openrouter/google/gemini-2.5-flash-lite",
|
||||
"Gemini 2.5 Pro": "openrouter/google/gemini-2.5-pro",
|
||||
"GPT-5": "openrouter/openai/gpt-5"
|
||||
}
|
||||
|
||||
# openrouter API configuration
|
||||
OPENROUTER_API_BASE = "https://openrouter.ai/api/v1"
|
||||
OPENROUTER_MAX_TOKENS = 4096
|
||||
OPENROUTER_TEMPERATURE = 0.7
|
||||
|
||||
# optimization defaults
|
||||
DEFAULT_ITERATIONS = 10
|
||||
DEFAULT_PATIENCE = 5
|
||||
DEFAULT_USE_CACHE = True
|
||||
|
||||
# default evaluation categories
|
||||
DEFAULT_CATEGORIES: List[str] = [
|
||||
"Engagement potential - how likely users are to like, retweet, or reply",
|
||||
"Clarity and readability - how easy the tweet is to understand",
|
||||
"Emotional impact - how well the tweet evokes feelings or reactions",
|
||||
"Relevance to target audience - how well it resonates with intended readers"
|
||||
]
|
||||
|
||||
# error messages
|
||||
ERROR_PARSING = "Default evaluation due to parsing error"
|
||||
ERROR_VALIDATION = "Default evaluation due to validation error"
|
||||
ERROR_GENERATION = "Tweet generation failed"
|
||||
ERROR_EVALUATION = "Tweet evaluation failed"
|
||||
ERROR_DSPy_INIT = "DSPy initialization failed"
|
||||
ERROR_NO_API_KEY = "OPENROUTER_API_KEY environment variable is required"
|
||||
ERROR_SAVE_CATEGORIES = "Failed to save categories"
|
||||
ERROR_LOAD_CATEGORIES = "Failed to load categories"
|
||||
ERROR_SAVE_SETTINGS = "Failed to save settings"
|
||||
ERROR_LOAD_SETTINGS = "Failed to load settings"
|
||||
ERROR_SAVE_HISTORY = "Failed to save input history"
|
||||
ERROR_LOAD_HISTORY = "Failed to load input history"
|
||||
|
||||
# cache configuration
|
||||
CACHE_ENABLE_MEMORY = True
|
||||
CACHE_ENABLE_DISK = True
|
||||
|
||||
# iteration display
|
||||
ITERATION_SLEEP_TIME = 0.1 # seconds
|
||||
|
||||
# truncation display
|
||||
CATEGORY_DISPLAY_MAX_LENGTH = 30
|
||||
CATEGORY_DISPLAY_TRUNCATION = "..."
|
||||
CATEGORY_IMPROVEMENT_MAX_LENGTH = 50
|
||||
85
helpers.py
Normal file
85
helpers.py
Normal file
@@ -0,0 +1,85 @@
|
||||
from typing import Optional, Dict, Any
|
||||
from models import EvaluationResult
|
||||
from constants import MAX_SCORE
|
||||
|
||||
|
||||
def format_evaluation_for_generator(evaluation: Optional[EvaluationResult]) -> str:
|
||||
"""
|
||||
Format an evaluation result as text for the generator module.
|
||||
|
||||
Args:
|
||||
evaluation: The evaluation result to format
|
||||
|
||||
Returns:
|
||||
Formatted string with category-by-category reasoning and scores
|
||||
"""
|
||||
if not evaluation or not evaluation.evaluations:
|
||||
return ""
|
||||
|
||||
eval_lines = []
|
||||
for eval in evaluation.evaluations:
|
||||
eval_lines.append(f"{eval.category} (Score: {eval.score}/{MAX_SCORE}): {eval.reasoning}")
|
||||
|
||||
return "\n".join(eval_lines)
|
||||
|
||||
|
||||
def build_settings_dict(
|
||||
selected_model: str,
|
||||
iterations: int,
|
||||
patience: int,
|
||||
use_cache: bool
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Build a settings dictionary for saving.
|
||||
|
||||
Args:
|
||||
selected_model: The selected model name
|
||||
iterations: Number of optimization iterations
|
||||
patience: Patience threshold for early stopping
|
||||
use_cache: Whether to use DSPy cache
|
||||
|
||||
Returns:
|
||||
Dictionary containing all settings
|
||||
"""
|
||||
return {
|
||||
"selected_model": selected_model,
|
||||
"iterations": iterations,
|
||||
"patience": patience,
|
||||
"use_cache": use_cache
|
||||
}
|
||||
|
||||
|
||||
def truncate_tweet(tweet: str, max_length: int, suffix: str = "...") -> str:
|
||||
"""
|
||||
Truncate a tweet to the maximum length with a suffix.
|
||||
|
||||
Args:
|
||||
tweet: The tweet text to truncate
|
||||
max_length: Maximum allowed length
|
||||
suffix: Suffix to add when truncating (default: "...")
|
||||
|
||||
Returns:
|
||||
Truncated tweet text
|
||||
"""
|
||||
tweet = tweet.strip()
|
||||
if len(tweet) <= max_length:
|
||||
return tweet
|
||||
|
||||
truncation_point = max_length - len(suffix)
|
||||
return tweet[:truncation_point] + suffix
|
||||
|
||||
|
||||
def truncate_category_display(category: str, max_length: int = 30) -> str:
|
||||
"""
|
||||
Truncate a category name for display purposes.
|
||||
|
||||
Args:
|
||||
category: The category name
|
||||
max_length: Maximum display length (default: 30)
|
||||
|
||||
Returns:
|
||||
Truncated category name with "..." if needed
|
||||
"""
|
||||
if len(category) <= max_length:
|
||||
return category
|
||||
return category[:max_length] + "..."
|
||||
119
hill_climbing.py
Normal file
119
hill_climbing.py
Normal file
@@ -0,0 +1,119 @@
|
||||
from typing import List, Iterator, Tuple, Dict
|
||||
from models import EvaluationResult
|
||||
from modules import TweetGeneratorModule, TweetEvaluatorModule
|
||||
from helpers import format_evaluation_for_generator
|
||||
|
||||
class HillClimbingOptimizer:
|
||||
"""Hill climbing optimizer for tweet improvement."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
generator: TweetGeneratorModule,
|
||||
evaluator: TweetEvaluatorModule,
|
||||
categories: List[str],
|
||||
max_iterations: int = 10,
|
||||
patience: int = 5
|
||||
):
|
||||
self.generator = generator
|
||||
self.evaluator = evaluator
|
||||
self.categories = categories
|
||||
self.max_iterations = max_iterations
|
||||
self.patience = patience
|
||||
|
||||
def optimize(self, initial_text: str) -> Iterator[Tuple[str, EvaluationResult, bool, int, Dict[str, str], Dict[str, str]]]:
|
||||
"""
|
||||
Optimize tweet using hill climbing algorithm.
|
||||
|
||||
Yields:
|
||||
Tuple of (current_tweet, evaluation_result, is_improvement, patience_counter, generator_inputs, evaluator_inputs)
|
||||
"""
|
||||
# Generate initial tweet
|
||||
generator_inputs = {
|
||||
"input_text": initial_text,
|
||||
"current_tweet": "",
|
||||
"previous_evaluation": ""
|
||||
}
|
||||
current_tweet = self.generator(
|
||||
input_text=initial_text,
|
||||
current_tweet="",
|
||||
previous_evaluation=None
|
||||
)
|
||||
|
||||
evaluator_inputs = {
|
||||
"original_text": initial_text,
|
||||
"current_best_tweet": "",
|
||||
"tweet_text": current_tweet
|
||||
}
|
||||
current_score = self.evaluator(
|
||||
tweet_text=current_tweet,
|
||||
categories=self.categories,
|
||||
original_text=initial_text,
|
||||
current_best_tweet=""
|
||||
)
|
||||
|
||||
best_tweet = current_tweet
|
||||
best_score = current_score
|
||||
patience_counter = 0
|
||||
|
||||
yield (current_tweet, current_score, True, patience_counter, generator_inputs, evaluator_inputs)
|
||||
|
||||
for iteration in range(1, self.max_iterations):
|
||||
# Generate improved tweet with previous evaluation as feedback
|
||||
try:
|
||||
# Format evaluation for display in generator inputs
|
||||
eval_text = format_evaluation_for_generator(best_score)
|
||||
|
||||
generator_inputs = {
|
||||
"input_text": initial_text,
|
||||
"current_tweet": best_tweet,
|
||||
"previous_evaluation": eval_text
|
||||
}
|
||||
|
||||
candidate_tweet = self.generator(
|
||||
input_text=initial_text,
|
||||
current_tweet=best_tweet,
|
||||
previous_evaluation=best_score
|
||||
)
|
||||
|
||||
# Evaluate candidate
|
||||
evaluator_inputs = {
|
||||
"original_text": initial_text,
|
||||
"current_best_tweet": best_tweet,
|
||||
"tweet_text": candidate_tweet
|
||||
}
|
||||
candidate_score = self.evaluator(
|
||||
tweet_text=candidate_tweet,
|
||||
categories=self.categories,
|
||||
original_text=initial_text,
|
||||
current_best_tweet=best_tweet
|
||||
)
|
||||
|
||||
# Check if candidate is better (hill climbing condition)
|
||||
is_improvement = candidate_score > best_score
|
||||
|
||||
if is_improvement:
|
||||
best_tweet = candidate_tweet
|
||||
best_score = candidate_score
|
||||
patience_counter = 0
|
||||
yield (candidate_tweet, candidate_score, True, patience_counter, generator_inputs, evaluator_inputs)
|
||||
else:
|
||||
patience_counter += 1
|
||||
yield (best_tweet, candidate_score, False, patience_counter, generator_inputs, evaluator_inputs)
|
||||
|
||||
# Early stopping if no improvement for 'patience' iterations
|
||||
if patience_counter >= self.patience:
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
# If generation fails, yield current best
|
||||
patience_counter += 1
|
||||
evaluator_inputs = {
|
||||
"original_text": initial_text,
|
||||
"current_best_tweet": best_tweet,
|
||||
"tweet_text": best_tweet
|
||||
}
|
||||
yield (best_tweet, best_score, False, patience_counter, generator_inputs, evaluator_inputs)
|
||||
|
||||
if patience_counter >= self.patience:
|
||||
break
|
||||
|
||||
49
main.py
Normal file
49
main.py
Normal file
@@ -0,0 +1,49 @@
|
||||
from agent.agent import TweetOptimizerAgent, TweetOptimizerConfig
|
||||
|
||||
def main():
|
||||
# create agent with default config
|
||||
config = TweetOptimizerConfig()
|
||||
tweet_optimizer = TweetOptimizerAgent(config)
|
||||
import os
|
||||
|
||||
# set up test environment (replace with real API key for actual usage)
|
||||
if not os.getenv("OPENROUTER_API_KEY"):
|
||||
raise ValueError("OPENROUTER_API_KEY environment variable is not set")
|
||||
|
||||
# full optimization process
|
||||
print("\n=== Full Optimization Process ===")
|
||||
try:
|
||||
results = tweet_optimizer(
|
||||
input_text="Anthropic added a new OSS model on HuggingFace.",
|
||||
iterations=10, # Reduced for testing
|
||||
patience=8
|
||||
)
|
||||
print(f"Initial text: {results['initial_text']}")
|
||||
print(f"Final tweet: {results['final_tweet']}")
|
||||
print(f"Best score: {results['best_score']:.2f}")
|
||||
print(f"Iterations run: {results['iterations_run']}")
|
||||
print(f"Improvements found: {results['improvement_count']}")
|
||||
print(f"Early stopped: {results['early_stopped']}")
|
||||
except Exception as e:
|
||||
print(f"Error in optimization: {e}")
|
||||
|
||||
# push to hub
|
||||
print("\n=== Push to Hub ===")
|
||||
try:
|
||||
tweet_optimizer.push_to_hub(
|
||||
"farouk1/tweet-optimizer-v2",
|
||||
commit_message="Complete Migration",
|
||||
with_code=True
|
||||
)
|
||||
print("Successfully pushed to hub!")
|
||||
except Exception as e:
|
||||
print(f"Error pushing to hub: {e}")
|
||||
|
||||
print("\n=== Agent Configuration ===")
|
||||
print(f"Model: {config.lm}")
|
||||
print(f"Categories: {config.categories}")
|
||||
print(f"Max iterations: {config.max_iterations}")
|
||||
print(f"Patience: {config.patience}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
60
models.py
Normal file
60
models.py
Normal file
@@ -0,0 +1,60 @@
|
||||
from pydantic import BaseModel, Field, validator
|
||||
from typing import List
|
||||
from constants import MIN_SCORE, MAX_SCORE
|
||||
|
||||
class CategoryEvaluation(BaseModel):
|
||||
"""Pydantic model for a single category evaluation with reasoning."""
|
||||
|
||||
category: str = Field(description="The evaluation category name")
|
||||
reasoning: str = Field(description="Explanation for the score")
|
||||
score: int = Field(
|
||||
description=f"Score for this category ({MIN_SCORE}-{MAX_SCORE})",
|
||||
ge=MIN_SCORE,
|
||||
le=MAX_SCORE
|
||||
)
|
||||
|
||||
@validator('score')
|
||||
def validate_score(cls, score):
|
||||
"""Ensure score is within the valid range."""
|
||||
if not isinstance(score, int) or score < MIN_SCORE or score > MAX_SCORE:
|
||||
raise ValueError(f"Score {score} must be an integer between {MIN_SCORE} and {MAX_SCORE}")
|
||||
return score
|
||||
|
||||
class EvaluationResult(BaseModel):
|
||||
"""Pydantic model for tweet evaluation results."""
|
||||
|
||||
evaluations: List[CategoryEvaluation] = Field(
|
||||
description="List of category evaluations with reasoning and scores"
|
||||
)
|
||||
|
||||
@validator('evaluations')
|
||||
def validate_evaluations(cls, evals):
|
||||
"""Ensure we have at least one evaluation."""
|
||||
if not evals or len(evals) < 1:
|
||||
raise ValueError("Must have at least one category evaluation")
|
||||
return evals
|
||||
|
||||
@property
|
||||
def category_scores(self) -> List[int]:
|
||||
"""Get list of scores for backwards compatibility."""
|
||||
return [eval.score for eval in self.evaluations]
|
||||
|
||||
def total_score(self) -> float:
|
||||
"""Calculate the total score across all categories."""
|
||||
return sum(eval.score for eval in self.evaluations)
|
||||
|
||||
def average_score(self) -> float:
|
||||
"""Calculate the average score across all categories."""
|
||||
return self.total_score() / len(self.evaluations)
|
||||
|
||||
def __gt__(self, other):
|
||||
"""Compare evaluation results based on total score."""
|
||||
if not isinstance(other, EvaluationResult):
|
||||
return NotImplemented
|
||||
return self.total_score() > other.total_score()
|
||||
|
||||
def __eq__(self, other):
|
||||
"""Check equality based on total score."""
|
||||
if not isinstance(other, EvaluationResult):
|
||||
return NotImplemented
|
||||
return self.total_score() == other.total_score()
|
||||
128
modules.py
Normal file
128
modules.py
Normal file
@@ -0,0 +1,128 @@
|
||||
import dspy
|
||||
from typing import List, Optional
|
||||
from models import EvaluationResult, CategoryEvaluation
|
||||
from constants import (
|
||||
TWEET_MAX_LENGTH,
|
||||
TWEET_TRUNCATION_SUFFIX,
|
||||
DEFAULT_SCORE,
|
||||
ERROR_PARSING,
|
||||
ERROR_VALIDATION,
|
||||
ERROR_GENERATION,
|
||||
ERROR_EVALUATION,
|
||||
MIN_SCORE,
|
||||
MAX_SCORE
|
||||
)
|
||||
from helpers import format_evaluation_for_generator, truncate_tweet
|
||||
|
||||
class TweetGenerator(dspy.Signature):
|
||||
"""Generate or improve a tweet based on input text and detailed evaluation feedback with reasoning."""
|
||||
|
||||
input_text: str = dspy.InputField(desc="Original text or current tweet to improve")
|
||||
current_tweet: str = dspy.InputField(desc="Current best tweet version (empty for first generation)")
|
||||
previous_evaluation: str = dspy.InputField(desc="Previous evaluation with category-by-category reasoning and scores (empty for first generation)")
|
||||
improved_tweet: str = dspy.OutputField(desc=f"Generated or improved tweet text (max {TWEET_MAX_LENGTH} characters)")
|
||||
|
||||
class TweetEvaluator(dspy.Signature):
|
||||
"""Evaluate a tweet across multiple custom categories. For each category, provide detailed reasoning explaining the score, then assign a score. Ensure the tweet maintains the same meaning as the original text."""
|
||||
|
||||
original_text: str = dspy.InputField(desc="Original input text that started the optimization")
|
||||
current_best_tweet: str = dspy.InputField(desc="Current best tweet version for comparison (empty for first evaluation)")
|
||||
tweet_text: str = dspy.InputField(desc="Tweet text to evaluate")
|
||||
categories: str = dspy.InputField(desc="Comma-separated list of evaluation category descriptions")
|
||||
evaluations: List[CategoryEvaluation] = dspy.OutputField(
|
||||
desc=f"List of evaluations with category name, detailed reasoning, and score ({MIN_SCORE}-{MAX_SCORE}) for each category. Ensure the tweet conveys the same meaning as the original text."
|
||||
)
|
||||
|
||||
class TweetGeneratorModule(dspy.Module):
|
||||
"""DSPy module for generating and improving tweets."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.generate = dspy.ChainOfThought(TweetGenerator)
|
||||
|
||||
def forward(self, input_text: str, current_tweet: str = "", previous_evaluation: Optional[EvaluationResult] = None) -> str:
|
||||
"""Generate or improve a tweet."""
|
||||
try:
|
||||
# Format previous evaluation as text
|
||||
eval_text = format_evaluation_for_generator(previous_evaluation)
|
||||
|
||||
result = self.generate(
|
||||
input_text=input_text,
|
||||
current_tweet=current_tweet,
|
||||
previous_evaluation=eval_text
|
||||
)
|
||||
|
||||
# Ensure tweet doesn't exceed character limit
|
||||
tweet = truncate_tweet(result.improved_tweet, TWEET_MAX_LENGTH, TWEET_TRUNCATION_SUFFIX)
|
||||
|
||||
return tweet
|
||||
except Exception as e:
|
||||
raise Exception(f"{ERROR_GENERATION}: {str(e)}")
|
||||
|
||||
class TweetEvaluatorModule(dspy.Module):
|
||||
"""DSPy module for evaluating tweets across custom categories."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.evaluate = dspy.ChainOfThought(TweetEvaluator)
|
||||
|
||||
def forward(self, tweet_text: str, categories: List[str], original_text: str = "", current_best_tweet: str = "") -> EvaluationResult:
|
||||
"""Evaluate a tweet across specified categories."""
|
||||
try:
|
||||
# Join categories into comma-separated string
|
||||
categories_str = ", ".join(categories)
|
||||
|
||||
result = self.evaluate(
|
||||
original_text=original_text,
|
||||
current_best_tweet=current_best_tweet,
|
||||
tweet_text=tweet_text,
|
||||
categories=categories_str
|
||||
)
|
||||
|
||||
# Extract and validate evaluations
|
||||
evaluations = result.evaluations
|
||||
|
||||
# Ensure we have the right number of evaluations
|
||||
if len(evaluations) != len(categories):
|
||||
# Create default evaluations if mismatch
|
||||
evaluations = [
|
||||
CategoryEvaluation(
|
||||
category=cat,
|
||||
reasoning=ERROR_PARSING,
|
||||
score=DEFAULT_SCORE
|
||||
) for cat in categories
|
||||
]
|
||||
else:
|
||||
# Validate each evaluation
|
||||
validated_evals = []
|
||||
for i, eval in enumerate(evaluations):
|
||||
try:
|
||||
# Ensure score is valid
|
||||
score = max(MIN_SCORE, min(MAX_SCORE, int(eval.score)))
|
||||
validated_evals.append(CategoryEvaluation(
|
||||
category=categories[i] if i < len(categories) else eval.category,
|
||||
reasoning=eval.reasoning if eval.reasoning else "No reasoning provided",
|
||||
score=score
|
||||
))
|
||||
except (ValueError, TypeError, AttributeError):
|
||||
validated_evals.append(CategoryEvaluation(
|
||||
category=categories[i] if i < len(categories) else "Unknown",
|
||||
reasoning=ERROR_VALIDATION,
|
||||
score=DEFAULT_SCORE
|
||||
))
|
||||
evaluations = validated_evals
|
||||
|
||||
# Create validated result
|
||||
validated_result = EvaluationResult(evaluations=evaluations)
|
||||
|
||||
return validated_result
|
||||
except Exception as e:
|
||||
# Return default evaluations on error
|
||||
default_evals = [
|
||||
CategoryEvaluation(
|
||||
category=cat,
|
||||
reasoning=f"{ERROR_EVALUATION}: {str(e)}",
|
||||
score=DEFAULT_SCORE
|
||||
) for cat in categories
|
||||
]
|
||||
return EvaluationResult(evaluations=default_evals)
|
||||
18
pyproject.toml
Normal file
18
pyproject.toml
Normal file
@@ -0,0 +1,18 @@
|
||||
[project]
|
||||
name = "tweet-optimizer-v2"
|
||||
version = "0.1.0"
|
||||
description = "CLI tool for optimizing tweets using DSPy and hill-climbing algorithm"
|
||||
requires-python = ">=3.11"
|
||||
dependencies = [
|
||||
"dspy>=3.0.3",
|
||||
"dspy-ai>=3.0.3",
|
||||
"modaic>=0.1.1",
|
||||
"pandas>=2.3.3",
|
||||
"pydantic>=2.12.2",
|
||||
"pytest>=8.4.2",
|
||||
"pytest-mock>=3.15.1",
|
||||
"requests>=2.32.5",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
tweet-optimizer = "cli:main"
|
||||
192
utils.py
Normal file
192
utils.py
Normal file
@@ -0,0 +1,192 @@
|
||||
import json
|
||||
import os
|
||||
import dspy
|
||||
from typing import List, Dict, Any
|
||||
from constants import (
|
||||
CATEGORIES_FILE,
|
||||
SETTINGS_FILE,
|
||||
HISTORY_FILE,
|
||||
DEFAULT_CATEGORIES,
|
||||
DEFAULT_MODEL,
|
||||
DEFAULT_ITERATIONS,
|
||||
DEFAULT_PATIENCE,
|
||||
DEFAULT_USE_CACHE,
|
||||
MAX_HISTORY_ITEMS,
|
||||
OPENROUTER_API_BASE,
|
||||
OPENROUTER_MAX_TOKENS,
|
||||
OPENROUTER_TEMPERATURE,
|
||||
ERROR_NO_API_KEY,
|
||||
ERROR_SAVE_CATEGORIES,
|
||||
ERROR_LOAD_CATEGORIES,
|
||||
ERROR_SAVE_SETTINGS,
|
||||
ERROR_LOAD_SETTINGS,
|
||||
ERROR_SAVE_HISTORY,
|
||||
ERROR_LOAD_HISTORY,
|
||||
ERROR_DSPy_INIT,
|
||||
TWEET_MAX_LENGTH
|
||||
)
|
||||
|
||||
def save_categories(categories: List[str]) -> None:
|
||||
"""Save categories to JSON file."""
|
||||
try:
|
||||
with open(CATEGORIES_FILE, 'w') as f:
|
||||
json.dump(categories, f, indent=2)
|
||||
except Exception as e:
|
||||
print(f"{ERROR_SAVE_CATEGORIES}: {str(e)}")
|
||||
|
||||
def load_categories() -> List[str]:
|
||||
"""Load categories from JSON file."""
|
||||
try:
|
||||
if os.path.exists(CATEGORIES_FILE):
|
||||
with open(CATEGORIES_FILE, 'r') as f:
|
||||
categories = json.load(f)
|
||||
return categories if isinstance(categories, list) else []
|
||||
else:
|
||||
save_categories(DEFAULT_CATEGORIES)
|
||||
return DEFAULT_CATEGORIES
|
||||
except Exception as e:
|
||||
print(f"{ERROR_LOAD_CATEGORIES}: {str(e)}")
|
||||
return []
|
||||
|
||||
def get_dspy_lm(model_name: str):
|
||||
"""Get a DSPy LM instance for the specified model (cached per model)."""
|
||||
try:
|
||||
openrouter_key = os.getenv("OPENROUTER_API_KEY")
|
||||
if not openrouter_key:
|
||||
raise ValueError(ERROR_NO_API_KEY)
|
||||
|
||||
max_tokens = 16000 if "openai/gpt-5" in model_name else OPENROUTER_MAX_TOKENS
|
||||
temperature = 1.0 if "openai/gpt-5" in model_name else OPENROUTER_TEMPERATURE
|
||||
|
||||
lm = dspy.LM(
|
||||
model=model_name,
|
||||
api_key=openrouter_key,
|
||||
api_base=OPENROUTER_API_BASE,
|
||||
max_tokens=max_tokens,
|
||||
temperature=temperature
|
||||
)
|
||||
return lm
|
||||
except Exception as e:
|
||||
raise Exception(f"Failed to create LM: {str(e)}")
|
||||
|
||||
def initialize_dspy(model_name: str = DEFAULT_MODEL, use_cache: bool = DEFAULT_USE_CACHE) -> bool:
|
||||
"""Initialize DSPy with OpenRouter and selected model."""
|
||||
# Configure cache settings
|
||||
try:
|
||||
dspy.configure_cache(
|
||||
enable_memory_cache=use_cache,
|
||||
enable_disk_cache=use_cache
|
||||
)
|
||||
except Exception:
|
||||
# Cache configuration might fail in some environments, continue anyway
|
||||
pass
|
||||
|
||||
# Only configure DSPy once globally
|
||||
if not hasattr(dspy, '_replit_configured'):
|
||||
try:
|
||||
# Get the LM for the default model
|
||||
default_lm = get_dspy_lm(model_name)
|
||||
dspy.configure(lm=default_lm)
|
||||
dspy._replit_configured = True # type: ignore
|
||||
except Exception as e:
|
||||
raise Exception(f"{ERROR_DSPy_INIT}: {str(e)}")
|
||||
|
||||
return True
|
||||
|
||||
def format_tweet_for_display(tweet: str) -> str:
|
||||
"""Format tweet text for better display."""
|
||||
return tweet.strip()
|
||||
|
||||
def calculate_tweet_length(tweet: str) -> int:
|
||||
"""Calculate tweet length."""
|
||||
return len(tweet.strip())
|
||||
|
||||
def is_valid_tweet(tweet: str) -> bool:
|
||||
"""Check if tweet is valid (not empty and within character limit)."""
|
||||
cleaned_tweet = tweet.strip()
|
||||
return bool(cleaned_tweet) and len(cleaned_tweet) <= TWEET_MAX_LENGTH
|
||||
|
||||
def save_settings(settings: Dict[str, Any]) -> None:
|
||||
"""Save settings to JSON file."""
|
||||
try:
|
||||
with open(SETTINGS_FILE, 'w') as f:
|
||||
json.dump(settings, f, indent=2)
|
||||
except Exception as e:
|
||||
print(f"{ERROR_SAVE_SETTINGS}: {str(e)}")
|
||||
|
||||
def load_settings() -> Dict[str, Any]:
|
||||
"""Load settings from JSON file."""
|
||||
try:
|
||||
if os.path.exists(SETTINGS_FILE):
|
||||
with open(SETTINGS_FILE, 'r') as f:
|
||||
settings = json.load(f)
|
||||
return settings if isinstance(settings, dict) else get_default_settings()
|
||||
else:
|
||||
# Return default settings if file doesn't exist
|
||||
default_settings = get_default_settings()
|
||||
save_settings(default_settings)
|
||||
return default_settings
|
||||
except Exception as e:
|
||||
print(f"{ERROR_LOAD_SETTINGS}: {str(e)}")
|
||||
return get_default_settings()
|
||||
|
||||
def get_default_settings() -> Dict[str, Any]:
|
||||
"""Get default settings."""
|
||||
return {
|
||||
"selected_model": DEFAULT_MODEL,
|
||||
"iterations": DEFAULT_ITERATIONS,
|
||||
"patience": DEFAULT_PATIENCE,
|
||||
"use_cache": DEFAULT_USE_CACHE
|
||||
}
|
||||
|
||||
def save_input_history(history: List[str]) -> None:
|
||||
"""Save input history to JSON file."""
|
||||
try:
|
||||
with open(HISTORY_FILE, 'w') as f:
|
||||
json.dump(history, f, indent=2)
|
||||
except Exception as e:
|
||||
print(f"{ERROR_SAVE_HISTORY}: {str(e)}")
|
||||
|
||||
def load_input_history() -> List[str]:
|
||||
"""Load input history from JSON file."""
|
||||
try:
|
||||
if os.path.exists(HISTORY_FILE):
|
||||
with open(HISTORY_FILE, 'r') as f:
|
||||
history = json.load(f)
|
||||
return history if isinstance(history, list) else []
|
||||
else:
|
||||
return []
|
||||
except Exception as e:
|
||||
print(f"{ERROR_LOAD_HISTORY}: {str(e)}")
|
||||
return []
|
||||
|
||||
def add_to_input_history(history: List[str], new_input: str) -> List[str]:
|
||||
"""
|
||||
Add a new input to history, maintaining max size and avoiding duplicates.
|
||||
|
||||
Args:
|
||||
history: Current history list
|
||||
new_input: New input text to add
|
||||
|
||||
Returns:
|
||||
Updated history list with new input at the beginning
|
||||
"""
|
||||
# Strip whitespace from input
|
||||
new_input = new_input.strip()
|
||||
|
||||
# Don't add empty strings
|
||||
if not new_input:
|
||||
return history
|
||||
|
||||
# Remove duplicate if it exists
|
||||
if new_input in history:
|
||||
history.remove(new_input)
|
||||
|
||||
# Add to beginning of list
|
||||
updated_history = [new_input] + history
|
||||
|
||||
# Trim to max size
|
||||
if len(updated_history) > MAX_HISTORY_ITEMS:
|
||||
updated_history = updated_history[:MAX_HISTORY_ITEMS]
|
||||
|
||||
return updated_history
|
||||
Reference in New Issue
Block a user