diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/agent.py b/agent.py
deleted file mode 100644
index 4d48c4b..0000000
--- a/agent.py
+++ /dev/null
@@ -1,172 +0,0 @@
-from modaic import PrecompiledAgent, PrecompiledConfig
-from modules import TweetGeneratorModule, TweetEvaluatorModule
-from models import EvaluationResult
-from hill_climbing import HillClimbingOptimizer
-from typing import Optional, List, Dict, Any
-from utils import get_dspy_lm
-from constants import DEFAULT_CATEGORIES, DEFAULT_ITERATIONS, DEFAULT_PATIENCE
-
-
-class TweetOptimizerConfig(PrecompiledConfig):
-    lm: str = "openrouter/google/gemini-2.5-flash"
-    eval_lm: str = "openrouter/openai/gpt-5"
-    categories: List[str] = DEFAULT_CATEGORIES
-    max_iterations: int = DEFAULT_ITERATIONS
-    patience: int = DEFAULT_PATIENCE
-
-
-class TweetOptimizerAgent(PrecompiledAgent):
-    config: TweetOptimizerConfig
-
-    def __init__(self, config: TweetOptimizerConfig):
-        super().__init__(config)
-        self.tweet_generator = TweetGeneratorModule()
-        self.tweet_evaluator = TweetEvaluatorModule()
-
-        # set up optimizer
-        self.optimizer = HillClimbingOptimizer(
-            generator=self.tweet_generator,
-            evaluator=self.tweet_evaluator,
-            categories=config.categories,
-            max_iterations=config.max_iterations,
-            patience=config.patience
-        )
-
-        self.lm = config.lm
-        self.eval_lm = config.eval_lm
-
-        # initialize DSPy with the specified model
-        self.tweet_generator.set_lm(get_dspy_lm(config.lm))
-        self.tweet_evaluator.set_lm(get_dspy_lm(config.eval_lm))
-
-    def forward(
-        self,
-        input_text: str,
-        current_tweet: str = "",
-        previous_evaluation: Optional[EvaluationResult] = None,
-    ) -> str:
-        """Generate a single optimized tweet (single iteration)."""
-        tweet = self.tweet_generator(input_text, current_tweet, previous_evaluation)
-        return tweet
-
-    def optimize(
-        self,
-        input_text: str,
-        iterations: Optional[int] = None,
-        patience: Optional[int] = None
-    ) -> Dict[str, Any]:
-        """Run full optimization process like the CLI."""
-        max_iterations = iterations or self.config.max_iterations
-        patience_limit = patience or self.config.patience
-
-        results = {
-            'initial_text': input_text,
-            'final_tweet': '',
-            'best_score': 0.0,
-            'iterations_run': 0,
-            'early_stopped': False,
-            'scores_history': [],
-            'improvement_count': 0
-        }
-
-        best_tweet = ""
-        best_score = 0.0
-
-        for iteration, (current_tweet, scores, is_improvement, patience_counter, _, _) in enumerate(
-            self.optimizer.optimize(input_text)
-        ):
-            iteration_num = iteration + 1
-            results['iterations_run'] = iteration_num
-            results['scores_history'].append(scores)
-
-            if is_improvement:
-                best_tweet = current_tweet
-                best_score = sum(scores.category_scores) / len(scores.category_scores)
-                results['improvement_count'] += 1
-
-            # check for early stopping
-            if patience_counter >= patience_limit:
-                results['early_stopped'] = True
-                break
-
-            # stop at max iterations
-            if iteration_num >= max_iterations:
-                break
-
-        results.update({
-            'final_tweet': best_tweet,
-            'best_score': best_score
-        })
-
-        return results
-
-    def evaluate_tweet(
-        self,
-        tweet_text: str,
-        original_text: str = "",
-        current_best_tweet: str = ""
-    ) -> EvaluationResult:
-        """Evaluate a tweet using the configured categories."""
-        return self.tweet_evaluator(tweet_text, self.config.categories, original_text, current_best_tweet)
-
-
-if __name__ == "__main__":
-    # create agent with default config
-    config = TweetOptimizerConfig()
-    tweet_optimizer = TweetOptimizerAgent(config)
-    """
-    import os
-
-    # set up test environment (replace with real API key for actual usage)
-    if not os.getenv("OPENROUTER_API_KEY"):
-        raise ValueError("OPENROUTER_API_KEY environment variable is not set")
-
-
-
-    # single tweet generation
-    print("=== Single Tweet Generation ===")
-    try:
-        single_tweet = tweet_optimizer(
-            input_text="Anthropic added a new OSS model on HuggingFace.",
-            current_tweet="",
-            previous_evaluation=None,
-        )
-        print(f"Generated tweet: {single_tweet}")
-    except Exception as e:
-        print(f"Error in single generation: {e}")
-
-    # full optimization process
-    print("\n=== Full Optimization Process ===")
-    try:
-        results = tweet_optimizer.optimize(
-            input_text="Anthropic added a new OSS model on HuggingFace.",
-            iterations=10,  # Reduced for testing
-            patience=8
-        )
-        print(f"Initial text: {results['initial_text']}")
-        print(f"Final tweet: {results['final_tweet']}")
-        print(f"Best score: {results['best_score']:.2f}")
-        print(f"Iterations run: {results['iterations_run']}")
-        print(f"Improvements found: {results['improvement_count']}")
-        print(f"Early stopped: {results['early_stopped']}")
-    except Exception as e:
-        print(f"Error in optimization: {e}")
-    """
-    # push to hub
-    print("\n=== Push to Hub ===")
-    try:
-        tweet_optimizer.push_to_hub(
-             "farouk1/tweet-optimizer-v2",
-             commit_message="Complete Migration",
-             with_code=True
-         )
-        print("Successfully pushed to hub!")
-    except Exception as e:
-        print(f"Error pushing to hub: {e}")
-    """
-    print("\n=== Agent Configuration ===")
-    print(f"Model: {config.lm}")
-    print(f"Categories: {config.categories}")
-    print(f"Max iterations: {config.max_iterations}")
-    print(f"Patience: {config.patience}")
-    """
\ No newline at end of file
diff --git a/agent/__pycache__/__init__.cpython-310.pyc b/agent/__pycache__/__init__.cpython-310.pyc
deleted file mode 100644
index 992044a..0000000
Binary files a/agent/__pycache__/__init__.cpython-310.pyc and /dev/null differ
diff --git a/agent/__pycache__/agent.cpython-310.pyc b/agent/__pycache__/agent.cpython-310.pyc
deleted file mode 100644
index 39d31ee..0000000
Binary files a/agent/__pycache__/agent.cpython-310.pyc and /dev/null differ
diff --git a/agent/__pycache__/constants.cpython-310.pyc b/agent/__pycache__/constants.cpython-310.pyc
deleted file mode 100644
index 8b7a0a1..0000000
Binary files a/agent/__pycache__/constants.cpython-310.pyc and /dev/null differ
diff --git a/agent/__pycache__/helpers.cpython-310.pyc b/agent/__pycache__/helpers.cpython-310.pyc
deleted file mode 100644
index 8824895..0000000
Binary files a/agent/__pycache__/helpers.cpython-310.pyc and /dev/null differ
diff --git a/agent/__pycache__/hill_climbing.cpython-310.pyc b/agent/__pycache__/hill_climbing.cpython-310.pyc
deleted file mode 100644
index 3329097..0000000
Binary files a/agent/__pycache__/hill_climbing.cpython-310.pyc and /dev/null differ
diff --git a/agent/__pycache__/index.cpython-310.pyc b/agent/__pycache__/index.cpython-310.pyc
deleted file mode 100644
index f2316f1..0000000
Binary files a/agent/__pycache__/index.cpython-310.pyc and /dev/null differ
diff --git a/agent/__pycache__/models.cpython-310.pyc b/agent/__pycache__/models.cpython-310.pyc
deleted file mode 100644
index 372dac3..0000000
Binary files a/agent/__pycache__/models.cpython-310.pyc and /dev/null differ
diff --git a/agent/__pycache__/modules.cpython-310.pyc b/agent/__pycache__/modules.cpython-310.pyc
deleted file mode 100644
index 47e0184..0000000
Binary files a/agent/__pycache__/modules.cpython-310.pyc and /dev/null differ
diff --git a/agent/__pycache__/utils.cpython-310.pyc b/agent/__pycache__/utils.cpython-310.pyc
deleted file mode 100644
index b003ec8..0000000
Binary files a/agent/__pycache__/utils.cpython-310.pyc and /dev/null differ
diff --git a/constants.py b/constants.py
deleted file mode 100644
index 1eb4de6..0000000
--- a/constants.py
+++ /dev/null
@@ -1,75 +0,0 @@
-from typing import Dict, List
-
-# tweet configuration
-TWEET_MAX_LENGTH = 280
-TWEET_TRUNCATION_SUFFIX = "..."
-TWEET_TRUNCATION_LENGTH = TWEET_MAX_LENGTH - len(TWEET_TRUNCATION_SUFFIX)
-
-# score configuration
-MIN_SCORE = 1
-MAX_SCORE = 9
-DEFAULT_SCORE = 5
-
-# file paths
-CATEGORIES_FILE = "categories.json"
-SETTINGS_FILE = "settings.json"
-HISTORY_FILE = "input_history.json"
-
-# history configuration
-MAX_HISTORY_ITEMS = 50  # maximum number of historical inputs to store
-
-# model configuration
-DEFAULT_MODEL = "openrouter/anthropic/claude-sonnet-4.5"
-
-AVAILABLE_MODELS: Dict[str, str] = {
-    "Claude Sonnet 4.5": "openrouter/anthropic/claude-sonnet-4.5",
-    "Opus 4.1": "openrouter/anthropic/claude-opus-4.1",
-    "Gemini 2.5 Flash": "openrouter/google/gemini-2.5-flash",
-    "Gemini 2.5 Flash Lite": "openrouter/google/gemini-2.5-flash-lite",
-    "Gemini 2.5 Pro": "openrouter/google/gemini-2.5-pro",
-    "GPT-5": "openrouter/openai/gpt-5"
-}
-
-# openrouter API configuration
-OPENROUTER_API_BASE = "https://openrouter.ai/api/v1"
-OPENROUTER_MAX_TOKENS = 4096
-OPENROUTER_TEMPERATURE = 0.7
-
-# optimization defaults
-DEFAULT_ITERATIONS = 10
-DEFAULT_PATIENCE = 5
-DEFAULT_USE_CACHE = True
-
-# default evaluation categories
-DEFAULT_CATEGORIES: List[str] = [
-    "Engagement potential - how likely users are to like, retweet, or reply",
-    "Clarity and readability - how easy the tweet is to understand",
-    "Emotional impact - how well the tweet evokes feelings or reactions",
-    "Relevance to target audience - how well it resonates with intended readers"
-]
-
-# error messages
-ERROR_PARSING = "Default evaluation due to parsing error"
-ERROR_VALIDATION = "Default evaluation due to validation error"
-ERROR_GENERATION = "Tweet generation failed"
-ERROR_EVALUATION = "Tweet evaluation failed"
-ERROR_DSPy_INIT = "DSPy initialization failed"
-ERROR_NO_API_KEY = "OPENROUTER_API_KEY environment variable is required"
-ERROR_SAVE_CATEGORIES = "Failed to save categories"
-ERROR_LOAD_CATEGORIES = "Failed to load categories"
-ERROR_SAVE_SETTINGS = "Failed to save settings"
-ERROR_LOAD_SETTINGS = "Failed to load settings"
-ERROR_SAVE_HISTORY = "Failed to save input history"
-ERROR_LOAD_HISTORY = "Failed to load input history"
-
-# cache configuration
-CACHE_ENABLE_MEMORY = True
-CACHE_ENABLE_DISK = True
-
-# iteration display
-ITERATION_SLEEP_TIME = 0.1  # seconds
-
-# truncation display
-CATEGORY_DISPLAY_MAX_LENGTH = 30
-CATEGORY_DISPLAY_TRUNCATION = "..."
-CATEGORY_IMPROVEMENT_MAX_LENGTH = 50
diff --git a/helpers.py b/helpers.py
deleted file mode 100644
index 5be03ed..0000000
--- a/helpers.py
+++ /dev/null
@@ -1,85 +0,0 @@
-from typing import Optional, Dict, Any
-from models import EvaluationResult
-from constants import MAX_SCORE
-
-
-def format_evaluation_for_generator(evaluation: Optional[EvaluationResult]) -> str:
-    """
-    Format an evaluation result as text for the generator module.
-    
-    Args:
-        evaluation: The evaluation result to format
-        
-    Returns:
-        Formatted string with category-by-category reasoning and scores
-    """
-    if not evaluation or not evaluation.evaluations:
-        return ""
-    
-    eval_lines = []
-    for eval in evaluation.evaluations:
-        eval_lines.append(f"{eval.category} (Score: {eval.score}/{MAX_SCORE}): {eval.reasoning}")
-    
-    return "\n".join(eval_lines)
-
-
-def build_settings_dict(
-    selected_model: str,
-    iterations: int,
-    patience: int,
-    use_cache: bool
-) -> Dict[str, Any]:
-    """
-    Build a settings dictionary for saving.
-    
-    Args:
-        selected_model: The selected model name
-        iterations: Number of optimization iterations
-        patience: Patience threshold for early stopping
-        use_cache: Whether to use DSPy cache
-        
-    Returns:
-        Dictionary containing all settings
-    """
-    return {
-        "selected_model": selected_model,
-        "iterations": iterations,
-        "patience": patience,
-        "use_cache": use_cache
-    }
-
-
-def truncate_tweet(tweet: str, max_length: int, suffix: str = "...") -> str:
-    """
-    Truncate a tweet to the maximum length with a suffix.
-    
-    Args:
-        tweet: The tweet text to truncate
-        max_length: Maximum allowed length
-        suffix: Suffix to add when truncating (default: "...")
-        
-    Returns:
-        Truncated tweet text
-    """
-    tweet = tweet.strip()
-    if len(tweet) <= max_length:
-        return tweet
-    
-    truncation_point = max_length - len(suffix)
-    return tweet[:truncation_point] + suffix
-
-
-def truncate_category_display(category: str, max_length: int = 30) -> str:
-    """
-    Truncate a category name for display purposes.
-    
-    Args:
-        category: The category name
-        max_length: Maximum display length (default: 30)
-        
-    Returns:
-        Truncated category name with "..." if needed
-    """
-    if len(category) <= max_length:
-        return category
-    return category[:max_length] + "..."
diff --git a/hill_climbing.py b/hill_climbing.py
deleted file mode 100644
index 0b5a1e5..0000000
--- a/hill_climbing.py
+++ /dev/null
@@ -1,119 +0,0 @@
-from typing import List, Iterator, Tuple, Dict
-from models import EvaluationResult
-from agent.modules import TweetGeneratorModule, TweetEvaluatorModule
-from helpers import format_evaluation_for_generator
-
-class HillClimbingOptimizer:
-    """Hill climbing optimizer for tweet improvement."""
-    
-    def __init__(
-        self,
-        generator: TweetGeneratorModule,
-        evaluator: TweetEvaluatorModule,
-        categories: List[str],
-        max_iterations: int = 10,
-        patience: int = 5
-    ):
-        self.generator = generator
-        self.evaluator = evaluator
-        self.categories = categories
-        self.max_iterations = max_iterations
-        self.patience = patience
-    
-    def optimize(self, initial_text: str) -> Iterator[Tuple[str, EvaluationResult, bool, int, Dict[str, str], Dict[str, str]]]:
-        """
-        Optimize tweet using hill climbing algorithm.
-        
-        Yields:
-            Tuple of (current_tweet, evaluation_result, is_improvement, patience_counter, generator_inputs, evaluator_inputs)
-        """
-        # Generate initial tweet
-        generator_inputs = {
-            "input_text": initial_text,
-            "current_tweet": "",
-            "previous_evaluation": ""
-        }
-        current_tweet = self.generator(
-            input_text=initial_text,
-            current_tweet="",
-            previous_evaluation=None
-        )
-        
-        evaluator_inputs = {
-            "original_text": initial_text,
-            "current_best_tweet": "",
-            "tweet_text": current_tweet
-        }
-        current_score = self.evaluator(
-            tweet_text=current_tweet,
-            categories=self.categories,
-            original_text=initial_text,
-            current_best_tweet=""
-        )
-        
-        best_tweet = current_tweet
-        best_score = current_score
-        patience_counter = 0
-        
-        yield (current_tweet, current_score, True, patience_counter, generator_inputs, evaluator_inputs)
-        
-        for iteration in range(1, self.max_iterations):
-            # Generate improved tweet with previous evaluation as feedback
-            try:
-                # Format evaluation for display in generator inputs
-                eval_text = format_evaluation_for_generator(best_score)
-                
-                generator_inputs = {
-                    "input_text": initial_text,
-                    "current_tweet": best_tweet,
-                    "previous_evaluation": eval_text
-                }
-                
-                candidate_tweet = self.generator(
-                    input_text=initial_text,
-                    current_tweet=best_tweet,
-                    previous_evaluation=best_score
-                )
-                
-                # Evaluate candidate
-                evaluator_inputs = {
-                    "original_text": initial_text,
-                    "current_best_tweet": best_tweet,
-                    "tweet_text": candidate_tweet
-                }
-                candidate_score = self.evaluator(
-                    tweet_text=candidate_tweet,
-                    categories=self.categories,
-                    original_text=initial_text,
-                    current_best_tweet=best_tweet
-                )
-                
-                # Check if candidate is better (hill climbing condition)
-                is_improvement = candidate_score > best_score
-                
-                if is_improvement:
-                    best_tweet = candidate_tweet
-                    best_score = candidate_score
-                    patience_counter = 0
-                    yield (candidate_tweet, candidate_score, True, patience_counter, generator_inputs, evaluator_inputs)
-                else:
-                    patience_counter += 1
-                    yield (best_tweet, candidate_score, False, patience_counter, generator_inputs, evaluator_inputs)
-                
-                # Early stopping if no improvement for 'patience' iterations
-                if patience_counter >= self.patience:
-                    break
-                    
-            except Exception as e:
-                # If generation fails, yield current best
-                patience_counter += 1
-                evaluator_inputs = {
-                    "original_text": initial_text,
-                    "current_best_tweet": best_tweet,
-                    "tweet_text": best_tweet
-                }
-                yield (best_tweet, best_score, False, patience_counter, generator_inputs, evaluator_inputs)
-                
-                if patience_counter >= self.patience:
-                    break
-    
diff --git a/models.py b/models.py
deleted file mode 100644
index 08dd15e..0000000
--- a/models.py
+++ /dev/null
@@ -1,60 +0,0 @@
-from pydantic import BaseModel, Field, validator
-from typing import List
-from constants import MIN_SCORE, MAX_SCORE
-
-class CategoryEvaluation(BaseModel):
-    """Pydantic model for a single category evaluation with reasoning."""
-    
-    category: str = Field(description="The evaluation category name")
-    reasoning: str = Field(description="Explanation for the score")
-    score: int = Field(
-        description=f"Score for this category ({MIN_SCORE}-{MAX_SCORE})", 
-        ge=MIN_SCORE, 
-        le=MAX_SCORE
-    )
-    
-    @validator('score')
-    def validate_score(cls, score):
-        """Ensure score is within the valid range."""
-        if not isinstance(score, int) or score < MIN_SCORE or score > MAX_SCORE:
-            raise ValueError(f"Score {score} must be an integer between {MIN_SCORE} and {MAX_SCORE}")
-        return score
-
-class EvaluationResult(BaseModel):
-    """Pydantic model for tweet evaluation results."""
-    
-    evaluations: List[CategoryEvaluation] = Field(
-        description="List of category evaluations with reasoning and scores"
-    )
-    
-    @validator('evaluations')
-    def validate_evaluations(cls, evals):
-        """Ensure we have at least one evaluation."""
-        if not evals or len(evals) < 1:
-            raise ValueError("Must have at least one category evaluation")
-        return evals
-    
-    @property
-    def category_scores(self) -> List[int]:
-        """Get list of scores for backwards compatibility."""
-        return [eval.score for eval in self.evaluations]
-    
-    def total_score(self) -> float:
-        """Calculate the total score across all categories."""
-        return sum(eval.score for eval in self.evaluations)
-    
-    def average_score(self) -> float:
-        """Calculate the average score across all categories."""
-        return self.total_score() / len(self.evaluations)
-    
-    def __gt__(self, other):
-        """Compare evaluation results based on total score."""
-        if not isinstance(other, EvaluationResult):
-            return NotImplemented
-        return self.total_score() > other.total_score()
-    
-    def __eq__(self, other):
-        """Check equality based on total score."""
-        if not isinstance(other, EvaluationResult):
-            return NotImplemented
-        return self.total_score() == other.total_score()
diff --git a/modules.py b/modules.py
deleted file mode 100644
index f4d034e..0000000
--- a/modules.py
+++ /dev/null
@@ -1,128 +0,0 @@
-import dspy
-from typing import List, Optional
-from models import EvaluationResult, CategoryEvaluation
-from constants import (
-    TWEET_MAX_LENGTH,
-    TWEET_TRUNCATION_SUFFIX,
-    DEFAULT_SCORE,
-    ERROR_PARSING,
-    ERROR_VALIDATION,
-    ERROR_GENERATION,
-    ERROR_EVALUATION,
-    MIN_SCORE,
-    MAX_SCORE
-)
-from helpers import format_evaluation_for_generator, truncate_tweet
-
-class TweetGenerator(dspy.Signature):
-    """Generate or improve a tweet based on input text and detailed evaluation feedback with reasoning."""
-    
-    input_text: str = dspy.InputField(desc="Original text or current tweet to improve")
-    current_tweet: str = dspy.InputField(desc="Current best tweet version (empty for first generation)")
-    previous_evaluation: str = dspy.InputField(desc="Previous evaluation with category-by-category reasoning and scores (empty for first generation)")
-    improved_tweet: str = dspy.OutputField(desc=f"Generated or improved tweet text (max {TWEET_MAX_LENGTH} characters)")
-
-class TweetEvaluator(dspy.Signature):
-    """Evaluate a tweet across multiple custom categories. For each category, provide detailed reasoning explaining the score, then assign a score. Ensure the tweet maintains the same meaning as the original text."""
-    
-    original_text: str = dspy.InputField(desc="Original input text that started the optimization")
-    current_best_tweet: str = dspy.InputField(desc="Current best tweet version for comparison (empty for first evaluation)")
-    tweet_text: str = dspy.InputField(desc="Tweet text to evaluate")
-    categories: str = dspy.InputField(desc="Comma-separated list of evaluation category descriptions")
-    evaluations: List[CategoryEvaluation] = dspy.OutputField(
-        desc=f"List of evaluations with category name, detailed reasoning, and score ({MIN_SCORE}-{MAX_SCORE}) for each category. Ensure the tweet conveys the same meaning as the original text."
-    )
-
-class TweetGeneratorModule(dspy.Module):
-    """DSPy module for generating and improving tweets."""
-    
-    def __init__(self):
-        super().__init__()
-        self.generate = dspy.ChainOfThought(TweetGenerator)
-    
-    def forward(self, input_text: str, current_tweet: str = "", previous_evaluation: Optional[EvaluationResult] = None) -> str:
-        """Generate or improve a tweet."""
-        try:
-            # Format previous evaluation as text
-            eval_text = format_evaluation_for_generator(previous_evaluation)
-            
-            result = self.generate(
-                input_text=input_text,
-                current_tweet=current_tweet,
-                previous_evaluation=eval_text
-            )
-            
-            # Ensure tweet doesn't exceed character limit
-            tweet = truncate_tweet(result.improved_tweet, TWEET_MAX_LENGTH, TWEET_TRUNCATION_SUFFIX)
-            
-            return tweet
-        except Exception as e:
-            raise Exception(f"{ERROR_GENERATION}: {str(e)}")
-
-class TweetEvaluatorModule(dspy.Module):
-    """DSPy module for evaluating tweets across custom categories."""
-    
-    def __init__(self):
-        super().__init__()
-        self.evaluate = dspy.ChainOfThought(TweetEvaluator)
-    
-    def forward(self, tweet_text: str, categories: List[str], original_text: str = "", current_best_tweet: str = "") -> EvaluationResult:
-        """Evaluate a tweet across specified categories."""
-        try:
-            # Join categories into comma-separated string
-            categories_str = ", ".join(categories)
-            
-            result = self.evaluate(
-                original_text=original_text,
-                current_best_tweet=current_best_tweet,
-                tweet_text=tweet_text,
-                categories=categories_str
-            )
-            
-            # Extract and validate evaluations
-            evaluations = result.evaluations
-            
-            # Ensure we have the right number of evaluations
-            if len(evaluations) != len(categories):
-                # Create default evaluations if mismatch
-                evaluations = [
-                    CategoryEvaluation(
-                        category=cat,
-                        reasoning=ERROR_PARSING,
-                        score=DEFAULT_SCORE
-                    ) for cat in categories
-                ]
-            else:
-                # Validate each evaluation
-                validated_evals = []
-                for i, eval in enumerate(evaluations):
-                    try:
-                        # Ensure score is valid
-                        score = max(MIN_SCORE, min(MAX_SCORE, int(eval.score)))
-                        validated_evals.append(CategoryEvaluation(
-                            category=categories[i] if i < len(categories) else eval.category,
-                            reasoning=eval.reasoning if eval.reasoning else "No reasoning provided",
-                            score=score
-                        ))
-                    except (ValueError, TypeError, AttributeError):
-                        validated_evals.append(CategoryEvaluation(
-                            category=categories[i] if i < len(categories) else "Unknown",
-                            reasoning=ERROR_VALIDATION,
-                            score=DEFAULT_SCORE
-                        ))
-                evaluations = validated_evals
-            
-            # Create validated result
-            validated_result = EvaluationResult(evaluations=evaluations)
-            
-            return validated_result
-        except Exception as e:
-            # Return default evaluations on error
-            default_evals = [
-                CategoryEvaluation(
-                    category=cat,
-                    reasoning=f"{ERROR_EVALUATION}: {str(e)}",
-                    score=DEFAULT_SCORE
-                ) for cat in categories
-            ]
-            return EvaluationResult(evaluations=default_evals)
diff --git a/pyproject.toml b/pyproject.toml
index e523742..5109fe9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,16 +3,7 @@ name = "tweet-optimizer-v2"
 version = "0.1.0"
 description = "CLI tool for optimizing tweets using DSPy and hill-climbing algorithm"
 requires-python = ">=3.11"
-dependencies = [
-    "dspy>=3.0.3",
-    "dspy-ai>=3.0.3",
-    "modaic>=0.1.1",
-    "pandas>=2.3.3",
-    "pydantic>=2.12.2",
-    "pytest>=8.4.2",
-    "pytest-mock>=3.15.1",
-    "requests>=2.32.5",
-]
+dependencies = ["dspy>=3.0.3", "dspy-ai>=3.0.3", "modaic>=0.3.0", "pandas>=2.3.3", "pydantic>=2.12.2", "pytest>=8.4.2", "pytest-mock>=3.15.1", "requests>=2.32.5"]
 
 [project.scripts]
 tweet-optimizer = "cli:main"
diff --git a/utils.py b/utils.py
deleted file mode 100644
index c99aea2..0000000
--- a/utils.py
+++ /dev/null
@@ -1,192 +0,0 @@
-import json
-import os
-import dspy
-from typing import List, Dict, Any
-from constants import (
-    CATEGORIES_FILE,
-    SETTINGS_FILE,
-    HISTORY_FILE,
-    DEFAULT_CATEGORIES,
-    DEFAULT_MODEL,
-    DEFAULT_ITERATIONS,
-    DEFAULT_PATIENCE,
-    DEFAULT_USE_CACHE,
-    MAX_HISTORY_ITEMS,
-    OPENROUTER_API_BASE,
-    OPENROUTER_MAX_TOKENS,
-    OPENROUTER_TEMPERATURE,
-    ERROR_NO_API_KEY,
-    ERROR_SAVE_CATEGORIES,
-    ERROR_LOAD_CATEGORIES,
-    ERROR_SAVE_SETTINGS,
-    ERROR_LOAD_SETTINGS,
-    ERROR_SAVE_HISTORY,
-    ERROR_LOAD_HISTORY,
-    ERROR_DSPy_INIT,
-    TWEET_MAX_LENGTH
-)
-
-def save_categories(categories: List[str]) -> None:
-    """Save categories to JSON file."""
-    try:
-        with open(CATEGORIES_FILE, 'w') as f:
-            json.dump(categories, f, indent=2)
-    except Exception as e:
-        print(f"{ERROR_SAVE_CATEGORIES}: {str(e)}")
-
-def load_categories() -> List[str]:
-    """Load categories from JSON file."""
-    try:
-        if os.path.exists(CATEGORIES_FILE):
-            with open(CATEGORIES_FILE, 'r') as f:
-                categories = json.load(f)
-                return categories if isinstance(categories, list) else []
-        else:
-            save_categories(DEFAULT_CATEGORIES)
-            return DEFAULT_CATEGORIES
-    except Exception as e:
-        print(f"{ERROR_LOAD_CATEGORIES}: {str(e)}")
-        return []
-
-def get_dspy_lm(model_name: str):
-    """Get a DSPy LM instance for the specified model (cached per model)."""
-    try:
-        openrouter_key = os.getenv("OPENROUTER_API_KEY")
-        if not openrouter_key:
-            raise ValueError(ERROR_NO_API_KEY)
-
-        max_tokens = 16000 if "openai/gpt-5" in model_name else OPENROUTER_MAX_TOKENS
-        temperature = 1.0 if "openai/gpt-5" in model_name else OPENROUTER_TEMPERATURE
-        
-        lm = dspy.LM(
-            model=model_name,
-            api_key=openrouter_key,
-            api_base=OPENROUTER_API_BASE,
-            max_tokens=max_tokens,
-            temperature=temperature
-        )
-        return lm
-    except Exception as e:
-        raise Exception(f"Failed to create LM: {str(e)}")
-
-def initialize_dspy(model_name: str = DEFAULT_MODEL, use_cache: bool = DEFAULT_USE_CACHE) -> bool:
-    """Initialize DSPy with OpenRouter and selected model."""
-    # Configure cache settings
-    try:
-        dspy.configure_cache(
-            enable_memory_cache=use_cache,
-            enable_disk_cache=use_cache
-        )
-    except Exception:
-        # Cache configuration might fail in some environments, continue anyway
-        pass
-    
-    # Only configure DSPy once globally
-    if not hasattr(dspy, '_replit_configured'):
-        try:
-            # Get the LM for the default model
-            default_lm = get_dspy_lm(model_name)
-            dspy.configure(lm=default_lm)
-            dspy._replit_configured = True  # type: ignore
-        except Exception as e:
-            raise Exception(f"{ERROR_DSPy_INIT}: {str(e)}")
-    
-    return True
-
-def format_tweet_for_display(tweet: str) -> str:
-    """Format tweet text for better display."""
-    return tweet.strip()
-
-def calculate_tweet_length(tweet: str) -> int:
-    """Calculate tweet length."""
-    return len(tweet.strip())
-
-def is_valid_tweet(tweet: str) -> bool:
-    """Check if tweet is valid (not empty and within character limit)."""
-    cleaned_tweet = tweet.strip()
-    return bool(cleaned_tweet) and len(cleaned_tweet) <= TWEET_MAX_LENGTH
-
-def save_settings(settings: Dict[str, Any]) -> None:
-    """Save settings to JSON file."""
-    try:
-        with open(SETTINGS_FILE, 'w') as f:
-            json.dump(settings, f, indent=2)
-    except Exception as e:
-        print(f"{ERROR_SAVE_SETTINGS}: {str(e)}")
-
-def load_settings() -> Dict[str, Any]:
-    """Load settings from JSON file."""
-    try:
-        if os.path.exists(SETTINGS_FILE):
-            with open(SETTINGS_FILE, 'r') as f:
-                settings = json.load(f)
-                return settings if isinstance(settings, dict) else get_default_settings()
-        else:
-            # Return default settings if file doesn't exist
-            default_settings = get_default_settings()
-            save_settings(default_settings)
-            return default_settings
-    except Exception as e:
-        print(f"{ERROR_LOAD_SETTINGS}: {str(e)}")
-        return get_default_settings()
-
-def get_default_settings() -> Dict[str, Any]:
-    """Get default settings."""
-    return {
-        "selected_model": DEFAULT_MODEL,
-        "iterations": DEFAULT_ITERATIONS,
-        "patience": DEFAULT_PATIENCE,
-        "use_cache": DEFAULT_USE_CACHE
-    }
-
-def save_input_history(history: List[str]) -> None:
-    """Save input history to JSON file."""
-    try:
-        with open(HISTORY_FILE, 'w') as f:
-            json.dump(history, f, indent=2)
-    except Exception as e:
-        print(f"{ERROR_SAVE_HISTORY}: {str(e)}")
-
-def load_input_history() -> List[str]:
-    """Load input history from JSON file."""
-    try:
-        if os.path.exists(HISTORY_FILE):
-            with open(HISTORY_FILE, 'r') as f:
-                history = json.load(f)
-                return history if isinstance(history, list) else []
-        else:
-            return []
-    except Exception as e:
-        print(f"{ERROR_LOAD_HISTORY}: {str(e)}")
-        return []
-
-def add_to_input_history(history: List[str], new_input: str) -> List[str]:
-    """
-    Add a new input to history, maintaining max size and avoiding duplicates.
-    
-    Args:
-        history: Current history list
-        new_input: New input text to add
-        
-    Returns:
-        Updated history list with new input at the beginning
-    """
-    # Strip whitespace from input
-    new_input = new_input.strip()
-    
-    # Don't add empty strings
-    if not new_input:
-        return history
-    
-    # Remove duplicate if it exists
-    if new_input in history:
-        history.remove(new_input)
-    
-    # Add to beginning of list
-    updated_history = [new_input] + history
-    
-    # Trim to max size
-    if len(updated_history) > MAX_HISTORY_ITEMS:
-        updated_history = updated_history[:MAX_HISTORY_ITEMS]
-    
-    return updated_history