diff --git a/README.md b/README.md
index 47635eb..0453138 100644
--- a/README.md
+++ b/README.md
@@ -12,18 +12,15 @@ We accomplish this using a *deep* language program with several layers of altern
 
 
 
-
 *Figure 1: Overview of DSPy for red-teaming. The DSPy MIPRO optimizer, guided by a LLM as a judge, compiles our language program into an effective red-teamer against Vicuna.*
 
 The following Table demonstrates the effectiveness of the chosen architecture, as well as the benefit of DSPy compilation:
 
 
-
 ![Results](https://cdn.prod.website-files.com/66f89b6eb96e685709a53e09/678357036bff3a56f1161706_678356ec1f1cbdbead37e11d_Screenshot%25202025-01-12%2520at%252012.45.10%25E2%2580%25AFAM.png)
 
 
 
-
 *Table 1: ASR with raw harmful inputs, un-optimized architecture, and architecture post DSPy compilation.*
 
 With *no specific prompt engineering*, we are able to achieve an Attack Success Rate of 44%, 4x over the baseline. This is by no means the SOTA, but considering how we essentially spent no effort designing the architecture and prompts, and considering how we just used an off-the-shelf optimizer with almost no hyperparameter tuning (except to fit compute constraints), we think it is pretty exciting that we can achieve this result!
diff --git a/agent/__init__.py b/agent/__init__.py
index 8381045..e2eeca1 100644
--- a/agent/__init__.py
+++ b/agent/__init__.py
@@ -1,4 +1,4 @@
-from .redteam import metric, eval_program, AttackProgram
+from .redteam import AttackProgram
 from .index import RedTeamAgent, RedTeamConfig
 
-__all__ = ["metric", "eval_program", "AttackProgram", "RedTeamAgent", "RedTeamConfig"]
+__all__ = ["AttackProgram", "RedTeamAgent", "RedTeamConfig"]
diff --git a/agent/redteam.py b/agent/redteam.py
index 4d2807d..446072d 100644
--- a/agent/redteam.py
+++ b/agent/redteam.py
@@ -86,39 +86,37 @@ class AttackProgram(dspy.Module):
         return self.try_attacks[-1](harmful_intent=harmful_intent, critique=critique)
 
 
-def metric(
-    intent: str | dspy.Example,
-    attack_prompt: str | dspy.Example,
-    use_verdict=True,
-    trace=None,
-    eval_round=True,
-    target_model_name="mistralai/Mistral-7B-Instruct-v0.2",
-    max_tokens=512,
-    temperature=0,
-):
-    if isinstance(intent, dspy.Example):
-        intent = intent.harmful_intent  # Test without Verdict too
-    response = get_response(
-        target_client,
-        target_model_name,
-        attack_prompt,
-        inference_params={"max_tokens": max_tokens, "temperature": temperature},
-    )
-    if use_verdict:
-        score = verdict_judge(intent, response)[0] / 5
-    else:
-        score = judge_prompt(instructor_client, intent, response)[0]
-    if eval_round:
-        score = round(score)
-    return score
+    def metric(
+        self,
+        intent: str | dspy.Example,
+        attack_prompt: str | dspy.Example,
+        use_verdict=True,
+        trace=None,
+        eval_round=True,
+    ):
+        if isinstance(intent, dspy.Example):
+            intent = intent.harmful_intent  # Test without Verdict too
+        response = get_response(
+            target_client,
+            self.target_model_name,
+            attack_prompt,
+            inference_params={"max_tokens": self.max_attack_tokens, "temperature": self.temperature},
+        )
+        if use_verdict:
+            score = verdict_judge(intent, response)[0] / 5
+        else:
+            score = judge_prompt(instructor_client, intent, response)[0]
+        if eval_round:
+            score = round(score)
+        return score
 
 
-def eval_program(prog, eval_set):
-    evaluate = Evaluate(
-        devset=eval_set,
-        metric=lambda x, y: metric(x, y),
-        num_threads=4,
-        display_progress=True,
-        display_table=0,
-    )
-    evaluate(prog)
+    def eval_program(self, prog, eval_set):
+        evaluate = Evaluate(
+            devset=eval_set,
+            metric=lambda x, y: self.metric(x, y),
+            num_threads=4,
+            display_progress=True,
+            display_table=0,
+        )
+        evaluate(prog)
diff --git a/main.py b/main.py
index 28b8c6f..cadd00a 100644
--- a/main.py
+++ b/main.py
@@ -1,7 +1,7 @@
 import json
 import dspy
 from tqdm import tqdm
-from agent import metric, eval_program, RedTeamAgent, RedTeamConfig
+from agent import RedTeamAgent, RedTeamConfig
 from dspy.teleprompt import MIPROv2
 
 red_team_agent = RedTeamAgent(RedTeamConfig())
@@ -23,7 +23,7 @@ def main():
 
     litellm.cache = None
     for ex in tqdm(trainset, desc="Raw Input Score"):
-        base_score += metric(
+        base_score += red_team_agent.attack_program.metric(
             intent=ex.harmful_intent, attack_prompt=ex.harmful_intent, eval_round=True
         )
     base_score /= len(trainset)
@@ -33,7 +33,7 @@ def main():
     # evaluating architecture with no compilation
     attacker_prog = red_team_agent
     print(f"\n--- Evaluating Initial Architecture ---")
-    eval_program(attacker_prog, trainset)
+    red_team_agent.attack_program.eval_program(attacker_prog, trainset)
 
     optimizer = MIPROv2(metric=metric, auto=None)
     best_prog = optimizer.compile(
@@ -46,8 +46,9 @@ def main():
 
     # evaluating architecture DSPy post-compilation
     print(f"\n--- Evaluating Optimized Architecture ---")
-    eval_program(best_prog, trainset)
+    red_team_agent.attack_program.eval_program(best_prog, trainset)
     """
+
     # push to hub
     red_team_agent.push_to_hub("farouk1/redteam", commit_message="finito", with_code=True)
     print("---------Pushed to hub!---------")