use new modaic nomenclature

This commit is contained in:
2025-12-28 12:11:38 -08:00
parent 7775134235
commit eef0c53100
2 changed files with 15 additions and 11 deletions

View File

@@ -28,7 +28,7 @@
"finetuning_model": null, "finetuning_model": null,
"launch_kwargs": {}, "launch_kwargs": {},
"train_kwargs": {}, "train_kwargs": {},
"temperature": null, "temperature": 0,
"max_tokens": 512 "max_tokens": 512
} }
}, },
@@ -61,7 +61,7 @@
"finetuning_model": null, "finetuning_model": null,
"launch_kwargs": {}, "launch_kwargs": {},
"train_kwargs": {}, "train_kwargs": {},
"temperature": null, "temperature": 0,
"max_tokens": 512 "max_tokens": 512
} }
}, },
@@ -94,7 +94,7 @@
"finetuning_model": null, "finetuning_model": null,
"launch_kwargs": {}, "launch_kwargs": {},
"train_kwargs": {}, "train_kwargs": {},
"temperature": null, "temperature": 0,
"max_tokens": 512 "max_tokens": 512
} }
}, },
@@ -127,7 +127,7 @@
"finetuning_model": null, "finetuning_model": null,
"launch_kwargs": {}, "launch_kwargs": {},
"train_kwargs": {}, "train_kwargs": {},
"temperature": null, "temperature": 0,
"max_tokens": 512 "max_tokens": 512
} }
}, },
@@ -160,7 +160,7 @@
"finetuning_model": null, "finetuning_model": null,
"launch_kwargs": {}, "launch_kwargs": {},
"train_kwargs": {}, "train_kwargs": {},
"temperature": null, "temperature": 0,
"max_tokens": 512 "max_tokens": 512
} }
}, },
@@ -197,7 +197,7 @@
"finetuning_model": null, "finetuning_model": null,
"launch_kwargs": {}, "launch_kwargs": {},
"train_kwargs": {}, "train_kwargs": {},
"temperature": null, "temperature": 0,
"max_tokens": 512 "max_tokens": 512
} }
}, },
@@ -234,7 +234,7 @@
"finetuning_model": null, "finetuning_model": null,
"launch_kwargs": {}, "launch_kwargs": {},
"train_kwargs": {}, "train_kwargs": {},
"temperature": null, "temperature": 0,
"max_tokens": 512 "max_tokens": 512
} }
}, },
@@ -271,7 +271,7 @@
"finetuning_model": null, "finetuning_model": null,
"launch_kwargs": {}, "launch_kwargs": {},
"train_kwargs": {}, "train_kwargs": {},
"temperature": null, "temperature": 0,
"max_tokens": 512 "max_tokens": 512
} }
}, },
@@ -308,7 +308,7 @@
"finetuning_model": null, "finetuning_model": null,
"launch_kwargs": {}, "launch_kwargs": {},
"train_kwargs": {}, "train_kwargs": {},
"temperature": null, "temperature": 0,
"max_tokens": 512 "max_tokens": 512
} }
}, },
@@ -345,7 +345,7 @@
"finetuning_model": null, "finetuning_model": null,
"launch_kwargs": {}, "launch_kwargs": {},
"train_kwargs": {}, "train_kwargs": {},
"temperature": null, "temperature": 0,
"max_tokens": 512 "max_tokens": 512
} }
}, },

View File

@@ -63,14 +63,18 @@ class AttackProgram(PrecompiledProgram):
**kwargs, **kwargs,
): ):
super().__init__(config, **kwargs) super().__init__(config, **kwargs)
attack_model = dspy.LM(model=config.lm, max_tokens=config.max_attack_tokens)
attack_model = dspy.LM(model=config.lm, max_tokens=config.max_attack_tokens, temperature=config.temperature)
self.get_response = get_response self.get_response = get_response
self.layers = config.num_layers self.layers = config.num_layers
self.try_attacks = [dspy.Predict(Attack) for _ in range(self.layers)] self.try_attacks = [dspy.Predict(Attack) for _ in range(self.layers)]
self.critique_attacks = [dspy.Predict(Refine) for _ in range(self.layers)] self.critique_attacks = [dspy.Predict(Refine) for _ in range(self.layers)]
self.target_model_name = config.target_lm self.target_model_name = config.target_lm
self.max_attack_tokens = config.max_attack_tokens self.max_attack_tokens = config.max_attack_tokens
self.temperature = config.temperature self.temperature = config.temperature
self.set_lm(attack_model) self.set_lm(attack_model)
def forward(self, harmful_intent, critique=""): def forward(self, harmful_intent, critique=""):