180 lines
7.8 KiB
Python
180 lines
7.8 KiB
Python
from modaic import PrecompiledAgent, PrecompiledConfig
|
||
from agent.modules import SignatureGenerator
|
||
import dspy
|
||
|
||
|
||
class PromptToSignatureConfig(PrecompiledConfig):
|
||
lm: str = "gemini/gemini-2.5-pro-preview-03-25"
|
||
refine_lm: str = "gemini/gemini-2.5-pro-preview-03-25"
|
||
max_tokens: int = 4096
|
||
temperature: float = 0.7
|
||
max_attempts_to_refine: int = 5
|
||
|
||
|
||
class PromptToSignatureAgent(PrecompiledAgent):
|
||
config: PromptToSignatureConfig
|
||
|
||
def __init__(self, config: PromptToSignatureConfig, **kwargs):
|
||
super().__init__(config, **kwargs)
|
||
self.signature_generator = SignatureGenerator()
|
||
self.signature_refiner = dspy.Refine(
|
||
module=self.signature_generator,
|
||
N=config.max_attempts_to_refine,
|
||
reward_fn=PromptToSignatureAgent.validate_signature_with_feedback,
|
||
threshold=1.0,
|
||
)
|
||
|
||
lm = dspy.LM(
|
||
model=config.lm,
|
||
max_tokens=config.max_tokens,
|
||
temperature=config.temperature,
|
||
)
|
||
refine_lm = dspy.LM(
|
||
model=config.refine_lm,
|
||
max_tokens=config.max_tokens,
|
||
temperature=config.temperature,
|
||
)
|
||
|
||
self.signature_generator.set_lm(lm)
|
||
self.signature_refiner.set_lm(refine_lm)
|
||
|
||
def forward(self, prompt: str, refine: bool = False) -> dspy.Prediction:
|
||
if not prompt:
|
||
raise ValueError("Prompt is required!!")
|
||
|
||
if refine:
|
||
try:
|
||
result = self.signature_refiner(prompt=prompt)
|
||
except Exception as e:
|
||
print(f"Refinement failed: {e}")
|
||
print("💡 Try adjusting your prompt or increasing max attempts")
|
||
return None
|
||
else:
|
||
result = self.signature_generator(prompt)
|
||
|
||
return result
|
||
|
||
def generate_code(self, prediction: dspy.Prediction) -> str:
|
||
return self.signature_generator.generate_code(prediction)
|
||
|
||
@staticmethod # attached metric for refinement
|
||
def validate_signature_with_feedback(args, pred):
|
||
"""Validation function for dspy.Refine that asks user for feedback"""
|
||
|
||
# display the generated signature
|
||
print("\n" + "=" * 60)
|
||
print("🔍 Review Generated Signature")
|
||
print("=" * 60)
|
||
|
||
# show the signature name and description
|
||
print(f"Signature Name: {pred.signature_name}")
|
||
print(f"Description: {pred.task_description}")
|
||
|
||
# show the fields in a simple format
|
||
print(f"\nFields ({len(pred.signature_fields)}):")
|
||
for i, field in enumerate(pred.signature_fields, 1):
|
||
role_emoji = "📥" if field.role.value == "input" else "📤"
|
||
print(
|
||
f" {i}. {role_emoji} {field.name} ({field.type.value}) - {field.description}"
|
||
)
|
||
|
||
# ask for user approval (in an app, this would be a state variable)
|
||
is_satisfied = input("Are you satisfied with this signature? (y/n): ")
|
||
is_satisfied = is_satisfied.lower() == "y"
|
||
|
||
if is_satisfied:
|
||
print("✓ Signature approved!")
|
||
return 1.0
|
||
else:
|
||
# ask for feedback (in an app, this would be a state variable)
|
||
feedback = input("Please provide feedback for improvement: ")
|
||
if not feedback:
|
||
raise ValueError(
|
||
"Feedback is required if you are not satisfied with the signature!"
|
||
)
|
||
|
||
print(f"📝 Feedback recorded: {feedback}")
|
||
return dspy.Prediction(score=0.0, feedback=feedback)
|
||
|
||
|
||
agent = PromptToSignatureAgent(PromptToSignatureConfig())
|
||
|
||
CR_PROMPT = """ You are Charlotte, an advanced knowledge graph connection reasoning agent operating at an expert cognitive level. Your task is to discover profound, non-trivial connections between documents in a user's knowledge web that might not be immediately obvious.
|
||
|
||
Input Context:
|
||
- Primary Document (FROM): {candidate_document["model_candidate"]}
|
||
- Potential Connection Documents (TO): {candidate_document["candidates_to_link"]}
|
||
- Knowledge Web ID: {self.webId}
|
||
- Previously Mapped Connections: {self.staged_connections} as a list of dicts
|
||
- Source ID: {self.sourceId}
|
||
- Similarity scores indicate text similarity but DO NOT indicate connection quality
|
||
- Content sources include: youtube transcripts, notes, PDFs, websites, and other knowledge artifacts
|
||
|
||
CONNECTION QUALITY HIERARCHY (from lowest to highest value):
|
||
1. AVOID: Surface keyword matching ("both mention AI")
|
||
2. AVOID: Topical similarity ("both discuss machine learning")
|
||
3. MINIMAL: Direct referential links ("cites the same paper")
|
||
4. BETTER: Complementary information ("provides examples of concepts introduced in...")
|
||
5. VALUABLE: Sequential development ("builds upon the framework by adding...")
|
||
6. EXCELLENT: Conceptual bridges ("connects theoretical principles from X with practical applications in Y")
|
||
7. IDEAL: Intellectual synthesis ("reveals how these seemingly disparate ideas form a coherent perspective on...")
|
||
|
||
Advanced Connection Criteria (MUST satisfy at least one):
|
||
• Reveals multi-hop intellectual pathways (A → B → C reasoning chains)
|
||
• Exposes non-obvious causal relationships
|
||
• Identifies conceptual frameworks shared across different domains
|
||
• Uncovers temporal development of ideas across sources
|
||
• Bridges theoretical propositions with empirical evidence
|
||
• Reveals complementary perspectives on the same phenomenon
|
||
• Identifies methodological parallels across different contexts
|
||
|
||
STRICT CONSTRAINTS:
|
||
• Generate 1-2 connections ONLY if they meet the quality threshold (levels 5-7)
|
||
• No connections is better than low-quality connections
|
||
• Never refer to documents by ID or as "candidate document"/"source document"
|
||
• Use natural language that references specific content details
|
||
• Each connection must illuminate something that would be valuable for deeper understanding
|
||
• Prioritize precision over quantity
|
||
|
||
Location-Specific References:
|
||
• For videos: Convert timestamps to <a href="URL&t=TIME_IN_SECONDS" target="_blank">MM:SS</a> format
|
||
• For documents: Reference specific page numbers, sections, or paragraphs
|
||
• For websites: Reference specific headings or content sections
|
||
|
||
Output Format:
|
||
Structured JSON matching the CreateConnection model with:
|
||
1. fromSourceId (provided)
|
||
2. toSourceId (from candidates. ALWAYS REFER TO "sourceId" on the object)
|
||
3. webId (provided)
|
||
4. connection description
|
||
|
||
## Style guide for `connection description`:
|
||
- Casual, present-tense, ~15 words, proper punctuation.
|
||
- Start with the speaker or doc (“Marques says…”, “Paper X shows…”).
|
||
- Capture the **direction** implicitly: *the description should read naturally from the FROM doc’s perspective.*
|
||
- **Outgoing** example: “Marq mentions this concept → Trinetix explainer.”
|
||
- **Incoming** example: “Verge review slams it as half-baked.”
|
||
- No IDs, no quotation marks unless they are real quotes, no boilerplate.
|
||
|
||
Before finalizing each connection, verify it meets these criteria:
|
||
1. Would a subject matter expert find this connection insightful?
|
||
2. Does this connection reveal something non-obvious?
|
||
3. Would this connection enhance understanding of either document?
|
||
4. Is the connection specific enough to be meaningful?
|
||
|
||
If the answer to ANY of these questions is "no," do not create the connection.
|
||
"""
|
||
|
||
|
||
def main():
|
||
# try refine
|
||
#refined_result = agent(
|
||
# prompt=CR_PROMPT,
|
||
#)
|
||
|
||
agent.push_to_hub("fadeleke/prompt-to-signature", with_code=True)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|