import os import dspy from dotenv import load_dotenv from modaic import PrecompiledProgram, PrecompiledConfig from src.neo4j import Neo4j load_dotenv() # set up Neo4j using NEO4J_URI neo4j = Neo4j( uri=os.getenv("NEO4J_URI"), user=os.getenv("NEO4J_USER"), password=os.getenv("NEO4J_PASSWORD"), ) class CypherFromText(dspy.Signature): """Instructions: Create a Cypher MERGE statement to model all entities and relationships found in the text following these guidelines: - Refer to the provided schema and use existing or similar nodes, properties or relationships before creating new ones. - Use generic categories for node and relationship labels.""" text = dspy.InputField( desc="Text to model using nodes, properties and relationships." ) neo4j_schema = dspy.InputField( desc="Current graph schema in Neo4j as a list of NODES and RELATIONSHIPS." ) statement = dspy.OutputField( desc="Cypher statement to merge nodes and relationships found in the text." ) class GenerateCypherConfig(PrecompiledConfig): model: str = "openai/gpt-4o" max_tokens: int = 1024 class GenerateCypher(PrecompiledProgram): config: GenerateCypherConfig def __init__(self, config: GenerateCypherConfig, **kwargs): super().__init__(config=config, **kwargs) self.lm = dspy.LM( model=config.model, max_tokens=config.max_tokens, ) self.generate_cypher = dspy.ChainOfThought(CypherFromText) self.generate_cypher.set_lm(self.lm) def forward(self, text: str, neo4j_schema: list[str]): return self.generate_cypher(text=text, neo4j_schema=neo4j_schema) generate_cypher = GenerateCypher(GenerateCypherConfig()) if __name__ == "__main__": from pathlib import Path import json """ examples_path = Path(__file__).parent / "examples" / "wikipedia-abstracts-v0_0_1.ndjson" with open(examples_path, "r") as f: for line in f: data = json.loads(line) text = data["text"] print("TEXT TO PROCESS:\n", text[:50]) cypher = generate_cypher(text=text, neo4j_schema=neo4j.fmt_schema()) neo4j.query(cypher.statement.replace('```', '')) print("CYPHER STATEMENT:\n", cypher.statement) schema = neo4j.fmt_schema() print("SCHEMA:\n", schema) """ generate_cypher.push_to_hub("farouk1/text-to-cypher", with_code=True, tag="v0.0.6", commit_message="Update README.md")