Files
text-to-cypher/main.py
2025-12-27 03:18:24 -08:00

73 lines
2.5 KiB
Python

import os
import dspy
from dotenv import load_dotenv
from modaic import PrecompiledProgram, PrecompiledConfig
from src.neo4j import Neo4j
load_dotenv()
# set up Neo4j using NEO4J_URI
neo4j = Neo4j(
uri=os.getenv("NEO4J_URI"),
user=os.getenv("NEO4J_USER"),
password=os.getenv("NEO4J_PASSWORD"),
)
class CypherFromText(dspy.Signature):
"""Instructions:
Create a Cypher MERGE statement to model all entities and relationships found in the text following these guidelines:
- Refer to the provided schema and use existing or similar nodes, properties or relationships before creating new ones.
- Use generic categories for node and relationship labels."""
text = dspy.InputField(
desc="Text to model using nodes, properties and relationships."
)
neo4j_schema = dspy.InputField(
desc="Current graph schema in Neo4j as a list of NODES and RELATIONSHIPS."
)
statement = dspy.OutputField(
desc="Cypher statement to merge nodes and relationships found in the text."
)
class GenerateCypherConfig(PrecompiledConfig):
model: str = "openai/gpt-4o"
max_tokens: int = 1024
class GenerateCypher(PrecompiledProgram):
config: GenerateCypherConfig
def __init__(self, config: GenerateCypherConfig, **kwargs):
super().__init__(config=config, **kwargs)
self.lm = dspy.LM(
model=config.model,
max_tokens=config.max_tokens,
)
self.generate_cypher = dspy.ChainOfThought(CypherFromText)
self.generate_cypher.set_lm(self.lm)
def forward(self, text: str, neo4j_schema: list[str]):
return self.generate_cypher(text=text, neo4j_schema=neo4j_schema)
generate_cypher = GenerateCypher(GenerateCypherConfig())
if __name__ == "__main__":
from pathlib import Path
import json
"""
examples_path = Path(__file__).parent / "examples" / "wikipedia-abstracts-v0_0_1.ndjson"
with open(examples_path, "r") as f:
for line in f:
data = json.loads(line)
text = data["text"]
print("TEXT TO PROCESS:\n", text[:50])
cypher = generate_cypher(text=text, neo4j_schema=neo4j.fmt_schema())
neo4j.query(cypher.statement.replace('```', ''))
print("CYPHER STATEMENT:\n", cypher.statement)
"""
schema = neo4j.fmt_schema()
print("SCHEMA:\n", schema)
generate_cypher.push_to_hub("farouk1/text-to-cypher", with_code=True, tag="v0.0.5", commit_message="Update README.md")