74 lines
2.5 KiB
Python
74 lines
2.5 KiB
Python
import os
|
|
import dspy
|
|
from dotenv import load_dotenv
|
|
from modaic import PrecompiledProgram, PrecompiledConfig
|
|
from src.neo4j import Neo4j
|
|
|
|
load_dotenv()
|
|
|
|
# set up Neo4j using NEO4J_URI
|
|
neo4j = Neo4j(
|
|
uri=os.getenv("NEO4J_URI"),
|
|
user=os.getenv("NEO4J_USER"),
|
|
password=os.getenv("NEO4J_PASSWORD"),
|
|
)
|
|
|
|
class CypherFromText(dspy.Signature):
|
|
"""Instructions:
|
|
Create a Cypher MERGE statement to model all entities and relationships found in the text following these guidelines:
|
|
- Refer to the provided schema and use existing or similar nodes, properties or relationships before creating new ones.
|
|
- Use generic categories for node and relationship labels."""
|
|
text = dspy.InputField(
|
|
desc="Text to model using nodes, properties and relationships."
|
|
)
|
|
neo4j_schema = dspy.InputField(
|
|
desc="Current graph schema in Neo4j as a list of NODES and RELATIONSHIPS."
|
|
)
|
|
statement = dspy.OutputField(
|
|
desc="Cypher statement to merge nodes and relationships found in the text."
|
|
)
|
|
|
|
class GenerateCypherConfig(PrecompiledConfig):
|
|
model: str = "openai/gpt-4o"
|
|
max_tokens: int = 1024
|
|
|
|
|
|
class GenerateCypher(PrecompiledProgram):
|
|
config: GenerateCypherConfig
|
|
|
|
def __init__(self, config: GenerateCypherConfig, **kwargs):
|
|
super().__init__(config=config, **kwargs)
|
|
self.lm = dspy.LM(
|
|
model=config.model,
|
|
max_tokens=config.max_tokens,
|
|
)
|
|
self.generate_cypher = dspy.ChainOfThought(CypherFromText)
|
|
self.generate_cypher.set_lm(self.lm)
|
|
|
|
def forward(self, text: str, neo4j_schema: list[str]):
|
|
return self.generate_cypher(text=text, neo4j_schema=neo4j_schema)
|
|
|
|
generate_cypher = GenerateCypher(GenerateCypherConfig())
|
|
|
|
if __name__ == "__main__":
|
|
from pathlib import Path
|
|
import json
|
|
"""
|
|
examples_path = Path(__file__).parent / "examples" / "wikipedia-abstracts-v0_0_1.ndjson"
|
|
with open(examples_path, "r") as f:
|
|
for line in f:
|
|
data = json.loads(line)
|
|
text = data["text"]
|
|
print("TEXT TO PROCESS:\n", text[:50])
|
|
cypher = generate_cypher(text=text, neo4j_schema=neo4j.fmt_schema())
|
|
neo4j.query(cypher.statement.replace('```', ''))
|
|
print("CYPHER STATEMENT:\n", cypher.statement)
|
|
|
|
schema = neo4j.fmt_schema()
|
|
print("SCHEMA:\n", schema)
|
|
"""
|
|
generate_cypher.push_to_hub("farouk1/text-to-cypher", with_code=True, tag="v0.0.7", commit_message="Update README.md")
|
|
|
|
|
|
|