2 Commits

Author SHA1 Message Date
194597adbc set LM 2025-12-27 02:32:33 -08:00
38e9965d2e set LM 2025-12-27 02:18:33 -08:00
4 changed files with 191 additions and 4 deletions

View File

@@ -1,5 +1,5 @@
{ {
"model": "gpt-4", "model": "openai/gpt-4o",
"neo4j_schema": [], "neo4j_schema": [],
"max_tokens": 1024 "max_tokens": 1024
} }

16
main.py
View File

@@ -1,9 +1,18 @@
from dotenv import load_dotenv import os
import dspy import dspy
from dotenv import load_dotenv
from modaic import PrecompiledProgram, PrecompiledConfig from modaic import PrecompiledProgram, PrecompiledConfig
from src.neo4j import Neo4j
load_dotenv() load_dotenv()
# set up Neo4j using NEO4J_URI
neo4j = Neo4j(
uri=os.getenv("NEO4J_URI"),
user=os.getenv("NEO4J_USER"),
password=os.getenv("NEO4J_PASSWORD"),
)
class CypherFromText(dspy.Signature): class CypherFromText(dspy.Signature):
"""Instructions: """Instructions:
Create a Cypher MERGE statement to model all entities and relationships found in the text following these guidelines: Create a Cypher MERGE statement to model all entities and relationships found in the text following these guidelines:
@@ -21,7 +30,7 @@ class CypherFromText(dspy.Signature):
class GenerateCypherConfig(PrecompiledConfig): class GenerateCypherConfig(PrecompiledConfig):
neo4j_schema: list[str] = [] neo4j_schema: list[str] = []
model: str = "gpt-4" model: str = "openai/gpt-4o"
max_tokens: int = 1024 max_tokens: int = 1024
@@ -35,6 +44,7 @@ class GenerateCypher(PrecompiledProgram):
max_tokens=config.max_tokens, max_tokens=config.max_tokens,
) )
self.generate_cypher = dspy.ChainOfThought(CypherFromText) self.generate_cypher = dspy.ChainOfThought(CypherFromText)
self.generate_cypher.set_lm(self.lm)
def forward(self, text: str, neo4j_schema: list[str]): def forward(self, text: str, neo4j_schema: list[str]):
return self.generate_cypher(text=text, neo4j_schema=neo4j_schema) return self.generate_cypher(text=text, neo4j_schema=neo4j_schema)
@@ -42,6 +52,6 @@ class GenerateCypher(PrecompiledProgram):
if __name__ == "__main__": if __name__ == "__main__":
generate_cypher = GenerateCypher(GenerateCypherConfig()) generate_cypher = GenerateCypher(GenerateCypherConfig())
generate_cypher.push_to_hub("farouk1/text-to-cypher", with_code=True, tag="v0.0.1", commit_message="init") generate_cypher.push_to_hub("farouk1/text-to-cypher", with_code=True, tag="v0.0.3", commit_message="set LM")

0
src/__init__.py Normal file
View File

177
src/neo4j.py Normal file
View File

@@ -0,0 +1,177 @@
import json
import neo4j
def parse_relationships(schema: dict) -> str:
# Parse the JSON string into a Python object if it's not already
if isinstance(schema, str):
data = json.loads(schema)
else:
data = schema
data = data[0]["relationships"]
# Initialize a list to hold the formatted relationship strings
relationships = []
# Iterate through each relationship in the data
for relationship in data:
entity1, relation, entity2 = relationship
# Extract the names of the entities and the relationship
entity1_name = entity1["name"]
entity2_name = entity2["name"]
# Format the string as specified and add it to the list
formatted_relationship = f"{entity1_name}-{relation}->{entity2_name}"
relationships.append(formatted_relationship)
# Join all formatted strings with a newline character
result = "\n".join(relationships)
return result
def parse_nodes(schema):
schema = schema
nodes = [node["name"] for node in schema[0]["nodes"]]
return "\n".join(nodes)
def parse_node_properties(node_properties):
# Initialize a dictionary to accumulate node details
node_details = {}
# Iterate through each item in the input JSON
for item in node_properties:
node_label = item["nodeLabels"][0] # Assuming there's always one label
prop_name = item["propertyName"]
mandatory = "required" if item["mandatory"] else "optional"
# Prepare the property string
property_str = f"{prop_name} ({mandatory})" if item["mandatory"] else prop_name
# If the node label exists, append the property; otherwise, create a new entry
if node_label in node_details:
node_details[node_label].append(property_str)
else:
node_details[node_label] = [property_str]
# Format the output
output_lines = []
for node, properties in node_details.items():
output_lines.append(f"{node}")
for prop in properties:
prop_line = f" - {prop}" if "required" in prop else f" - {prop}"
output_lines.append(prop_line)
return "\n".join(output_lines)
def parse_rel_properties(rel_properties):
# Initialize a dictionary to accumulate relationship details
rel_details = {}
# Iterate through each item in the input JSON
for item in rel_properties:
# Extract relationship type name, removing :` and `
rel_type = item["relType"][2:].strip("`")
prop_name = item["propertyName"]
mandatory = "required" if item["mandatory"] else "optional"
# If propertyName is not None, prepare the property string
if prop_name is not None:
property_str = f"{prop_name} ({mandatory})"
# If the relationship type exists, append the property; otherwise, create a new entry
if rel_type in rel_details:
rel_details[rel_type].append(property_str)
else:
rel_details[rel_type] = [property_str]
else:
# For relationships without properties, ensure the relationship is listed
rel_details.setdefault(rel_type, [])
# Format the output
output_lines = []
for rel_type, properties in rel_details.items():
output_lines.append(f"{rel_type}")
for prop in properties:
output_lines.append(f" - {prop}")
return "\n".join(output_lines)
class Neo4j:
def __init__(self, uri, user: str = None, password: str = None):
self._uri = uri
self._user = user
self._password = password
self._auth = (
None
if (self._user is None and self._password is None)
else (self._user, self._password)
)
self._driver = neo4j.GraphDatabase.driver(
self._uri, auth=(self._user, self._password)
)
self._verify_connection()
def close(self):
self._driver.close()
def _verify_connection(self):
with self._driver as driver:
driver.verify_connectivity()
def query(self, query, parameters=None, db=None):
assert db is None, (
"The Neo4j implementation does not support multiple databases."
)
with self._driver.session(database=db) as session:
result = session.run(query, parameters)
return result.data()
def schema(self, parsed=False):
query = """
CALL db.schema.visualization()
"""
schema = self.query(query)
if parsed:
return parse_nodes(schema), parse_relationships(schema)
return schema
def schema_properties(self, parsed=False):
props = self._schema_node_properties(), self._schema_relationship_properties()
if parsed:
return parse_node_properties(props[0]), parse_rel_properties(props[1])
return props
def _schema_node_properties(self):
query = """
CALL db.schema.nodeTypeProperties()
"""
return self.query(query)
def _schema_relationship_properties(self):
query = """
CALL db.schema.relTypeProperties()
"""
return self.query(query)
def fmt_schema(self):
parsed_schema = self.schema(parsed=True)
parsed_props = self.schema_properties(parsed=True)
parsed = (*parsed_props, parsed_schema[1])
return "\n".join(
[
f"{element}:\n{parsed[idx]}\n"
for idx, element in enumerate(
[
"NODE LABELS & PROPERTIES",
"RELATIONSHIP LABELS & PROPERTIES",
"RELATIONSHIPS",
]
)
]
)