2 Commits

Author SHA1 Message Date
5cdedc3403 Syntax fix 2025-12-27 02:59:43 -08:00
194597adbc set LM 2025-12-27 02:32:33 -08:00
5 changed files with 248 additions and 9 deletions

View File

@@ -1,5 +1,4 @@
{ {
"model": "gpt-4", "model": "openai/gpt-4o",
"neo4j_schema": [],
"max_tokens": 1024 "max_tokens": 1024
} }

37
main.py
View File

@@ -1,9 +1,18 @@
from dotenv import load_dotenv import os
import dspy import dspy
from dotenv import load_dotenv
from modaic import PrecompiledProgram, PrecompiledConfig from modaic import PrecompiledProgram, PrecompiledConfig
from src.neo4j import Neo4j
load_dotenv() load_dotenv()
# set up Neo4j using NEO4J_URI
neo4j = Neo4j(
uri=os.getenv("NEO4J_URI"),
user=os.getenv("NEO4J_USER"),
password=os.getenv("NEO4J_PASSWORD"),
)
class CypherFromText(dspy.Signature): class CypherFromText(dspy.Signature):
"""Instructions: """Instructions:
Create a Cypher MERGE statement to model all entities and relationships found in the text following these guidelines: Create a Cypher MERGE statement to model all entities and relationships found in the text following these guidelines:
@@ -20,16 +29,15 @@ class CypherFromText(dspy.Signature):
) )
class GenerateCypherConfig(PrecompiledConfig): class GenerateCypherConfig(PrecompiledConfig):
neo4j_schema: list[str] = [] model: str = "openai/gpt-4o"
model: str = "gpt-4"
max_tokens: int = 1024 max_tokens: int = 1024
class GenerateCypher(PrecompiledProgram): class GenerateCypher(PrecompiledProgram):
config: GenerateCypherConfig config: GenerateCypherConfig
def _init_(self, config: GenerateCypherConfig, **kwargs): def __init__(self, config: GenerateCypherConfig, **kwargs):
super()._init_(**kwargs) super().__init__(config=config, **kwargs)
self.lm = dspy.LM( self.lm = dspy.LM(
model=config.model, model=config.model,
max_tokens=config.max_tokens, max_tokens=config.max_tokens,
@@ -40,9 +48,24 @@ class GenerateCypher(PrecompiledProgram):
def forward(self, text: str, neo4j_schema: list[str]): def forward(self, text: str, neo4j_schema: list[str]):
return self.generate_cypher(text=text, neo4j_schema=neo4j_schema) return self.generate_cypher(text=text, neo4j_schema=neo4j_schema)
generate_cypher = GenerateCypher(GenerateCypherConfig())
if __name__ == "__main__": if __name__ == "__main__":
generate_cypher = GenerateCypher(GenerateCypherConfig()) """
generate_cypher.push_to_hub("farouk1/text-to-cypher", with_code=True, tag="v0.0.2", commit_message="set LM") from pathlib import Path
import json
examples_path = Path(__file__).parent / "examples" / "wikipedia-abstracts-v0_0_1.ndjson"
with open(examples_path, "r") as f:
for line in f:
data = json.loads(line)
text = data["text"]
print(text[:50])
cypher = generate_cypher(text=text, neo4j_schema=neo4j.fmt_schema())
neo4j.query(cypher.statement.replace('```', ''))
"""
schema = neo4j.fmt_schema()
print(schema)
generate_cypher.push_to_hub("farouk1/text-to-cypher", with_code=True, tag="v0.0.4", commit_message="Syntax fix")

View File

@@ -1,4 +1,41 @@
{ {
"generate_cypher.predict": {
"traces": [],
"train": [],
"demos": [],
"signature": {
"instructions": "Instructions:\nCreate a Cypher MERGE statement to model all entities and relationships found in the text following these guidelines:\n- Refer to the provided schema and use existing or similar nodes, properties or relationships before creating new ones.\n- Use generic categories for node and relationship labels.",
"fields": [
{
"prefix": "Text:",
"description": "Text to model using nodes, properties and relationships."
},
{
"prefix": "Neo 4 J Schema:",
"description": "Current graph schema in Neo4j as a list of NODES and RELATIONSHIPS."
},
{
"prefix": "Reasoning: Let's think step by step in order to",
"description": "${reasoning}"
},
{
"prefix": "Statement:",
"description": "Cypher statement to merge nodes and relationships found in the text."
}
]
},
"lm": {
"model": "openai/gpt-4o",
"model_type": "chat",
"cache": true,
"num_retries": 3,
"finetuning_model": null,
"launch_kwargs": {},
"train_kwargs": {},
"temperature": null,
"max_tokens": 1024
}
},
"metadata": { "metadata": {
"dependency_versions": { "dependency_versions": {
"python": "3.13", "python": "3.13",

0
src/__init__.py Normal file
View File

180
src/neo4j.py Normal file
View File

@@ -0,0 +1,180 @@
import json
import neo4j
def parse_relationships(schema: dict) -> str:
# Parse the JSON string into a Python object if it's not already
if isinstance(schema, str):
data = json.loads(schema)
else:
data = schema
data = data[0]["relationships"]
# Initialize a list to hold the formatted relationship strings
relationships = []
# Iterate through each relationship in the data
for relationship in data:
entity1, relation, entity2 = relationship
# Extract the names of the entities and the relationship
entity1_name = entity1["name"]
entity2_name = entity2["name"]
# Format the string as specified and add it to the list
formatted_relationship = f"{entity1_name}-{relation}->{entity2_name}"
relationships.append(formatted_relationship)
# Join all formatted strings with a newline character
result = "\n".join(relationships)
return result
def parse_nodes(schema):
schema = schema
nodes = [node["name"] for node in schema[0]["nodes"]]
return "\n".join(nodes)
def parse_node_properties(node_properties):
# Initialize a dictionary to accumulate node details
node_details = {}
# Iterate through each item in the input JSON
for item in node_properties:
node_label = item["nodeLabels"][0] # Assuming there's always one label
prop_name = item["propertyName"]
mandatory = "required" if item["mandatory"] else "optional"
# Prepare the property string
property_str = f"{prop_name} ({mandatory})" if item["mandatory"] else prop_name
# If the node label exists, append the property; otherwise, create a new entry
if node_label in node_details:
node_details[node_label].append(property_str)
else:
node_details[node_label] = [property_str]
# Format the output
output_lines = []
for node, properties in node_details.items():
output_lines.append(f"{node}")
for prop in properties:
prop_line = f" - {prop}" if "required" in prop else f" - {prop}"
output_lines.append(prop_line)
return "\n".join(output_lines)
def parse_rel_properties(rel_properties):
# Initialize a dictionary to accumulate relationship details
rel_details = {}
# Iterate through each item in the input JSON
for item in rel_properties:
# Extract relationship type name, removing :` and `
rel_type = item["relType"][2:].strip("`")
prop_name = item["propertyName"]
mandatory = "required" if item["mandatory"] else "optional"
# If propertyName is not None, prepare the property string
if prop_name is not None:
property_str = f"{prop_name} ({mandatory})"
# If the relationship type exists, append the property; otherwise, create a new entry
if rel_type in rel_details:
rel_details[rel_type].append(property_str)
else:
rel_details[rel_type] = [property_str]
else:
# For relationships without properties, ensure the relationship is listed
rel_details.setdefault(rel_type, [])
# Format the output
output_lines = []
for rel_type, properties in rel_details.items():
output_lines.append(f"{rel_type}")
for prop in properties:
output_lines.append(f" - {prop}")
return "\n".join(output_lines)
class Neo4j:
def __init__(self, uri, user: str = None, password: str = None):
self._uri = uri
self._user = user
self._password = password
self._auth = (
None
if (self._user is None and self._password is None)
else (self._user, self._password)
)
self._driver = neo4j.GraphDatabase.driver(
self._uri, auth=(self._user, self._password)
)
self._verify_connection()
print("CONNECTION ESTABLISHED")
def close(self):
self._driver.close()
print("CONNECTION CLOSED")
def _verify_connection(self):
with self._driver as driver:
driver.verify_connectivity()
print("CONNECTION VERIFIED")
def query(self, query, parameters=None, db=None):
assert db is None, (
"The Neo4j implementation does not support multiple databases."
)
with self._driver.session(database=db) as session:
result = session.run(query, parameters)
return result.data()
def schema(self, parsed=False):
query = """
CALL db.schema.visualization()
"""
schema = self.query(query)
if parsed:
return parse_nodes(schema), parse_relationships(schema)
return schema
def schema_properties(self, parsed=False):
props = self._schema_node_properties(), self._schema_relationship_properties()
if parsed:
return parse_node_properties(props[0]), parse_rel_properties(props[1])
return props
def _schema_node_properties(self):
query = """
CALL db.schema.nodeTypeProperties()
"""
return self.query(query)
def _schema_relationship_properties(self):
query = """
CALL db.schema.relTypeProperties()
"""
return self.query(query)
def fmt_schema(self):
parsed_schema = self.schema(parsed=True)
parsed_props = self.schema_properties(parsed=True)
parsed = (*parsed_props, parsed_schema[1])
return "\n".join(
[
f"{element}:\n{parsed[idx]}\n"
for idx, element in enumerate(
[
"NODE LABELS & PROPERTIES",
"RELATIONSHIP LABELS & PROPERTIES",
"RELATIONSHIPS",
]
)
]
)