Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| fc9560cc50 | |||
| 57e7b1fd36 |
@@ -1,4 +1,4 @@
|
||||
{
|
||||
"AutoConfig": "main.GenerateCypherConfig",
|
||||
"AutoProgram": "main.GenerateCypher"
|
||||
"AutoConfig": "modules.GenerateCypherConfig",
|
||||
"AutoProgram": "modules.GenerateCypher"
|
||||
}
|
||||
@@ -1,4 +1,5 @@
|
||||
{
|
||||
"model": "openai/gpt-4o",
|
||||
"max_tokens": 1024
|
||||
"model": "openrouter/openai/gpt-4o",
|
||||
"max_tokens": 1024,
|
||||
"cache": false
|
||||
}
|
||||
@@ -2,17 +2,9 @@ import os
|
||||
import dspy
|
||||
from dotenv import load_dotenv
|
||||
from modaic import PrecompiledProgram, PrecompiledConfig
|
||||
from src.neo4j import Neo4j
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# set up Neo4j using NEO4J_URI
|
||||
neo4j = Neo4j(
|
||||
uri=os.getenv("NEO4J_URI"),
|
||||
user=os.getenv("NEO4J_USER"),
|
||||
password=os.getenv("NEO4J_PASSWORD"),
|
||||
)
|
||||
|
||||
|
||||
class CypherFromQuestion(dspy.Signature):
|
||||
"""Task: Generate Cypher statement to query a graph database.
|
||||
@@ -33,8 +25,9 @@ class CypherFromQuestion(dspy.Signature):
|
||||
|
||||
|
||||
class GenerateCypherConfig(PrecompiledConfig):
|
||||
model: str = "openai/gpt-4o"
|
||||
model: str = "openrouter/openai/gpt-4o" # OPENROUTER ONLY
|
||||
max_tokens: int = 1024
|
||||
cache: bool = False
|
||||
|
||||
|
||||
class GenerateCypher(PrecompiledProgram):
|
||||
@@ -45,6 +38,8 @@ class GenerateCypher(PrecompiledProgram):
|
||||
self.lm = dspy.LM(
|
||||
model=config.model,
|
||||
max_tokens=config.max_tokens,
|
||||
api_base="https://openrouter.ai/api/v1",
|
||||
cache=config.cache,
|
||||
)
|
||||
self.generate_cypher = dspy.ChainOfThought(CypherFromQuestion)
|
||||
self.generate_cypher.set_lm(self.lm)
|
||||
@@ -76,6 +71,6 @@ if __name__ == "__main__":
|
||||
generate_cypher.push_to_hub(
|
||||
"farouk1/text-to-cypher",
|
||||
with_code=True,
|
||||
tag="v0.0.9",
|
||||
commit_message="Update README.md",
|
||||
tag="v1.0.1",
|
||||
commit_message="Don't cache results",
|
||||
)
|
||||
@@ -25,15 +25,16 @@
|
||||
]
|
||||
},
|
||||
"lm": {
|
||||
"model": "openai/gpt-4o",
|
||||
"model": "openrouter/openai/gpt-4o",
|
||||
"model_type": "chat",
|
||||
"cache": true,
|
||||
"cache": false,
|
||||
"num_retries": 3,
|
||||
"finetuning_model": null,
|
||||
"launch_kwargs": {},
|
||||
"train_kwargs": {},
|
||||
"temperature": null,
|
||||
"max_tokens": 1024
|
||||
"max_tokens": 1024,
|
||||
"api_base": "https://openrouter.ai/api/v1"
|
||||
}
|
||||
},
|
||||
"metadata": {
|
||||
|
||||
@@ -4,4 +4,4 @@ version = "0.1.0"
|
||||
description = "Add your description here"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.13"
|
||||
dependencies = ["datasets>=4.4.2", "dspy>=3.0.4", "modaic>=0.8.2", "neo4j~=5.18.0", "python-dotenv~=1.0.1", "sacrebleu>=2.5.1"]
|
||||
dependencies = ["datasets>=4.4.2", "dspy>=3.0.4", "modaic>=0.8.3", "neo4j~=5.18.0", "python-dotenv~=1.0.1", "sacrebleu>=2.5.1"]
|
||||
|
||||
180
src/neo4j.py
180
src/neo4j.py
@@ -1,180 +0,0 @@
|
||||
import json
|
||||
import neo4j
|
||||
|
||||
|
||||
def parse_relationships(schema: dict) -> str:
|
||||
# Parse the JSON string into a Python object if it's not already
|
||||
if isinstance(schema, str):
|
||||
data = json.loads(schema)
|
||||
else:
|
||||
data = schema
|
||||
|
||||
data = data[0]["relationships"]
|
||||
|
||||
# Initialize a list to hold the formatted relationship strings
|
||||
relationships = []
|
||||
|
||||
# Iterate through each relationship in the data
|
||||
for relationship in data:
|
||||
entity1, relation, entity2 = relationship
|
||||
# Extract the names of the entities and the relationship
|
||||
entity1_name = entity1["name"]
|
||||
entity2_name = entity2["name"]
|
||||
# Format the string as specified and add it to the list
|
||||
formatted_relationship = f"{entity1_name}-{relation}->{entity2_name}"
|
||||
relationships.append(formatted_relationship)
|
||||
|
||||
# Join all formatted strings with a newline character
|
||||
result = "\n".join(relationships)
|
||||
return result
|
||||
|
||||
|
||||
def parse_nodes(schema):
|
||||
schema = schema
|
||||
nodes = [node["name"] for node in schema[0]["nodes"]]
|
||||
return "\n".join(nodes)
|
||||
|
||||
|
||||
def parse_node_properties(node_properties):
|
||||
# Initialize a dictionary to accumulate node details
|
||||
node_details = {}
|
||||
|
||||
# Iterate through each item in the input JSON
|
||||
for item in node_properties:
|
||||
node_label = item["nodeLabels"][0] # Assuming there's always one label
|
||||
prop_name = item["propertyName"]
|
||||
mandatory = "required" if item["mandatory"] else "optional"
|
||||
|
||||
# Prepare the property string
|
||||
property_str = f"{prop_name} ({mandatory})" if item["mandatory"] else prop_name
|
||||
|
||||
# If the node label exists, append the property; otherwise, create a new entry
|
||||
if node_label in node_details:
|
||||
node_details[node_label].append(property_str)
|
||||
else:
|
||||
node_details[node_label] = [property_str]
|
||||
|
||||
# Format the output
|
||||
output_lines = []
|
||||
for node, properties in node_details.items():
|
||||
output_lines.append(f"{node}")
|
||||
for prop in properties:
|
||||
prop_line = f" - {prop}" if "required" in prop else f" - {prop}"
|
||||
output_lines.append(prop_line)
|
||||
|
||||
return "\n".join(output_lines)
|
||||
|
||||
|
||||
def parse_rel_properties(rel_properties):
|
||||
# Initialize a dictionary to accumulate relationship details
|
||||
rel_details = {}
|
||||
|
||||
# Iterate through each item in the input JSON
|
||||
for item in rel_properties:
|
||||
# Extract relationship type name, removing :` and `
|
||||
rel_type = item["relType"][2:].strip("`")
|
||||
prop_name = item["propertyName"]
|
||||
mandatory = "required" if item["mandatory"] else "optional"
|
||||
|
||||
# If propertyName is not None, prepare the property string
|
||||
if prop_name is not None:
|
||||
property_str = f"{prop_name} ({mandatory})"
|
||||
# If the relationship type exists, append the property; otherwise, create a new entry
|
||||
if rel_type in rel_details:
|
||||
rel_details[rel_type].append(property_str)
|
||||
else:
|
||||
rel_details[rel_type] = [property_str]
|
||||
else:
|
||||
# For relationships without properties, ensure the relationship is listed
|
||||
rel_details.setdefault(rel_type, [])
|
||||
|
||||
# Format the output
|
||||
output_lines = []
|
||||
for rel_type, properties in rel_details.items():
|
||||
output_lines.append(f"{rel_type}")
|
||||
for prop in properties:
|
||||
output_lines.append(f" - {prop}")
|
||||
|
||||
return "\n".join(output_lines)
|
||||
|
||||
|
||||
class Neo4j:
|
||||
def __init__(self, uri, user: str = None, password: str = None):
|
||||
self._uri = uri
|
||||
self._user = user
|
||||
self._password = password
|
||||
self._auth = (
|
||||
None
|
||||
if (self._user is None and self._password is None)
|
||||
else (self._user, self._password)
|
||||
)
|
||||
self._driver = neo4j.GraphDatabase.driver(
|
||||
self._uri, auth=(self._user, self._password)
|
||||
)
|
||||
|
||||
self._verify_connection()
|
||||
print("CONNECTION ESTABLISHED")
|
||||
|
||||
def close(self):
|
||||
self._driver.close()
|
||||
print("CONNECTION CLOSED")
|
||||
|
||||
def _verify_connection(self):
|
||||
with self._driver as driver:
|
||||
driver.verify_connectivity()
|
||||
print("CONNECTION VERIFIED")
|
||||
|
||||
def query(self, query, parameters=None, db=None):
|
||||
assert db is None, (
|
||||
"The Neo4j implementation does not support multiple databases."
|
||||
)
|
||||
with self._driver.session(database=db) as session:
|
||||
result = session.run(query, parameters)
|
||||
return result.data()
|
||||
|
||||
def schema(self, parsed=False):
|
||||
query = """
|
||||
CALL db.schema.visualization()
|
||||
"""
|
||||
schema = self.query(query)
|
||||
|
||||
if parsed:
|
||||
return parse_nodes(schema), parse_relationships(schema)
|
||||
|
||||
return schema
|
||||
|
||||
def schema_properties(self, parsed=False):
|
||||
props = self._schema_node_properties(), self._schema_relationship_properties()
|
||||
if parsed:
|
||||
return parse_node_properties(props[0]), parse_rel_properties(props[1])
|
||||
|
||||
return props
|
||||
|
||||
def _schema_node_properties(self):
|
||||
query = """
|
||||
CALL db.schema.nodeTypeProperties()
|
||||
"""
|
||||
return self.query(query)
|
||||
|
||||
def _schema_relationship_properties(self):
|
||||
query = """
|
||||
CALL db.schema.relTypeProperties()
|
||||
"""
|
||||
return self.query(query)
|
||||
|
||||
def fmt_schema(self):
|
||||
parsed_schema = self.schema(parsed=True)
|
||||
parsed_props = self.schema_properties(parsed=True)
|
||||
parsed = (*parsed_props, parsed_schema[1])
|
||||
return "\n".join(
|
||||
[
|
||||
f"{element}:\n{parsed[idx]}\n"
|
||||
for idx, element in enumerate(
|
||||
[
|
||||
"NODE LABELS & PROPERTIES",
|
||||
"RELATIONSHIP LABELS & PROPERTIES",
|
||||
"RELATIONSHIPS",
|
||||
]
|
||||
)
|
||||
]
|
||||
)
|
||||
Reference in New Issue
Block a user