Syntax fix

set LM
2025-12-27 02:59:43 -08:00 · 2025-12-27 02:32:33 -08:00
5 changed files with 248 additions and 9 deletions
--- a/config.json
+++ b/config.json
@@ -1,5 +1,4 @@
 {
-  "model": "gpt-4",
+  "model": "openai/gpt-4o",
  "neo4j_schema": [],
  "max_tokens": 1024
 }
--- a/main.py
+++ b/main.py
@@ -1,9 +1,18 @@
-from dotenv import load_dotenv
+import os
 import dspy
 from dotenv import load_dotenv
 from modaic import PrecompiledProgram, PrecompiledConfig
 from src.neo4j import Neo4j
 load_dotenv()
 # set up Neo4j using NEO4J_URI
 neo4j = Neo4j(
    uri=os.getenv("NEO4J_URI"),
    user=os.getenv("NEO4J_USER"),
    password=os.getenv("NEO4J_PASSWORD"),
 )
 class CypherFromText(dspy.Signature):
    """Instructions:
    Create a Cypher MERGE statement to model all entities and relationships found in the text following these guidelines:
@@ -20,16 +29,15 @@ class CypherFromText(dspy.Signature):
    )
 class GenerateCypherConfig(PrecompiledConfig):
-    neo4j_schema: list[str] = []
+    model: str = "openai/gpt-4o"
    model: str = "gpt-4"
    max_tokens: int = 1024
 class GenerateCypher(PrecompiledProgram):
    config: GenerateCypherConfig
-    def _init_(self, config: GenerateCypherConfig, **kwargs):
+    def __init__(self, config: GenerateCypherConfig, **kwargs):
-        super()._init_(**kwargs)
+        super().__init__(config=config, **kwargs)
        self.lm = dspy.LM(
            model=config.model,
            max_tokens=config.max_tokens,
@@ -40,9 +48,24 @@ class GenerateCypher(PrecompiledProgram):
    def forward(self, text: str, neo4j_schema: list[str]):
        return self.generate_cypher(text=text, neo4j_schema=neo4j_schema)
 generate_cypher = GenerateCypher(GenerateCypherConfig())
 if __name__ == "__main__":
-    generate_cypher = GenerateCypher(GenerateCypherConfig())
+    """
-    generate_cypher.push_to_hub("farouk1/text-to-cypher", with_code=True, tag="v0.0.2", commit_message="set LM")
+    from pathlib import Path
    import json
    examples_path = Path(__file__).parent / "examples" / "wikipedia-abstracts-v0_0_1.ndjson"
    with open(examples_path, "r") as f:
         for line in f:
             data = json.loads(line)
             text = data["text"]
             print(text[:50])
             cypher = generate_cypher(text=text, neo4j_schema=neo4j.fmt_schema())
             neo4j.query(cypher.statement.replace('```', ''))
    """
    schema = neo4j.fmt_schema() 
    print(schema)
    generate_cypher.push_to_hub("farouk1/text-to-cypher", with_code=True, tag="v0.0.4", commit_message="Syntax fix")
--- a/program.json
+++ b/program.json
@@ -1,4 +1,41 @@
 {
  "generate_cypher.predict": {
    "traces": [],
    "train": [],
    "demos": [],
    "signature": {
      "instructions": "Instructions:\nCreate a Cypher MERGE statement to model all entities and relationships found in the text following these guidelines:\n- Refer to the provided schema and use existing or similar nodes, properties or relationships before creating new ones.\n- Use generic categories for node and relationship labels.",
      "fields": [
        {
          "prefix": "Text:",
          "description": "Text to model using nodes, properties and relationships."
        },
        {
          "prefix": "Neo 4 J Schema:",
          "description": "Current graph schema in Neo4j as a list of NODES and RELATIONSHIPS."
        },
        {
          "prefix": "Reasoning: Let's think step by step in order to",
          "description": "${reasoning}"
        },
        {
          "prefix": "Statement:",
          "description": "Cypher statement to merge nodes and relationships found in the text."
        }
      ]
    },
    "lm": {
      "model": "openai/gpt-4o",
      "model_type": "chat",
      "cache": true,
      "num_retries": 3,
      "finetuning_model": null,
      "launch_kwargs": {},
      "train_kwargs": {},
      "temperature": null,
      "max_tokens": 1024
    }
  },
  "metadata": {
    "dependency_versions": {
      "python": "3.13",
--- a/src/init.py
+++ b/src/init.py
--- a/src/neo4j.py
+++ b/src/neo4j.py
@@ -0,0 +1,180 @@
 import json
 import neo4j
 def parse_relationships(schema: dict) -> str:
    # Parse the JSON string into a Python object if it's not already
    if isinstance(schema, str):
        data = json.loads(schema)
    else:
        data = schema
    data = data[0]["relationships"]
    # Initialize a list to hold the formatted relationship strings
    relationships = []
    # Iterate through each relationship in the data
    for relationship in data:
        entity1, relation, entity2 = relationship
        # Extract the names of the entities and the relationship
        entity1_name = entity1["name"]
        entity2_name = entity2["name"]
        # Format the string as specified and add it to the list
        formatted_relationship = f"{entity1_name}-{relation}->{entity2_name}"
        relationships.append(formatted_relationship)
    # Join all formatted strings with a newline character
    result = "\n".join(relationships)
    return result
 def parse_nodes(schema):
    schema = schema
    nodes = [node["name"] for node in schema[0]["nodes"]]
    return "\n".join(nodes)
 def parse_node_properties(node_properties):
    # Initialize a dictionary to accumulate node details
    node_details = {}
    # Iterate through each item in the input JSON
    for item in node_properties:
        node_label = item["nodeLabels"][0]  # Assuming there's always one label
        prop_name = item["propertyName"]
        mandatory = "required" if item["mandatory"] else "optional"
        # Prepare the property string
        property_str = f"{prop_name} ({mandatory})" if item["mandatory"] else prop_name
        # If the node label exists, append the property; otherwise, create a new entry
        if node_label in node_details:
            node_details[node_label].append(property_str)
        else:
            node_details[node_label] = [property_str]
    # Format the output
    output_lines = []
    for node, properties in node_details.items():
        output_lines.append(f"{node}")
        for prop in properties:
            prop_line = f"  - {prop}" if "required" in prop else f"  - {prop}"
            output_lines.append(prop_line)
    return "\n".join(output_lines)
 def parse_rel_properties(rel_properties):
    # Initialize a dictionary to accumulate relationship details
    rel_details = {}
    # Iterate through each item in the input JSON
    for item in rel_properties:
        # Extract relationship type name, removing :` and `
        rel_type = item["relType"][2:].strip("`")
        prop_name = item["propertyName"]
        mandatory = "required" if item["mandatory"] else "optional"
        # If propertyName is not None, prepare the property string
        if prop_name is not None:
            property_str = f"{prop_name} ({mandatory})"
            # If the relationship type exists, append the property; otherwise, create a new entry
            if rel_type in rel_details:
                rel_details[rel_type].append(property_str)
            else:
                rel_details[rel_type] = [property_str]
        else:
            # For relationships without properties, ensure the relationship is listed
            rel_details.setdefault(rel_type, [])
    # Format the output
    output_lines = []
    for rel_type, properties in rel_details.items():
        output_lines.append(f"{rel_type}")
        for prop in properties:
            output_lines.append(f"  - {prop}")
    return "\n".join(output_lines)
 class Neo4j:
    def __init__(self, uri, user: str = None, password: str = None):
        self._uri = uri
        self._user = user
        self._password = password
        self._auth = (
            None
            if (self._user is None and self._password is None)
            else (self._user, self._password)
        )
        self._driver = neo4j.GraphDatabase.driver(
            self._uri, auth=(self._user, self._password)
        )
        self._verify_connection()
        print("CONNECTION ESTABLISHED")
    def close(self):
        self._driver.close()
        print("CONNECTION CLOSED")
    def _verify_connection(self):
        with self._driver as driver:
            driver.verify_connectivity()
            print("CONNECTION VERIFIED")
    def query(self, query, parameters=None, db=None):
        assert db is None, (
            "The Neo4j implementation does not support multiple databases."
        )
        with self._driver.session(database=db) as session:
            result = session.run(query, parameters)
            return result.data()
    def schema(self, parsed=False):
        query = """
        CALL db.schema.visualization()
        """
        schema = self.query(query)
        if parsed:
            return parse_nodes(schema), parse_relationships(schema)
        return schema
    def schema_properties(self, parsed=False):
        props = self._schema_node_properties(), self._schema_relationship_properties()
        if parsed:
            return parse_node_properties(props[0]), parse_rel_properties(props[1])
        return props
    def _schema_node_properties(self):
        query = """
        CALL db.schema.nodeTypeProperties()
        """
        return self.query(query)
    def _schema_relationship_properties(self):
        query = """
        CALL db.schema.relTypeProperties()
        """
        return self.query(query)
    def fmt_schema(self):
        parsed_schema = self.schema(parsed=True)
        parsed_props = self.schema_properties(parsed=True)
        parsed = (*parsed_props, parsed_schema[1])
        return "\n".join(
            [
                f"{element}:\n{parsed[idx]}\n"
                for idx, element in enumerate(
                    [
                        "NODE LABELS & PROPERTIES",
                        "RELATIONSHIP LABELS & PROPERTIES",
                        "RELATIONSHIPS",
                    ]
                )
            ]
        )
Author	SHA1	Message	Date
Farouk Adeleke	5cdedc3403	Syntax fix	2025-12-27 02:59:43 -08:00
Farouk Adeleke	194597adbc	set LM	2025-12-27 02:32:33 -08:00