3 Commits

Author SHA1 Message Date
501c224540 Update README.md 2025-12-27 05:07:01 -08:00
64c45ee66c Update README.md 2025-12-27 04:33:07 -08:00
b221ae4b42 Update README.md 2025-12-27 03:30:28 -08:00
4 changed files with 38 additions and 35 deletions

View File

@@ -1,15 +1,10 @@
# dspy-neo4j-knowledge-graph # text-to-cypher
LLM-driven automated knowledge graph construction from text using DSPy and Neo4j. LLM-driven automated knowledge graph construction from text using DSPy and Neo4j.
![Knowledge Graph](img/kg.png)
## Project Structure ## Project Structure
```sh ```sh
dspy-neo4j-knowledge-graph/ text-to-cypher/
├── README.md ├── README.md
├── examples/
│ └── wikipedia-abstracts-v0_0_1.ndjson
├── img/
├── main.py ├── main.py
├── pyproject.toml ├── pyproject.toml
├── uv.lock ├── uv.lock
@@ -79,7 +74,7 @@ MODAIC_TOKEN=<your-modaic-token>
Run Neo4j locally using Docker: Run Neo4j locally using Docker:
```sh ```sh
docker run \ docker run \
--name dspy-kg \ --name text-to-cypher \
--publish=7474:7474 \ --publish=7474:7474 \
--publish=7687:7687 \ --publish=7687:7687 \
--env "NEO4J_AUTH=none" \ --env "NEO4J_AUTH=none" \
@@ -153,8 +148,8 @@ neo4j.query(cypher.statement.replace('```', ''))
### Stop Neo4j Docker Container ### Stop Neo4j Docker Container
```sh ```sh
docker stop dspy-kg docker stop text-to-cypher
docker rm dspy-kg docker rm text-to-cypher
``` ```
### Remove Virtual Environment ### Remove Virtual Environment

48
main.py
View File

@@ -13,25 +13,29 @@ neo4j = Neo4j(
password=os.getenv("NEO4J_PASSWORD"), password=os.getenv("NEO4J_PASSWORD"),
) )
class CypherFromText(dspy.Signature):
"""Instructions: class CypherFromQuestion(dspy.Signature):
Create a Cypher MERGE statement to model all entities and relationships found in the text following these guidelines: """Task: Generate Cypher statement to query a graph database.
- Refer to the provided schema and use existing or similar nodes, properties or relationships before creating new ones. Instructions: Use only the provided relationship types and properties in the schema.
- Use generic categories for node and relationship labels.""" Do not use any other relationship types or properties that are not provided in the schema.
text = dspy.InputField( Do not include any explanations or apologies in your responses.
desc="Text to model using nodes, properties and relationships." Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.
"""
question = dspy.InputField(
desc="Question to model using a cypher statement. Use only the provided relationship types and properties in the schema."
) )
neo4j_schema = dspy.InputField( neo4j_schema = dspy.InputField(
desc="Current graph schema in Neo4j as a list of NODES and RELATIONSHIPS." desc="Current graph schema in Neo4j as a list of NODES and RELATIONSHIPS."
) )
statement = dspy.OutputField( statement = dspy.OutputField(desc="Cypher statement to query the graph database.")
desc="Cypher statement to merge nodes and relationships found in the text."
)
class GenerateCypherConfig(PrecompiledConfig): class GenerateCypherConfig(PrecompiledConfig):
model: str = "openai/gpt-4o" model: str = "openai/gpt-4o"
max_tokens: int = 1024 max_tokens: int = 1024
class GenerateCypher(PrecompiledProgram): class GenerateCypher(PrecompiledProgram):
config: GenerateCypherConfig config: GenerateCypherConfig
@@ -42,18 +46,20 @@ class GenerateCypher(PrecompiledProgram):
model=config.model, model=config.model,
max_tokens=config.max_tokens, max_tokens=config.max_tokens,
) )
self.generate_cypher = dspy.ChainOfThought(CypherFromText) self.generate_cypher = dspy.ChainOfThought(CypherFromQuestion)
self.generate_cypher.set_lm(self.lm) self.generate_cypher.set_lm(self.lm)
def forward(self, text: str, neo4j_schema: list[str]): def forward(self, question: str, neo4j_schema: list[str]):
return self.generate_cypher(text=text, neo4j_schema=neo4j_schema) return self.generate_cypher(question=question, neo4j_schema=neo4j_schema)
generate_cypher = GenerateCypher(GenerateCypherConfig()) generate_cypher = GenerateCypher(GenerateCypherConfig())
if __name__ == "__main__": if __name__ == "__main__":
"""
from pathlib import Path from pathlib import Path
import json import json
"""
examples_path = Path(__file__).parent / "examples" / "wikipedia-abstracts-v0_0_1.ndjson" examples_path = Path(__file__).parent / "examples" / "wikipedia-abstracts-v0_0_1.ndjson"
with open(examples_path, "r") as f: with open(examples_path, "r") as f:
for line in f: for line in f:
@@ -67,7 +73,9 @@ if __name__ == "__main__":
schema = neo4j.fmt_schema() schema = neo4j.fmt_schema()
print("SCHEMA:\n", schema) print("SCHEMA:\n", schema)
""" """
generate_cypher.push_to_hub("farouk1/text-to-cypher", with_code=True, tag="v0.0.6", commit_message="Update README.md") generate_cypher.push_to_hub(
"farouk1/text-to-cypher",
with_code=True,
tag="v0.0.9",
commit_message="Update README.md",
)

View File

@@ -4,11 +4,11 @@
"train": [], "train": [],
"demos": [], "demos": [],
"signature": { "signature": {
"instructions": "Instructions:\nCreate a Cypher MERGE statement to model all entities and relationships found in the text following these guidelines:\n- Refer to the provided schema and use existing or similar nodes, properties or relationships before creating new ones.\n- Use generic categories for node and relationship labels.", "instructions": "Task: Generate Cypher statement to query a graph database.\nInstructions: Use only the provided relationship types and properties in the schema.\nDo not use any other relationship types or properties that are not provided in the schema.\nDo not include any explanations or apologies in your responses.\nDo not respond to any questions that might ask anything else than for you to construct a Cypher statement.\nDo not include any text except the generated Cypher statement.",
"fields": [ "fields": [
{ {
"prefix": "Text:", "prefix": "Question:",
"description": "Text to model using nodes, properties and relationships." "description": "Question to model using a cypher statement. Use only the provided relationship types and properties in the schema."
}, },
{ {
"prefix": "Neo 4 J Schema:", "prefix": "Neo 4 J Schema:",
@@ -20,7 +20,7 @@
}, },
{ {
"prefix": "Statement:", "prefix": "Statement:",
"description": "Cypher statement to merge nodes and relationships found in the text." "description": "Cypher statement to query the graph database."
} }
] ]
}, },

View File

@@ -4,4 +4,4 @@ version = "0.1.0"
description = "Add your description here" description = "Add your description here"
readme = "README.md" readme = "README.md"
requires-python = ">=3.13" requires-python = ">=3.13"
dependencies = ["datasets>=4.4.2", "dspy>=3.0.4", "modaic>=0.8.2", "neo4j~=5.18.0", "python-dotenv~=1.0.1"] dependencies = ["datasets>=4.4.2", "dspy>=3.0.4", "modaic>=0.8.2", "neo4j~=5.18.0", "python-dotenv~=1.0.1", "sacrebleu>=2.5.1"]