Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 64c45ee66c | |||
| b221ae4b42 | |||
| 31e1186573 |
12
README.md
12
README.md
@@ -1,11 +1,9 @@
|
|||||||
# dspy-neo4j-knowledge-graph
|
# text-to-cypher
|
||||||
LLM-driven automated knowledge graph construction from text using DSPy and Neo4j.
|
LLM-driven automated knowledge graph construction from text using DSPy and Neo4j.
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
## Project Structure
|
## Project Structure
|
||||||
```sh
|
```sh
|
||||||
dspy-neo4j-knowledge-graph/
|
text-to-cypher/
|
||||||
├── README.md
|
├── README.md
|
||||||
├── examples/
|
├── examples/
|
||||||
│ └── wikipedia-abstracts-v0_0_1.ndjson
|
│ └── wikipedia-abstracts-v0_0_1.ndjson
|
||||||
@@ -79,7 +77,7 @@ MODAIC_TOKEN=<your-modaic-token>
|
|||||||
Run Neo4j locally using Docker:
|
Run Neo4j locally using Docker:
|
||||||
```sh
|
```sh
|
||||||
docker run \
|
docker run \
|
||||||
--name dspy-kg \
|
--name text-to-cypher \
|
||||||
--publish=7474:7474 \
|
--publish=7474:7474 \
|
||||||
--publish=7687:7687 \
|
--publish=7687:7687 \
|
||||||
--env "NEO4J_AUTH=none" \
|
--env "NEO4J_AUTH=none" \
|
||||||
@@ -153,8 +151,8 @@ neo4j.query(cypher.statement.replace('```', ''))
|
|||||||
|
|
||||||
### Stop Neo4j Docker Container
|
### Stop Neo4j Docker Container
|
||||||
```sh
|
```sh
|
||||||
docker stop dspy-kg
|
docker stop text-to-cypher
|
||||||
docker rm dspy-kg
|
docker rm text-to-cypher
|
||||||
```
|
```
|
||||||
|
|
||||||
### Remove Virtual Environment
|
### Remove Virtual Environment
|
||||||
|
|||||||
38
main.py
38
main.py
@@ -13,13 +13,17 @@ neo4j = Neo4j(
|
|||||||
password=os.getenv("NEO4J_PASSWORD"),
|
password=os.getenv("NEO4J_PASSWORD"),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class CypherFromText(dspy.Signature):
|
class CypherFromText(dspy.Signature):
|
||||||
"""Instructions:
|
"""Task: Generate Cypher statement to query a graph database.
|
||||||
Create a Cypher MERGE statement to model all entities and relationships found in the text following these guidelines:
|
Instructions: Use only the provided relationship types and properties in the schema.
|
||||||
- Refer to the provided schema and use existing or similar nodes, properties or relationships before creating new ones.
|
Do not use any other relationship types or properties that are not provided in the schema.
|
||||||
- Use generic categories for node and relationship labels."""
|
Do not include any explanations or apologies in your responses.
|
||||||
text = dspy.InputField(
|
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
|
||||||
desc="Text to model using nodes, properties and relationships."
|
Do not include any text except the generated Cypher statement.
|
||||||
|
"""
|
||||||
|
question = dspy.InputField(
|
||||||
|
desc="Question to model using a cypher statement."
|
||||||
)
|
)
|
||||||
neo4j_schema = dspy.InputField(
|
neo4j_schema = dspy.InputField(
|
||||||
desc="Current graph schema in Neo4j as a list of NODES and RELATIONSHIPS."
|
desc="Current graph schema in Neo4j as a list of NODES and RELATIONSHIPS."
|
||||||
@@ -28,10 +32,11 @@ class CypherFromText(dspy.Signature):
|
|||||||
desc="Cypher statement to merge nodes and relationships found in the text."
|
desc="Cypher statement to merge nodes and relationships found in the text."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class GenerateCypherConfig(PrecompiledConfig):
|
class GenerateCypherConfig(PrecompiledConfig):
|
||||||
model: str = "openai/gpt-4o"
|
model: str = "openai/gpt-4o"
|
||||||
max_tokens: int = 1024
|
max_tokens: int = 1024
|
||||||
|
|
||||||
|
|
||||||
class GenerateCypher(PrecompiledProgram):
|
class GenerateCypher(PrecompiledProgram):
|
||||||
config: GenerateCypherConfig
|
config: GenerateCypherConfig
|
||||||
@@ -44,16 +49,18 @@ class GenerateCypher(PrecompiledProgram):
|
|||||||
)
|
)
|
||||||
self.generate_cypher = dspy.ChainOfThought(CypherFromText)
|
self.generate_cypher = dspy.ChainOfThought(CypherFromText)
|
||||||
self.generate_cypher.set_lm(self.lm)
|
self.generate_cypher.set_lm(self.lm)
|
||||||
|
|
||||||
def forward(self, text: str, neo4j_schema: list[str]):
|
def forward(self, text: str, neo4j_schema: list[str]):
|
||||||
return self.generate_cypher(text=text, neo4j_schema=neo4j_schema)
|
return self.generate_cypher(text=text, neo4j_schema=neo4j_schema)
|
||||||
|
|
||||||
|
|
||||||
generate_cypher = GenerateCypher(GenerateCypherConfig())
|
generate_cypher = GenerateCypher(GenerateCypherConfig())
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
"""
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import json
|
import json
|
||||||
"""
|
|
||||||
examples_path = Path(__file__).parent / "examples" / "wikipedia-abstracts-v0_0_1.ndjson"
|
examples_path = Path(__file__).parent / "examples" / "wikipedia-abstracts-v0_0_1.ndjson"
|
||||||
with open(examples_path, "r") as f:
|
with open(examples_path, "r") as f:
|
||||||
for line in f:
|
for line in f:
|
||||||
@@ -63,10 +70,13 @@ if __name__ == "__main__":
|
|||||||
cypher = generate_cypher(text=text, neo4j_schema=neo4j.fmt_schema())
|
cypher = generate_cypher(text=text, neo4j_schema=neo4j.fmt_schema())
|
||||||
neo4j.query(cypher.statement.replace('```', ''))
|
neo4j.query(cypher.statement.replace('```', ''))
|
||||||
print("CYPHER STATEMENT:\n", cypher.statement)
|
print("CYPHER STATEMENT:\n", cypher.statement)
|
||||||
"""
|
|
||||||
schema = neo4j.fmt_schema()
|
schema = neo4j.fmt_schema()
|
||||||
print("SCHEMA:\n", schema)
|
print("SCHEMA:\n", schema)
|
||||||
generate_cypher.push_to_hub("farouk1/text-to-cypher", with_code=True, tag="v0.0.5", commit_message="Update README.md")
|
"""
|
||||||
|
generate_cypher.push_to_hub(
|
||||||
|
"farouk1/text-to-cypher",
|
||||||
|
with_code=True,
|
||||||
|
tag="v0.0.8",
|
||||||
|
commit_message="Update README.md",
|
||||||
|
)
|
||||||
|
|||||||
@@ -4,11 +4,11 @@
|
|||||||
"train": [],
|
"train": [],
|
||||||
"demos": [],
|
"demos": [],
|
||||||
"signature": {
|
"signature": {
|
||||||
"instructions": "Instructions:\nCreate a Cypher MERGE statement to model all entities and relationships found in the text following these guidelines:\n- Refer to the provided schema and use existing or similar nodes, properties or relationships before creating new ones.\n- Use generic categories for node and relationship labels.",
|
"instructions": "Task: Generate Cypher statement to query a graph database. \nInstructions: Use only the provided relationship types and properties in the schema. \nDo not use any other relationship types or properties that are not provided in the schema. \nDo not include any explanations or apologies in your responses. \nDo not respond to any questions that might ask anything else than for you to construct a Cypher statement. \nDo not include any text except the generated Cypher statement.",
|
||||||
"fields": [
|
"fields": [
|
||||||
{
|
{
|
||||||
"prefix": "Text:",
|
"prefix": "Question:",
|
||||||
"description": "Text to model using nodes, properties and relationships."
|
"description": "Question to model using a cypher statement."
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"prefix": "Neo 4 J Schema:",
|
"prefix": "Neo 4 J Schema:",
|
||||||
|
|||||||
@@ -4,4 +4,4 @@ version = "0.1.0"
|
|||||||
description = "Add your description here"
|
description = "Add your description here"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.13"
|
requires-python = ">=3.13"
|
||||||
dependencies = ["dspy>=3.0.4", "modaic>=0.8.2", "neo4j~=5.18.0", "python-dotenv~=1.0.1"]
|
dependencies = ["datasets>=4.4.2", "dspy>=3.0.4", "modaic>=0.8.2", "neo4j~=5.18.0", "python-dotenv~=1.0.1"]
|
||||||
|
|||||||
Reference in New Issue
Block a user