From eeb1ccab64ffb979ee9801d786578de83a81123f Mon Sep 17 00:00:00 2001 From: Farouk Adeleke Date: Sat, 27 Dec 2025 02:17:22 -0800 Subject: [PATCH] init --- README.md | 85 ++++++++++++++++++++++++++++++++++++++++++++++- auto_classes.json | 4 +++ config.json | 5 +++ main.py | 47 ++++++++++++++++++++++++++ program.json | 9 +++++ pyproject.toml | 7 ++++ 6 files changed, 156 insertions(+), 1 deletion(-) create mode 100644 auto_classes.json create mode 100644 config.json create mode 100644 main.py create mode 100644 program.json create mode 100644 pyproject.toml diff --git a/README.md b/README.md index 462da40..f6fcf51 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,85 @@ -# text-to-cypher +# dspy-neo4j-knowledge-graph +LLM-driven automated knowledge graph construction from text using DSPy and Neo4j. +![Knowledge Graph](img/kg.png) + +## Project Structure +```sh +dspy-neo4j-knowledge-graph/ +├── README.md +├── examples +├── requirements.txt +├── run.py +└── src +``` + +## Description +Model entities and relationships and build a Knowledge Graph using DSPy, Neo4j, and OpenAI's GPT-4. When given a paragraph or block of text, the app uses the DSPy library and OpenAI's GPT-4 to extract entities and relationships and generate a Cypher statement which is run in Neo4j to create the Knowledge Graph. + +### Optimized Schema Context +The current graph schema is passed to the model as a list of nodes, relationships and properties in the context of the prompt. This allows the model to use elements from the existing schema and make connections between existing entities and relationships. + +## Quick Start +1. Clone the repository. +2. Create a [Python virtual environment](#python-virtual-environment) and install the required packages. +3. Create a `.env` file and add the required [environment variables](#environment-variables). +4. [Run Neo4j using Docker](#usage). +5. Run `python3 run.py` and paste your text in the prompt. +6. Navigate to `http://localhost:7474/browser/` to view the Knowledge Graph in Neo4j Browser. + +## Installation + +### Prerequisites +* Python 3.12 +* OpenAI API Key +* Docker + +### Environment Variables +Before you begin, make sure to create a `.env` file and add your OpenAI API key. +```sh +NEO4J_URI=bolt://localhost:7687 +OPENAI_API_KEY= +``` + +### Python Virtual Environment +Create a Python virtual environment and install the required packages. +```sh +python3 -m venv .venv +source .venv/bin/activate +pip install --upgrade pip +pip install -r requirements.txt +``` + +## Usage +Run Neo4j using Docker. +```sh +docker run \ + --name dspy-kg \ + --publish=7474:7474 \ + --publish=7687:7687 \ + --env "NEO4J_AUTH=none" \ + neo4j:5.15 +``` + +## Clean Up +Stop and remove the Neo4j container. +```sh +docker stop dspy-kg +docker rm dspy-kg +``` + +Deactivate the Python virtual environment. +```sh +deactivate +rm -rf .venv +``` + +## License +This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. + +## References +- [DSPy docs](https://dspy-docs.vercel.app/docs/intro) +- [Neo4j docs](https://neo4j.com/docs/) + +## Contact +**Primary Contact:** [@chrisammon3000](https://github.com/chrisammon3000) diff --git a/auto_classes.json b/auto_classes.json new file mode 100644 index 0000000..5d1a5e1 --- /dev/null +++ b/auto_classes.json @@ -0,0 +1,4 @@ +{ + "AutoConfig": "main.GenerateCypherConfig", + "AutoProgram": "main.GenerateCypher" +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..5cec6cd --- /dev/null +++ b/config.json @@ -0,0 +1,5 @@ +{ + "model": "gpt-4", + "neo4j_schema": [], + "max_tokens": 1024 +} \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..4c7a59b --- /dev/null +++ b/main.py @@ -0,0 +1,47 @@ +from dotenv import load_dotenv +import dspy +from modaic import PrecompiledProgram, PrecompiledConfig + +load_dotenv() + +class CypherFromText(dspy.Signature): + """Instructions: + Create a Cypher MERGE statement to model all entities and relationships found in the text following these guidelines: + - Refer to the provided schema and use existing or similar nodes, properties or relationships before creating new ones. + - Use generic categories for node and relationship labels.""" + text = dspy.InputField( + desc="Text to model using nodes, properties and relationships." + ) + neo4j_schema = dspy.InputField( + desc="Current graph schema in Neo4j as a list of NODES and RELATIONSHIPS." + ) + statement = dspy.OutputField( + desc="Cypher statement to merge nodes and relationships found in the text." + ) + +class GenerateCypherConfig(PrecompiledConfig): + neo4j_schema: list[str] = [] + model: str = "gpt-4" + max_tokens: int = 1024 + + +class GenerateCypher(PrecompiledProgram): + config: GenerateCypherConfig + + def _init_(self, config: GenerateCypherConfig, **kwargs): + super()._init_(**kwargs) + self.lm = dspy.LM( + model=config.model, + max_tokens=config.max_tokens, + ) + self.generate_cypher = dspy.ChainOfThought(CypherFromText) + + def forward(self, text: str, neo4j_schema: list[str]): + return self.generate_cypher(text=text, neo4j_schema=neo4j_schema) + + +if __name__ == "__main__": + generate_cypher = GenerateCypher(GenerateCypherConfig()) + generate_cypher.push_to_hub("farouk1/text-to-cypher", with_code=True, tag="v0.0.1", commit_message="init") + + diff --git a/program.json b/program.json new file mode 100644 index 0000000..ac56cba --- /dev/null +++ b/program.json @@ -0,0 +1,9 @@ +{ + "metadata": { + "dependency_versions": { + "python": "3.13", + "dspy": "3.0.4", + "cloudpickle": "3.1" + } + } +} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..131df11 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,7 @@ +[project] +name = "text-to-cypher" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.13" +dependencies = ["dspy>=3.0.4", "modaic>=0.8.2", "neo4j~=5.18.0", "python-dotenv~=1.0.1"]