diff --git a/auto_classes.json b/auto_classes.json
new file mode 100644
index 0000000..52efeb3
--- /dev/null
+++ b/auto_classes.json
@@ -0,0 +1,4 @@
+{
+  "AutoConfig": "modules.GenerateCypherConfig",
+  "AutoProgram": "modules.GenerateCypher"
+}
\ No newline at end of file
diff --git a/modules.py b/modules.py
new file mode 100644
index 0000000..c1f6285
--- /dev/null
+++ b/modules.py
@@ -0,0 +1,74 @@
+import os
+import dspy
+from dotenv import load_dotenv
+from modaic import PrecompiledProgram, PrecompiledConfig
+
+load_dotenv()
+
+
+class CypherFromQuestion(dspy.Signature):
+    """Task: Generate Cypher statement to query a graph database.
+    Instructions: Use only the provided relationship types and properties in the schema.
+    Do not use any other relationship types or properties that are not provided in the schema.
+    Do not include any explanations or apologies in your responses.
+    Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
+    Do not include any text except the generated Cypher statement.
+    """
+
+    question = dspy.InputField(
+        desc="Question to model using a cypher statement. Use only the provided relationship types and properties in the schema."
+    )
+    neo4j_schema = dspy.InputField(
+        desc="Current graph schema in Neo4j as a list of NODES and RELATIONSHIPS."
+    )
+    statement = dspy.OutputField(desc="Cypher statement to query the graph database.")
+
+
+class GenerateCypherConfig(PrecompiledConfig):
+    model: str = "openrouter/openai/gpt-4o"  # OPENROUTER ONLY
+    max_tokens: int = 1024
+
+
+class GenerateCypher(PrecompiledProgram):
+    config: GenerateCypherConfig
+
+    def __init__(self, config: GenerateCypherConfig, **kwargs):
+        super().__init__(config=config, **kwargs)
+        self.lm = dspy.LM(
+            model=config.model,
+            max_tokens=config.max_tokens,
+            api_base="https://openrouter.ai/api/v1",
+        )
+        self.generate_cypher = dspy.ChainOfThought(CypherFromQuestion)
+        self.generate_cypher.set_lm(self.lm)
+
+    def forward(self, question: str, neo4j_schema: list[str]):
+        return self.generate_cypher(question=question, neo4j_schema=neo4j_schema)
+
+
+generate_cypher = GenerateCypher(GenerateCypherConfig())
+
+if __name__ == "__main__":
+    """
+    from pathlib import Path
+    import json
+
+    examples_path = Path(__file__).parent / "examples" / "wikipedia-abstracts-v0_0_1.ndjson"
+    with open(examples_path, "r") as f:
+         for line in f:
+             data = json.loads(line)
+             text = data["text"]
+             print("TEXT TO PROCESS:\n", text[:50])
+             cypher = generate_cypher(text=text, neo4j_schema=neo4j.fmt_schema())
+             neo4j.query(cypher.statement.replace('```', ''))
+             print("CYPHER STATEMENT:\n", cypher.statement)
+    
+    schema = neo4j.fmt_schema()   
+    print("SCHEMA:\n", schema)
+    """
+    generate_cypher.push_to_hub(
+        "farouk1/text-to-cypher",
+        with_code=True,
+        tag="v0.0.9",
+        commit_message="Update README.md",
+    )
diff --git a/optimize.py b/optimize.py
new file mode 100644
index 0000000..a37f1f2
--- /dev/null
+++ b/optimize.py
@@ -0,0 +1,78 @@
+import dspy
+from dspy import GEPA
+from modules import generate_cypher
+from datasets import load_dataset
+
+
+def process_dataset():
+    train_split = load_dataset("neo4j/text2cypher-2025v1")["train"]
+    train_split = [
+        dspy.Example(
+            {
+                "question": x["question"],
+                "neo4j_schema": x["schema"],
+                "expected_cypher": x["cypher"],
+            }
+        ).with_inputs("question", "neo4j_schema")
+        for x in train_split
+    ]
+    import random
+
+    random.Random(0).shuffle(train_split)
+    train_split = train_split[:200]
+    tot_num = len(train_split)
+
+    test_split = load_dataset("neo4j/text2cypher-2025v1")["test"]
+    test_split = [
+        dspy.Example(
+            {
+                "question": x["question"],
+                "neo4j_schema": x["schema"],
+                "expected_cypher": x["cypher"],
+            }
+        ).with_inputs("question", "neo4j_schema")
+        for x in test_split
+    ]
+
+    train_set = train_split[: int(0.5 * tot_num)]
+    val_set = train_split[int(0.5 * tot_num) :]
+    test_set = test_split[:200]
+
+    return train_set, val_set, test_set
+
+
+def metric(example, prediction, trace=None, pred_name=None, pred_trace=None):
+    from sacrebleu import sentence_bleu
+
+    expected_cypher = example["expected_cypher"]
+    generated_cypher = prediction.statement
+
+    # Calculate sentence-level BLEU (Google BLEU)
+    bleu_score = sentence_bleu(
+        generated_cypher,
+        [expected_cypher],  # Reference as a list
+    ).score / 100.0  # Normalize to 0-1
+
+    feedback = f"BLEU score: {bleu_score:.3f}"
+    return dspy.Prediction(score=bleu_score, feedback=feedback)
+
+
+train_set, val_set, test_set = process_dataset()
+
+optimizer = GEPA(
+    metric=metric,
+    auto="light",
+    num_threads=32,
+    track_stats=True,
+    reflection_minibatch_size=3,
+    reflection_lm=dspy.LM(model="gpt-5.2", temperature=1.0, max_tokens=32000),
+)
+
+
+if __name__ == "__main__":
+    optimized_program = optimizer.compile(
+        generate_cypher,
+        trainset=train_set,
+        valset=val_set,
+    )
+    optimized_program.push_to_hub("farouk1/text-to-cypher-gepa", tag="v1.0.1", commit_message="Optimized program with code")
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..84a5c0d
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,7 @@
+[project]
+name = "text-to-cypher-gepa"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.13"
+dependencies = ["datasets>=4.4.2", "dspy>=3.0.4", "modaic>=0.8.3", "neo4j~=5.18.0", "python-dotenv~=1.0.1", "sacrebleu>=2.5.1"]