Optimized program with code

2025-12-27 19:59:14 -08:00
4 changed files with 6 additions and 6 deletions
--- a/config.json
+++ b/config.json
@@ -1,5 +1,5 @@
 {
  "model": "openrouter/openai/gpt-4o",
  "max_tokens": 1024,
-  "cache": false
+  "cache": true
 }
--- a/modules.py
+++ b/modules.py
@@ -27,7 +27,7 @@ class CypherFromQuestion(dspy.Signature):
 class GenerateCypherConfig(PrecompiledConfig):
    model: str = "openrouter/openai/gpt-4o"  # OPENROUTER ONLY
    max_tokens: int = 1024
-    cache: bool = False
+    cache: bool = True
 class GenerateCypher(PrecompiledProgram):
--- a/optimize.py
+++ b/optimize.py
@@ -61,7 +61,7 @@ train_set, val_set, test_set = process_dataset()
 optimizer = GEPA(
    metric=metric,
-    auto="light",
+    auto="medium",
    num_threads=32,
    track_stats=True,
    reflection_minibatch_size=3,
@@ -75,4 +75,4 @@ if __name__ == "__main__":
        trainset=train_set,
        valset=val_set,
    )
-    optimized_program.push_to_hub("farouk1/text-to-cypher-gepa", tag="v1.0.2", commit_message="Optimized program with code")
+    optimized_program.push_to_hub("farouk1/text-to-cypher-gepa", tag="v1.0.4", commit_message="Optimized program with code")
--- a/program.json
+++ b/program.json
@@ -4,7 +4,7 @@
    "train": [],
    "demos": [],
    "signature": {
-      "instructions": "Task: Generate Cypher statement to query a graph database.\nInstructions: Use only the provided relationship types and properties in the schema.\nDo not use any other relationship types or properties that are not provided in the schema.\nDo not include any explanations or apologies in your responses.\nDo not respond to any questions that might ask anything else than for you to construct a Cypher statement.\nDo not include any text except the generated Cypher statement.",
+      "instructions": "text\nTask: Given (1) a natural-language question and (2) a Neo4j schema description, output exactly ONE Cypher query that answers the question.\n\nINPUTS\n- question: the user request in natural language.\n- neo4j_schema: schema info given either as:\n  (a) JSON-like dict describing node labels, relationship types, directions, and properties, OR\n  (b) a textual summary listing node labels with properties and a list of allowed relationships as {start, type, end}, plus any relationship properties.\n\nABSOLUTE REQUIREMENTS (must follow)\n1) Output ONLY the Cypher query text.\n   - No reasoning, no explanations, no markdown/code fences, no headings, no extra characters.\n2) Use ONLY labels, relationship types, directions, and properties that appear in neo4j_schema.\n   - Do NOT invent labels/properties/relationships.\n   - If the question asks for something not representable, produce the closest possible query using only the schema.\n3) Respect relationship direction exactly as specified.\n   - If schema says Article -[:PUBLISHED_IN]-> Journal, do not reverse it.\n   - In JSON-like schemas, relationship direction may be expressed as \"in\" or \"out\" under a node\u2019s \"relationships\"; interpret it relative to that node.\n4) Return ONLY what the question asks for.\n   - If it asks for \u201ctitle values\u201d, return a.title (not whole nodes).\n   - If it asks for counts, return counts with clear aliases.\n   - Use DISTINCT when the question implies uniqueness.\n5) Produce exactly one valid Cypher statement.\n\nQUERY CONSTRUCTION RULES / COMMON PITFALLS TO AVOID\nA) Filtering on relationship properties:\n   - Put relationship property predicates on the relationship pattern or in WHERE, using correct Cypher syntax.\n   - Example: MATCH (a)-[r:PUBLISHED_IN]->(j) WHERE r.meta = '220'\n   - IMPORTANT: use Cypher string literals with single quotes (e.g., '220'), not JSON-style quotes.\nB) \u201cFirst N\u201d / \u201cN items\u201d semantics:\n   - If the question requests \u201cfirst 3\u201d or \u201c20 Article\u201d, include LIMIT N.\n   - If \u201cfirst\u201d implies ordering but no explicit sort key is given in schema/question, you may use LIMIT without ORDER BY.\n   - Do NOT return more columns than asked just to justify \u201cfirst\u201d.\nC) Aggregations and grouping:\n   - When returning both a field and a count, group by the non-aggregated field via WITH/RETURN.\n   - Apply HAVING-like filters using WITH ... WHERE (e.g., cities with >1 student).\n   - Example pattern:\n     MATCH (s:Student)\n     WITH s.city_code AS city, count(*) AS student_count\n     WHERE student_count > 1\n     RETURN city, student_count\nD) Date/time duration questions:\n   - Use only functions that work with the property datatypes shown.\n   - If begin/end are DATE_TIME, you may use duration/between logic; prefer robust checks:\n     - If asked \u201cexactly one month\u201d, check the full duration equals duration({months:1}) when possible, or use duration.between(f.begin, f.end) and compare appropriately.\n   - Do not introduce alternative date properties that aren\u2019t requested unless necessary and present in schema.\nE) String matching:\n   - For prefix constraints, use STARTS WITH.\n   - For exact text match, use equality.\nF) Combining strings/properties:\n   - Use `+` for concatenation and alias with AS as requested.\n\nOUTPUT\n- Exactly one Cypher query, and nothing else.",
      "fields": [
        {
          "prefix": "Question:",
@@ -27,7 +27,7 @@
    "lm": {
      "model": "openrouter/openai/gpt-4o",
      "model_type": "chat",
-      "cache": false,
+      "cache": true,
      "num_retries": 3,
      "finetuning_model": null,
      "launch_kwargs": {},