From ec07718eb30680e6249fef64227374f3d8f8f588 Mon Sep 17 00:00:00 2001
From: Farouk Adeleke <fadeleke57@gmail.com>
Date: Tue, 4 Nov 2025 22:51:27 -0500
Subject: [PATCH] (no commit message)

---
 agent.json     |   2 +-
 agent/index.py |   2 +-
 config.json    |   2 +-
 main.py        | 108 +++++++++++++++++++++----------------------------
 4 files changed, 50 insertions(+), 64 deletions(-)

diff --git a/agent.json b/agent.json
index 8464c61..99bb0b0 100644
--- a/agent.json
+++ b/agent.json
@@ -25,7 +25,7 @@
       ]
     },
     "lm": {
-      "model": "openai/gpt-4o",
+      "model": "openrouter/anthropic/claude-haiku-4.5",
       "model_type": "chat",
       "cache": true,
       "num_retries": 3,
diff --git a/agent/index.py b/agent/index.py
index fc2b1a0..fdeb921 100644
--- a/agent/index.py
+++ b/agent/index.py
@@ -5,7 +5,7 @@ from modaic import PrecompiledAgent, PrecompiledConfig
 
 class PromptToSignatureConfig(PrecompiledConfig):
     lm: str = "openrouter/anthropic/claude-haiku-4.5"
-    refine_lm: str = "openai/gpt-4o"
+    refine_lm: str = "openrouter/anthropic/claude-haiku-4.5"
     max_tokens: int = 16000
     temperature: float = 1.0
     max_attempts_to_refine: int = 5
diff --git a/config.json b/config.json
index ca5585d..f494070 100644
--- a/config.json
+++ b/config.json
@@ -1,6 +1,6 @@
 {
   "lm": "openrouter/anthropic/claude-haiku-4.5",
-  "refine_lm": "openai/gpt-4o",
+  "refine_lm": "openrouter/anthropic/claude-haiku-4.5",
   "max_tokens": 16000,
   "temperature": 1.0,
   "max_attempts_to_refine": 5
diff --git a/main.py b/main.py
index f1ab713..b697804 100644
--- a/main.py
+++ b/main.py
@@ -1,87 +1,73 @@
 from agent import PromptToSignatureAgent, PromptToSignatureConfig
 
 agent = PromptToSignatureAgent(PromptToSignatureConfig())
-PROMPT = """**INPUT FORMAT**: Raw clinical notes in free-text format, typically 200-2000 words, containing unstructured medical documentation from patient encounters including history, examination findings, diagnoses, treatment plans, and follow-up instructions.
+PROMPT = """**INPUT FORMAT**: Customer product reviews in natural language text, ranging from 50-1000 words. May include informal language, slang, misspellings, emojis, and mixed sentiments. Reviews may be from various e-commerce platforms (Amazon, eBay, specialized retail sites).
 
-**TASK DESCRIPTION**: You are a medical information extraction system. Analyze the provided clinical notes and perform comprehensive structured data extraction along with risk stratification. Your task involves multiple sub-tasks executed simultaneously.
+**TASK DESCRIPTION**: You are an advanced sentiment analysis and feature extraction system for e-commerce platforms. Your goal is to parse customer reviews and extract granular insights about both overall sentiment and feature-specific opinions to help businesses understand customer satisfaction at a detailed level.
 
 **EXTRACTION REQUIREMENTS**:
-- **Patient Demographics**: Extract full legal name, age (in years), biological sex/gender, date of birth (format: YYYY-MM-DD), medical record number (MRN) if present
-- **Primary Diagnosis**: Identify the main diagnosis with corresponding ICD-10 code, include diagnostic certainty level (confirmed/suspected/rule-out)
-- **Secondary Diagnoses**: List all comorbidities and additional conditions mentioned, each with ICD-10 codes where applicable
-- **Medications**: Extract complete medication list including generic and brand names, dosages with units (mg, mcg, mL), frequency (BID, TID, QID, PRN), route of administration (PO, IV, IM, topical), and duration if specified
-- **Allergies**: Document all allergies with allergen name, reaction type (rash, anaphylaxis, nausea, etc.), and severity classification (mild/moderate/severe/life-threatening)
-- **Vital Signs**: Extract most recent measurements - blood pressure (systolic/diastolic in mmHg), heart rate (bpm), temperature (°F or °C with unit), respiratory rate (breaths/min), oxygen saturation (%), and pain score (0-10 scale)
-- **Laboratory Results**: Identify all lab values mentioned with test name, numerical result, unit of measurement, reference range, and flag if abnormal (high/low/critical)
-- **Appointments**: Extract scheduled follow-up dates, appointment types (follow-up, specialist referral, procedure), and provider names
+- **Overall Sentiment**: Determine the general sentiment of the entire review using a 4-category classification (positive/negative/mixed/neutral). Mixed indicates both positive and negative sentiments present; neutral indicates factual statements without emotional valence.
+- **Product Features Mentioned**: Identify which of these standard e-commerce features are discussed: product quality, value for money, delivery/shipping experience, customer service interactions, packaging quality, product description accuracy, ease of use, durability, aesthetics/appearance, size/fit accuracy
+- **Feature-Specific Sentiment**: For each feature mentioned, assign a sentiment score (-2 very negative, -1 negative, 0 neutral, +1 positive, +2 very positive)
+- **Star Rating Prediction**: Based on the text sentiment and feature analysis, predict what star rating (1-5 stars) the customer likely gave, with confidence score (0-100%)
+- **Purchase Verification**: Detect phrases indicating verified purchase ("verified purchase", "bought this", "received this product", "purchased from") vs. uncertain authenticity
+- **Competitor Comparison**: Identify if the review compares the product to competitors, extract competitor names/products mentioned, and determine if comparison is favorable or unfavorable
+- **Specific Issues**: Extract concrete complaints (e.g., "broke after 2 weeks", "arrived damaged", "wrong color sent", "missing parts")
+- **Specific Praise**: Extract concrete positive mentions (e.g., "excellent battery life", "fast shipping", "exceeded expectations")
+- **Review Helpfulness Indicators**: Classify review as "likely helpful" or "not helpful" based on linguistic markers (specificity, length, balanced perspective, constructive criticism vs. pure emotion)
 
 **CLASSIFICATION REQUIREMENTS**:
-- **Urgency Level**: Classify the case into one of four categories:
-  - ROUTINE: Stable patient, chronic condition management, no acute concerns
-  - URGENT: Requires attention within 24-48 hours, acute but not life-threatening condition
-  - EMERGENCY: Immediate intervention required, potentially life-threatening presentation
-  - CRITICAL: Life-threatening emergency requiring immediate intervention (ICU-level care)
+- **Review Quality Score**: Rate the review quality from 1-5 where:
+  - 1 = Unhelpful (pure emotion, no details, spam-like)
+  - 2 = Minimal info (very brief, vague)
+  - 3 = Moderate (some useful details)
+  - 4 = Good (specific, balanced, informative)
+  - 5 = Excellent (detailed, comprehensive, fair assessment)
 
 **OUTPUT FORMAT**: Return the following schema:
 {
-  "patient_demographics": {
-    "name": "string",
-    "age": "integer",
-    "gender": "string",
-    "dob": "YYYY-MM-DD",
-    "mrn": "string or null"
+  "overall_sentiment": {
+    "classification": "positive|negative|mixed|neutral",
+    "confidence_score": "float (0-1)"
   },
-  "primary_diagnosis": {
-    "condition": "string",
-    "icd10_code": "string",
-    "certainty": "confirmed|suspected|rule-out"
+  "predicted_star_rating": {
+    "stars": "integer (1-5)",
+    "confidence": "integer (0-100)"
   },
-  "secondary_diagnoses": [
-    {"condition": "string", "icd10_code": "string"}
-  ],
-  "medications": [
+  "features_analyzed": [
     {
-      "name": "string",
-      "dosage": "string",
-      "frequency": "string",
-      "route": "string",
-      "duration": "string or null"
+      "feature_name": "string",
+      "mentioned": "boolean",
+      "sentiment_score": "integer (-2 to +2)",
+      "supporting_quote": "string (relevant excerpt from review)"
     }
   ],
-  "allergies": [
-    {
-      "allergen": "string",
-      "reaction": "string",
-      "severity": "mild|moderate|severe|life-threatening"
-    }
-  ],
-  "vital_signs": {
-    "blood_pressure": "string (systolic/diastolic)",
-    "heart_rate": "integer",
-    "temperature": "float with unit",
-    "respiratory_rate": "integer",
-    "oxygen_saturation": "integer",
-    "pain_score": "integer (0-10)"
+  "purchase_verification": {
+    "appears_verified": "boolean",
+    "indicators": ["array of strings showing evidence"]
   },
-  "lab_results": [
+  "competitor_comparison": {
+    "present": "boolean",
+    "competitors_mentioned": ["array of strings"],
+    "comparison_favorability": "favorable|unfavorable|neutral"
+  },
+  "specific_issues": [
     {
-      "test_name": "string",
-      "value": "float",
-      "unit": "string",
-      "reference_range": "string",
-      "flag": "normal|high|low|critical"
+      "issue": "string",
+      "severity": "minor|moderate|major",
+      "quote": "string"
     }
   ],
-  "appointments": [
+  "specific_praise": [
     {
-      "date": "YYYY-MM-DD",
-      "type": "string",
-      "provider": "string"
+      "praise_point": "string",
+      "quote": "string"
     }
   ],
-  "urgency_classification": {
-    "level": "ROUTINE|URGENT|EMERGENCY|CRITICAL",
-    "reasoning": "string (brief explanation for classification)"
+  "review_helpfulness": {
+    "classification": "likely_helpful|not_helpful",
+    "quality_score": "integer (1-5)",
+    "reasoning": "string"
   }
 }
 """