Use structured outputs with Pydantic models instead of text parsing

2025-12-08 12:21:51 -05:00
parent 6a6d2caa4a
commit 4df7bb42b4
3 changed files with 287 additions and 51 deletions
--- a/claude_dspy/agent.py
+++ b/claude_dspy/agent.py
@@ -290,7 +290,11 @@ class ClaudeCode(PrecompiledProgram):
        return ClaudeSDKClient(options=options)

    def _build_prompt(self, input_value: str) -> str:
-        """Build prompt from signature docstring, field descriptions, and input value."""
+        """Build prompt from signature docstring, field descriptions, and input value.
+
+        Note: When using structured outputs, the SDK handles JSON formatting automatically
+        via the output_format parameter, so we don't add JSON instructions to the prompt.
+        """
        prompt_parts = []

        # add signature docstring if present
@@ -325,20 +329,18 @@ class ClaudeCode(PrecompiledProgram):
        if output_desc:
            prompt_parts.append(f"\nPlease produce the following output: {output_desc}")

-        # for Pydantic outputs, add explicit JSON instructions
-        if self.output_format:
-            schema = self.output_format["schema"]
-            prompt_parts.append(
-                f"\nYou MUST respond with ONLY valid JSON matching this schema:\n"
-                f"{json.dumps(schema, indent=2)}\n\n"
-                f"Do not include any explanatory text, markdown formatting, or code blocks. "
-                f"Return ONLY the raw JSON object."
-            )
+        # Don't add JSON instructions - the SDK handles structured outputs via output_format
+        # The schema is passed through ClaudeAgentOptions and enforced by the SDK

        return "\n\n".join(prompt_parts)

    def _get_output_format(self) -> Optional[dict[str, Any]]:
-        """Get output format configuration for structured outputs."""
+        """Get output format configuration for structured outputs.
+
+        Supports:
+        - Direct Pydantic models: MyModel
+        - Generic types: list[MyModel], dict[str, MyModel]
+        """
        output_type = self.output_field.annotation

        if is_pydantic_model(output_type):
@@ -350,43 +352,68 @@ class ClaudeCode(PrecompiledProgram):

        return None

-    async def _run_async(self, prompt: str) -> tuple[str, list[TraceItem], Usage]:
-        """Run the agent asynchronously and collect results."""
+    async def _run_async(
+        self, prompt: str
+    ) -> tuple[str | dict | list | None, list[TraceItem], Usage]:
+        """Run the agent asynchronously and collect results.
+
+        Returns:
+            - response: For structured outputs, returns dict/list from structured_output.
+                       For text outputs, returns string from result or text blocks.
+            - trace: Execution trace items
+            - usage: Token usage statistics
+        """
+        print(f"[ClaudeCode._run_async] Initializing client (connected={self._is_connected})")
+
        # create client if needed
        if self._client is None:
+            print(f"[ClaudeCode._run_async] Creating new ClaudeSDKClient")
            self._client = self._create_client()

        # connect if not already connected
        if not self._is_connected:
+            print(f"[ClaudeCode._run_async] Connecting to Claude SDK...")
            await self._client.connect()
            self._is_connected = True
+            print(f"[ClaudeCode._run_async] Connected successfully")

        # send query (output_format already configured in options)
+        print(f"[ClaudeCode._run_async] Sending query to agent...")
        await self._client.query(prompt)
+        print(f"[ClaudeCode._run_async] Query sent, waiting for response...")

        # collect messages and build trace
        trace: list[TraceItem] = []
        usage = Usage()
        response_text = ""
+        structured_output = None
+        message_count = 0

        async for message in self._client.receive_response():
+            message_count += 1
+
            # handle assistant messages
            if isinstance(message, AssistantMessage):
+                print(f"[ClaudeCode._run_async] Received AssistantMessage #{message_count} with {len(message.content)} blocks")
                for block in message.content:
                    if isinstance(block, TextBlock):
+                        print(f"[ClaudeCode._run_async]   - TextBlock: {len(block.text)} chars")
                        response_text += block.text
                        trace.append(
                            AgentMessageItem(text=block.text, model=message.model)
                        )
                    elif isinstance(block, ThinkingBlock):
+                        print(f"[ClaudeCode._run_async]   - ThinkingBlock: {len(block.thinking)} chars")
                        trace.append(
                            ThinkingItem(text=block.thinking, model=message.model)
                        )
                    elif isinstance(block, ToolUseBlock):
-                        # Handle StructuredOutput tool (contains JSON response)
+                        print(f"[ClaudeCode._run_async]   - ToolUseBlock: {block.name} (id={block.id})")
+                        # handle StructuredOutput tool (contains JSON response)
                        if block.name == "StructuredOutput":
-                            # The JSON is directly in the tool input (already a dict)
+                            # the JSON is directly in the tool input (already a dict)
                            response_text = json.dumps(block.input)
+                            print(f"[ClaudeCode._run_async]     StructuredOutput captured ({len(response_text)} chars)")

                        trace.append(
                            ToolUseItem(
@@ -396,11 +423,12 @@ class ClaudeCode(PrecompiledProgram):
                            )
                        )
                    elif isinstance(block, ToolResultBlock):
+                        print(f"[ClaudeCode._run_async]   - ToolResultBlock: tool_use_id={block.tool_use_id}, is_error={block.is_error}")
                        content_str = ""
                        if isinstance(block.content, str):
                            content_str = block.content
                        elif isinstance(block.content, list):
-                            # Extract text from content blocks
+                            # extract text from content blocks
                            for item in block.content:
                                if (
                                    isinstance(item, dict)
@@ -410,7 +438,7 @@ class ClaudeCode(PrecompiledProgram):

                        trace.append(
                            ToolResultItem(
-                                tool_name="",  # Tool name not in ToolResultBlock
+                                tool_name="",  # tool name not in ToolResultBlock
                                tool_use_id=block.tool_use_id,
                                content=content_str,
                                is_error=block.is_error or False,
@@ -419,9 +447,11 @@ class ClaudeCode(PrecompiledProgram):

            # handle result messages (final message with usage info)
            elif isinstance(message, ResultMessage):
+                print(f"[ClaudeCode._run_async] Received ResultMessage (is_error={getattr(message, 'is_error', False)})")
                # store session ID
                if hasattr(message, "session_id"):
                    self._session_id = message.session_id
+                    print(f"[ClaudeCode._run_async]   - Session ID: {self._session_id}")

                # extract usage
                if hasattr(message, "usage") and message.usage:
@@ -433,6 +463,7 @@ class ClaudeCode(PrecompiledProgram):
                        ),
                        output_tokens=usage_data.get("output_tokens", 0),
                    )
+                    print(f"[ClaudeCode._run_async]   - Usage: {usage.input_tokens} in, {usage.output_tokens} out, {usage.cached_input_tokens} cached")

                # check for errors
                if hasattr(message, "is_error") and message.is_error:
@@ -441,26 +472,41 @@ class ClaudeCode(PrecompiledProgram):
                        if hasattr(message, "result")
                        else "Unknown error"
                    )
+                    print(f"[ClaudeCode._run_async]   - ERROR: {error_msg}")
                    trace.append(
                        ErrorItem(message=error_msg, error_type="execution_error")
                    )
                    raise RuntimeError(f"Agent execution failed: {error_msg}")

-                # extract result if present (for structured outputs from result field)
-                # Note: structured outputs may come from StructuredOutput tool instead
-                if hasattr(message, "result") and message.result:
+                # Prefer structured_output over result (when using output_format)
+                if hasattr(message, "structured_output") and message.structured_output is not None:
+                    structured_output = message.structured_output
+                    print(f"[ClaudeCode._run_async]   - Structured output captured: {type(structured_output).__name__} ({len(str(structured_output))} chars)")
+                # Fallback to result field for text outputs
+                elif hasattr(message, "result") and message.result:
                    response_text = message.result
+                    print(f"[ClaudeCode._run_async]   - Result extracted from message ({len(response_text)} chars)")

            # handle system messages
            elif isinstance(message, SystemMessage):
+                print(f"[ClaudeCode._run_async] Received SystemMessage")
                # log system messages to trace but don't error
                if hasattr(message, "data") and message.data:
                    data_str = str(message.data)
+                    print(f"[ClaudeCode._run_async]   - Data: {data_str[:100]}..." if len(data_str) > 100 else f"[ClaudeCode._run_async]   - Data: {data_str}")
                    trace.append(
                        AgentMessageItem(text=f"[System: {data_str}]", model="system")
                    )

-        return response_text, trace, usage
+        print(f"[ClaudeCode._run_async] Completed: {message_count} messages processed, {len(trace)} trace items")
+
+        # Return structured_output if available (for Pydantic outputs), otherwise text
+        if structured_output is not None:
+            print(f"[ClaudeCode._run_async] Returning structured output")
+            return structured_output, trace, usage
+        else:
+            print(f"[ClaudeCode._run_async] Returning text response")
+            return response_text, trace, usage

    def forward(self, **kwargs: Any) -> Prediction:
        """Execute the agent with an input message.
@@ -480,6 +526,8 @@ class ClaudeCode(PrecompiledProgram):
            >>> print(result.trace)      # List of execution items
            >>> print(result.usage)      # Token usage stats
        """
+        print(f"\n[ClaudeCode.forward] Called with fields: {list(kwargs.keys())}")
+
        # extract input value
        if self.input_field_name not in kwargs:
            raise ValueError(
@@ -488,11 +536,14 @@ class ClaudeCode(PrecompiledProgram):
            )

        input_value = kwargs[self.input_field_name]
+        print(f"[ClaudeCode.forward] Input field '{self.input_field_name}': {input_value[:100]}..." if len(str(input_value)) > 100 else f"[ClaudeCode.forward] Input field '{self.input_field_name}': {input_value}")

        # build prompt
        prompt = self._build_prompt(input_value)
+        print(f"[ClaudeCode.forward] Built prompt ({len(prompt)} chars): {prompt[:200]}...")

        # run async execution in event loop
+        print(f"[ClaudeCode.forward] Starting async execution (model={self.model})")
        try:
            loop = asyncio.get_event_loop()
            if loop.is_running():
@@ -511,19 +562,37 @@ class ClaudeCode(PrecompiledProgram):
            # no event loop, create one
            response_text, trace, usage = asyncio.run(self._run_async(prompt))

+        # Log response details
+        response_type = type(response_text).__name__
+        response_len = len(str(response_text)) if response_text else 0
+        print(f"[ClaudeCode.forward] Received response (type={response_type}, {response_len} chars, {len(trace)} trace items)")
+        print(f"[ClaudeCode.forward] Token usage: {usage.input_tokens} input, {usage.output_tokens} output, {usage.cached_input_tokens} cached")
+
        # parse response based on output type
        output_type = self.output_field.annotation
        if is_pydantic_model(output_type):
+            print(f"[ClaudeCode.forward] Parsing structured output (type: {output_type})")
            try:
+                # response_text can be dict/list (from structured_output) or str (legacy)
                parsed_output = parse_json_response(response_text, output_type)
+                print(f"[ClaudeCode.forward] Successfully parsed structured output")
            except Exception as e:
+                print(f"[ClaudeCode.forward] ERROR: Failed to parse structured output")
                raise ValueError(
-                    f"Failed to parse Claude response as {output_type.__name__}: {e}\n"
+                    f"Failed to parse Claude response as {output_type}: {e}\n"
+                    f"Response type: {type(response_text)}\n"
                    f"Response: {response_text}"
                )
        else:
+            print(f"[ClaudeCode.forward] Extracting text response (output type: {output_type})")
            # string output - extract text
-            parsed_output = extract_text_from_response(response_text)
+            if isinstance(response_text, str):
+                parsed_output = extract_text_from_response(response_text)
+            else:
+                # Shouldn't happen, but handle gracefully
+                parsed_output = str(response_text)
+
+        print(f"[ClaudeCode.forward] Returning Prediction with session_id={self._session_id}\n")

        # return prediction with typed output, trace, and usage
        return Prediction(
@@ -545,6 +614,8 @@ class ClaudeCode(PrecompiledProgram):
        Returns:
            Prediction with typed output, trace, and usage
        """
+        print(f"\n[ClaudeCode.aforward] Called with fields: {list(kwargs.keys())}")
+
        # extract input value
        if self.input_field_name not in kwargs:
            raise ValueError(
@@ -553,26 +624,47 @@ class ClaudeCode(PrecompiledProgram):
            )

        input_value = kwargs[self.input_field_name]
+        print(f"[ClaudeCode.aforward] Input field '{self.input_field_name}': {input_value[:100]}..." if len(str(input_value)) > 100 else f"[ClaudeCode.aforward] Input field '{self.input_field_name}': {input_value}")

        # build prompt
        prompt = self._build_prompt(input_value)
+        print(f"[ClaudeCode.aforward] Built prompt ({len(prompt)} chars)")

        # run async execution
+        print(f"[ClaudeCode.aforward] Starting async execution (model={self.model})")
        response_text, trace, usage = await self._run_async(prompt)

+        # Log response details
+        response_type = type(response_text).__name__
+        response_len = len(str(response_text)) if response_text else 0
+        print(f"[ClaudeCode.aforward] Received response (type={response_type}, {response_len} chars, {len(trace)} trace items)")
+        print(f"[ClaudeCode.aforward] Token usage: {usage.input_tokens} input, {usage.output_tokens} output, {usage.cached_input_tokens} cached")
+
        # parse response based on output type
        output_type = self.output_field.annotation
        if is_pydantic_model(output_type):
+            print(f"[ClaudeCode.aforward] Parsing structured output (type: {output_type})")
            try:
+                # response_text can be dict/list (from structured_output) or str (legacy)
                parsed_output = parse_json_response(response_text, output_type)
+                print(f"[ClaudeCode.aforward] Successfully parsed structured output")
            except Exception as e:
+                print(f"[ClaudeCode.aforward] ERROR: Failed to parse structured output")
                raise ValueError(
-                    f"Failed to parse Claude response as {output_type.__name__}: {e}\n"
+                    f"Failed to parse Claude response as {output_type}: {e}\n"
+                    f"Response type: {type(response_text)}\n"
                    f"Response: {response_text}"
                )
        else:
+            print(f"[ClaudeCode.aforward] Extracting text response (output type: {output_type})")
            # string output - extract text
-            parsed_output = extract_text_from_response(response_text)
+            if isinstance(response_text, str):
+                parsed_output = extract_text_from_response(response_text)
+            else:
+                # Shouldn't happen, but handle gracefully
+                parsed_output = str(response_text)
+
+        print(f"[ClaudeCode.aforward] Returning Prediction with session_id={self._session_id}\n")

        # return prediction with typed output, trace, and usage
        return Prediction(