diff --git a/backend/internal/pkg/apicompat/anthropic_to_responses_response.go b/backend/internal/pkg/apicompat/anthropic_to_responses_response.go new file mode 100644 index 00000000..0c5e330c --- /dev/null +++ b/backend/internal/pkg/apicompat/anthropic_to_responses_response.go @@ -0,0 +1,526 @@ +package apicompat + +import ( + "crypto/rand" + "encoding/hex" + "encoding/json" + "fmt" + "time" +) + +// --------------------------------------------------------------------------- +// Non-streaming: AnthropicResponse → ResponsesResponse +// --------------------------------------------------------------------------- + +// AnthropicToResponsesResponse converts an Anthropic Messages response into a +// Responses API response. This is the reverse of ResponsesToAnthropic and +// enables Anthropic upstream responses to be returned in OpenAI Responses format. +func AnthropicToResponsesResponse(resp *AnthropicResponse) *ResponsesResponse { + id := resp.ID + if id == "" { + id = generateResponsesID() + } + + out := &ResponsesResponse{ + ID: id, + Object: "response", + Model: resp.Model, + } + + var outputs []ResponsesOutput + var msgParts []ResponsesContentPart + + for _, block := range resp.Content { + switch block.Type { + case "thinking": + if block.Thinking != "" { + outputs = append(outputs, ResponsesOutput{ + Type: "reasoning", + ID: generateItemID(), + Summary: []ResponsesSummary{{ + Type: "summary_text", + Text: block.Thinking, + }}, + }) + } + case "text": + if block.Text != "" { + msgParts = append(msgParts, ResponsesContentPart{ + Type: "output_text", + Text: block.Text, + }) + } + case "tool_use": + args := "{}" + if len(block.Input) > 0 { + args = string(block.Input) + } + outputs = append(outputs, ResponsesOutput{ + Type: "function_call", + ID: generateItemID(), + CallID: toResponsesCallID(block.ID), + Name: block.Name, + Arguments: args, + Status: "completed", + }) + } + } + + // Assemble message output item from text parts + if len(msgParts) > 0 { + outputs = append(outputs, ResponsesOutput{ + Type: "message", + ID: generateItemID(), + Role: "assistant", + Content: msgParts, + Status: "completed", + }) + } + + if len(outputs) == 0 { + outputs = append(outputs, ResponsesOutput{ + Type: "message", + ID: generateItemID(), + Role: "assistant", + Content: []ResponsesContentPart{{Type: "output_text", Text: ""}}, + Status: "completed", + }) + } + out.Output = outputs + + // Map stop_reason → status + out.Status = anthropicStopReasonToResponsesStatus(resp.StopReason, resp.Content) + if out.Status == "incomplete" { + out.IncompleteDetails = &ResponsesIncompleteDetails{Reason: "max_output_tokens"} + } + + // Usage + out.Usage = &ResponsesUsage{ + InputTokens: resp.Usage.InputTokens, + OutputTokens: resp.Usage.OutputTokens, + TotalTokens: resp.Usage.InputTokens + resp.Usage.OutputTokens, + } + if resp.Usage.CacheReadInputTokens > 0 { + out.Usage.InputTokensDetails = &ResponsesInputTokensDetails{ + CachedTokens: resp.Usage.CacheReadInputTokens, + } + } + + return out +} + +// anthropicStopReasonToResponsesStatus maps Anthropic stop_reason to Responses status. +func anthropicStopReasonToResponsesStatus(stopReason string, blocks []AnthropicContentBlock) string { + switch stopReason { + case "max_tokens": + return "incomplete" + case "end_turn", "tool_use", "stop_sequence": + return "completed" + default: + return "completed" + } +} + +// --------------------------------------------------------------------------- +// Streaming: AnthropicStreamEvent → []ResponsesStreamEvent (stateful converter) +// --------------------------------------------------------------------------- + +// AnthropicEventToResponsesState tracks state for converting a sequence of +// Anthropic SSE events into Responses SSE events. +type AnthropicEventToResponsesState struct { + ResponseID string + Model string + Created int64 + SequenceNumber int + + // CreatedSent tracks whether response.created has been emitted. + CreatedSent bool + // CompletedSent tracks whether the terminal event has been emitted. + CompletedSent bool + + // Current output tracking + OutputIndex int + CurrentItemID string + CurrentItemType string // "message" | "function_call" | "reasoning" + + // For message output: accumulate text parts + ContentIndex int + + // For function_call: track per-output info + CurrentCallID string + CurrentName string + + // Usage from message_delta + InputTokens int + OutputTokens int + CacheReadInputTokens int +} + +// NewAnthropicEventToResponsesState returns an initialised stream state. +func NewAnthropicEventToResponsesState() *AnthropicEventToResponsesState { + return &AnthropicEventToResponsesState{ + Created: time.Now().Unix(), + } +} + +// AnthropicEventToResponsesEvents converts a single Anthropic SSE event into +// zero or more Responses SSE events, updating state as it goes. +func AnthropicEventToResponsesEvents( + evt *AnthropicStreamEvent, + state *AnthropicEventToResponsesState, +) []ResponsesStreamEvent { + switch evt.Type { + case "message_start": + return anthToResHandleMessageStart(evt, state) + case "content_block_start": + return anthToResHandleContentBlockStart(evt, state) + case "content_block_delta": + return anthToResHandleContentBlockDelta(evt, state) + case "content_block_stop": + return anthToResHandleContentBlockStop(evt, state) + case "message_delta": + return anthToResHandleMessageDelta(evt, state) + case "message_stop": + return anthToResHandleMessageStop(state) + default: + return nil + } +} + +// FinalizeAnthropicResponsesStream emits synthetic termination events if the +// stream ended without a proper message_stop. +func FinalizeAnthropicResponsesStream(state *AnthropicEventToResponsesState) []ResponsesStreamEvent { + if !state.CreatedSent || state.CompletedSent { + return nil + } + + var events []ResponsesStreamEvent + + // Close any open item + events = append(events, closeCurrentResponsesItem(state)...) + + // Emit response.completed + events = append(events, makeResponsesCompletedEvent(state, "completed", nil)) + state.CompletedSent = true + return events +} + +// ResponsesEventToSSE formats a ResponsesStreamEvent as an SSE data line. +func ResponsesEventToSSE(evt ResponsesStreamEvent) (string, error) { + data, err := json.Marshal(evt) + if err != nil { + return "", err + } + return fmt.Sprintf("event: %s\ndata: %s\n\n", evt.Type, data), nil +} + +// --- internal handlers --- + +func anthToResHandleMessageStart(evt *AnthropicStreamEvent, state *AnthropicEventToResponsesState) []ResponsesStreamEvent { + if evt.Message != nil { + state.ResponseID = evt.Message.ID + if state.Model == "" { + state.Model = evt.Message.Model + } + if evt.Message.Usage.InputTokens > 0 { + state.InputTokens = evt.Message.Usage.InputTokens + } + } + + if state.CreatedSent { + return nil + } + state.CreatedSent = true + + // Emit response.created + return []ResponsesStreamEvent{makeResponsesCreatedEvent(state)} +} + +func anthToResHandleContentBlockStart(evt *AnthropicStreamEvent, state *AnthropicEventToResponsesState) []ResponsesStreamEvent { + if evt.ContentBlock == nil { + return nil + } + + var events []ResponsesStreamEvent + + switch evt.ContentBlock.Type { + case "thinking": + state.CurrentItemID = generateItemID() + state.CurrentItemType = "reasoning" + state.ContentIndex = 0 + + events = append(events, makeResponsesEvent(state, "response.output_item.added", &ResponsesStreamEvent{ + OutputIndex: state.OutputIndex, + Item: &ResponsesOutput{ + Type: "reasoning", + ID: state.CurrentItemID, + }, + })) + + case "text": + // If we don't have an open message item, open one + if state.CurrentItemType != "message" { + state.CurrentItemID = generateItemID() + state.CurrentItemType = "message" + state.ContentIndex = 0 + + events = append(events, makeResponsesEvent(state, "response.output_item.added", &ResponsesStreamEvent{ + OutputIndex: state.OutputIndex, + Item: &ResponsesOutput{ + Type: "message", + ID: state.CurrentItemID, + Role: "assistant", + Status: "in_progress", + }, + })) + } + + case "tool_use": + // Close previous item if any + events = append(events, closeCurrentResponsesItem(state)...) + + state.CurrentItemID = generateItemID() + state.CurrentItemType = "function_call" + state.CurrentCallID = toResponsesCallID(evt.ContentBlock.ID) + state.CurrentName = evt.ContentBlock.Name + + events = append(events, makeResponsesEvent(state, "response.output_item.added", &ResponsesStreamEvent{ + OutputIndex: state.OutputIndex, + Item: &ResponsesOutput{ + Type: "function_call", + ID: state.CurrentItemID, + CallID: state.CurrentCallID, + Name: state.CurrentName, + Status: "in_progress", + }, + })) + } + + return events +} + +func anthToResHandleContentBlockDelta(evt *AnthropicStreamEvent, state *AnthropicEventToResponsesState) []ResponsesStreamEvent { + if evt.Delta == nil { + return nil + } + + switch evt.Delta.Type { + case "text_delta": + if evt.Delta.Text == "" { + return nil + } + return []ResponsesStreamEvent{makeResponsesEvent(state, "response.output_text.delta", &ResponsesStreamEvent{ + OutputIndex: state.OutputIndex, + ContentIndex: state.ContentIndex, + Delta: evt.Delta.Text, + ItemID: state.CurrentItemID, + })} + + case "thinking_delta": + if evt.Delta.Thinking == "" { + return nil + } + return []ResponsesStreamEvent{makeResponsesEvent(state, "response.reasoning_summary_text.delta", &ResponsesStreamEvent{ + OutputIndex: state.OutputIndex, + SummaryIndex: 0, + Delta: evt.Delta.Thinking, + ItemID: state.CurrentItemID, + })} + + case "input_json_delta": + if evt.Delta.PartialJSON == "" { + return nil + } + return []ResponsesStreamEvent{makeResponsesEvent(state, "response.function_call_arguments.delta", &ResponsesStreamEvent{ + OutputIndex: state.OutputIndex, + Delta: evt.Delta.PartialJSON, + ItemID: state.CurrentItemID, + CallID: state.CurrentCallID, + Name: state.CurrentName, + })} + + case "signature_delta": + // Anthropic signature deltas have no Responses equivalent; skip + return nil + } + + return nil +} + +func anthToResHandleContentBlockStop(evt *AnthropicStreamEvent, state *AnthropicEventToResponsesState) []ResponsesStreamEvent { + switch state.CurrentItemType { + case "reasoning": + // Emit reasoning summary done + output item done + events := []ResponsesStreamEvent{ + makeResponsesEvent(state, "response.reasoning_summary_text.done", &ResponsesStreamEvent{ + OutputIndex: state.OutputIndex, + SummaryIndex: 0, + ItemID: state.CurrentItemID, + }), + } + events = append(events, closeCurrentResponsesItem(state)...) + return events + + case "function_call": + // Emit function_call_arguments.done + output item done + events := []ResponsesStreamEvent{ + makeResponsesEvent(state, "response.function_call_arguments.done", &ResponsesStreamEvent{ + OutputIndex: state.OutputIndex, + ItemID: state.CurrentItemID, + CallID: state.CurrentCallID, + Name: state.CurrentName, + }), + } + events = append(events, closeCurrentResponsesItem(state)...) + return events + + case "message": + // Emit output_text.done (text block is done, but message item stays open for potential more blocks) + return []ResponsesStreamEvent{ + makeResponsesEvent(state, "response.output_text.done", &ResponsesStreamEvent{ + OutputIndex: state.OutputIndex, + ContentIndex: state.ContentIndex, + ItemID: state.CurrentItemID, + }), + } + } + + return nil +} + +func anthToResHandleMessageDelta(evt *AnthropicStreamEvent, state *AnthropicEventToResponsesState) []ResponsesStreamEvent { + // Update usage + if evt.Usage != nil { + state.OutputTokens = evt.Usage.OutputTokens + if evt.Usage.CacheReadInputTokens > 0 { + state.CacheReadInputTokens = evt.Usage.CacheReadInputTokens + } + } + + // Extract stop_reason from delta + if evt.Delta != nil && evt.Delta.StopReason != "" { + // We'll use this in message_stop + } + + return nil +} + +func anthToResHandleMessageStop(state *AnthropicEventToResponsesState) []ResponsesStreamEvent { + if state.CompletedSent { + return nil + } + + var events []ResponsesStreamEvent + + // Close any open item + events = append(events, closeCurrentResponsesItem(state)...) + + // Determine status + status := "completed" + var incompleteDetails *ResponsesIncompleteDetails + + // Emit response.completed + events = append(events, makeResponsesCompletedEvent(state, status, incompleteDetails)) + state.CompletedSent = true + return events +} + +// --- helper functions --- + +func closeCurrentResponsesItem(state *AnthropicEventToResponsesState) []ResponsesStreamEvent { + if state.CurrentItemType == "" { + return nil + } + + itemType := state.CurrentItemType + itemID := state.CurrentItemID + + // Reset + state.CurrentItemType = "" + state.CurrentItemID = "" + state.CurrentCallID = "" + state.CurrentName = "" + state.OutputIndex++ + state.ContentIndex = 0 + + return []ResponsesStreamEvent{makeResponsesEvent(state, "response.output_item.done", &ResponsesStreamEvent{ + OutputIndex: state.OutputIndex - 1, // Use the index before increment + Item: &ResponsesOutput{ + Type: itemType, + ID: itemID, + Status: "completed", + }, + })} +} + +func makeResponsesCreatedEvent(state *AnthropicEventToResponsesState) ResponsesStreamEvent { + seq := state.SequenceNumber + state.SequenceNumber++ + return ResponsesStreamEvent{ + Type: "response.created", + SequenceNumber: seq, + Response: &ResponsesResponse{ + ID: state.ResponseID, + Object: "response", + Model: state.Model, + Status: "in_progress", + Output: []ResponsesOutput{}, + }, + } +} + +func makeResponsesCompletedEvent( + state *AnthropicEventToResponsesState, + status string, + incompleteDetails *ResponsesIncompleteDetails, +) ResponsesStreamEvent { + seq := state.SequenceNumber + state.SequenceNumber++ + + usage := &ResponsesUsage{ + InputTokens: state.InputTokens, + OutputTokens: state.OutputTokens, + TotalTokens: state.InputTokens + state.OutputTokens, + } + if state.CacheReadInputTokens > 0 { + usage.InputTokensDetails = &ResponsesInputTokensDetails{ + CachedTokens: state.CacheReadInputTokens, + } + } + + return ResponsesStreamEvent{ + Type: "response.completed", + SequenceNumber: seq, + Response: &ResponsesResponse{ + ID: state.ResponseID, + Object: "response", + Model: state.Model, + Status: status, + Output: []ResponsesOutput{}, // Simplified; full output tracking would add complexity + Usage: usage, + IncompleteDetails: incompleteDetails, + }, + } +} + +func makeResponsesEvent(state *AnthropicEventToResponsesState, eventType string, template *ResponsesStreamEvent) ResponsesStreamEvent { + seq := state.SequenceNumber + state.SequenceNumber++ + + evt := *template + evt.Type = eventType + evt.SequenceNumber = seq + return evt +} + +func generateResponsesID() string { + b := make([]byte, 12) + _, _ = rand.Read(b) + return "resp_" + hex.EncodeToString(b) +} + +func generateItemID() string { + b := make([]byte, 12) + _, _ = rand.Read(b) + return "item_" + hex.EncodeToString(b) +} diff --git a/backend/internal/pkg/apicompat/responses_to_anthropic_request.go b/backend/internal/pkg/apicompat/responses_to_anthropic_request.go new file mode 100644 index 00000000..f0a5b07e --- /dev/null +++ b/backend/internal/pkg/apicompat/responses_to_anthropic_request.go @@ -0,0 +1,464 @@ +package apicompat + +import ( + "encoding/json" + "fmt" + "strings" +) + +// ResponsesToAnthropicRequest converts a Responses API request into an +// Anthropic Messages request. This is the reverse of AnthropicToResponses and +// enables Anthropic platform groups to accept OpenAI Responses API requests +// by converting them to the native /v1/messages format before forwarding upstream. +func ResponsesToAnthropicRequest(req *ResponsesRequest) (*AnthropicRequest, error) { + system, messages, err := convertResponsesInputToAnthropic(req.Input) + if err != nil { + return nil, err + } + + out := &AnthropicRequest{ + Model: req.Model, + Messages: messages, + Temperature: req.Temperature, + TopP: req.TopP, + Stream: req.Stream, + } + + if len(system) > 0 { + out.System = system + } + + // max_output_tokens → max_tokens + if req.MaxOutputTokens != nil && *req.MaxOutputTokens > 0 { + out.MaxTokens = *req.MaxOutputTokens + } + if out.MaxTokens == 0 { + // Anthropic requires max_tokens; default to a sensible value. + out.MaxTokens = 8192 + } + + // Convert tools + if len(req.Tools) > 0 { + out.Tools = convertResponsesToAnthropicTools(req.Tools) + } + + // Convert tool_choice (reverse of convertAnthropicToolChoiceToResponses) + if len(req.ToolChoice) > 0 { + tc, err := convertResponsesToAnthropicToolChoice(req.ToolChoice) + if err != nil { + return nil, fmt.Errorf("convert tool_choice: %w", err) + } + out.ToolChoice = tc + } + + // reasoning.effort → output_config.effort + thinking + if req.Reasoning != nil && req.Reasoning.Effort != "" { + effort := mapResponsesEffortToAnthropic(req.Reasoning.Effort) + out.OutputConfig = &AnthropicOutputConfig{Effort: effort} + // Enable thinking for non-low efforts + if effort != "low" { + out.Thinking = &AnthropicThinking{ + Type: "enabled", + BudgetTokens: defaultThinkingBudget(effort), + } + } + } + + return out, nil +} + +// defaultThinkingBudget returns a sensible thinking budget based on effort level. +func defaultThinkingBudget(effort string) int { + switch effort { + case "low": + return 1024 + case "medium": + return 4096 + case "high": + return 10240 + case "max": + return 32768 + default: + return 10240 + } +} + +// mapResponsesEffortToAnthropic converts OpenAI Responses reasoning effort to +// Anthropic effort levels. Reverse of mapAnthropicEffortToResponses. +// +// low → low +// medium → medium +// high → high +// xhigh → max +func mapResponsesEffortToAnthropic(effort string) string { + if effort == "xhigh" { + return "max" + } + return effort // low→low, medium→medium, high→high, unknown→passthrough +} + +// convertResponsesInputToAnthropic extracts system prompt and messages from +// a Responses API input array. Returns the system as raw JSON (for Anthropic's +// polymorphic system field) and a list of Anthropic messages. +func convertResponsesInputToAnthropic(inputRaw json.RawMessage) (json.RawMessage, []AnthropicMessage, error) { + // Try as plain string input. + var inputStr string + if err := json.Unmarshal(inputRaw, &inputStr); err == nil { + content, _ := json.Marshal(inputStr) + return nil, []AnthropicMessage{{Role: "user", Content: content}}, nil + } + + var items []ResponsesInputItem + if err := json.Unmarshal(inputRaw, &items); err != nil { + return nil, nil, fmt.Errorf("parse responses input: %w", err) + } + + var system json.RawMessage + var messages []AnthropicMessage + + for _, item := range items { + switch { + case item.Role == "system": + // System prompt → Anthropic system field + text := extractTextFromContent(item.Content) + if text != "" { + system, _ = json.Marshal(text) + } + + case item.Type == "function_call": + // function_call → assistant message with tool_use block + input := json.RawMessage("{}") + if item.Arguments != "" { + input = json.RawMessage(item.Arguments) + } + block := AnthropicContentBlock{ + Type: "tool_use", + ID: fromResponsesCallIDToAnthropic(item.CallID), + Name: item.Name, + Input: input, + } + blockJSON, _ := json.Marshal([]AnthropicContentBlock{block}) + messages = append(messages, AnthropicMessage{ + Role: "assistant", + Content: blockJSON, + }) + + case item.Type == "function_call_output": + // function_call_output → user message with tool_result block + outputContent := item.Output + if outputContent == "" { + outputContent = "(empty)" + } + contentJSON, _ := json.Marshal(outputContent) + block := AnthropicContentBlock{ + Type: "tool_result", + ToolUseID: fromResponsesCallIDToAnthropic(item.CallID), + Content: contentJSON, + } + blockJSON, _ := json.Marshal([]AnthropicContentBlock{block}) + messages = append(messages, AnthropicMessage{ + Role: "user", + Content: blockJSON, + }) + + case item.Role == "user": + content, err := convertResponsesUserToAnthropicContent(item.Content) + if err != nil { + return nil, nil, err + } + messages = append(messages, AnthropicMessage{ + Role: "user", + Content: content, + }) + + case item.Role == "assistant": + content, err := convertResponsesAssistantToAnthropicContent(item.Content) + if err != nil { + return nil, nil, err + } + messages = append(messages, AnthropicMessage{ + Role: "assistant", + Content: content, + }) + + default: + // Unknown role/type — attempt as user message + if item.Content != nil { + messages = append(messages, AnthropicMessage{ + Role: "user", + Content: item.Content, + }) + } + } + } + + // Merge consecutive same-role messages (Anthropic requires alternating roles) + messages = mergeConsecutiveMessages(messages) + + return system, messages, nil +} + +// extractTextFromContent extracts text from a content field that may be a +// plain string or an array of content parts. +func extractTextFromContent(raw json.RawMessage) string { + if len(raw) == 0 { + return "" + } + var s string + if err := json.Unmarshal(raw, &s); err == nil { + return s + } + var parts []ResponsesContentPart + if err := json.Unmarshal(raw, &parts); err == nil { + var texts []string + for _, p := range parts { + if (p.Type == "input_text" || p.Type == "output_text" || p.Type == "text") && p.Text != "" { + texts = append(texts, p.Text) + } + } + return strings.Join(texts, "\n\n") + } + return "" +} + +// convertResponsesUserToAnthropicContent converts a Responses user message +// content field into Anthropic content blocks JSON. +func convertResponsesUserToAnthropicContent(raw json.RawMessage) (json.RawMessage, error) { + if len(raw) == 0 { + return json.Marshal("") // empty string content + } + + // Try plain string. + var s string + if err := json.Unmarshal(raw, &s); err == nil { + return json.Marshal(s) + } + + // Array of content parts → Anthropic content blocks. + var parts []ResponsesContentPart + if err := json.Unmarshal(raw, &parts); err != nil { + // Pass through as-is if we can't parse + return raw, nil + } + + var blocks []AnthropicContentBlock + for _, p := range parts { + switch p.Type { + case "input_text", "text": + if p.Text != "" { + blocks = append(blocks, AnthropicContentBlock{ + Type: "text", + Text: p.Text, + }) + } + case "input_image": + src := dataURIToAnthropicImageSource(p.ImageURL) + if src != nil { + blocks = append(blocks, AnthropicContentBlock{ + Type: "image", + Source: src, + }) + } + } + } + + if len(blocks) == 0 { + return json.Marshal("") + } + return json.Marshal(blocks) +} + +// convertResponsesAssistantToAnthropicContent converts a Responses assistant +// message content field into Anthropic content blocks JSON. +func convertResponsesAssistantToAnthropicContent(raw json.RawMessage) (json.RawMessage, error) { + if len(raw) == 0 { + return json.Marshal([]AnthropicContentBlock{{Type: "text", Text: ""}}) + } + + // Try plain string. + var s string + if err := json.Unmarshal(raw, &s); err == nil { + return json.Marshal([]AnthropicContentBlock{{Type: "text", Text: s}}) + } + + // Array of content parts → Anthropic content blocks. + var parts []ResponsesContentPart + if err := json.Unmarshal(raw, &parts); err != nil { + return raw, nil + } + + var blocks []AnthropicContentBlock + for _, p := range parts { + switch p.Type { + case "output_text", "text": + if p.Text != "" { + blocks = append(blocks, AnthropicContentBlock{ + Type: "text", + Text: p.Text, + }) + } + } + } + + if len(blocks) == 0 { + blocks = append(blocks, AnthropicContentBlock{Type: "text", Text: ""}) + } + return json.Marshal(blocks) +} + +// fromResponsesCallIDToAnthropic converts an OpenAI function call ID back to +// Anthropic format. Reverses toResponsesCallID. +func fromResponsesCallIDToAnthropic(id string) string { + // If it has our "fc_" prefix wrapping a known Anthropic prefix, strip it + if after, ok := strings.CutPrefix(id, "fc_"); ok { + if strings.HasPrefix(after, "toolu_") || strings.HasPrefix(after, "call_") { + return after + } + } + // Generate a synthetic Anthropic tool ID + if !strings.HasPrefix(id, "toolu_") && !strings.HasPrefix(id, "call_") { + return "toolu_" + id + } + return id +} + +// dataURIToAnthropicImageSource parses a data URI into an AnthropicImageSource. +func dataURIToAnthropicImageSource(dataURI string) *AnthropicImageSource { + if !strings.HasPrefix(dataURI, "data:") { + return nil + } + // Format: data:;base64, + rest := strings.TrimPrefix(dataURI, "data:") + semicolonIdx := strings.Index(rest, ";") + if semicolonIdx < 0 { + return nil + } + mediaType := rest[:semicolonIdx] + rest = rest[semicolonIdx+1:] + if !strings.HasPrefix(rest, "base64,") { + return nil + } + data := strings.TrimPrefix(rest, "base64,") + return &AnthropicImageSource{ + Type: "base64", + MediaType: mediaType, + Data: data, + } +} + +// mergeConsecutiveMessages merges consecutive messages with the same role +// because Anthropic requires alternating user/assistant turns. +func mergeConsecutiveMessages(messages []AnthropicMessage) []AnthropicMessage { + if len(messages) <= 1 { + return messages + } + + var merged []AnthropicMessage + for _, msg := range messages { + if len(merged) == 0 || merged[len(merged)-1].Role != msg.Role { + merged = append(merged, msg) + continue + } + + // Same role — merge content arrays + last := &merged[len(merged)-1] + lastBlocks := parseContentBlocks(last.Content) + newBlocks := parseContentBlocks(msg.Content) + combined := append(lastBlocks, newBlocks...) + last.Content, _ = json.Marshal(combined) + } + return merged +} + +// parseContentBlocks attempts to parse content as []AnthropicContentBlock. +// If it's a string, wraps it in a text block. +func parseContentBlocks(raw json.RawMessage) []AnthropicContentBlock { + var blocks []AnthropicContentBlock + if err := json.Unmarshal(raw, &blocks); err == nil { + return blocks + } + var s string + if err := json.Unmarshal(raw, &s); err == nil { + return []AnthropicContentBlock{{Type: "text", Text: s}} + } + return nil +} + +// convertResponsesToAnthropicTools maps Responses API tools to Anthropic format. +// Reverse of convertAnthropicToolsToResponses. +func convertResponsesToAnthropicTools(tools []ResponsesTool) []AnthropicTool { + var out []AnthropicTool + for _, t := range tools { + switch t.Type { + case "web_search": + out = append(out, AnthropicTool{ + Type: "web_search_20250305", + Name: "web_search", + }) + case "function": + out = append(out, AnthropicTool{ + Name: t.Name, + Description: t.Description, + InputSchema: normalizeAnthropicInputSchema(t.Parameters), + }) + default: + // Pass through unknown tool types + out = append(out, AnthropicTool{ + Type: t.Type, + Name: t.Name, + Description: t.Description, + InputSchema: t.Parameters, + }) + } + } + return out +} + +// normalizeAnthropicInputSchema ensures the input_schema has a "type" field. +func normalizeAnthropicInputSchema(schema json.RawMessage) json.RawMessage { + if len(schema) == 0 || string(schema) == "null" { + return json.RawMessage(`{"type":"object","properties":{}}`) + } + return schema +} + +// convertResponsesToAnthropicToolChoice maps Responses tool_choice to Anthropic format. +// Reverse of convertAnthropicToolChoiceToResponses. +// +// "auto" → {"type":"auto"} +// "required" → {"type":"any"} +// "none" → {"type":"none"} +// {"type":"function","function":{"name":"X"}} → {"type":"tool","name":"X"} +func convertResponsesToAnthropicToolChoice(raw json.RawMessage) (json.RawMessage, error) { + // Try as string first + var s string + if err := json.Unmarshal(raw, &s); err == nil { + switch s { + case "auto": + return json.Marshal(map[string]string{"type": "auto"}) + case "required": + return json.Marshal(map[string]string{"type": "any"}) + case "none": + return json.Marshal(map[string]string{"type": "none"}) + default: + return raw, nil + } + } + + // Try as object with type=function + var tc struct { + Type string `json:"type"` + Function struct { + Name string `json:"name"` + } `json:"function"` + } + if err := json.Unmarshal(raw, &tc); err == nil && tc.Type == "function" && tc.Function.Name != "" { + return json.Marshal(map[string]string{ + "type": "tool", + "name": tc.Function.Name, + }) + } + + // Pass through unknown + return raw, nil +}