fix(apicompat): support array content for system and tool messages
This commit is contained in:
@@ -181,6 +181,35 @@ func TestChatCompletionsToResponses_ImageURL(t *testing.T) {
|
||||
assert.Equal(t, "data:image/png;base64,abc123", parts[1].ImageURL)
|
||||
}
|
||||
|
||||
func TestChatCompletionsToResponses_SystemArrayContent(t *testing.T) {
|
||||
req := &ChatCompletionsRequest{
|
||||
Model: "gpt-4o",
|
||||
Messages: []ChatMessage{
|
||||
{Role: "system", Content: json.RawMessage(`[{"type":"text","text":"You are a careful visual assistant."}]`)},
|
||||
{Role: "user", Content: json.RawMessage(`[{"type":"text","text":"Describe this image"},{"type":"image_url","image_url":{"url":"data:image/png;base64,abc123"}}]`)},
|
||||
},
|
||||
}
|
||||
|
||||
resp, err := ChatCompletionsToResponses(req)
|
||||
require.NoError(t, err)
|
||||
|
||||
var items []ResponsesInputItem
|
||||
require.NoError(t, json.Unmarshal(resp.Input, &items))
|
||||
require.Len(t, items, 2)
|
||||
|
||||
var systemParts []ResponsesContentPart
|
||||
require.NoError(t, json.Unmarshal(items[0].Content, &systemParts))
|
||||
require.Len(t, systemParts, 1)
|
||||
assert.Equal(t, "input_text", systemParts[0].Type)
|
||||
assert.Equal(t, "You are a careful visual assistant.", systemParts[0].Text)
|
||||
|
||||
var userParts []ResponsesContentPart
|
||||
require.NoError(t, json.Unmarshal(items[1].Content, &userParts))
|
||||
require.Len(t, userParts, 2)
|
||||
assert.Equal(t, "input_image", userParts[1].Type)
|
||||
assert.Equal(t, "data:image/png;base64,abc123", userParts[1].ImageURL)
|
||||
}
|
||||
|
||||
func TestChatCompletionsToResponses_LegacyFunctions(t *testing.T) {
|
||||
req := &ChatCompletionsRequest{
|
||||
Model: "gpt-4o",
|
||||
@@ -398,6 +427,45 @@ func TestResponsesToChatCompletions_Reasoning(t *testing.T) {
|
||||
assert.Equal(t, "I thought about it.", chat.Choices[0].Message.ReasoningContent)
|
||||
}
|
||||
|
||||
func TestChatCompletionsToResponses_ToolArrayContent(t *testing.T) {
|
||||
req := &ChatCompletionsRequest{
|
||||
Model: "gpt-4o",
|
||||
Messages: []ChatMessage{
|
||||
{Role: "user", Content: json.RawMessage(`"Use the tool"`)},
|
||||
{
|
||||
Role: "assistant",
|
||||
ToolCalls: []ChatToolCall{
|
||||
{
|
||||
ID: "call_1",
|
||||
Type: "function",
|
||||
Function: ChatFunctionCall{
|
||||
Name: "inspect_image",
|
||||
Arguments: `{}`,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Role: "tool",
|
||||
ToolCallID: "call_1",
|
||||
Content: json.RawMessage(
|
||||
`[{"type":"text","text":"image width: 100"},{"type":"image_url","image_url":{"url":"data:image/png;base64,ignored"}},{"type":"text","text":"; image height: 200"}]`,
|
||||
),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
resp, err := ChatCompletionsToResponses(req)
|
||||
require.NoError(t, err)
|
||||
|
||||
var items []ResponsesInputItem
|
||||
require.NoError(t, json.Unmarshal(resp.Input, &items))
|
||||
require.Len(t, items, 3)
|
||||
assert.Equal(t, "function_call_output", items[2].Type)
|
||||
assert.Equal(t, "call_1", items[2].CallID)
|
||||
assert.Equal(t, "image width: 100; image height: 200", items[2].Output)
|
||||
}
|
||||
|
||||
func TestResponsesToChatCompletions_Incomplete(t *testing.T) {
|
||||
resp := &ResponsesResponse{
|
||||
ID: "resp_inc",
|
||||
|
||||
@@ -6,6 +6,11 @@ import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
type chatMessageContent struct {
|
||||
Text *string
|
||||
Parts []ChatContentPart
|
||||
}
|
||||
|
||||
// ChatCompletionsToResponses converts a Chat Completions request into a
|
||||
// Responses API request. The upstream always streams, so Stream is forced to
|
||||
// true. store is always false and reasoning.encrypted_content is always
|
||||
@@ -113,11 +118,11 @@ func chatMessageToResponsesItems(m ChatMessage) ([]ResponsesInputItem, error) {
|
||||
|
||||
// chatSystemToResponses converts a system message.
|
||||
func chatSystemToResponses(m ChatMessage) ([]ResponsesInputItem, error) {
|
||||
text, err := parseChatContent(m.Content)
|
||||
parsed, err := parseChatMessageContent(m.Content)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
content, err := json.Marshal(text)
|
||||
content, err := marshalChatInputContent(parsed)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -127,39 +132,11 @@ func chatSystemToResponses(m ChatMessage) ([]ResponsesInputItem, error) {
|
||||
// chatUserToResponses converts a user message, handling both plain strings and
|
||||
// multi-modal content arrays.
|
||||
func chatUserToResponses(m ChatMessage) ([]ResponsesInputItem, error) {
|
||||
// Try plain string first.
|
||||
var s string
|
||||
if err := json.Unmarshal(m.Content, &s); err == nil {
|
||||
content, _ := json.Marshal(s)
|
||||
return []ResponsesInputItem{{Role: "user", Content: content}}, nil
|
||||
}
|
||||
|
||||
var parts []ChatContentPart
|
||||
if err := json.Unmarshal(m.Content, &parts); err != nil {
|
||||
parsed, err := parseChatMessageContent(m.Content)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parse user content: %w", err)
|
||||
}
|
||||
|
||||
var responseParts []ResponsesContentPart
|
||||
for _, p := range parts {
|
||||
switch p.Type {
|
||||
case "text":
|
||||
if p.Text != "" {
|
||||
responseParts = append(responseParts, ResponsesContentPart{
|
||||
Type: "input_text",
|
||||
Text: p.Text,
|
||||
})
|
||||
}
|
||||
case "image_url":
|
||||
if p.ImageURL != nil && p.ImageURL.URL != "" {
|
||||
responseParts = append(responseParts, ResponsesContentPart{
|
||||
Type: "input_image",
|
||||
ImageURL: p.ImageURL.URL,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
content, err := json.Marshal(responseParts)
|
||||
content, err := marshalChatInputContent(parsed)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -312,16 +289,79 @@ func chatFunctionToResponses(m ChatMessage) ([]ResponsesInputItem, error) {
|
||||
}
|
||||
|
||||
// parseChatContent returns the string value of a ChatMessage Content field.
|
||||
// Content must be a JSON string. Returns "" if content is null or empty.
|
||||
// Content can be a JSON string or an array of typed parts. Array content is
|
||||
// flattened to text by concatenating text parts and ignoring non-text parts.
|
||||
func parseChatContent(raw json.RawMessage) (string, error) {
|
||||
parsed, err := parseChatMessageContent(raw)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if parsed.Text != nil {
|
||||
return *parsed.Text, nil
|
||||
}
|
||||
return flattenChatContentParts(parsed.Parts), nil
|
||||
}
|
||||
|
||||
func parseChatMessageContent(raw json.RawMessage) (chatMessageContent, error) {
|
||||
if len(raw) == 0 {
|
||||
return "", nil
|
||||
return chatMessageContent{Text: stringPtr("")}, nil
|
||||
}
|
||||
|
||||
var s string
|
||||
if err := json.Unmarshal(raw, &s); err != nil {
|
||||
return "", fmt.Errorf("parse content as string: %w", err)
|
||||
if err := json.Unmarshal(raw, &s); err == nil {
|
||||
return chatMessageContent{Text: &s}, nil
|
||||
}
|
||||
return s, nil
|
||||
|
||||
var parts []ChatContentPart
|
||||
if err := json.Unmarshal(raw, &parts); err == nil {
|
||||
return chatMessageContent{Parts: parts}, nil
|
||||
}
|
||||
|
||||
return chatMessageContent{}, fmt.Errorf("parse content as string or parts array")
|
||||
}
|
||||
|
||||
func marshalChatInputContent(content chatMessageContent) (json.RawMessage, error) {
|
||||
if content.Text != nil {
|
||||
return json.Marshal(*content.Text)
|
||||
}
|
||||
return json.Marshal(convertChatContentPartsToResponses(content.Parts))
|
||||
}
|
||||
|
||||
func convertChatContentPartsToResponses(parts []ChatContentPart) []ResponsesContentPart {
|
||||
var responseParts []ResponsesContentPart
|
||||
for _, p := range parts {
|
||||
switch p.Type {
|
||||
case "text":
|
||||
if p.Text != "" {
|
||||
responseParts = append(responseParts, ResponsesContentPart{
|
||||
Type: "input_text",
|
||||
Text: p.Text,
|
||||
})
|
||||
}
|
||||
case "image_url":
|
||||
if p.ImageURL != nil && p.ImageURL.URL != "" {
|
||||
responseParts = append(responseParts, ResponsesContentPart{
|
||||
Type: "input_image",
|
||||
ImageURL: p.ImageURL.URL,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
return responseParts
|
||||
}
|
||||
|
||||
func flattenChatContentParts(parts []ChatContentPart) string {
|
||||
var textParts []string
|
||||
for _, p := range parts {
|
||||
if p.Type == "text" && p.Text != "" {
|
||||
textParts = append(textParts, p.Text)
|
||||
}
|
||||
}
|
||||
return strings.Join(textParts, "")
|
||||
}
|
||||
|
||||
func stringPtr(s string) *string {
|
||||
return &s
|
||||
}
|
||||
|
||||
// convertChatToolsToResponses maps Chat Completions tool definitions and legacy
|
||||
|
||||
Reference in New Issue
Block a user