Merge pull request #1197 from mutuyihao/fix/apicompat-array-content
fix(apicompat): support array content for system and tool messages
This commit is contained in:
@@ -181,6 +181,35 @@ func TestChatCompletionsToResponses_ImageURL(t *testing.T) {
|
|||||||
assert.Equal(t, "data:image/png;base64,abc123", parts[1].ImageURL)
|
assert.Equal(t, "data:image/png;base64,abc123", parts[1].ImageURL)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestChatCompletionsToResponses_SystemArrayContent(t *testing.T) {
|
||||||
|
req := &ChatCompletionsRequest{
|
||||||
|
Model: "gpt-4o",
|
||||||
|
Messages: []ChatMessage{
|
||||||
|
{Role: "system", Content: json.RawMessage(`[{"type":"text","text":"You are a careful visual assistant."}]`)},
|
||||||
|
{Role: "user", Content: json.RawMessage(`[{"type":"text","text":"Describe this image"},{"type":"image_url","image_url":{"url":"data:image/png;base64,abc123"}}]`)},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := ChatCompletionsToResponses(req)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
var items []ResponsesInputItem
|
||||||
|
require.NoError(t, json.Unmarshal(resp.Input, &items))
|
||||||
|
require.Len(t, items, 2)
|
||||||
|
|
||||||
|
var systemParts []ResponsesContentPart
|
||||||
|
require.NoError(t, json.Unmarshal(items[0].Content, &systemParts))
|
||||||
|
require.Len(t, systemParts, 1)
|
||||||
|
assert.Equal(t, "input_text", systemParts[0].Type)
|
||||||
|
assert.Equal(t, "You are a careful visual assistant.", systemParts[0].Text)
|
||||||
|
|
||||||
|
var userParts []ResponsesContentPart
|
||||||
|
require.NoError(t, json.Unmarshal(items[1].Content, &userParts))
|
||||||
|
require.Len(t, userParts, 2)
|
||||||
|
assert.Equal(t, "input_image", userParts[1].Type)
|
||||||
|
assert.Equal(t, "data:image/png;base64,abc123", userParts[1].ImageURL)
|
||||||
|
}
|
||||||
|
|
||||||
func TestChatCompletionsToResponses_LegacyFunctions(t *testing.T) {
|
func TestChatCompletionsToResponses_LegacyFunctions(t *testing.T) {
|
||||||
req := &ChatCompletionsRequest{
|
req := &ChatCompletionsRequest{
|
||||||
Model: "gpt-4o",
|
Model: "gpt-4o",
|
||||||
@@ -398,6 +427,45 @@ func TestResponsesToChatCompletions_Reasoning(t *testing.T) {
|
|||||||
assert.Equal(t, "I thought about it.", chat.Choices[0].Message.ReasoningContent)
|
assert.Equal(t, "I thought about it.", chat.Choices[0].Message.ReasoningContent)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestChatCompletionsToResponses_ToolArrayContent(t *testing.T) {
|
||||||
|
req := &ChatCompletionsRequest{
|
||||||
|
Model: "gpt-4o",
|
||||||
|
Messages: []ChatMessage{
|
||||||
|
{Role: "user", Content: json.RawMessage(`"Use the tool"`)},
|
||||||
|
{
|
||||||
|
Role: "assistant",
|
||||||
|
ToolCalls: []ChatToolCall{
|
||||||
|
{
|
||||||
|
ID: "call_1",
|
||||||
|
Type: "function",
|
||||||
|
Function: ChatFunctionCall{
|
||||||
|
Name: "inspect_image",
|
||||||
|
Arguments: `{}`,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Role: "tool",
|
||||||
|
ToolCallID: "call_1",
|
||||||
|
Content: json.RawMessage(
|
||||||
|
`[{"type":"text","text":"image width: 100"},{"type":"image_url","image_url":{"url":"data:image/png;base64,ignored"}},{"type":"text","text":"; image height: 200"}]`,
|
||||||
|
),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := ChatCompletionsToResponses(req)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
var items []ResponsesInputItem
|
||||||
|
require.NoError(t, json.Unmarshal(resp.Input, &items))
|
||||||
|
require.Len(t, items, 3)
|
||||||
|
assert.Equal(t, "function_call_output", items[2].Type)
|
||||||
|
assert.Equal(t, "call_1", items[2].CallID)
|
||||||
|
assert.Equal(t, "image width: 100; image height: 200", items[2].Output)
|
||||||
|
}
|
||||||
|
|
||||||
func TestResponsesToChatCompletions_Incomplete(t *testing.T) {
|
func TestResponsesToChatCompletions_Incomplete(t *testing.T) {
|
||||||
resp := &ResponsesResponse{
|
resp := &ResponsesResponse{
|
||||||
ID: "resp_inc",
|
ID: "resp_inc",
|
||||||
|
|||||||
@@ -6,6 +6,11 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type chatMessageContent struct {
|
||||||
|
Text *string
|
||||||
|
Parts []ChatContentPart
|
||||||
|
}
|
||||||
|
|
||||||
// ChatCompletionsToResponses converts a Chat Completions request into a
|
// ChatCompletionsToResponses converts a Chat Completions request into a
|
||||||
// Responses API request. The upstream always streams, so Stream is forced to
|
// Responses API request. The upstream always streams, so Stream is forced to
|
||||||
// true. store is always false and reasoning.encrypted_content is always
|
// true. store is always false and reasoning.encrypted_content is always
|
||||||
@@ -113,11 +118,11 @@ func chatMessageToResponsesItems(m ChatMessage) ([]ResponsesInputItem, error) {
|
|||||||
|
|
||||||
// chatSystemToResponses converts a system message.
|
// chatSystemToResponses converts a system message.
|
||||||
func chatSystemToResponses(m ChatMessage) ([]ResponsesInputItem, error) {
|
func chatSystemToResponses(m ChatMessage) ([]ResponsesInputItem, error) {
|
||||||
text, err := parseChatContent(m.Content)
|
parsed, err := parseChatMessageContent(m.Content)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
content, err := json.Marshal(text)
|
content, err := marshalChatInputContent(parsed)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -127,39 +132,11 @@ func chatSystemToResponses(m ChatMessage) ([]ResponsesInputItem, error) {
|
|||||||
// chatUserToResponses converts a user message, handling both plain strings and
|
// chatUserToResponses converts a user message, handling both plain strings and
|
||||||
// multi-modal content arrays.
|
// multi-modal content arrays.
|
||||||
func chatUserToResponses(m ChatMessage) ([]ResponsesInputItem, error) {
|
func chatUserToResponses(m ChatMessage) ([]ResponsesInputItem, error) {
|
||||||
// Try plain string first.
|
parsed, err := parseChatMessageContent(m.Content)
|
||||||
var s string
|
if err != nil {
|
||||||
if err := json.Unmarshal(m.Content, &s); err == nil {
|
|
||||||
content, _ := json.Marshal(s)
|
|
||||||
return []ResponsesInputItem{{Role: "user", Content: content}}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
var parts []ChatContentPart
|
|
||||||
if err := json.Unmarshal(m.Content, &parts); err != nil {
|
|
||||||
return nil, fmt.Errorf("parse user content: %w", err)
|
return nil, fmt.Errorf("parse user content: %w", err)
|
||||||
}
|
}
|
||||||
|
content, err := marshalChatInputContent(parsed)
|
||||||
var responseParts []ResponsesContentPart
|
|
||||||
for _, p := range parts {
|
|
||||||
switch p.Type {
|
|
||||||
case "text":
|
|
||||||
if p.Text != "" {
|
|
||||||
responseParts = append(responseParts, ResponsesContentPart{
|
|
||||||
Type: "input_text",
|
|
||||||
Text: p.Text,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
case "image_url":
|
|
||||||
if p.ImageURL != nil && p.ImageURL.URL != "" {
|
|
||||||
responseParts = append(responseParts, ResponsesContentPart{
|
|
||||||
Type: "input_image",
|
|
||||||
ImageURL: p.ImageURL.URL,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
content, err := json.Marshal(responseParts)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -312,16 +289,79 @@ func chatFunctionToResponses(m ChatMessage) ([]ResponsesInputItem, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// parseChatContent returns the string value of a ChatMessage Content field.
|
// parseChatContent returns the string value of a ChatMessage Content field.
|
||||||
// Content must be a JSON string. Returns "" if content is null or empty.
|
// Content can be a JSON string or an array of typed parts. Array content is
|
||||||
|
// flattened to text by concatenating text parts and ignoring non-text parts.
|
||||||
func parseChatContent(raw json.RawMessage) (string, error) {
|
func parseChatContent(raw json.RawMessage) (string, error) {
|
||||||
|
parsed, err := parseChatMessageContent(raw)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
if parsed.Text != nil {
|
||||||
|
return *parsed.Text, nil
|
||||||
|
}
|
||||||
|
return flattenChatContentParts(parsed.Parts), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseChatMessageContent(raw json.RawMessage) (chatMessageContent, error) {
|
||||||
if len(raw) == 0 {
|
if len(raw) == 0 {
|
||||||
return "", nil
|
return chatMessageContent{Text: stringPtr("")}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
var s string
|
var s string
|
||||||
if err := json.Unmarshal(raw, &s); err != nil {
|
if err := json.Unmarshal(raw, &s); err == nil {
|
||||||
return "", fmt.Errorf("parse content as string: %w", err)
|
return chatMessageContent{Text: &s}, nil
|
||||||
}
|
}
|
||||||
return s, nil
|
|
||||||
|
var parts []ChatContentPart
|
||||||
|
if err := json.Unmarshal(raw, &parts); err == nil {
|
||||||
|
return chatMessageContent{Parts: parts}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return chatMessageContent{}, fmt.Errorf("parse content as string or parts array")
|
||||||
|
}
|
||||||
|
|
||||||
|
func marshalChatInputContent(content chatMessageContent) (json.RawMessage, error) {
|
||||||
|
if content.Text != nil {
|
||||||
|
return json.Marshal(*content.Text)
|
||||||
|
}
|
||||||
|
return json.Marshal(convertChatContentPartsToResponses(content.Parts))
|
||||||
|
}
|
||||||
|
|
||||||
|
func convertChatContentPartsToResponses(parts []ChatContentPart) []ResponsesContentPart {
|
||||||
|
var responseParts []ResponsesContentPart
|
||||||
|
for _, p := range parts {
|
||||||
|
switch p.Type {
|
||||||
|
case "text":
|
||||||
|
if p.Text != "" {
|
||||||
|
responseParts = append(responseParts, ResponsesContentPart{
|
||||||
|
Type: "input_text",
|
||||||
|
Text: p.Text,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
case "image_url":
|
||||||
|
if p.ImageURL != nil && p.ImageURL.URL != "" {
|
||||||
|
responseParts = append(responseParts, ResponsesContentPart{
|
||||||
|
Type: "input_image",
|
||||||
|
ImageURL: p.ImageURL.URL,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return responseParts
|
||||||
|
}
|
||||||
|
|
||||||
|
func flattenChatContentParts(parts []ChatContentPart) string {
|
||||||
|
var textParts []string
|
||||||
|
for _, p := range parts {
|
||||||
|
if p.Type == "text" && p.Text != "" {
|
||||||
|
textParts = append(textParts, p.Text)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return strings.Join(textParts, "")
|
||||||
|
}
|
||||||
|
|
||||||
|
func stringPtr(s string) *string {
|
||||||
|
return &s
|
||||||
}
|
}
|
||||||
|
|
||||||
// convertChatToolsToResponses maps Chat Completions tool definitions and legacy
|
// convertChatToolsToResponses maps Chat Completions tool definitions and legacy
|
||||||
|
|||||||
Reference in New Issue
Block a user