Fix Codex OAuth tool mapping

2026-01-09 18:35:58 +08:00
parent 0a9c17b9d1
commit 7a06c4873e
6 changed files with 1434 additions and 114 deletions
--- a/backend/internal/service/codex_prompts.go
+++ b/backend/internal/service/codex_prompts.go
@@ -0,0 +1,9 @@
+package service
+
+import _ "embed"
+
+//go:embed prompts/codex_opencode_bridge.txt
+var codexOpenCodeBridge string
+
+//go:embed prompts/tool_remap_message.txt
+var codexToolRemapMessage string
--- a/backend/internal/service/openai_codex_transform.go
+++ b/backend/internal/service/openai_codex_transform.go
--- a/backend/internal/service/openai_gateway_service.go
+++ b/backend/internal/service/openai_gateway_service.go
@@ -12,6 +12,7 @@ import (
 	"io"
 	"log"
 	"net/http"
+	"os"
 	"regexp"
 	"sort"
 	"strconv"
@@ -528,6 +529,7 @@ func (s *OpenAIGatewayService) Forward(ctx context.Context, c *gin.Context, acco
 	// Extract model and stream from parsed body
 	reqModel, _ := reqBody["model"].(string)
 	reqStream, _ := reqBody["stream"].(bool)
+	promptCacheKey := ""

 	// Track if body needs re-serialization
 	bodyModified := false
@@ -540,19 +542,17 @@ func (s *OpenAIGatewayService) Forward(ctx context.Context, c *gin.Context, acco
 		bodyModified = true
 	}

-	// For OAuth accounts using ChatGPT internal API:
-	// 1. Add store: false
-	// 2. Normalize input format for Codex API compatibility
 	if account.Type == AccountTypeOAuth {
-		reqBody["store"] = false
-		bodyModified = true
-
-		// Normalize input format: convert AI SDK multi-part content format to simplified format
-		// AI SDK sends: {"content": [{"type": "input_text", "text": "..."}]}
-		// Codex API expects: {"content": "..."}
-		if normalizeInputForCodexAPI(reqBody) {
+		codexResult := applyCodexOAuthTransform(reqBody, codexModeEnabled())
+		if codexResult.Modified {
 			bodyModified = true
 		}
+		if codexResult.NormalizedModel != "" {
+			mappedModel = codexResult.NormalizedModel
+		}
+		if codexResult.PromptCacheKey != "" {
+			promptCacheKey = codexResult.PromptCacheKey
+		}
 	}

 	// Re-serialize body only if modified
@@ -571,7 +571,7 @@ func (s *OpenAIGatewayService) Forward(ctx context.Context, c *gin.Context, acco
 	}

 	// Build upstream request
-	upstreamReq, err := s.buildUpstreamRequest(ctx, c, account, body, token, reqStream)
+	upstreamReq, err := s.buildUpstreamRequest(ctx, c, account, body, token, reqStream, promptCacheKey)
 	if err != nil {
 		return nil, err
 	}
@@ -632,7 +632,7 @@ func (s *OpenAIGatewayService) Forward(ctx context.Context, c *gin.Context, acco
 	}, nil
 }

-func (s *OpenAIGatewayService) buildUpstreamRequest(ctx context.Context, c *gin.Context, account *Account, body []byte, token string, isStream bool) (*http.Request, error) {
+func (s *OpenAIGatewayService) buildUpstreamRequest(ctx context.Context, c *gin.Context, account *Account, body []byte, token string, isStream bool, promptCacheKey string) (*http.Request, error) {
 	// Determine target URL based on account type
 	var targetURL string
 	switch account.Type {
@@ -672,12 +672,6 @@ func (s *OpenAIGatewayService) buildUpstreamRequest(ctx context.Context, c *gin.
 		if chatgptAccountID != "" {
 			req.Header.Set("chatgpt-account-id", chatgptAccountID)
 		}
-		// Set accept header based on stream mode
-		if isStream {
-			req.Header.Set("accept", "text/event-stream")
-		} else {
-			req.Header.Set("accept", "application/json")
-		}
 	}

 	// Whitelist passthrough headers
@@ -689,6 +683,18 @@ func (s *OpenAIGatewayService) buildUpstreamRequest(ctx context.Context, c *gin.
 			}
 		}
 	}
+	if account.Type == AccountTypeOAuth {
+		req.Header.Set("OpenAI-Beta", "responses=experimental")
+		req.Header.Set("originator", "codex_cli_rs")
+		req.Header.Set("accept", "text/event-stream")
+		if promptCacheKey != "" {
+			req.Header.Set("conversation_id", promptCacheKey)
+			req.Header.Set("session_id", promptCacheKey)
+		} else {
+			req.Header.Del("conversation_id")
+			req.Header.Del("session_id")
+		}
+	}

 	// Apply custom User-Agent if configured
 	customUA := account.GetOpenAIUserAgent()
@@ -706,6 +712,7 @@ func (s *OpenAIGatewayService) buildUpstreamRequest(ctx context.Context, c *gin.

 func (s *OpenAIGatewayService) handleErrorResponse(ctx context.Context, resp *http.Response, c *gin.Context, account *Account) (*OpenAIForwardResult, error) {
 	body, _ := io.ReadAll(resp.Body)
+	logUpstreamErrorBody(account.ID, resp.StatusCode, body)

 	// Check custom error codes
 	if !account.ShouldHandleErrorCode(resp.StatusCode) {
@@ -764,6 +771,24 @@ func (s *OpenAIGatewayService) handleErrorResponse(ctx context.Context, resp *ht
 	return nil, fmt.Errorf("upstream error: %d", resp.StatusCode)
 }

+func logUpstreamErrorBody(accountID int64, statusCode int, body []byte) {
+	if strings.ToLower(strings.TrimSpace(os.Getenv("GATEWAY_LOG_UPSTREAM_ERROR_BODY"))) != "true" {
+		return
+	}
+
+	maxBytes := 2048
+	if rawMax := strings.TrimSpace(os.Getenv("GATEWAY_LOG_UPSTREAM_ERROR_BODY_MAX_BYTES")); rawMax != "" {
+		if parsed, err := strconv.Atoi(rawMax); err == nil && parsed > 0 {
+			maxBytes = parsed
+		}
+	}
+	if len(body) > maxBytes {
+		body = body[:maxBytes]
+	}
+
+	log.Printf("Upstream error body: account=%d status=%d body=%q", accountID, statusCode, string(body))
+}
+
 // openaiStreamingResult streaming response result
 type openaiStreamingResult struct {
 	usage        *OpenAIUsage
@@ -1016,6 +1041,13 @@ func (s *OpenAIGatewayService) handleNonStreamingResponse(ctx context.Context, r
 		return nil, err
 	}

+	if account.Type == AccountTypeOAuth {
+		bodyLooksLikeSSE := bytes.Contains(body, []byte("data:")) || bytes.Contains(body, []byte("event:"))
+		if isEventStreamResponse(resp.Header) || bodyLooksLikeSSE {
+			return s.handleOAuthSSEToJSON(resp, c, body, originalModel, mappedModel)
+		}
+	}
+
 	// Parse usage
 	var response struct {
 		Usage struct {
@@ -1055,6 +1087,110 @@ func (s *OpenAIGatewayService) handleNonStreamingResponse(ctx context.Context, r
 	return usage, nil
 }

+func isEventStreamResponse(header http.Header) bool {
+	contentType := strings.ToLower(header.Get("Content-Type"))
+	return strings.Contains(contentType, "text/event-stream")
+}
+
+func (s *OpenAIGatewayService) handleOAuthSSEToJSON(resp *http.Response, c *gin.Context, body []byte, originalModel, mappedModel string) (*OpenAIUsage, error) {
+	bodyText := string(body)
+	finalResponse, ok := extractCodexFinalResponse(bodyText)
+
+	usage := &OpenAIUsage{}
+	if ok {
+		var response struct {
+			Usage struct {
+				InputTokens       int `json:"input_tokens"`
+				OutputTokens      int `json:"output_tokens"`
+				InputTokenDetails struct {
+					CachedTokens int `json:"cached_tokens"`
+				} `json:"input_tokens_details"`
+			} `json:"usage"`
+		}
+		if err := json.Unmarshal(finalResponse, &response); err == nil {
+			usage.InputTokens = response.Usage.InputTokens
+			usage.OutputTokens = response.Usage.OutputTokens
+			usage.CacheReadInputTokens = response.Usage.InputTokenDetails.CachedTokens
+		}
+		body = finalResponse
+		if originalModel != mappedModel {
+			body = s.replaceModelInResponseBody(body, mappedModel, originalModel)
+		}
+	} else {
+		usage = s.parseSSEUsageFromBody(bodyText)
+		if originalModel != mappedModel {
+			bodyText = s.replaceModelInSSEBody(bodyText, mappedModel, originalModel)
+		}
+		body = []byte(bodyText)
+	}
+
+	responseheaders.WriteFilteredHeaders(c.Writer.Header(), resp.Header, s.cfg.Security.ResponseHeaders)
+
+	contentType := "application/json; charset=utf-8"
+	if !ok {
+		contentType = resp.Header.Get("Content-Type")
+		if contentType == "" {
+			contentType = "text/event-stream"
+		}
+	}
+	c.Data(resp.StatusCode, contentType, body)
+
+	return usage, nil
+}
+
+func extractCodexFinalResponse(body string) ([]byte, bool) {
+	lines := strings.Split(body, "\n")
+	for _, line := range lines {
+		if !openaiSSEDataRe.MatchString(line) {
+			continue
+		}
+		data := openaiSSEDataRe.ReplaceAllString(line, "")
+		if data == "" || data == "[DONE]" {
+			continue
+		}
+		var event struct {
+			Type     string          `json:"type"`
+			Response json.RawMessage `json:"response"`
+		}
+		if json.Unmarshal([]byte(data), &event) != nil {
+			continue
+		}
+		if event.Type == "response.done" || event.Type == "response.completed" {
+			if len(event.Response) > 0 {
+				return event.Response, true
+			}
+		}
+	}
+	return nil, false
+}
+
+func (s *OpenAIGatewayService) parseSSEUsageFromBody(body string) *OpenAIUsage {
+	usage := &OpenAIUsage{}
+	lines := strings.Split(body, "\n")
+	for _, line := range lines {
+		if !openaiSSEDataRe.MatchString(line) {
+			continue
+		}
+		data := openaiSSEDataRe.ReplaceAllString(line, "")
+		if data == "" || data == "[DONE]" {
+			continue
+		}
+		s.parseSSEUsage(data, usage)
+	}
+	return usage
+}
+
+func (s *OpenAIGatewayService) replaceModelInSSEBody(body, fromModel, toModel string) string {
+	lines := strings.Split(body, "\n")
+	for i, line := range lines {
+		if !openaiSSEDataRe.MatchString(line) {
+			continue
+		}
+		lines[i] = s.replaceModelInSSELine(line, fromModel, toModel)
+	}
+	return strings.Join(lines, "\n")
+}
+
 func (s *OpenAIGatewayService) validateUpstreamBaseURL(raw string) (string, error) {
 	if s.cfg != nil && !s.cfg.Security.URLAllowlist.Enabled {
 		normalized, err := urlvalidator.ValidateURLFormat(raw, s.cfg.Security.URLAllowlist.AllowInsecureHTTP)
@@ -1094,101 +1230,6 @@ func (s *OpenAIGatewayService) replaceModelInResponseBody(body []byte, fromModel
 	return newBody
 }

-// normalizeInputForCodexAPI converts AI SDK multi-part content format to simplified format
-// that the ChatGPT internal Codex API expects.
-//
-// AI SDK sends content as an array of typed objects:
-//
-//	{"content": [{"type": "input_text", "text": "hello"}]}
-//
-// ChatGPT Codex API expects content as a simple string:
-//
-//	{"content": "hello"}
-//
-// This function modifies reqBody in-place and returns true if any modification was made.
-func normalizeInputForCodexAPI(reqBody map[string]any) bool {
-	input, ok := reqBody["input"]
-	if !ok {
-		return false
-	}
-
-	// Handle case where input is a simple string (already compatible)
-	if _, isString := input.(string); isString {
-		return false
-	}
-
-	// Handle case where input is an array of messages
-	inputArray, ok := input.([]any)
-	if !ok {
-		return false
-	}
-
-	modified := false
-	for _, item := range inputArray {
-		message, ok := item.(map[string]any)
-		if !ok {
-			continue
-		}
-
-		content, ok := message["content"]
-		if !ok {
-			continue
-		}
-
-		// If content is already a string, no conversion needed
-		if _, isString := content.(string); isString {
-			continue
-		}
-
-		// If content is an array (AI SDK format), convert to string
-		contentArray, ok := content.([]any)
-		if !ok {
-			continue
-		}
-
-		// Extract text from content array
-		var textParts []string
-		for _, part := range contentArray {
-			partMap, ok := part.(map[string]any)
-			if !ok {
-				continue
-			}
-
-			// Handle different content types
-			partType, _ := partMap["type"].(string)
-			switch partType {
-			case "input_text", "text":
-				// Extract text from input_text or text type
-				if text, ok := partMap["text"].(string); ok {
-					textParts = append(textParts, text)
-				}
-			case "input_image", "image":
-				// For images, we need to preserve the original format
-				// as ChatGPT Codex API may support images in a different way
-				// For now, skip image parts (they will be lost in conversion)
-				// TODO: Consider preserving image data or handling it separately
-				continue
-			case "input_file", "file":
-				// Similar to images, file inputs may need special handling
-				continue
-			default:
-				// For unknown types, try to extract text if available
-				if text, ok := partMap["text"].(string); ok {
-					textParts = append(textParts, text)
-				}
-			}
-		}
-
-		// Convert content array to string
-		if len(textParts) > 0 {
-			message["content"] = strings.Join(textParts, "\n")
-			modified = true
-		}
-	}
-
-	return modified
-}
-
 // OpenAIRecordUsageInput input for recording usage
 type OpenAIRecordUsageInput struct {
 	Result       *OpenAIForwardResult
--- a/backend/internal/service/openai_gateway_service_test.go
+++ b/backend/internal/service/openai_gateway_service_test.go
@@ -220,7 +220,7 @@ func TestOpenAIInvalidBaseURLWhenAllowlistDisabled(t *testing.T) {
 		Credentials: map[string]any{"base_url": "://invalid-url"},
 	}

-	_, err := svc.buildUpstreamRequest(c.Request.Context(), c, account, []byte("{}"), "token", false)
+	_, err := svc.buildUpstreamRequest(c.Request.Context(), c, account, []byte("{}"), "token", false, "")
 	if err == nil {
 		t.Fatalf("expected error for invalid base_url when allowlist disabled")
 	}
--- a/backend/internal/service/prompts/codex_opencode_bridge.txt
+++ b/backend/internal/service/prompts/codex_opencode_bridge.txt
@@ -0,0 +1,122 @@
+# Codex Running in OpenCode
+
+You are running Codex through OpenCode, an open-source terminal coding assistant. OpenCode provides different tools but follows Codex operating principles.
+
+## CRITICAL: Tool Replacements
+
+<critical_rule priority="0">
+❌ APPLY_PATCH DOES NOT EXIST → ✅ USE "edit" INSTEAD
+- NEVER use: apply_patch, applyPatch
+- ALWAYS use: edit tool for ALL file modifications
+- Before modifying files: Verify you're using "edit", NOT "apply_patch"
+</critical_rule>
+
+<critical_rule priority="0">
+❌ UPDATE_PLAN DOES NOT EXIST → ✅ USE "todowrite" INSTEAD
+- NEVER use: update_plan, updatePlan, read_plan, readPlan
+- ALWAYS use: todowrite for task/plan updates, todoread to read plans
+- Before plan operations: Verify you're using "todowrite", NOT "update_plan"
+</critical_rule>
+
+## Available OpenCode Tools
+
+**File Operations:**
+- `write`  - Create new files
+  - Overwriting existing files requires a prior Read in this session; default to ASCII unless the file already uses Unicode.
+- `edit`   - Modify existing files (REPLACES apply_patch)
+  - Requires a prior Read in this session; preserve exact indentation; ensure `oldString` uniquely matches or use `replaceAll`; edit fails if ambiguous or missing.
+- `read`   - Read file contents
+
+**Search/Discovery:**
+- `grep`   - Search file contents (tool, not bash grep); use `include` to filter patterns; set `path` only when not searching workspace root; for cross-file match counts use bash with `rg`.
+- `glob`   - Find files by pattern; defaults to workspace cwd unless `path` is set.
+- `list`   - List directories (requires absolute paths)
+
+**Execution:**
+- `bash`   - Run shell commands
+  - No workdir parameter; do not include it in tool calls.
+  - Always include a short description for the command.
+  - Do not use cd; use absolute paths in commands.
+  - Quote paths containing spaces with double quotes.
+  - Chain multiple commands with ';' or '&&'; avoid newlines.
+  - Use Grep/Glob tools for searches; only use bash with `rg` when you need counts or advanced features.
+  - Do not use `ls`/`cat` in bash; use `list`/`read` tools instead.
+  - For deletions (rm), verify by listing parent dir with `list`.
+
+**Network:**
+- `webfetch` - Fetch web content
+  - Use fully-formed URLs (http/https; http auto-upgrades to https).
+  - Always set `format` to one of: text | markdown | html; prefer markdown unless otherwise required.
+  - Read-only; short cache window.
+
+**Task Management:**
+- `todowrite` - Manage tasks/plans (REPLACES update_plan)
+- `todoread`  - Read current plan
+
+## Substitution Rules
+
+Base instruction says:    You MUST use instead:
+apply_patch           →   edit
+update_plan           →   todowrite
+read_plan             →   todoread
+
+**Path Usage:** Use per-tool conventions to avoid conflicts:
+- Tool calls: `read`, `edit`, `write`, `list` require absolute paths.
+- Searches: `grep`/`glob` default to the workspace cwd; prefer relative include patterns; set `path` only when a different root is needed.
+- Presentation: In assistant messages, show workspace-relative paths; use absolute paths only inside tool calls.
+- Tool schema overrides general path preferences—do not convert required absolute paths to relative.
+
+## Verification Checklist
+
+Before file/plan modifications:
+1. Am I using "edit" NOT "apply_patch"?
+2. Am I using "todowrite" NOT "update_plan"?
+3. Is this tool in the approved list above?
+4. Am I following each tool's path requirements?
+
+If ANY answer is NO → STOP and correct before proceeding.
+
+## OpenCode Working Style
+
+**Communication:**
+- Send brief preambles (8-12 words) before tool calls, building on prior context
+- Provide progress updates during longer tasks
+
+**Execution:**
+- Keep working autonomously until query is fully resolved before yielding
+- Don't return to user with partial solutions
+
+**Code Approach:**
+- New projects: Be ambitious and creative
+- Existing codebases: Surgical precision - modify only what's requested unless explicitly instructed to do otherwise
+
+**Testing:**
+- If tests exist: Start specific to your changes, then broader validation
+
+## Advanced Tools
+
+**Task Tool (Sub-Agents):**
+- Use the Task tool (functions.task) to launch sub-agents
+- Check the Task tool description for current agent types and their capabilities
+- Useful for complex analysis, specialized workflows, or tasks requiring isolated context
+- The agent list is dynamically generated - refer to tool schema for available agents
+
+**Parallelization:**
+- When multiple independent tool calls are needed, use multi_tool_use.parallel to run them concurrently.
+- Reserve sequential calls for ordered or data-dependent steps.
+
+**MCP Tools:**
+- Model Context Protocol servers provide additional capabilities
+- MCP tools are prefixed: `mcp__<server-name>__<tool-name>`
+- Check your available tools for MCP integrations
+- Use when the tool's functionality matches your task needs
+
+## What Remains from Codex
+ 
+Sandbox policies, approval mechanisms, final answer formatting, git commit protocols, and file reference formats all follow Codex instructions. In approval policy "never", never request escalations.
+
+## Approvals & Safety
+- Assume workspace-write filesystem, network enabled, approval on-failure unless explicitly stated otherwise.
+- When a command fails due to sandboxing or permissions, retry with escalated permissions if allowed by policy, including a one-line justification.
+- Treat destructive commands (e.g., `rm`, `git reset --hard`) as requiring explicit user request or approval.
+- When uncertain, prefer non-destructive verification first (e.g., confirm file existence with `list`, then delete with `bash`).
--- a/backend/internal/service/prompts/tool_remap_message.txt
+++ b/backend/internal/service/prompts/tool_remap_message.txt
@@ -0,0 +1,63 @@
+<user_instructions priority="0">
+<environment_override priority="0">
+YOU ARE IN A DIFFERENT ENVIRONMENT. These instructions override ALL previous tool references.
+</environment_override>
+
+<tool_replacements priority="0">
+<critical_rule priority="0">
+❌ APPLY_PATCH DOES NOT EXIST → ✅ USE "edit" INSTEAD
+- NEVER use: apply_patch, applyPatch
+- ALWAYS use: edit tool for ALL file modifications
+- Before modifying files: Verify you're using "edit", NOT "apply_patch"
+</critical_rule>
+
+<critical_rule priority="0">
+❌ UPDATE_PLAN DOES NOT EXIST → ✅ USE "todowrite" INSTEAD
+- NEVER use: update_plan, updatePlan
+- ALWAYS use: todowrite for ALL task/plan operations
+- Use todoread to read current plan
+- Before plan operations: Verify you're using "todowrite", NOT "update_plan"
+</critical_rule>
+</tool_replacements>
+
+<available_tools priority="0">
+File Operations:
+  • write  - Create new files
+  • edit   - Modify existing files (REPLACES apply_patch)
+  • patch  - Apply diff patches
+  • read   - Read file contents
+
+Search/Discovery:
+  • grep   - Search file contents
+  • glob   - Find files by pattern
+  • list   - List directories (use relative paths)
+
+Execution:
+  • bash   - Run shell commands
+
+Network:
+  • webfetch - Fetch web content
+
+Task Management:
+  • todowrite - Manage tasks/plans (REPLACES update_plan)
+  • todoread  - Read current plan
+</available_tools>
+
+<substitution_rules priority="0">
+Base instruction says:    You MUST use instead:
+apply_patch           →   edit
+update_plan           →   todowrite
+read_plan             →   todoread
+absolute paths        →   relative paths
+</substitution_rules>
+
+<verification_checklist priority="0">
+Before file/plan modifications:
+1. Am I using "edit" NOT "apply_patch"?
+2. Am I using "todowrite" NOT "update_plan"?
+3. Is this tool in the approved list above?
+4. Am I using relative paths?
+
+If ANY answer is NO → STOP and correct before proceeding.
+</verification_checklist>
+</user_instructions>