From ce006a7a9130ce2042625e4cf67ee318a19fa9f6 Mon Sep 17 00:00:00 2001 From: alfadb Date: Sat, 28 Feb 2026 10:37:06 +0800 Subject: [PATCH 1/2] fix(ops): validate error_type against known whitelist before classification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Upstream proxies (account 4, 112) return `""` as the error.type in their JSON responses — a Go fmt.Sprintf("%v", nil) artifact. Since `normalizeOpsErrorType` only checked for empty string, the literal "" passed through and poisoned the entire classification chain: error_phase was misclassified as "internal" (instead of "request"), severity was inflated to P2, and the stored error_type was meaningless. Add `isKnownOpsErrorType` whitelist so any unrecognised type falls through to the code-based or default "api_error" classification. Co-Authored-By: Claude Opus 4.6 --- backend/internal/handler/ops_error_logger.go | 23 +++++++- .../internal/handler/ops_error_logger_test.go | 57 +++++++++++++++++++ 2 files changed, 79 insertions(+), 1 deletion(-) diff --git a/backend/internal/handler/ops_error_logger.go b/backend/internal/handler/ops_error_logger.go index 6fbf7952..42fe33fd 100644 --- a/backend/internal/handler/ops_error_logger.go +++ b/backend/internal/handler/ops_error_logger.go @@ -939,8 +939,29 @@ func guessPlatformFromPath(path string) string { } } +// isKnownOpsErrorType returns true if t is a recognized error type used by the +// ops classification pipeline. Upstream proxies sometimes return garbage values +// (e.g. the Go-serialized literal "") which would pollute phase/severity +// classification if accepted blindly. +func isKnownOpsErrorType(t string) bool { + switch t { + case "invalid_request_error", + "authentication_error", + "rate_limit_error", + "billing_error", + "subscription_error", + "upstream_error", + "overloaded_error", + "api_error", + "not_found_error", + "forbidden_error": + return true + } + return false +} + func normalizeOpsErrorType(errType string, code string) string { - if errType != "" { + if errType != "" && isKnownOpsErrorType(errType) { return errType } switch strings.TrimSpace(code) { diff --git a/backend/internal/handler/ops_error_logger_test.go b/backend/internal/handler/ops_error_logger_test.go index 731b36ab..74d847e3 100644 --- a/backend/internal/handler/ops_error_logger_test.go +++ b/backend/internal/handler/ops_error_logger_test.go @@ -214,3 +214,60 @@ func TestOpsErrorLoggerMiddleware_DoesNotBreakOuterMiddlewares(t *testing.T) { }) require.Equal(t, http.StatusNoContent, rec.Code) } + +func TestIsKnownOpsErrorType(t *testing.T) { + known := []string{ + "invalid_request_error", + "authentication_error", + "rate_limit_error", + "billing_error", + "subscription_error", + "upstream_error", + "overloaded_error", + "api_error", + "not_found_error", + "forbidden_error", + } + for _, k := range known { + require.True(t, isKnownOpsErrorType(k), "expected known: %s", k) + } + + unknown := []string{"", "null", "", "random_error", "some_new_type", "\u003e"} + for _, u := range unknown { + require.False(t, isKnownOpsErrorType(u), "expected unknown: %q", u) + } +} + +func TestNormalizeOpsErrorType(t *testing.T) { + tests := []struct { + name string + errType string + code string + want string + }{ + // Known types pass through. + {"known invalid_request_error", "invalid_request_error", "", "invalid_request_error"}, + {"known rate_limit_error", "rate_limit_error", "", "rate_limit_error"}, + {"known upstream_error", "upstream_error", "", "upstream_error"}, + + // Unknown/garbage types are rejected and fall through to code-based or default. + {"nil literal from upstream", "", "", "api_error"}, + {"null string", "null", "", "api_error"}, + {"random string", "something_weird", "", "api_error"}, + + // Unknown type but known code still maps correctly. + {"nil with INSUFFICIENT_BALANCE code", "", "INSUFFICIENT_BALANCE", "billing_error"}, + {"nil with USAGE_LIMIT_EXCEEDED code", "", "USAGE_LIMIT_EXCEEDED", "subscription_error"}, + + // Empty type falls through to code-based mapping. + {"empty type with balance code", "", "INSUFFICIENT_BALANCE", "billing_error"}, + {"empty type with subscription code", "", "SUBSCRIPTION_NOT_FOUND", "subscription_error"}, + {"empty type no code", "", "", "api_error"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := normalizeOpsErrorType(tt.errType, tt.code) + require.Equal(t, tt.want, got) + }) + } +} From 093d7ba8583d14b081c2f5a96f19855b24c6e911 Mon Sep 17 00:00:00 2001 From: alfadb Date: Sat, 28 Feb 2026 14:55:07 +0800 Subject: [PATCH 2/2] fix(ops): use normalized error type for all classification functions - Compute normalizedType once and pass to classifyOpsPhase, classifyOpsSeverity, classifyOpsIsBusinessLimited, classifyOpsIsRetryable instead of raw parsed.ErrorType - Add test case verifying known type takes precedence over conflicting code Addresses Copilot review feedback on PR #680. --- backend/internal/handler/ops_error_logger.go | 12 +++++++----- backend/internal/handler/ops_error_logger_test.go | 3 +++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/backend/internal/handler/ops_error_logger.go b/backend/internal/handler/ops_error_logger.go index 42fe33fd..2f53d655 100644 --- a/backend/internal/handler/ops_error_logger.go +++ b/backend/internal/handler/ops_error_logger.go @@ -662,8 +662,10 @@ func OpsErrorLoggerMiddleware(ops *service.OpsService) gin.HandlerFunc { requestID = c.Writer.Header().Get("x-request-id") } - phase := classifyOpsPhase(parsed.ErrorType, parsed.Message, parsed.Code) - isBusinessLimited := classifyOpsIsBusinessLimited(parsed.ErrorType, phase, parsed.Code, status, parsed.Message) + normalizedType := normalizeOpsErrorType(parsed.ErrorType, parsed.Code) + + phase := classifyOpsPhase(normalizedType, parsed.Message, parsed.Code) + isBusinessLimited := classifyOpsIsBusinessLimited(normalizedType, phase, parsed.Code, status, parsed.Message) errorOwner := classifyOpsErrorOwner(phase, parsed.Message) errorSource := classifyOpsErrorSource(phase, parsed.Message) @@ -685,8 +687,8 @@ func OpsErrorLoggerMiddleware(ops *service.OpsService) gin.HandlerFunc { UserAgent: c.GetHeader("User-Agent"), ErrorPhase: phase, - ErrorType: normalizeOpsErrorType(parsed.ErrorType, parsed.Code), - Severity: classifyOpsSeverity(parsed.ErrorType, status), + ErrorType: normalizedType, + Severity: classifyOpsSeverity(normalizedType, status), StatusCode: status, IsBusinessLimited: isBusinessLimited, IsCountTokens: isCountTokensRequest(c), @@ -698,7 +700,7 @@ func OpsErrorLoggerMiddleware(ops *service.OpsService) gin.HandlerFunc { ErrorSource: errorSource, ErrorOwner: errorOwner, - IsRetryable: classifyOpsIsRetryable(parsed.ErrorType, status), + IsRetryable: classifyOpsIsRetryable(normalizedType, status), RetryCount: 0, CreatedAt: time.Now(), } diff --git a/backend/internal/handler/ops_error_logger_test.go b/backend/internal/handler/ops_error_logger_test.go index 74d847e3..679dd4ce 100644 --- a/backend/internal/handler/ops_error_logger_test.go +++ b/backend/internal/handler/ops_error_logger_test.go @@ -263,6 +263,9 @@ func TestNormalizeOpsErrorType(t *testing.T) { {"empty type with balance code", "", "INSUFFICIENT_BALANCE", "billing_error"}, {"empty type with subscription code", "", "SUBSCRIPTION_NOT_FOUND", "subscription_error"}, {"empty type no code", "", "", "api_error"}, + + // Known type overrides conflicting code-based mapping. + {"known type overrides conflicting code", "rate_limit_error", "INSUFFICIENT_BALANCE", "rate_limit_error"}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) {