From 2b29616723888894153664e97cde4961d6764134 Mon Sep 17 00:00:00 2001 From: huangzhenpc Date: Tue, 12 May 2026 10:42:32 +0800 Subject: [PATCH] refactor: compact kiro log format (status-first, per-request tracing) Old format had 9+ lines per request with key=value noise, and concurrent requests interleaved without any way to tell which line belongs to which call. The new format: - Each line starts with the status code/outcome (200, 400, 429, FAIL, TIMEOUT, ERR) so success/failure is visible at a glance. - Every request gets a 6-char hex req_id; all lines for that request share it, disambiguating interleaved concurrent traffic. - Endpoint abbreviated to 2 chars (CW/Q), model stripped of "claude-" prefix, attempt compacted to "a1"/"a2". - Successful requests collapse to 2 lines (REQ start + 200 done with first_byte and total elapsed). Retries/errors add one line each. - Durations use fmtMs: <1s -> "235ms", >=1s -> "2.8s" (one sig fig). Sample successful request: [KiroAPI] REQ a3f2b1 model=opus-4.7 account=x@y endpoints=CW,Q [KiroAPI] 200 a3f2b1 CW/a1 first_byte=1.2s total=2.8s Sample fallback chain: [KiroAPI] REQ b8e3c4 model=opus-4.6 account=x@y endpoints=Q,CW [KiroAPI] 400 b8e3c4 Q /a1 INVALID_MODEL_ID 325ms retry 1/3 [KiroAPI] 400 b8e3c4 Q /a2 INVALID_MODEL_ID 242ms retry 2/3 [KiroAPI] 400 b8e3c4 Q /a4 INVALID_MODEL_ID 216ms exhausted -> fallback [KiroAPI] 400 b8e3c4 CW/a1 INVALID_MODEL_ID 452ms retry 1/3 ... [KiroAPI] FAIL b8e3c4 all endpoints failed 2.1s last=400 --- proxy/kiro.go | 101 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 77 insertions(+), 24 deletions(-) diff --git a/proxy/kiro.go b/proxy/kiro.go index b64c6bd..bb064bd 100644 --- a/proxy/kiro.go +++ b/proxy/kiro.go @@ -5,6 +5,7 @@ package proxy import ( "bytes" "context" + cryptoRand "crypto/rand" "encoding/json" "fmt" "io" @@ -202,20 +203,21 @@ func CallKiroAPI(account *config.Account, payload *KiroPayload, callback *KiroSt accountLabel = account.ID } - endpointNames := make([]string, 0, len(endpoints)) + reqID := shortReqID() + epNames := make([]string, 0, len(endpoints)) for _, ep := range endpoints { - endpointNames = append(endpointNames, ep.Name) + epNames = append(epNames, shortEndpoint(ep.Name)) } - log.Printf("[KiroAPI] request start account=%s model=%q endpoints=[%s] invalid_model_retries=%d first_byte_timeout=%ds first_byte_retries=%d", accountLabel, modelID, strings.Join(endpointNames, ","), invalidModelRetries, firstByteTimeoutSec, firstByteRetries) + log.Printf("[KiroAPI] REQ %s model=%s account=%s endpoints=%s", reqID, shortModel(modelID), accountLabel, strings.Join(epNames, ",")) requestStart := time.Now() var lastErr error + var lastStatus string // 用于 FAIL 行总结 for _, ep := range endpoints { - // 更新 payload 中的 origin payload.ConversationState.CurrentMessage.UserInputMessage.Origin = ep.Origin + epShort := shortEndpoint(ep.Name) - // 同端点重试上限取两种策略中更大的那个,分别记数 maxAttempts := invalidModelRetries + 1 if firstByteRetries+1 > maxAttempts { maxAttempts = firstByteRetries + 1 @@ -232,6 +234,8 @@ func CallKiroAPI(account *config.Account, payload *KiroPayload, callback *KiroSt if err != nil { cancel() lastErr = err + lastStatus = "ERR" + log.Printf("[KiroAPI] ERR %s %s/a%d new_request %v", reqID, epShort, attempt, err) shouldFallback = true break } @@ -252,13 +256,13 @@ func CallKiroAPI(account *config.Account, payload *KiroPayload, callback *KiroSt req.Header.Set("Amz-Sdk-Invocation-Id", uuid.New().String()) attemptStart := time.Now() - log.Printf("[KiroAPI] try endpoint=%s attempt=%d/%d account=%s model=%q origin=%s", ep.Name, attempt, maxAttempts, accountLabel, modelID, ep.Origin) resp, err := kiroHttpStore.Load().Do(req) if err != nil { cancel() lastErr = err - log.Printf("[KiroAPI] endpoint=%s attempt=%d/%d account=%s model=%q transport_error elapsed=%s err=%v", ep.Name, attempt, maxAttempts, accountLabel, modelID, time.Since(attemptStart), err) + lastStatus = "ERR" + log.Printf("[KiroAPI] ERR %s %s/a%d transport %s %v", reqID, epShort, attempt, fmtMs(time.Since(attemptStart)), err) shouldFallback = true break } @@ -266,8 +270,9 @@ func CallKiroAPI(account *config.Account, payload *KiroPayload, callback *KiroSt if resp.StatusCode == 429 { resp.Body.Close() cancel() - log.Printf("[KiroAPI] endpoint=%s attempt=%d/%d account=%s model=%q status=429 quota_exhausted elapsed=%s", ep.Name, attempt, maxAttempts, accountLabel, modelID, time.Since(attemptStart)) lastErr = fmt.Errorf("quota exhausted on %s", ep.Name) + lastStatus = "429" + log.Printf("[KiroAPI] 429 %s %s/a%d quota_exhausted %s", reqID, epShort, attempt, fmtMs(time.Since(attemptStart))) shouldFallback = true break } @@ -277,42 +282,40 @@ func CallKiroAPI(account *config.Account, payload *KiroPayload, callback *KiroSt resp.Body.Close() cancel() lastErr = fmt.Errorf("HTTP %d from %s: %s", resp.StatusCode, ep.Name, string(errBody)) + lastStatus = fmt.Sprintf("%d", resp.StatusCode) bodyStr := string(errBody) - // 认证错误不继续尝试 if resp.StatusCode == 401 || resp.StatusCode == 403 { - log.Printf("[KiroAPI] endpoint=%s attempt=%d/%d account=%s model=%q status=%d auth_error elapsed=%s body=%s", ep.Name, attempt, maxAttempts, accountLabel, modelID, resp.StatusCode, time.Since(attemptStart), truncateForLog(bodyStr, 300)) + log.Printf("[KiroAPI] %d %s %s/a%d auth_error %s %s", resp.StatusCode, reqID, epShort, attempt, fmtMs(time.Since(attemptStart)), truncateForLog(bodyStr, 200)) + log.Printf("[KiroAPI] FAIL %s all endpoints failed %s last=%s", reqID, fmtMs(time.Since(requestStart)), lastStatus) return lastErr } - // INVALID_MODEL_ID: 同端点再试 if resp.StatusCode == 400 && strings.Contains(bodyStr, "INVALID_MODEL_ID") { if invalidModelUsed < invalidModelRetries { invalidModelUsed++ - log.Printf("[KiroAPI] endpoint=%s attempt=%d/%d account=%s model=%q status=400 INVALID_MODEL_ID retrying (%d/%d) elapsed=%s", ep.Name, attempt, maxAttempts, accountLabel, modelID, invalidModelUsed, invalidModelRetries, time.Since(attemptStart)) + log.Printf("[KiroAPI] 400 %s %s/a%d INVALID_MODEL_ID %s retry %d/%d", reqID, epShort, attempt, fmtMs(time.Since(attemptStart)), invalidModelUsed, invalidModelRetries) continue } - log.Printf("[KiroAPI] endpoint=%s attempt=%d/%d account=%s model=%q status=400 INVALID_MODEL_ID exhausted, fallback elapsed=%s", ep.Name, attempt, maxAttempts, accountLabel, modelID, time.Since(attemptStart)) + log.Printf("[KiroAPI] 400 %s %s/a%d INVALID_MODEL_ID %s exhausted → fallback", reqID, epShort, attempt, fmtMs(time.Since(attemptStart))) shouldFallback = true break } - log.Printf("[KiroAPI] endpoint=%s attempt=%d/%d account=%s model=%q status=%d elapsed=%s body=%s", ep.Name, attempt, maxAttempts, accountLabel, modelID, resp.StatusCode, time.Since(attemptStart), truncateForLog(bodyStr, 300)) + log.Printf("[KiroAPI] %d %s %s/a%d %s %s", resp.StatusCode, reqID, epShort, attempt, fmtMs(time.Since(attemptStart)), truncateForLog(bodyStr, 200)) shouldFallback = true break } - log.Printf("[KiroAPI] endpoint=%s attempt=%d/%d account=%s model=%q status=200 headers_elapsed=%s, streaming...", ep.Name, attempt, maxAttempts, accountLabel, modelID, time.Since(attemptStart)) - - // 首字节超时:启动 AfterFunc 在超时时 cancel context;收到首包后 stop timer + // 首字节超时 var firstByteReceived atomic.Bool var firstByteTimedOut atomic.Bool + var firstByteAt time.Duration var timer *time.Timer if firstByteTimeoutSec > 0 { timer = time.AfterFunc(time.Duration(firstByteTimeoutSec)*time.Second, func() { if !firstByteReceived.Load() { firstByteTimedOut.Store(true) - log.Printf("[KiroAPI] endpoint=%s attempt=%d/%d account=%s model=%q first_byte_timeout=%ds, cancelling", ep.Name, attempt, maxAttempts, accountLabel, modelID, firstByteTimeoutSec) cancel() } }) @@ -320,10 +323,10 @@ func CallKiroAPI(account *config.Account, payload *KiroPayload, callback *KiroSt onFirstByte := func() { firstByteReceived.Store(true) + firstByteAt = time.Since(attemptStart) if timer != nil { timer.Stop() } - log.Printf("[KiroAPI] endpoint=%s attempt=%d/%d account=%s model=%q first_byte elapsed=%s", ep.Name, attempt, maxAttempts, accountLabel, modelID, time.Since(attemptStart)) } err = parseEventStream(resp.Body, callback, onFirstByte) @@ -333,21 +336,25 @@ func CallKiroAPI(account *config.Account, payload *KiroPayload, callback *KiroSt } cancel() - // 首字节超时触发且无数据到达 → 同端点重试 if err != nil && firstByteTimedOut.Load() && !firstByteReceived.Load() { + lastStatus = "TIMEOUT" if firstByteUsed < firstByteRetries { firstByteUsed++ lastErr = fmt.Errorf("first-byte timeout after %ds", firstByteTimeoutSec) - log.Printf("[KiroAPI] endpoint=%s attempt=%d/%d account=%s model=%q first_byte_timeout retrying (%d/%d)", ep.Name, attempt, maxAttempts, accountLabel, modelID, firstByteUsed, firstByteRetries) + log.Printf("[KiroAPI] TIMEOUT %s %s/a%d first_byte>%ds retry %d/%d", reqID, epShort, attempt, firstByteTimeoutSec, firstByteUsed, firstByteRetries) continue } lastErr = fmt.Errorf("first-byte timeout after %ds on %s", firstByteTimeoutSec, ep.Name) - log.Printf("[KiroAPI] endpoint=%s attempt=%d/%d account=%s model=%q first_byte_timeout exhausted, fallback", ep.Name, attempt, maxAttempts, accountLabel, modelID) + log.Printf("[KiroAPI] TIMEOUT %s %s/a%d first_byte>%ds exhausted → fallback", reqID, epShort, attempt, firstByteTimeoutSec) shouldFallback = true break } - log.Printf("[KiroAPI] endpoint=%s account=%s model=%q done total_elapsed=%s err=%v", ep.Name, accountLabel, modelID, time.Since(requestStart), err) + status := "200" + if err != nil { + status = "ERR" + } + log.Printf("[KiroAPI] %s %s %s/a%d first_byte=%s total=%s", status, reqID, epShort, attempt, fmtMs(firstByteAt), fmtMs(time.Since(requestStart))) return err } @@ -356,13 +363,59 @@ func CallKiroAPI(account *config.Account, payload *KiroPayload, callback *KiroSt } } - log.Printf("[KiroAPI] all endpoints failed account=%s model=%q total_elapsed=%s last_err=%v", accountLabel, modelID, time.Since(requestStart), lastErr) + log.Printf("[KiroAPI] FAIL %s all endpoints failed %s last=%s", reqID, fmtMs(time.Since(requestStart)), lastStatus) if lastErr != nil { return lastErr } return fmt.Errorf("all endpoints failed") } +// shortReqID 生成 6 字符请求标识(base36) +func shortReqID() string { + var buf [3]byte + if _, err := cryptoRand.Read(buf[:]); err != nil { + return fmt.Sprintf("%06d", time.Now().UnixNano()%1000000) + } + return fmt.Sprintf("%02x%02x%02x", buf[0], buf[1], buf[2]) +} + +// shortEndpoint 把端点名缩短到 2 字符便于视觉对齐 +func shortEndpoint(name string) string { + switch name { + case "CodeWhisperer": + return "CW" + case "AmazonQ": + return "Q " + default: + if len(name) >= 2 { + return name[:2] + } + return name + } +} + +// shortModel 把长模型名截短:claude-opus-4.7 → opus-4.7 +func shortModel(m string) string { + if strings.HasPrefix(m, "claude-") { + return m[len("claude-"):] + } + if m == "" { + return "-" + } + return m +} + +// fmtMs 把耗时格式化成紧凑字符串:<1s 用 ms,>=1s 用 1 位小数 s +func fmtMs(d time.Duration) string { + if d <= 0 { + return "0ms" + } + if d < time.Second { + return fmt.Sprintf("%dms", d.Milliseconds()) + } + return fmt.Sprintf("%.1fs", d.Seconds()) +} + func truncateForLog(s string, max int) string { s = strings.ReplaceAll(s, "\n", " ") if len(s) <= max {