refactor: compact kiro log format (status-first, per-request tracing)
Some checks failed
Build Docker Image / build (push) Has been cancelled

Old format had 9+ lines per request with key=value noise, and concurrent
requests interleaved without any way to tell which line belongs to which
call. The new format:

- Each line starts with the status code/outcome (200, 400, 429, FAIL,
  TIMEOUT, ERR) so success/failure is visible at a glance.
- Every request gets a 6-char hex req_id; all lines for that request
  share it, disambiguating interleaved concurrent traffic.
- Endpoint abbreviated to 2 chars (CW/Q), model stripped of "claude-"
  prefix, attempt compacted to "a1"/"a2".
- Successful requests collapse to 2 lines (REQ start + 200 done with
  first_byte and total elapsed). Retries/errors add one line each.
- Durations use fmtMs: <1s -> "235ms", >=1s -> "2.8s" (one sig fig).

Sample successful request:
  [KiroAPI] REQ  a3f2b1  model=opus-4.7  account=x@y  endpoints=CW,Q
  [KiroAPI] 200  a3f2b1  CW/a1  first_byte=1.2s  total=2.8s

Sample fallback chain:
  [KiroAPI] REQ  b8e3c4  model=opus-4.6  account=x@y  endpoints=Q,CW
  [KiroAPI] 400  b8e3c4  Q /a1  INVALID_MODEL_ID  325ms  retry 1/3
  [KiroAPI] 400  b8e3c4  Q /a2  INVALID_MODEL_ID  242ms  retry 2/3
  [KiroAPI] 400  b8e3c4  Q /a4  INVALID_MODEL_ID  216ms  exhausted -> fallback
  [KiroAPI] 400  b8e3c4  CW/a1  INVALID_MODEL_ID  452ms  retry 1/3
  ...
  [KiroAPI] FAIL b8e3c4  all endpoints failed  2.1s  last=400
This commit is contained in:
2026-05-12 10:42:32 +08:00
parent 89f731cb19
commit 2b29616723

View File

@@ -5,6 +5,7 @@ package proxy
import (
"bytes"
"context"
cryptoRand "crypto/rand"
"encoding/json"
"fmt"
"io"
@@ -202,20 +203,21 @@ func CallKiroAPI(account *config.Account, payload *KiroPayload, callback *KiroSt
accountLabel = account.ID
}
endpointNames := make([]string, 0, len(endpoints))
reqID := shortReqID()
epNames := make([]string, 0, len(endpoints))
for _, ep := range endpoints {
endpointNames = append(endpointNames, ep.Name)
epNames = append(epNames, shortEndpoint(ep.Name))
}
log.Printf("[KiroAPI] request start account=%s model=%q endpoints=[%s] invalid_model_retries=%d first_byte_timeout=%ds first_byte_retries=%d", accountLabel, modelID, strings.Join(endpointNames, ","), invalidModelRetries, firstByteTimeoutSec, firstByteRetries)
log.Printf("[KiroAPI] REQ %s model=%s account=%s endpoints=%s", reqID, shortModel(modelID), accountLabel, strings.Join(epNames, ","))
requestStart := time.Now()
var lastErr error
var lastStatus string // 用于 FAIL 行总结
for _, ep := range endpoints {
// 更新 payload 中的 origin
payload.ConversationState.CurrentMessage.UserInputMessage.Origin = ep.Origin
epShort := shortEndpoint(ep.Name)
// 同端点重试上限取两种策略中更大的那个,分别记数
maxAttempts := invalidModelRetries + 1
if firstByteRetries+1 > maxAttempts {
maxAttempts = firstByteRetries + 1
@@ -232,6 +234,8 @@ func CallKiroAPI(account *config.Account, payload *KiroPayload, callback *KiroSt
if err != nil {
cancel()
lastErr = err
lastStatus = "ERR"
log.Printf("[KiroAPI] ERR %s %s/a%d new_request %v", reqID, epShort, attempt, err)
shouldFallback = true
break
}
@@ -252,13 +256,13 @@ func CallKiroAPI(account *config.Account, payload *KiroPayload, callback *KiroSt
req.Header.Set("Amz-Sdk-Invocation-Id", uuid.New().String())
attemptStart := time.Now()
log.Printf("[KiroAPI] try endpoint=%s attempt=%d/%d account=%s model=%q origin=%s", ep.Name, attempt, maxAttempts, accountLabel, modelID, ep.Origin)
resp, err := kiroHttpStore.Load().Do(req)
if err != nil {
cancel()
lastErr = err
log.Printf("[KiroAPI] endpoint=%s attempt=%d/%d account=%s model=%q transport_error elapsed=%s err=%v", ep.Name, attempt, maxAttempts, accountLabel, modelID, time.Since(attemptStart), err)
lastStatus = "ERR"
log.Printf("[KiroAPI] ERR %s %s/a%d transport %s %v", reqID, epShort, attempt, fmtMs(time.Since(attemptStart)), err)
shouldFallback = true
break
}
@@ -266,8 +270,9 @@ func CallKiroAPI(account *config.Account, payload *KiroPayload, callback *KiroSt
if resp.StatusCode == 429 {
resp.Body.Close()
cancel()
log.Printf("[KiroAPI] endpoint=%s attempt=%d/%d account=%s model=%q status=429 quota_exhausted elapsed=%s", ep.Name, attempt, maxAttempts, accountLabel, modelID, time.Since(attemptStart))
lastErr = fmt.Errorf("quota exhausted on %s", ep.Name)
lastStatus = "429"
log.Printf("[KiroAPI] 429 %s %s/a%d quota_exhausted %s", reqID, epShort, attempt, fmtMs(time.Since(attemptStart)))
shouldFallback = true
break
}
@@ -277,42 +282,40 @@ func CallKiroAPI(account *config.Account, payload *KiroPayload, callback *KiroSt
resp.Body.Close()
cancel()
lastErr = fmt.Errorf("HTTP %d from %s: %s", resp.StatusCode, ep.Name, string(errBody))
lastStatus = fmt.Sprintf("%d", resp.StatusCode)
bodyStr := string(errBody)
// 认证错误不继续尝试
if resp.StatusCode == 401 || resp.StatusCode == 403 {
log.Printf("[KiroAPI] endpoint=%s attempt=%d/%d account=%s model=%q status=%d auth_error elapsed=%s body=%s", ep.Name, attempt, maxAttempts, accountLabel, modelID, resp.StatusCode, time.Since(attemptStart), truncateForLog(bodyStr, 300))
log.Printf("[KiroAPI] %d %s %s/a%d auth_error %s %s", resp.StatusCode, reqID, epShort, attempt, fmtMs(time.Since(attemptStart)), truncateForLog(bodyStr, 200))
log.Printf("[KiroAPI] FAIL %s all endpoints failed %s last=%s", reqID, fmtMs(time.Since(requestStart)), lastStatus)
return lastErr
}
// INVALID_MODEL_ID: 同端点再试
if resp.StatusCode == 400 && strings.Contains(bodyStr, "INVALID_MODEL_ID") {
if invalidModelUsed < invalidModelRetries {
invalidModelUsed++
log.Printf("[KiroAPI] endpoint=%s attempt=%d/%d account=%s model=%q status=400 INVALID_MODEL_ID retrying (%d/%d) elapsed=%s", ep.Name, attempt, maxAttempts, accountLabel, modelID, invalidModelUsed, invalidModelRetries, time.Since(attemptStart))
log.Printf("[KiroAPI] 400 %s %s/a%d INVALID_MODEL_ID %s retry %d/%d", reqID, epShort, attempt, fmtMs(time.Since(attemptStart)), invalidModelUsed, invalidModelRetries)
continue
}
log.Printf("[KiroAPI] endpoint=%s attempt=%d/%d account=%s model=%q status=400 INVALID_MODEL_ID exhausted, fallback elapsed=%s", ep.Name, attempt, maxAttempts, accountLabel, modelID, time.Since(attemptStart))
log.Printf("[KiroAPI] 400 %s %s/a%d INVALID_MODEL_ID %s exhausted fallback", reqID, epShort, attempt, fmtMs(time.Since(attemptStart)))
shouldFallback = true
break
}
log.Printf("[KiroAPI] endpoint=%s attempt=%d/%d account=%s model=%q status=%d elapsed=%s body=%s", ep.Name, attempt, maxAttempts, accountLabel, modelID, resp.StatusCode, time.Since(attemptStart), truncateForLog(bodyStr, 300))
log.Printf("[KiroAPI] %d %s %s/a%d %s %s", resp.StatusCode, reqID, epShort, attempt, fmtMs(time.Since(attemptStart)), truncateForLog(bodyStr, 200))
shouldFallback = true
break
}
log.Printf("[KiroAPI] endpoint=%s attempt=%d/%d account=%s model=%q status=200 headers_elapsed=%s, streaming...", ep.Name, attempt, maxAttempts, accountLabel, modelID, time.Since(attemptStart))
// 首字节超时:启动 AfterFunc 在超时时 cancel context收到首包后 stop timer
// 首字节超时
var firstByteReceived atomic.Bool
var firstByteTimedOut atomic.Bool
var firstByteAt time.Duration
var timer *time.Timer
if firstByteTimeoutSec > 0 {
timer = time.AfterFunc(time.Duration(firstByteTimeoutSec)*time.Second, func() {
if !firstByteReceived.Load() {
firstByteTimedOut.Store(true)
log.Printf("[KiroAPI] endpoint=%s attempt=%d/%d account=%s model=%q first_byte_timeout=%ds, cancelling", ep.Name, attempt, maxAttempts, accountLabel, modelID, firstByteTimeoutSec)
cancel()
}
})
@@ -320,10 +323,10 @@ func CallKiroAPI(account *config.Account, payload *KiroPayload, callback *KiroSt
onFirstByte := func() {
firstByteReceived.Store(true)
firstByteAt = time.Since(attemptStart)
if timer != nil {
timer.Stop()
}
log.Printf("[KiroAPI] endpoint=%s attempt=%d/%d account=%s model=%q first_byte elapsed=%s", ep.Name, attempt, maxAttempts, accountLabel, modelID, time.Since(attemptStart))
}
err = parseEventStream(resp.Body, callback, onFirstByte)
@@ -333,21 +336,25 @@ func CallKiroAPI(account *config.Account, payload *KiroPayload, callback *KiroSt
}
cancel()
// 首字节超时触发且无数据到达 → 同端点重试
if err != nil && firstByteTimedOut.Load() && !firstByteReceived.Load() {
lastStatus = "TIMEOUT"
if firstByteUsed < firstByteRetries {
firstByteUsed++
lastErr = fmt.Errorf("first-byte timeout after %ds", firstByteTimeoutSec)
log.Printf("[KiroAPI] endpoint=%s attempt=%d/%d account=%s model=%q first_byte_timeout retrying (%d/%d)", ep.Name, attempt, maxAttempts, accountLabel, modelID, firstByteUsed, firstByteRetries)
log.Printf("[KiroAPI] TIMEOUT %s %s/a%d first_byte>%ds retry %d/%d", reqID, epShort, attempt, firstByteTimeoutSec, firstByteUsed, firstByteRetries)
continue
}
lastErr = fmt.Errorf("first-byte timeout after %ds on %s", firstByteTimeoutSec, ep.Name)
log.Printf("[KiroAPI] endpoint=%s attempt=%d/%d account=%s model=%q first_byte_timeout exhausted, fallback", ep.Name, attempt, maxAttempts, accountLabel, modelID)
log.Printf("[KiroAPI] TIMEOUT %s %s/a%d first_byte>%ds exhausted fallback", reqID, epShort, attempt, firstByteTimeoutSec)
shouldFallback = true
break
}
log.Printf("[KiroAPI] endpoint=%s account=%s model=%q done total_elapsed=%s err=%v", ep.Name, accountLabel, modelID, time.Since(requestStart), err)
status := "200"
if err != nil {
status = "ERR"
}
log.Printf("[KiroAPI] %s %s %s/a%d first_byte=%s total=%s", status, reqID, epShort, attempt, fmtMs(firstByteAt), fmtMs(time.Since(requestStart)))
return err
}
@@ -356,13 +363,59 @@ func CallKiroAPI(account *config.Account, payload *KiroPayload, callback *KiroSt
}
}
log.Printf("[KiroAPI] all endpoints failed account=%s model=%q total_elapsed=%s last_err=%v", accountLabel, modelID, time.Since(requestStart), lastErr)
log.Printf("[KiroAPI] FAIL %s all endpoints failed %s last=%s", reqID, fmtMs(time.Since(requestStart)), lastStatus)
if lastErr != nil {
return lastErr
}
return fmt.Errorf("all endpoints failed")
}
// shortReqID 生成 6 字符请求标识base36
func shortReqID() string {
var buf [3]byte
if _, err := cryptoRand.Read(buf[:]); err != nil {
return fmt.Sprintf("%06d", time.Now().UnixNano()%1000000)
}
return fmt.Sprintf("%02x%02x%02x", buf[0], buf[1], buf[2])
}
// shortEndpoint 把端点名缩短到 2 字符便于视觉对齐
func shortEndpoint(name string) string {
switch name {
case "CodeWhisperer":
return "CW"
case "AmazonQ":
return "Q "
default:
if len(name) >= 2 {
return name[:2]
}
return name
}
}
// shortModel 把长模型名截短claude-opus-4.7 → opus-4.7
func shortModel(m string) string {
if strings.HasPrefix(m, "claude-") {
return m[len("claude-"):]
}
if m == "" {
return "-"
}
return m
}
// fmtMs 把耗时格式化成紧凑字符串:<1s 用 ms>=1s 用 1 位小数 s
func fmtMs(d time.Duration) string {
if d <= 0 {
return "0ms"
}
if d < time.Second {
return fmt.Sprintf("%dms", d.Milliseconds())
}
return fmt.Sprintf("%.1fs", d.Seconds())
}
func truncateForLog(s string, max int) string {
s = strings.ReplaceAll(s, "\n", " ")
if len(s) <= max {