refactor: compact kiro log format (status-first, per-request tracing)
Some checks failed
Build Docker Image / build (push) Has been cancelled
Some checks failed
Build Docker Image / build (push) Has been cancelled
Old format had 9+ lines per request with key=value noise, and concurrent requests interleaved without any way to tell which line belongs to which call. The new format: - Each line starts with the status code/outcome (200, 400, 429, FAIL, TIMEOUT, ERR) so success/failure is visible at a glance. - Every request gets a 6-char hex req_id; all lines for that request share it, disambiguating interleaved concurrent traffic. - Endpoint abbreviated to 2 chars (CW/Q), model stripped of "claude-" prefix, attempt compacted to "a1"/"a2". - Successful requests collapse to 2 lines (REQ start + 200 done with first_byte and total elapsed). Retries/errors add one line each. - Durations use fmtMs: <1s -> "235ms", >=1s -> "2.8s" (one sig fig). Sample successful request: [KiroAPI] REQ a3f2b1 model=opus-4.7 account=x@y endpoints=CW,Q [KiroAPI] 200 a3f2b1 CW/a1 first_byte=1.2s total=2.8s Sample fallback chain: [KiroAPI] REQ b8e3c4 model=opus-4.6 account=x@y endpoints=Q,CW [KiroAPI] 400 b8e3c4 Q /a1 INVALID_MODEL_ID 325ms retry 1/3 [KiroAPI] 400 b8e3c4 Q /a2 INVALID_MODEL_ID 242ms retry 2/3 [KiroAPI] 400 b8e3c4 Q /a4 INVALID_MODEL_ID 216ms exhausted -> fallback [KiroAPI] 400 b8e3c4 CW/a1 INVALID_MODEL_ID 452ms retry 1/3 ... [KiroAPI] FAIL b8e3c4 all endpoints failed 2.1s last=400
This commit is contained in:
101
proxy/kiro.go
101
proxy/kiro.go
@@ -5,6 +5,7 @@ package proxy
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
cryptoRand "crypto/rand"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
@@ -202,20 +203,21 @@ func CallKiroAPI(account *config.Account, payload *KiroPayload, callback *KiroSt
|
||||
accountLabel = account.ID
|
||||
}
|
||||
|
||||
endpointNames := make([]string, 0, len(endpoints))
|
||||
reqID := shortReqID()
|
||||
epNames := make([]string, 0, len(endpoints))
|
||||
for _, ep := range endpoints {
|
||||
endpointNames = append(endpointNames, ep.Name)
|
||||
epNames = append(epNames, shortEndpoint(ep.Name))
|
||||
}
|
||||
log.Printf("[KiroAPI] request start account=%s model=%q endpoints=[%s] invalid_model_retries=%d first_byte_timeout=%ds first_byte_retries=%d", accountLabel, modelID, strings.Join(endpointNames, ","), invalidModelRetries, firstByteTimeoutSec, firstByteRetries)
|
||||
log.Printf("[KiroAPI] REQ %s model=%s account=%s endpoints=%s", reqID, shortModel(modelID), accountLabel, strings.Join(epNames, ","))
|
||||
|
||||
requestStart := time.Now()
|
||||
|
||||
var lastErr error
|
||||
var lastStatus string // 用于 FAIL 行总结
|
||||
for _, ep := range endpoints {
|
||||
// 更新 payload 中的 origin
|
||||
payload.ConversationState.CurrentMessage.UserInputMessage.Origin = ep.Origin
|
||||
epShort := shortEndpoint(ep.Name)
|
||||
|
||||
// 同端点重试上限取两种策略中更大的那个,分别记数
|
||||
maxAttempts := invalidModelRetries + 1
|
||||
if firstByteRetries+1 > maxAttempts {
|
||||
maxAttempts = firstByteRetries + 1
|
||||
@@ -232,6 +234,8 @@ func CallKiroAPI(account *config.Account, payload *KiroPayload, callback *KiroSt
|
||||
if err != nil {
|
||||
cancel()
|
||||
lastErr = err
|
||||
lastStatus = "ERR"
|
||||
log.Printf("[KiroAPI] ERR %s %s/a%d new_request %v", reqID, epShort, attempt, err)
|
||||
shouldFallback = true
|
||||
break
|
||||
}
|
||||
@@ -252,13 +256,13 @@ func CallKiroAPI(account *config.Account, payload *KiroPayload, callback *KiroSt
|
||||
req.Header.Set("Amz-Sdk-Invocation-Id", uuid.New().String())
|
||||
|
||||
attemptStart := time.Now()
|
||||
log.Printf("[KiroAPI] try endpoint=%s attempt=%d/%d account=%s model=%q origin=%s", ep.Name, attempt, maxAttempts, accountLabel, modelID, ep.Origin)
|
||||
|
||||
resp, err := kiroHttpStore.Load().Do(req)
|
||||
if err != nil {
|
||||
cancel()
|
||||
lastErr = err
|
||||
log.Printf("[KiroAPI] endpoint=%s attempt=%d/%d account=%s model=%q transport_error elapsed=%s err=%v", ep.Name, attempt, maxAttempts, accountLabel, modelID, time.Since(attemptStart), err)
|
||||
lastStatus = "ERR"
|
||||
log.Printf("[KiroAPI] ERR %s %s/a%d transport %s %v", reqID, epShort, attempt, fmtMs(time.Since(attemptStart)), err)
|
||||
shouldFallback = true
|
||||
break
|
||||
}
|
||||
@@ -266,8 +270,9 @@ func CallKiroAPI(account *config.Account, payload *KiroPayload, callback *KiroSt
|
||||
if resp.StatusCode == 429 {
|
||||
resp.Body.Close()
|
||||
cancel()
|
||||
log.Printf("[KiroAPI] endpoint=%s attempt=%d/%d account=%s model=%q status=429 quota_exhausted elapsed=%s", ep.Name, attempt, maxAttempts, accountLabel, modelID, time.Since(attemptStart))
|
||||
lastErr = fmt.Errorf("quota exhausted on %s", ep.Name)
|
||||
lastStatus = "429"
|
||||
log.Printf("[KiroAPI] 429 %s %s/a%d quota_exhausted %s", reqID, epShort, attempt, fmtMs(time.Since(attemptStart)))
|
||||
shouldFallback = true
|
||||
break
|
||||
}
|
||||
@@ -277,42 +282,40 @@ func CallKiroAPI(account *config.Account, payload *KiroPayload, callback *KiroSt
|
||||
resp.Body.Close()
|
||||
cancel()
|
||||
lastErr = fmt.Errorf("HTTP %d from %s: %s", resp.StatusCode, ep.Name, string(errBody))
|
||||
lastStatus = fmt.Sprintf("%d", resp.StatusCode)
|
||||
bodyStr := string(errBody)
|
||||
|
||||
// 认证错误不继续尝试
|
||||
if resp.StatusCode == 401 || resp.StatusCode == 403 {
|
||||
log.Printf("[KiroAPI] endpoint=%s attempt=%d/%d account=%s model=%q status=%d auth_error elapsed=%s body=%s", ep.Name, attempt, maxAttempts, accountLabel, modelID, resp.StatusCode, time.Since(attemptStart), truncateForLog(bodyStr, 300))
|
||||
log.Printf("[KiroAPI] %d %s %s/a%d auth_error %s %s", resp.StatusCode, reqID, epShort, attempt, fmtMs(time.Since(attemptStart)), truncateForLog(bodyStr, 200))
|
||||
log.Printf("[KiroAPI] FAIL %s all endpoints failed %s last=%s", reqID, fmtMs(time.Since(requestStart)), lastStatus)
|
||||
return lastErr
|
||||
}
|
||||
|
||||
// INVALID_MODEL_ID: 同端点再试
|
||||
if resp.StatusCode == 400 && strings.Contains(bodyStr, "INVALID_MODEL_ID") {
|
||||
if invalidModelUsed < invalidModelRetries {
|
||||
invalidModelUsed++
|
||||
log.Printf("[KiroAPI] endpoint=%s attempt=%d/%d account=%s model=%q status=400 INVALID_MODEL_ID retrying (%d/%d) elapsed=%s", ep.Name, attempt, maxAttempts, accountLabel, modelID, invalidModelUsed, invalidModelRetries, time.Since(attemptStart))
|
||||
log.Printf("[KiroAPI] 400 %s %s/a%d INVALID_MODEL_ID %s retry %d/%d", reqID, epShort, attempt, fmtMs(time.Since(attemptStart)), invalidModelUsed, invalidModelRetries)
|
||||
continue
|
||||
}
|
||||
log.Printf("[KiroAPI] endpoint=%s attempt=%d/%d account=%s model=%q status=400 INVALID_MODEL_ID exhausted, fallback elapsed=%s", ep.Name, attempt, maxAttempts, accountLabel, modelID, time.Since(attemptStart))
|
||||
log.Printf("[KiroAPI] 400 %s %s/a%d INVALID_MODEL_ID %s exhausted → fallback", reqID, epShort, attempt, fmtMs(time.Since(attemptStart)))
|
||||
shouldFallback = true
|
||||
break
|
||||
}
|
||||
|
||||
log.Printf("[KiroAPI] endpoint=%s attempt=%d/%d account=%s model=%q status=%d elapsed=%s body=%s", ep.Name, attempt, maxAttempts, accountLabel, modelID, resp.StatusCode, time.Since(attemptStart), truncateForLog(bodyStr, 300))
|
||||
log.Printf("[KiroAPI] %d %s %s/a%d %s %s", resp.StatusCode, reqID, epShort, attempt, fmtMs(time.Since(attemptStart)), truncateForLog(bodyStr, 200))
|
||||
shouldFallback = true
|
||||
break
|
||||
}
|
||||
|
||||
log.Printf("[KiroAPI] endpoint=%s attempt=%d/%d account=%s model=%q status=200 headers_elapsed=%s, streaming...", ep.Name, attempt, maxAttempts, accountLabel, modelID, time.Since(attemptStart))
|
||||
|
||||
// 首字节超时:启动 AfterFunc 在超时时 cancel context;收到首包后 stop timer
|
||||
// 首字节超时
|
||||
var firstByteReceived atomic.Bool
|
||||
var firstByteTimedOut atomic.Bool
|
||||
var firstByteAt time.Duration
|
||||
var timer *time.Timer
|
||||
if firstByteTimeoutSec > 0 {
|
||||
timer = time.AfterFunc(time.Duration(firstByteTimeoutSec)*time.Second, func() {
|
||||
if !firstByteReceived.Load() {
|
||||
firstByteTimedOut.Store(true)
|
||||
log.Printf("[KiroAPI] endpoint=%s attempt=%d/%d account=%s model=%q first_byte_timeout=%ds, cancelling", ep.Name, attempt, maxAttempts, accountLabel, modelID, firstByteTimeoutSec)
|
||||
cancel()
|
||||
}
|
||||
})
|
||||
@@ -320,10 +323,10 @@ func CallKiroAPI(account *config.Account, payload *KiroPayload, callback *KiroSt
|
||||
|
||||
onFirstByte := func() {
|
||||
firstByteReceived.Store(true)
|
||||
firstByteAt = time.Since(attemptStart)
|
||||
if timer != nil {
|
||||
timer.Stop()
|
||||
}
|
||||
log.Printf("[KiroAPI] endpoint=%s attempt=%d/%d account=%s model=%q first_byte elapsed=%s", ep.Name, attempt, maxAttempts, accountLabel, modelID, time.Since(attemptStart))
|
||||
}
|
||||
|
||||
err = parseEventStream(resp.Body, callback, onFirstByte)
|
||||
@@ -333,21 +336,25 @@ func CallKiroAPI(account *config.Account, payload *KiroPayload, callback *KiroSt
|
||||
}
|
||||
cancel()
|
||||
|
||||
// 首字节超时触发且无数据到达 → 同端点重试
|
||||
if err != nil && firstByteTimedOut.Load() && !firstByteReceived.Load() {
|
||||
lastStatus = "TIMEOUT"
|
||||
if firstByteUsed < firstByteRetries {
|
||||
firstByteUsed++
|
||||
lastErr = fmt.Errorf("first-byte timeout after %ds", firstByteTimeoutSec)
|
||||
log.Printf("[KiroAPI] endpoint=%s attempt=%d/%d account=%s model=%q first_byte_timeout retrying (%d/%d)", ep.Name, attempt, maxAttempts, accountLabel, modelID, firstByteUsed, firstByteRetries)
|
||||
log.Printf("[KiroAPI] TIMEOUT %s %s/a%d first_byte>%ds retry %d/%d", reqID, epShort, attempt, firstByteTimeoutSec, firstByteUsed, firstByteRetries)
|
||||
continue
|
||||
}
|
||||
lastErr = fmt.Errorf("first-byte timeout after %ds on %s", firstByteTimeoutSec, ep.Name)
|
||||
log.Printf("[KiroAPI] endpoint=%s attempt=%d/%d account=%s model=%q first_byte_timeout exhausted, fallback", ep.Name, attempt, maxAttempts, accountLabel, modelID)
|
||||
log.Printf("[KiroAPI] TIMEOUT %s %s/a%d first_byte>%ds exhausted → fallback", reqID, epShort, attempt, firstByteTimeoutSec)
|
||||
shouldFallback = true
|
||||
break
|
||||
}
|
||||
|
||||
log.Printf("[KiroAPI] endpoint=%s account=%s model=%q done total_elapsed=%s err=%v", ep.Name, accountLabel, modelID, time.Since(requestStart), err)
|
||||
status := "200"
|
||||
if err != nil {
|
||||
status = "ERR"
|
||||
}
|
||||
log.Printf("[KiroAPI] %s %s %s/a%d first_byte=%s total=%s", status, reqID, epShort, attempt, fmtMs(firstByteAt), fmtMs(time.Since(requestStart)))
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -356,13 +363,59 @@ func CallKiroAPI(account *config.Account, payload *KiroPayload, callback *KiroSt
|
||||
}
|
||||
}
|
||||
|
||||
log.Printf("[KiroAPI] all endpoints failed account=%s model=%q total_elapsed=%s last_err=%v", accountLabel, modelID, time.Since(requestStart), lastErr)
|
||||
log.Printf("[KiroAPI] FAIL %s all endpoints failed %s last=%s", reqID, fmtMs(time.Since(requestStart)), lastStatus)
|
||||
if lastErr != nil {
|
||||
return lastErr
|
||||
}
|
||||
return fmt.Errorf("all endpoints failed")
|
||||
}
|
||||
|
||||
// shortReqID 生成 6 字符请求标识(base36)
|
||||
func shortReqID() string {
|
||||
var buf [3]byte
|
||||
if _, err := cryptoRand.Read(buf[:]); err != nil {
|
||||
return fmt.Sprintf("%06d", time.Now().UnixNano()%1000000)
|
||||
}
|
||||
return fmt.Sprintf("%02x%02x%02x", buf[0], buf[1], buf[2])
|
||||
}
|
||||
|
||||
// shortEndpoint 把端点名缩短到 2 字符便于视觉对齐
|
||||
func shortEndpoint(name string) string {
|
||||
switch name {
|
||||
case "CodeWhisperer":
|
||||
return "CW"
|
||||
case "AmazonQ":
|
||||
return "Q "
|
||||
default:
|
||||
if len(name) >= 2 {
|
||||
return name[:2]
|
||||
}
|
||||
return name
|
||||
}
|
||||
}
|
||||
|
||||
// shortModel 把长模型名截短:claude-opus-4.7 → opus-4.7
|
||||
func shortModel(m string) string {
|
||||
if strings.HasPrefix(m, "claude-") {
|
||||
return m[len("claude-"):]
|
||||
}
|
||||
if m == "" {
|
||||
return "-"
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
// fmtMs 把耗时格式化成紧凑字符串:<1s 用 ms,>=1s 用 1 位小数 s
|
||||
func fmtMs(d time.Duration) string {
|
||||
if d <= 0 {
|
||||
return "0ms"
|
||||
}
|
||||
if d < time.Second {
|
||||
return fmt.Sprintf("%dms", d.Milliseconds())
|
||||
}
|
||||
return fmt.Sprintf("%.1fs", d.Seconds())
|
||||
}
|
||||
|
||||
func truncateForLog(s string, max int) string {
|
||||
s = strings.ReplaceAll(s, "\n", " ")
|
||||
if len(s) <= max {
|
||||
|
||||
Reference in New Issue
Block a user