Files
kirogo/proxy/token_estimator.go
edxeth 6151888df5 fix: stabilize thinking streams, multimodal parsing, and token accounting (#20)
* fix: stabilize multimodal image compatibility across OpenCode flows

Advertise vision-capable metadata in /v1/models and make model matching deterministic so OpenCode does not downgrade image support or route 4.6 models incorrectly. Expand request translation to accept OpenCode/OpenAI attachment shapes, sanitize [Image N] placeholders safely, keep image-only follow-up turns non-empty, and improve token accounting so base64 image bytes no longer inflate prompt token usage and trigger premature compaction.

* fix: deduplicate thinking streams and trim injected prompt noise

* fix: align /v1/messages thinking blocks and message_start usage

* fix: reduce repetitive thinking across tool turns

Select a single reasoning stream source, prevent chunk replay, and preserve structured tool-loop context so the model keeps continuity instead of re-planning each turn.

* fix: unify token counting on existing API endpoints

Compute usage deterministically on /v1/messages and /v1/chat/completions even when upstream omits tokenUsage.

- remove roo-only token path and keep behavior on existing endpoints
- add proxy/token_estimator.go with shared Claude/OpenAI estimators (input/system/messages/tools + output/thinking/tool calls)
- wire stream/non-stream handlers to use estimator-derived input/output usage
- update /v1/messages/count_tokens to reuse the same estimator
- keep robust upstream usage parsing/normalization in proxy/kiro.go while dropping parser-level estimate fallback

Why: direct upstream tests show metering/context events frequently arrive without tokenUsage in this environment; this made usage zero or inconsistent. Local deterministic accounting keeps reported usage stable and explicit.
2026-02-23 20:33:53 +08:00

197 lines
4.1 KiB
Go

package proxy
import (
"encoding/json"
"math"
)
func estimateApproxTokens(text string) int {
if text == "" {
return 0
}
runes := []rune(text)
length := len(runes)
if length == 0 {
return 0
}
if length < 5 {
return max(1, int(math.Ceil(float64(length)/3.0)))
}
var regularAscii, digits, symbols, nonASCII int
for _, r := range runes {
switch {
case r >= 0x80:
nonASCII++
case r >= '0' && r <= '9':
digits++
case (r >= '!' && r <= '/') || (r >= ':' && r <= '@') || (r >= '[' && r <= '`') || (r >= '{' && r <= '~'):
symbols++
default:
regularAscii++
}
}
estimated := int(math.Ceil(
float64(regularAscii)/4.5 +
float64(digits)/2.0 +
float64(symbols)/1.5 +
float64(nonASCII)/1.5,
))
if estimated < 1 {
return 1
}
return estimated
}
func estimateClaudeRequestInputTokens(req *ClaudeRequest) int {
if req == nil {
return 0
}
total := estimateClaudeValueTokens(req.System)
for _, msg := range req.Messages {
total += estimateClaudeValueTokens(msg.Content)
}
for _, tool := range req.Tools {
total += estimateApproxTokens(tool.Name)
total += estimateApproxTokens(tool.Description)
total += estimateJSONTokens(tool.InputSchema)
}
return total
}
func estimateClaudeOutputTokens(content, thinkingContent string, toolUses []KiroToolUse) int {
total := estimateApproxTokens(content)
total += estimateApproxTokens(thinkingContent)
for _, tu := range toolUses {
total += estimateApproxTokens(tu.Name)
total += estimateJSONTokens(tu.Input)
}
return total
}
func estimateClaudeValueTokens(v interface{}) int {
switch value := v.(type) {
case nil:
return 0
case string:
return estimateApproxTokens(value)
case []interface{}:
total := 0
for _, part := range value {
total += estimateClaudeValueTokens(part)
}
return total
case map[string]interface{}:
typeName, _ := value["type"].(string)
switch typeName {
case "text":
if text, ok := value["text"].(string); ok {
return estimateApproxTokens(text)
}
case "thinking":
if thinking, ok := value["thinking"].(string); ok {
return estimateApproxTokens(thinking)
}
case "tool_use":
total := 0
if name, ok := value["name"].(string); ok {
total += estimateApproxTokens(name)
}
if input, ok := value["input"]; ok {
total += estimateJSONTokens(input)
}
if total > 0 {
return total
}
case "tool_result":
if content, ok := value["content"]; ok {
return estimateClaudeValueTokens(content)
}
}
total := 0
if text, ok := value["text"].(string); ok {
total += estimateApproxTokens(text)
}
if thinking, ok := value["thinking"].(string); ok {
total += estimateApproxTokens(thinking)
}
if content, ok := value["content"]; ok {
total += estimateClaudeValueTokens(content)
}
if total > 0 {
return total
}
return estimateJSONTokens(value)
default:
return estimateJSONTokens(value)
}
}
func estimateJSONTokens(v interface{}) int {
if v == nil {
return 0
}
b, err := json.Marshal(v)
if err != nil {
return 0
}
return estimateApproxTokens(string(b))
}
func estimateOpenAIRequestInputTokens(req *OpenAIRequest) int {
if req == nil {
return 0
}
total := 0
for _, msg := range req.Messages {
total += estimateOpenAIContentTokens(msg.Content)
total += estimateApproxTokens(msg.ToolCallID)
for _, tc := range msg.ToolCalls {
total += estimateApproxTokens(tc.Function.Name)
total += estimateApproxTokens(tc.Function.Arguments)
}
}
for _, tool := range req.Tools {
total += estimateApproxTokens(tool.Function.Name)
total += estimateApproxTokens(tool.Function.Description)
total += estimateJSONTokens(tool.Function.Parameters)
}
return total
}
func estimateOpenAIContentTokens(content interface{}) int {
switch value := content.(type) {
case nil:
return 0
case string:
return estimateApproxTokens(value)
default:
text := extractOpenAIMessageText(value)
if text != "" {
return estimateApproxTokens(text)
}
return estimateJSONTokens(value)
}
}
func estimateOpenAIOutputTokens(content, reasoningContent string, toolUses []KiroToolUse) int {
return estimateClaudeOutputTokens(content, reasoningContent, toolUses)
}