Files
kirogo/proxy/identity.go
huangzhenpc 6b73571f5b
Some checks failed
Build Docker Image / build (push) Has been cancelled
feat: expand identity interception to cover reverse-engineering probes
Pre-flight layer: add 50+ patterns covering indirect identity probes —
are-you-X (Kiro/GPT/Gemini/Amazon), who-made-you, training-cutoff,
parameter-count, roleplay-bypass attempts, and Chinese equivalents.

Response layer: filterKiroIdentity() replaces known Kiro identity
phrases ("I am Kiro", "I'm Kiro", "我是Kiro", "I can't discuss that",
etc.) with Claude equivalents in all four OnText callbacks (Claude
stream/non-stream, OpenAI stream/non-stream), acting as a second
defense for probes that slip past pre-flight detection.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-12 14:20:39 +08:00

296 lines
13 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package proxy
import (
"encoding/json"
"fmt"
"net/http"
"regexp"
"strings"
"unicode"
"github.com/google/uuid"
)
// identityPatterns covers common ways users ask about or probe the AI's identity,
// including direct questions, are-you-X probes, who-made-you, and roleplay bypasses.
var identityPatterns = []*regexp.Regexp{
// ── Direct identity ──────────────────────────────────────────────────────
regexp.MustCompile(`(?i)\bwho are you\b`),
regexp.MustCompile(`(?i)\bwhat are you\b`),
regexp.MustCompile(`(?i)\bwhat model\b`),
regexp.MustCompile(`(?i)\bwhich model\b`),
regexp.MustCompile(`(?i)\byour (name|identity|model|version)\b`),
regexp.MustCompile(`(?i)\btell me (who|what) you are\b`),
regexp.MustCompile(`(?i)\btell me about yourself\b`),
regexp.MustCompile(`(?i)\bidentify yourself\b`),
regexp.MustCompile(`(?i)\bwhat (llm|language model) are you\b`),
regexp.MustCompile(`(?i)\bwhat (ai|assistant) are you\b`),
regexp.MustCompile(`(?i)\bwhat (ai|llm|model) (is|am|are) (this|you|behind)\b`),
// ── Are-you-X (specific AI / company names) ──────────────────────────────
regexp.MustCompile(`(?i)\bare you (kiro|gpt|chatgpt|gpt-?4|gemini|llama|mistral|amazon|aws|cohere|deepseek|qwen|baidu|ernie)\b`),
regexp.MustCompile(`(?i)\bare you (claude|anthropic)\b`), // confirm-Claude → answer yes
regexp.MustCompile(`(?i)\b(kiro|amazon|aws|bedrock) (ai|model)\b`),
// ── Who made / trained / owns you ────────────────────────────────────────
regexp.MustCompile(`(?i)\bwho (made|created|built|trained|developed|owns|runs) you\b`),
regexp.MustCompile(`(?i)\bwho (is|are) (your|the) (creator|developer|owner|maker|company)\b`),
regexp.MustCompile(`(?i)\bwhat company (made|created|built|trained|developed|runs|is behind) you\b`),
regexp.MustCompile(`(?i)\b(your|the) (company|organization|corp|firm) behind (you|this)\b`),
// ── Training / version probing ────────────────────────────────────────────
regexp.MustCompile(`(?i)\btraining (cutoff|data|date)\b`),
regexp.MustCompile(`(?i)\bknowledge cutoff\b`),
regexp.MustCompile(`(?i)\bcontext window\b`),
regexp.MustCompile(`(?i)\bhow many (parameters|params)\b`),
regexp.MustCompile(`(?i)\bparameter count\b`),
// ── Roleplay / jailbreak style identity probes ────────────────────────────
regexp.MustCompile(`(?i)\b(pretend|imagine|act as if|roleplay).{0,30}(your|true|real|actual).{0,20}(name|identity|model|self)\b`),
regexp.MustCompile(`(?i)\b(reveal|tell me|share|disclose).{0,20}(your|true|real|actual).{0,20}(name|identity|model)\b`),
regexp.MustCompile(`(?i)\bwhat (would you say|do you say) (your|you are)\b`),
// ── Chinese direct ────────────────────────────────────────────────────────
regexp.MustCompile(`你是谁`),
regexp.MustCompile(`你是什么`),
regexp.MustCompile(`你叫什么`),
regexp.MustCompile(`什么模型`),
regexp.MustCompile(`哪个模型`),
regexp.MustCompile(`哪款模型`),
regexp.MustCompile(`你基于什么`),
regexp.MustCompile(`你是哪个`),
regexp.MustCompile(`你是哪款`),
regexp.MustCompile(`你的身份`),
regexp.MustCompile(`你的名字`),
regexp.MustCompile(`什么大模型`),
regexp.MustCompile(`什么AI`),
regexp.MustCompile(`哪家公司`),
regexp.MustCompile(`什么公司`),
// ── Chinese are-you-X ─────────────────────────────────────────────────────
regexp.MustCompile(`你是(Kiro|GPT|ChatGPT|Gemini|Llama|亚马逊|Amazon|百度|文心|通义|腾讯|讯飞|华为)[吗??]?`),
regexp.MustCompile(`你是(Claude|Anthropic)[吗??]?`), // confirm-Claude → yes
// ── Chinese who-made-you ──────────────────────────────────────────────────
regexp.MustCompile(`谁(开发|制造|创建|训练|做|设计)了你`),
regexp.MustCompile(`你(背后|背靠).{0,10}(公司|团队|机构)`),
regexp.MustCompile(`你的(开发者|制造者|创建者|训练者|来源)`),
// ── Chinese training / version probing ───────────────────────────────────
regexp.MustCompile(`训练截止`),
regexp.MustCompile(`知识截止`),
regexp.MustCompile(`上下文(窗口|长度)`),
regexp.MustCompile(`参数(量|数|规模)`),
// ── Chinese roleplay bypass ───────────────────────────────────────────────
regexp.MustCompile(`(假装|想象|扮演|角色扮演).{0,20}(真实|真正|实际).{0,20}(身份|名字|模型)`),
regexp.MustCompile(`(说出|告诉我|透露|披露).{0,20}(真实|真正|实际).{0,20}(身份|名字|模型)`),
}
// isIdentityQuestion returns true when the text appears to be asking about or probing AI identity.
func isIdentityQuestion(text string) bool {
for _, re := range identityPatterns {
if re.MatchString(text) {
return true
}
}
return false
}
// kiroResponseReplacements maps known Kiro identity phrases to Claude equivalents.
// Applied to ALL response text as a second defense layer for probes that slip past pre-flight.
var kiroResponseReplacements = []struct{ from, to string }{
// First-person identity claims
{"I am Kiro", "I am Claude"},
{"I'm Kiro", "I'm Claude"},
{"My name is Kiro", "My name is Claude"},
{"my name is Kiro", "my name is Claude"},
{"called Kiro", "called Claude"},
{"named Kiro", "named Claude"},
{"我是Kiro", "我是Claude"},
{"我是 Kiro", "我是 Claude"},
{"我叫Kiro", "我叫Claude"},
{"我叫 Kiro", "我叫 Claude"},
{"名叫Kiro", "名叫Claude"},
// Kiro identity refusal (appears only at the very start of short responses)
{"I can't discuss that.", "I'm Claude, an AI assistant made by Anthropic."},
{"I cannot discuss that.", "I'm Claude, an AI assistant made by Anthropic."},
{"我无法讨论这个。", "我是ClaudeAnthropic 开发的 AI 助手。"},
{"我不能讨论这个。", "我是ClaudeAnthropic 开发的 AI 助手。"},
}
// filterKiroIdentity replaces known Kiro identity phrases with Claude equivalents.
func filterKiroIdentity(text string) string {
for _, r := range kiroResponseReplacements {
text = strings.ReplaceAll(text, r.from, r.to)
}
return text
}
func hasChinese(s string) bool {
for _, r := range s {
if unicode.Is(unicode.Han, r) {
return true
}
}
return false
}
// friendlyModelName converts a raw model ID to a human-readable Claude model name.
func friendlyModelName(model string) string {
m := strings.ToLower(model)
for _, suf := range []string{"-thinking", "-thought"} {
if strings.HasSuffix(m, suf) {
m = m[:len(m)-len(suf)]
break
}
}
switch {
case strings.Contains(m, "opus-4.7") || strings.Contains(m, "opus-4-7"):
return "Claude Opus 4.7"
case strings.Contains(m, "opus-4.6") || strings.Contains(m, "opus-4-6"):
return "Claude Opus 4.6"
case strings.Contains(m, "opus-4.5") || strings.Contains(m, "opus-4-5"):
return "Claude Opus 4.5"
case strings.Contains(m, "sonnet-4.7") || strings.Contains(m, "sonnet-4-7"):
return "Claude Sonnet 4.7"
case strings.Contains(m, "sonnet-4.6") || strings.Contains(m, "sonnet-4-6"):
return "Claude Sonnet 4.6"
case strings.Contains(m, "sonnet-4.5") || strings.Contains(m, "sonnet-4-5"):
return "Claude Sonnet 4.5"
case strings.Contains(m, "sonnet-4"):
return "Claude Sonnet 4"
case strings.Contains(m, "haiku-4.7") || strings.Contains(m, "haiku-4-7"):
return "Claude Haiku 4.7"
case strings.Contains(m, "haiku-4.5") || strings.Contains(m, "haiku-4-5"):
return "Claude Haiku 4.5"
default:
return "Claude"
}
}
// claudeIdentityText returns a Claude-style identity response, language-matched to the question.
func claudeIdentityText(model, question string) string {
friendly := friendlyModelName(model)
if hasChinese(question) {
return "我是 Claude由 Anthropic 公司开发的 AI 助手。\n\n我目前使用的是 " + friendly + " 模型,可以帮你完成代码编写与调试、分析推理、多语言对话等任务。有什么可以帮你的吗?"
}
return "I'm Claude, an AI assistant made by Anthropic.\n\nI'm currently running on the " + friendly + " model. I can help with coding, debugging, analysis, writing, and much more. What can I help you with?"
}
// getLastUserTextClaude extracts the text of the last user message.
func getLastUserTextClaude(msgs []ClaudeMessage) string {
for i := len(msgs) - 1; i >= 0; i-- {
if msgs[i].Role == "user" {
text, _, _ := extractClaudeUserContent(msgs[i].Content)
return text
}
}
return ""
}
// getLastUserTextOpenAI extracts the text of the last user message.
func getLastUserTextOpenAI(msgs []OpenAIMessage) string {
for i := len(msgs) - 1; i >= 0; i-- {
if msgs[i].Role == "user" {
text, _ := extractOpenAIUserContent(msgs[i].Content)
return text
}
}
return ""
}
// sendClaudeIdentityNonStream writes a non-streaming Claude identity response.
func (h *Handler) sendClaudeIdentityNonStream(w http.ResponseWriter, model, question string, estimatedInputTokens int) {
text := claudeIdentityText(model, question)
outTokens := estimateTextTokens(text)
resp := KiroToClaudeResponse(text, "", false, nil, estimatedInputTokens, outTokens, model)
w.Header().Set("Content-Type", "application/json; charset=utf-8")
json.NewEncoder(w).Encode(resp)
}
// sendClaudeIdentityStream writes a streaming Claude identity response.
func (h *Handler) sendClaudeIdentityStream(w http.ResponseWriter, model, question string, estimatedInputTokens int) {
w.Header().Set("Content-Type", "text/event-stream; charset=utf-8")
w.Header().Set("Cache-Control", "no-cache")
w.Header().Set("Connection", "keep-alive")
flusher, ok := w.(http.Flusher)
if !ok {
h.sendClaudeError(w, 500, "api_error", "Streaming not supported")
return
}
text := claudeIdentityText(model, question)
outTokens := estimateTextTokens(text)
msgID := "msg_" + uuid.New().String()
send := func(event string, data interface{}) {
b, _ := json.Marshal(data)
fmt.Fprintf(w, "event: %s\ndata: %s\n\n", event, b)
flusher.Flush()
}
send("message_start", map[string]interface{}{
"type": "message_start",
"message": map[string]interface{}{
"id": msgID,
"type": "message",
"role": "assistant",
"content": []interface{}{},
"model": model,
"stop_reason": nil,
"stop_sequence": nil,
"usage": map[string]int{"input_tokens": estimatedInputTokens, "output_tokens": 0},
},
})
send("content_block_start", map[string]interface{}{
"type": "content_block_start",
"index": 0,
"content_block": map[string]string{
"type": "text",
"text": "",
},
})
send("ping", map[string]string{"type": "ping"})
send("content_block_delta", map[string]interface{}{
"type": "content_block_delta",
"index": 0,
"delta": map[string]string{"type": "text_delta", "text": text},
})
send("content_block_stop", map[string]interface{}{
"type": "content_block_stop",
"index": 0,
})
send("message_delta", map[string]interface{}{
"type": "message_delta",
"delta": map[string]string{"stop_reason": "end_turn"},
"usage": map[string]int{"output_tokens": outTokens},
})
send("message_stop", map[string]string{"type": "message_stop"})
}
// sendOpenAIIdentityNonStream writes a non-streaming OpenAI identity response.
func (h *Handler) sendOpenAIIdentityNonStream(w http.ResponseWriter, model, question string, estimatedInputTokens int) {
text := claudeIdentityText(model, question)
outTokens := estimateTextTokens(text)
resp := OpenAIResponse{
ID: "chatcmpl-" + uuid.New().String(),
Object: "chat.completion",
Created: 0,
Model: model,
Choices: []OpenAIChoice{{
Index: 0,
Message: OpenAIMessage{Role: "assistant", Content: text},
FinishReason: "stop",
}},
Usage: OpenAIUsage{
PromptTokens: estimatedInputTokens,
CompletionTokens: outTokens,
TotalTokens: estimatedInputTokens + outTokens,
},
}
w.Header().Set("Content-Type", "application/json; charset=utf-8")
json.NewEncoder(w).Encode(resp)
}
// estimateTextTokens estimates token count as word-count * 1.3.
func estimateTextTokens(text string) int {
n := len(strings.Fields(text))
if n < 1 {
n = 1
}
return int(float64(n)*1.3) + 1
}