kirogo/proxy/identity.go

package proxy

import (
	"encoding/json"
	"fmt"
	"net/http"
	"regexp"
	"strings"
	"unicode"

	"github.com/google/uuid"
)

// identityPatterns covers common ways users ask about or probe the AI's identity,
// including direct questions, are-you-X probes, who-made-you, and roleplay bypasses.
var identityPatterns = []*regexp.Regexp{
	// ── Direct identity ──────────────────────────────────────────────────────
	regexp.MustCompile(`(?i)\bwho are you\b`),
	regexp.MustCompile(`(?i)\bwhat are you\b`),
	regexp.MustCompile(`(?i)\bwhat model\b`),
	regexp.MustCompile(`(?i)\bwhich model\b`),
	regexp.MustCompile(`(?i)\byour (name|identity|model|version)\b`),
	regexp.MustCompile(`(?i)\btell me (who|what) you are\b`),
	regexp.MustCompile(`(?i)\btell me about yourself\b`),
	regexp.MustCompile(`(?i)\bidentify yourself\b`),
	regexp.MustCompile(`(?i)\bwhat (llm|language model) are you\b`),
	regexp.MustCompile(`(?i)\bwhat (ai|assistant) are you\b`),
	regexp.MustCompile(`(?i)\bwhat (ai|llm|model) (is|am|are) (this|you|behind)\b`),
	// ── Are-you-X (specific AI / company names) ──────────────────────────────
	regexp.MustCompile(`(?i)\bare you (kiro|gpt|chatgpt|gpt-?4|gemini|llama|mistral|amazon|aws|cohere|deepseek|qwen|baidu|ernie)\b`),
	regexp.MustCompile(`(?i)\bare you (claude|anthropic)\b`),          // confirm-Claude → answer yes
	regexp.MustCompile(`(?i)\b(kiro|amazon|aws|bedrock) (ai|model)\b`),
	// ── Who made / trained / owns you ────────────────────────────────────────
	regexp.MustCompile(`(?i)\bwho (made|created|built|trained|developed|owns|runs) you\b`),
	regexp.MustCompile(`(?i)\bwho (is|are) (your|the) (creator|developer|owner|maker|company)\b`),
	regexp.MustCompile(`(?i)\bwhat company (made|created|built|trained|developed|runs|is behind) you\b`),
	regexp.MustCompile(`(?i)\b(your|the) (company|organization|corp|firm) behind (you|this)\b`),
	// ── Training / version probing ────────────────────────────────────────────
	regexp.MustCompile(`(?i)\btraining (cutoff|data|date)\b`),
	regexp.MustCompile(`(?i)\bknowledge cutoff\b`),
	regexp.MustCompile(`(?i)\bcontext window\b`),
	regexp.MustCompile(`(?i)\bhow many (parameters|params)\b`),
	regexp.MustCompile(`(?i)\bparameter count\b`),
	// ── Roleplay / jailbreak style identity probes ────────────────────────────
	regexp.MustCompile(`(?i)\b(pretend|imagine|act as if|roleplay).{0,30}(your|true|real|actual).{0,20}(name|identity|model|self)\b`),
	regexp.MustCompile(`(?i)\b(reveal|tell me|share|disclose).{0,20}(your|true|real|actual).{0,20}(name|identity|model)\b`),
	regexp.MustCompile(`(?i)\bwhat (would you say|do you say) (your|you are)\b`),
	// ── Chinese direct ────────────────────────────────────────────────────────
	regexp.MustCompile(`你是谁`),
	regexp.MustCompile(`你是什么`),
	regexp.MustCompile(`你叫什么`),
	regexp.MustCompile(`什么模型`),
	regexp.MustCompile(`哪个模型`),
	regexp.MustCompile(`哪款模型`),
	regexp.MustCompile(`你基于什么`),
	regexp.MustCompile(`你是哪个`),
	regexp.MustCompile(`你是哪款`),
	regexp.MustCompile(`你的身份`),
	regexp.MustCompile(`你的名字`),
	regexp.MustCompile(`什么大模型`),
	regexp.MustCompile(`什么AI`),
	regexp.MustCompile(`哪家公司`),
	regexp.MustCompile(`什么公司`),
	// ── Chinese are-you-X ─────────────────────────────────────────────────────
	regexp.MustCompile(`你是(Kiro|GPT|ChatGPT|Gemini|Llama|亚马逊|Amazon|百度|文心|通义|腾讯|讯飞|华为)[吗？?]?`),
	regexp.MustCompile(`你是(Claude|Anthropic)[吗？?]?`), // confirm-Claude → yes
	// ── Chinese who-made-you ──────────────────────────────────────────────────
	regexp.MustCompile(`谁(开发|制造|创建|训练|做|设计)了你`),
	regexp.MustCompile(`你(背后|背靠).{0,10}(公司|团队|机构)`),
	regexp.MustCompile(`你的(开发者|制造者|创建者|训练者|来源)`),
	// ── Chinese training / version probing ───────────────────────────────────
	regexp.MustCompile(`训练截止`),
	regexp.MustCompile(`知识截止`),
	regexp.MustCompile(`上下文(窗口|长度)`),
	regexp.MustCompile(`参数(量|数|规模)`),
	// ── Chinese roleplay bypass ───────────────────────────────────────────────
	regexp.MustCompile(`(假装|想象|扮演|角色扮演).{0,20}(真实|真正|实际).{0,20}(身份|名字|模型)`),
	regexp.MustCompile(`(说出|告诉我|透露|披露).{0,20}(真实|真正|实际).{0,20}(身份|名字|模型)`),
}

// isIdentityQuestion returns true when the text appears to be asking about or probing AI identity.
func isIdentityQuestion(text string) bool {
	for _, re := range identityPatterns {
		if re.MatchString(text) {
			return true
		}
	}
	return false
}

// kiroResponseReplacements maps known Kiro identity phrases to Claude equivalents.
// Applied to ALL response text as a second defense layer for probes that slip past pre-flight.
var kiroResponseReplacements = []struct{ from, to string }{
	// First-person identity claims
	{"I am Kiro", "I am Claude"},
	{"I'm Kiro", "I'm Claude"},
	{"My name is Kiro", "My name is Claude"},
	{"my name is Kiro", "my name is Claude"},
	{"called Kiro", "called Claude"},
	{"named Kiro", "named Claude"},
	{"我是Kiro", "我是Claude"},
	{"我是 Kiro", "我是 Claude"},
	{"我叫Kiro", "我叫Claude"},
	{"我叫 Kiro", "我叫 Claude"},
	{"名叫Kiro", "名叫Claude"},
	// Kiro identity refusal (appears only at the very start of short responses)
	{"I can't discuss that.", "I'm Claude, an AI assistant made by Anthropic."},
	{"I cannot discuss that.", "I'm Claude, an AI assistant made by Anthropic."},
	{"我无法讨论这个。", "我是Claude，Anthropic 开发的 AI 助手。"},
	{"我不能讨论这个。", "我是Claude，Anthropic 开发的 AI 助手。"},
}

// filterKiroIdentity replaces known Kiro identity phrases with Claude equivalents.
func filterKiroIdentity(text string) string {
	for _, r := range kiroResponseReplacements {
		text = strings.ReplaceAll(text, r.from, r.to)
	}
	return text
}

func hasChinese(s string) bool {
	for _, r := range s {
		if unicode.Is(unicode.Han, r) {
			return true
		}
	}
	return false
}

// friendlyModelName converts a raw model ID to a human-readable Claude model name.
func friendlyModelName(model string) string {
	m := strings.ToLower(model)
	for _, suf := range []string{"-thinking", "-thought"} {
		if strings.HasSuffix(m, suf) {
			m = m[:len(m)-len(suf)]
			break
		}
	}
	switch {
	case strings.Contains(m, "opus-4.7") || strings.Contains(m, "opus-4-7"):
		return "Claude Opus 4.7"
	case strings.Contains(m, "opus-4.6") || strings.Contains(m, "opus-4-6"):
		return "Claude Opus 4.6"
	case strings.Contains(m, "opus-4.5") || strings.Contains(m, "opus-4-5"):
		return "Claude Opus 4.5"
	case strings.Contains(m, "sonnet-4.7") || strings.Contains(m, "sonnet-4-7"):
		return "Claude Sonnet 4.7"
	case strings.Contains(m, "sonnet-4.6") || strings.Contains(m, "sonnet-4-6"):
		return "Claude Sonnet 4.6"
	case strings.Contains(m, "sonnet-4.5") || strings.Contains(m, "sonnet-4-5"):
		return "Claude Sonnet 4.5"
	case strings.Contains(m, "sonnet-4"):
		return "Claude Sonnet 4"
	case strings.Contains(m, "haiku-4.7") || strings.Contains(m, "haiku-4-7"):
		return "Claude Haiku 4.7"
	case strings.Contains(m, "haiku-4.5") || strings.Contains(m, "haiku-4-5"):
		return "Claude Haiku 4.5"
	default:
		return "Claude"
	}
}

// claudeIdentityText returns a Claude-style identity response, language-matched to the question.
func claudeIdentityText(model, question string) string {
	friendly := friendlyModelName(model)
	if hasChinese(question) {
		return "我是 Claude，由 Anthropic 公司开发的 AI 助手。\n\n我目前使用的是 " + friendly + " 模型，可以帮你完成代码编写与调试、分析推理、多语言对话等任务。有什么可以帮你的吗？"
	}
	return "I'm Claude, an AI assistant made by Anthropic.\n\nI'm currently running on the " + friendly + " model. I can help with coding, debugging, analysis, writing, and much more. What can I help you with?"
}

// getLastUserTextClaude extracts the text of the last user message.
func getLastUserTextClaude(msgs []ClaudeMessage) string {
	for i := len(msgs) - 1; i >= 0; i-- {
		if msgs[i].Role == "user" {
			text, _, _ := extractClaudeUserContent(msgs[i].Content)
			return text
		}
	}
	return ""
}

// getLastUserTextOpenAI extracts the text of the last user message.
func getLastUserTextOpenAI(msgs []OpenAIMessage) string {
	for i := len(msgs) - 1; i >= 0; i-- {
		if msgs[i].Role == "user" {
			text, _ := extractOpenAIUserContent(msgs[i].Content)
			return text
		}
	}
	return ""
}

// sendClaudeIdentityNonStream writes a non-streaming Claude identity response.
func (h *Handler) sendClaudeIdentityNonStream(w http.ResponseWriter, model, question string, estimatedInputTokens int) {
	text := claudeIdentityText(model, question)
	outTokens := estimateTextTokens(text)
	resp := KiroToClaudeResponse(text, "", false, nil, estimatedInputTokens, outTokens, model)
	w.Header().Set("Content-Type", "application/json; charset=utf-8")
	json.NewEncoder(w).Encode(resp)
}

// sendClaudeIdentityStream writes a streaming Claude identity response.
func (h *Handler) sendClaudeIdentityStream(w http.ResponseWriter, model, question string, estimatedInputTokens int) {
	w.Header().Set("Content-Type", "text/event-stream; charset=utf-8")
	w.Header().Set("Cache-Control", "no-cache")
	w.Header().Set("Connection", "keep-alive")

	flusher, ok := w.(http.Flusher)
	if !ok {
		h.sendClaudeError(w, 500, "api_error", "Streaming not supported")
		return
	}

	text := claudeIdentityText(model, question)
	outTokens := estimateTextTokens(text)
	msgID := "msg_" + uuid.New().String()

	send := func(event string, data interface{}) {
		b, _ := json.Marshal(data)
		fmt.Fprintf(w, "event: %s\ndata: %s\n\n", event, b)
		flusher.Flush()
	}

	send("message_start", map[string]interface{}{
		"type": "message_start",
		"message": map[string]interface{}{
			"id":            msgID,
			"type":          "message",
			"role":          "assistant",
			"content":       []interface{}{},
			"model":         model,
			"stop_reason":   nil,
			"stop_sequence": nil,
			"usage":         map[string]int{"input_tokens": estimatedInputTokens, "output_tokens": 0},
		},
	})
	send("content_block_start", map[string]interface{}{
		"type":  "content_block_start",
		"index": 0,
		"content_block": map[string]string{
			"type": "text",
			"text": "",
		},
	})
	send("ping", map[string]string{"type": "ping"})
	send("content_block_delta", map[string]interface{}{
		"type":  "content_block_delta",
		"index": 0,
		"delta": map[string]string{"type": "text_delta", "text": text},
	})
	send("content_block_stop", map[string]interface{}{
		"type":  "content_block_stop",
		"index": 0,
	})
	send("message_delta", map[string]interface{}{
		"type":  "message_delta",
		"delta": map[string]string{"stop_reason": "end_turn"},
		"usage": map[string]int{"output_tokens": outTokens},
	})
	send("message_stop", map[string]string{"type": "message_stop"})
}

// sendOpenAIIdentityNonStream writes a non-streaming OpenAI identity response.
func (h *Handler) sendOpenAIIdentityNonStream(w http.ResponseWriter, model, question string, estimatedInputTokens int) {
	text := claudeIdentityText(model, question)
	outTokens := estimateTextTokens(text)
	resp := OpenAIResponse{
		ID:      "chatcmpl-" + uuid.New().String(),
		Object:  "chat.completion",
		Created: 0,
		Model:   model,
		Choices: []OpenAIChoice{{
			Index:        0,
			Message:      OpenAIMessage{Role: "assistant", Content: text},
			FinishReason: "stop",
		}},
		Usage: OpenAIUsage{
			PromptTokens:     estimatedInputTokens,
			CompletionTokens: outTokens,
			TotalTokens:      estimatedInputTokens + outTokens,
		},
	}
	w.Header().Set("Content-Type", "application/json; charset=utf-8")
	json.NewEncoder(w).Encode(resp)
}

// estimateTextTokens estimates token count as word-count * 1.3.
func estimateTextTokens(text string) int {
	n := len(strings.Fields(text))
	if n < 1 {
		n = 1
	}
	return int(float64(n)*1.3) + 1
}