fix(test): 修复测试和添加数据库迁移

测试修复： - 修复集成测试中的重复键冲突问题 - 移除 JSON 中多余的尾随逗号 - 新增 inprocess_transport_test.go - 更新 haiku 模型映射测试用例数据库迁移： - 026: 运营指标聚合表 - 027: 使用量与计费一致性约束
2026-01-03 06:36:35 -08:00
parent ff3f514f6b
commit b1702de522
16 changed files with 495 additions and 244 deletions
--- a/backend/migrations/026_ops_metrics_aggregation_tables.sql
+++ b/backend/migrations/026_ops_metrics_aggregation_tables.sql
@@ -0,0 +1,104 @@
+-- Ops monitoring: pre-aggregation tables for dashboard queries
+--
+-- Problem:
+-- The ops dashboard currently runs percentile_cont + GROUP BY queries over large raw tables
+-- (usage_logs, ops_error_logs). These will get slower as data grows.
+--
+-- This migration adds schema-only aggregation tables that can be populated by a future background job.
+-- No triggers/functions/jobs are created here (schema only).
+
+-- ============================================
+-- Hourly aggregates (per provider/platform)
+-- ============================================
+
+CREATE TABLE IF NOT EXISTS ops_metrics_hourly (
+    -- Start of the hour bucket (recommended: UTC).
+    bucket_start TIMESTAMPTZ NOT NULL,
+
+    -- Provider/platform label (e.g. anthropic/openai/gemini). Mirrors ops_* queries that GROUP BY platform.
+    platform VARCHAR(50) NOT NULL,
+
+    -- Traffic counts (use these to compute rates reliably across ranges).
+    request_count BIGINT NOT NULL DEFAULT 0,
+    success_count BIGINT NOT NULL DEFAULT 0,
+    error_count BIGINT NOT NULL DEFAULT 0,
+
+    -- Error breakdown used by provider health UI.
+    error_4xx_count BIGINT NOT NULL DEFAULT 0,
+    error_5xx_count BIGINT NOT NULL DEFAULT 0,
+    timeout_count BIGINT NOT NULL DEFAULT 0,
+
+    -- Latency aggregates (ms).
+    avg_latency_ms DOUBLE PRECISION,
+    p99_latency_ms DOUBLE PRECISION,
+
+    -- Convenience rate (percentage, 0-100). Still keep counts as source of truth.
+    error_rate DOUBLE PRECISION NOT NULL DEFAULT 0,
+
+    -- When this row was last (re)computed by the background job.
+    computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+
+    PRIMARY KEY (bucket_start, platform)
+);
+
+CREATE INDEX IF NOT EXISTS idx_ops_metrics_hourly_platform_bucket_start
+    ON ops_metrics_hourly (platform, bucket_start DESC);
+
+COMMENT ON TABLE ops_metrics_hourly IS 'Pre-aggregated hourly ops metrics by provider/platform to speed up dashboard queries.';
+COMMENT ON COLUMN ops_metrics_hourly.bucket_start IS 'Start timestamp of the hour bucket (recommended UTC).';
+COMMENT ON COLUMN ops_metrics_hourly.platform IS 'Provider/platform label (anthropic/openai/gemini, etc).';
+COMMENT ON COLUMN ops_metrics_hourly.error_rate IS 'Error rate percentage for the bucket (0-100). Counts remain the source of truth.';
+COMMENT ON COLUMN ops_metrics_hourly.computed_at IS 'When the row was last computed/refreshed.';
+
+-- ============================================
+-- Daily aggregates (per provider/platform)
+-- ============================================
+
+CREATE TABLE IF NOT EXISTS ops_metrics_daily (
+    -- Day bucket (recommended: UTC date).
+    bucket_date DATE NOT NULL,
+    platform VARCHAR(50) NOT NULL,
+
+    request_count BIGINT NOT NULL DEFAULT 0,
+    success_count BIGINT NOT NULL DEFAULT 0,
+    error_count BIGINT NOT NULL DEFAULT 0,
+
+    error_4xx_count BIGINT NOT NULL DEFAULT 0,
+    error_5xx_count BIGINT NOT NULL DEFAULT 0,
+    timeout_count BIGINT NOT NULL DEFAULT 0,
+
+    avg_latency_ms DOUBLE PRECISION,
+    p99_latency_ms DOUBLE PRECISION,
+
+    error_rate DOUBLE PRECISION NOT NULL DEFAULT 0,
+    computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+
+    PRIMARY KEY (bucket_date, platform)
+);
+
+CREATE INDEX IF NOT EXISTS idx_ops_metrics_daily_platform_bucket_date
+    ON ops_metrics_daily (platform, bucket_date DESC);
+
+COMMENT ON TABLE ops_metrics_daily IS 'Pre-aggregated daily ops metrics by provider/platform for longer-term trends.';
+COMMENT ON COLUMN ops_metrics_daily.bucket_date IS 'UTC date of the day bucket (recommended).';
+
+-- ============================================
+-- Population strategy (future background job)
+-- ============================================
+--
+-- Suggested approach:
+-- 1) Compute hourly buckets from raw logs using UTC time-bucketing, then UPSERT into ops_metrics_hourly.
+-- 2) Compute daily buckets either directly from raw logs or by rolling up ops_metrics_hourly.
+--
+-- Notes:
+-- - Ensure the job uses a consistent timezone (recommended: SET TIME ZONE ''UTC'') to avoid bucket drift.
+-- - Derive the provider/platform similarly to existing dashboard queries:
+--     usage_logs: COALESCE(NULLIF(groups.platform, ''), accounts.platform, '')
+--     ops_error_logs: COALESCE(NULLIF(ops_error_logs.platform, ''), groups.platform, accounts.platform, '')
+-- - Keep request_count/success_count/error_count as the authoritative values; compute error_rate from counts.
+--
+-- Example (hourly) shape (pseudo-SQL):
+--   INSERT INTO ops_metrics_hourly (...)
+--   SELECT date_trunc('hour', created_at) AS bucket_start, platform, ...
+--   FROM (/* aggregate usage_logs + ops_error_logs */) s
+--   ON CONFLICT (bucket_start, platform) DO UPDATE SET ...;
--- a/backend/migrations/027_usage_billing_consistency.sql
+++ b/backend/migrations/027_usage_billing_consistency.sql
@@ -0,0 +1,58 @@
+-- 027_usage_billing_consistency.sql
+-- Ensure usage_logs idempotency (request_id, api_key_id) and add reconciliation infrastructure.
+
+-- -----------------------------------------------------------------------------
+-- 1) Normalize legacy request_id values
+-- -----------------------------------------------------------------------------
+-- Historically request_id may be inserted as empty string. Convert it to NULL so
+-- the upcoming unique index does not break on repeated "" values.
+UPDATE usage_logs
+SET request_id = NULL
+WHERE request_id = '';
+
+-- If duplicates already exist for the same (request_id, api_key_id), keep the
+-- first row and NULL-out request_id for the rest so the unique index can be
+-- created without deleting historical logs.
+WITH ranked AS (
+    SELECT
+        id,
+        ROW_NUMBER() OVER (PARTITION BY api_key_id, request_id ORDER BY id) AS rn
+    FROM usage_logs
+    WHERE request_id IS NOT NULL
+)
+UPDATE usage_logs ul
+SET request_id = NULL
+FROM ranked r
+WHERE ul.id = r.id
+  AND r.rn > 1;
+
+-- -----------------------------------------------------------------------------
+-- 2) Idempotency constraint for usage_logs
+-- -----------------------------------------------------------------------------
+CREATE UNIQUE INDEX IF NOT EXISTS idx_usage_logs_request_id_api_key_unique
+    ON usage_logs (request_id, api_key_id);
+
+-- -----------------------------------------------------------------------------
+-- 3) Reconciliation infrastructure: billing ledger for usage charges
+-- -----------------------------------------------------------------------------
+CREATE TABLE IF NOT EXISTS billing_usage_entries (
+    id BIGSERIAL PRIMARY KEY,
+    usage_log_id BIGINT NOT NULL REFERENCES usage_logs(id) ON DELETE CASCADE,
+    user_id BIGINT NOT NULL REFERENCES users(id) ON DELETE CASCADE,
+    api_key_id BIGINT NOT NULL REFERENCES api_keys(id) ON DELETE CASCADE,
+    subscription_id BIGINT REFERENCES user_subscriptions(id) ON DELETE SET NULL,
+    billing_type SMALLINT NOT NULL,
+    applied BOOLEAN NOT NULL DEFAULT TRUE,
+    delta_usd DECIMAL(20, 10) NOT NULL DEFAULT 0,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
+);
+
+CREATE UNIQUE INDEX IF NOT EXISTS billing_usage_entries_usage_log_id_unique
+    ON billing_usage_entries (usage_log_id);
+
+CREATE INDEX IF NOT EXISTS idx_billing_usage_entries_user_time
+    ON billing_usage_entries (user_id, created_at);
+
+CREATE INDEX IF NOT EXISTS idx_billing_usage_entries_created_at
+    ON billing_usage_entries (created_at);
+