From fff1d548583bead12bafbdbdca576984b82739a2 Mon Sep 17 00:00:00 2001 From: yangjianbo Date: Thu, 12 Feb 2026 16:27:29 +0800 Subject: [PATCH] =?UTF-8?q?feat(log):=20=E8=90=BD=E5=9C=B0=E7=BB=9F?= =?UTF-8?q?=E4=B8=80=E6=97=A5=E5=BF=97=E5=BA=95=E5=BA=A7=E4=B8=8E=E7=B3=BB?= =?UTF-8?q?=E7=BB=9F=E6=97=A5=E5=BF=97=E8=BF=90=E7=BB=B4=E8=83=BD=E5=8A=9B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/cmd/server/main.go | 22 +- backend/cmd/server/wire.go | 7 + backend/cmd/server/wire_gen.go | 12 +- backend/go.mod | 6 +- backend/go.sum | 18 +- backend/internal/config/config.go | 106 ++++ backend/internal/config/config_test.go | 31 ++ .../admin/ops_runtime_logging_handler_test.go | 173 +++++++ .../handler/admin/ops_settings_handler.go | 79 +++ .../handler/admin/ops_system_log_handler.go | 174 +++++++ backend/internal/handler/gateway_handler.go | 6 +- .../internal/handler/gemini_v1beta_handler.go | 2 +- .../handler/openai_gateway_handler.go | 2 +- backend/internal/handler/ops_error_logger.go | 17 +- .../internal/handler/sora_gateway_handler.go | 2 +- backend/internal/pkg/ctxkey/ctxkey.go | 12 + backend/internal/pkg/logger/config_adapter.go | 31 ++ backend/internal/pkg/logger/logger.go | 373 ++++++++++++++ backend/internal/pkg/logger/logger_test.go | 129 +++++ backend/internal/pkg/logger/options.go | 161 ++++++ backend/internal/pkg/logger/options_test.go | 102 ++++ backend/internal/pkg/logger/slog_handler.go | 133 +++++ backend/internal/repository/ops_repo.go | 327 ++++++++++++ .../repository/ops_repo_system_logs_test.go | 86 ++++ .../server/middleware/client_request_id.go | 8 +- backend/internal/server/middleware/logger.go | 51 +- .../middleware/request_access_logger_test.go | 193 ++++++++ .../server/middleware/request_logger.go | 45 ++ backend/internal/server/router.go | 1 + backend/internal/server/routes/admin.go | 8 + backend/internal/service/domain_constants.go | 3 + .../internal/service/ops_cleanup_service.go | 18 +- backend/internal/service/ops_log_runtime.go | 267 ++++++++++ backend/internal/service/ops_models.go | 15 + backend/internal/service/ops_port.go | 67 +++ .../internal/service/ops_repo_mock_test.go | 196 ++++++++ backend/internal/service/ops_service.go | 7 +- .../service/ops_system_log_service.go | 124 +++++ .../internal/service/ops_system_log_sink.go | 302 ++++++++++++ .../service/ops_system_log_sink_test.go | 254 ++++++++++ backend/internal/service/wire.go | 9 + backend/migrations/054_ops_system_logs.sql | 55 +++ deploy/.env.example | 46 ++ deploy/config.example.yaml | 64 +++ frontend/src/api/admin/ops.ts | 109 +++- frontend/src/views/admin/ops/OpsDashboard.vue | 8 + .../ops/components/OpsSystemLogTable.vue | 464 ++++++++++++++++++ frontend/src/views/admin/ops/types.ts | 5 +- 48 files changed, 4265 insertions(+), 65 deletions(-) create mode 100644 backend/internal/handler/admin/ops_runtime_logging_handler_test.go create mode 100644 backend/internal/handler/admin/ops_system_log_handler.go create mode 100644 backend/internal/pkg/logger/config_adapter.go create mode 100644 backend/internal/pkg/logger/logger.go create mode 100644 backend/internal/pkg/logger/logger_test.go create mode 100644 backend/internal/pkg/logger/options.go create mode 100644 backend/internal/pkg/logger/options_test.go create mode 100644 backend/internal/pkg/logger/slog_handler.go create mode 100644 backend/internal/repository/ops_repo_system_logs_test.go create mode 100644 backend/internal/server/middleware/request_access_logger_test.go create mode 100644 backend/internal/server/middleware/request_logger.go create mode 100644 backend/internal/service/ops_log_runtime.go create mode 100644 backend/internal/service/ops_repo_mock_test.go create mode 100644 backend/internal/service/ops_system_log_service.go create mode 100644 backend/internal/service/ops_system_log_sink.go create mode 100644 backend/internal/service/ops_system_log_sink_test.go create mode 100644 backend/migrations/054_ops_system_logs.sql create mode 100644 frontend/src/views/admin/ops/components/OpsSystemLogTable.vue diff --git a/backend/cmd/server/main.go b/backend/cmd/server/main.go index 4d632b6f..31f2e1d1 100644 --- a/backend/cmd/server/main.go +++ b/backend/cmd/server/main.go @@ -8,7 +8,6 @@ import ( "errors" "flag" "log" - "log/slog" "net/http" "os" "os/signal" @@ -19,6 +18,7 @@ import ( _ "github.com/Wei-Shaw/sub2api/ent/runtime" "github.com/Wei-Shaw/sub2api/internal/config" "github.com/Wei-Shaw/sub2api/internal/handler" + "github.com/Wei-Shaw/sub2api/internal/pkg/logger" "github.com/Wei-Shaw/sub2api/internal/server/middleware" "github.com/Wei-Shaw/sub2api/internal/setup" "github.com/Wei-Shaw/sub2api/internal/web" @@ -49,22 +49,9 @@ func init() { // initLogger configures the default slog handler based on gin.Mode(). // In non-release mode, Debug level logs are enabled. -func initLogger() { - var level slog.Level - if gin.Mode() == gin.ReleaseMode { - level = slog.LevelInfo - } else { - level = slog.LevelDebug - } - handler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{ - Level: level, - }) - slog.SetDefault(slog.New(handler)) -} - func main() { - // Initialize slog logger based on gin mode - initLogger() + logger.InitBootstrap() + defer logger.Sync() // Parse command line flags setupMode := flag.Bool("setup", false, "Run setup wizard in CLI mode") @@ -141,6 +128,9 @@ func runMainServer() { if err != nil { log.Fatalf("Failed to load config: %v", err) } + if err := logger.Init(logger.OptionsFromConfig(cfg.Log)); err != nil { + log.Fatalf("Failed to initialize logger: %v", err) + } if cfg.RunMode == config.RunModeSimple { log.Println("⚠️ WARNING: Running in SIMPLE mode - billing and quota checks are DISABLED") } diff --git a/backend/cmd/server/wire.go b/backend/cmd/server/wire.go index 18515236..c426eec1 100644 --- a/backend/cmd/server/wire.go +++ b/backend/cmd/server/wire.go @@ -67,6 +67,7 @@ func provideCleanup( opsAlertEvaluator *service.OpsAlertEvaluatorService, opsCleanup *service.OpsCleanupService, opsScheduledReport *service.OpsScheduledReportService, + opsSystemLogSink *service.OpsSystemLogSink, soraMediaCleanup *service.SoraMediaCleanupService, schedulerSnapshot *service.SchedulerSnapshotService, tokenRefresh *service.TokenRefreshService, @@ -103,6 +104,12 @@ func provideCleanup( } return nil }}, + {"OpsSystemLogSink", func() error { + if opsSystemLogSink != nil { + opsSystemLogSink.Stop() + } + return nil + }}, {"SoraMediaCleanupService", func() error { if soraMediaCleanup != nil { soraMediaCleanup.Stop() diff --git a/backend/cmd/server/wire_gen.go b/backend/cmd/server/wire_gen.go index 5c870934..be17fb01 100644 --- a/backend/cmd/server/wire_gen.go +++ b/backend/cmd/server/wire_gen.go @@ -160,7 +160,8 @@ func initializeApplication(buildInfo handler.BuildInfo) (*Application, error) { openAITokenProvider := service.NewOpenAITokenProvider(accountRepository, geminiTokenCache, openAIOAuthService) openAIGatewayService := service.NewOpenAIGatewayService(accountRepository, usageLogRepository, userRepository, userSubscriptionRepository, gatewayCache, configConfig, schedulerSnapshotService, concurrencyService, billingService, rateLimitService, billingCacheService, httpUpstream, deferredService, openAITokenProvider) geminiMessagesCompatService := service.NewGeminiMessagesCompatService(accountRepository, groupRepository, gatewayCache, schedulerSnapshotService, geminiTokenProvider, rateLimitService, httpUpstream, antigravityGatewayService, configConfig) - opsService := service.NewOpsService(opsRepository, settingRepository, configConfig, accountRepository, userRepository, concurrencyService, gatewayService, openAIGatewayService, geminiMessagesCompatService, antigravityGatewayService) + opsSystemLogSink := service.ProvideOpsSystemLogSink(opsRepository) + opsService := service.NewOpsService(opsRepository, settingRepository, configConfig, accountRepository, userRepository, concurrencyService, gatewayService, openAIGatewayService, geminiMessagesCompatService, antigravityGatewayService, opsSystemLogSink) settingHandler := admin.NewSettingHandler(settingService, emailService, turnstileService, opsService) opsHandler := admin.NewOpsHandler(opsService) updateCache := repository.NewUpdateCache(redisClient) @@ -204,7 +205,7 @@ func initializeApplication(buildInfo handler.BuildInfo) (*Application, error) { tokenRefreshService := service.ProvideTokenRefreshService(accountRepository, soraAccountRepository, oAuthService, openAIOAuthService, geminiOAuthService, antigravityOAuthService, compositeTokenCacheInvalidator, schedulerCache, configConfig) accountExpiryService := service.ProvideAccountExpiryService(accountRepository) subscriptionExpiryService := service.ProvideSubscriptionExpiryService(userSubscriptionRepository) - v := provideCleanup(client, redisClient, opsMetricsCollector, opsAggregationService, opsAlertEvaluatorService, opsCleanupService, opsScheduledReportService, soraMediaCleanupService, schedulerSnapshotService, tokenRefreshService, accountExpiryService, subscriptionExpiryService, usageCleanupService, pricingService, emailQueueService, billingCacheService, subscriptionService, oAuthService, openAIOAuthService, geminiOAuthService, antigravityOAuthService) + v := provideCleanup(client, redisClient, opsMetricsCollector, opsAggregationService, opsAlertEvaluatorService, opsCleanupService, opsScheduledReportService, opsSystemLogSink, soraMediaCleanupService, schedulerSnapshotService, tokenRefreshService, accountExpiryService, subscriptionExpiryService, usageCleanupService, pricingService, emailQueueService, billingCacheService, subscriptionService, oAuthService, openAIOAuthService, geminiOAuthService, antigravityOAuthService) application := &Application{ Server: httpServer, Cleanup: v, @@ -234,6 +235,7 @@ func provideCleanup( opsAlertEvaluator *service.OpsAlertEvaluatorService, opsCleanup *service.OpsCleanupService, opsScheduledReport *service.OpsScheduledReportService, + opsSystemLogSink *service.OpsSystemLogSink, soraMediaCleanup *service.SoraMediaCleanupService, schedulerSnapshot *service.SchedulerSnapshotService, tokenRefresh *service.TokenRefreshService, @@ -269,6 +271,12 @@ func provideCleanup( } return nil }}, + {"OpsSystemLogSink", func() error { + if opsSystemLogSink != nil { + opsSystemLogSink.Stop() + } + return nil + }}, {"SoraMediaCleanupService", func() error { if soraMediaCleanup != nil { soraMediaCleanup.Stop() diff --git a/backend/go.mod b/backend/go.mod index 30a0041c..2a79c203 100644 --- a/backend/go.mod +++ b/backend/go.mod @@ -5,6 +5,7 @@ go 1.25.7 require ( entgo.io/ent v0.14.5 github.com/DATA-DOG/go-sqlmock v1.5.2 + github.com/cespare/xxhash/v2 v2.3.0 github.com/dgraph-io/ristretto v0.2.0 github.com/gin-gonic/gin v1.9.1 github.com/golang-jwt/jwt/v5 v5.2.2 @@ -13,6 +14,7 @@ require ( github.com/gorilla/websocket v1.5.3 github.com/imroc/req/v3 v3.57.0 github.com/lib/pq v1.10.9 + github.com/patrickmn/go-cache v2.1.0+incompatible github.com/pquerna/otp v1.5.0 github.com/redis/go-redis/v9 v9.17.2 github.com/refraction-networking/utls v1.8.1 @@ -25,10 +27,12 @@ require ( github.com/tidwall/gjson v1.18.0 github.com/tidwall/sjson v1.2.5 github.com/zeromicro/go-zero v1.9.4 + go.uber.org/zap v1.24.0 golang.org/x/crypto v0.47.0 golang.org/x/net v0.49.0 golang.org/x/sync v0.19.0 golang.org/x/term v0.39.0 + gopkg.in/natefinch/lumberjack.v2 v2.2.1 gopkg.in/yaml.v3 v3.0.1 modernc.org/sqlite v1.44.3 ) @@ -45,7 +49,6 @@ require ( github.com/boombuler/barcode v1.0.1-0.20190219062509-6c824513bacc // indirect github.com/bytedance/sonic v1.9.1 // indirect github.com/cenkalti/backoff/v4 v4.3.0 // indirect - github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect github.com/containerd/errdefs v1.0.0 // indirect github.com/containerd/errdefs/pkg v0.3.0 // indirect @@ -104,7 +107,6 @@ require ( github.com/ncruces/go-strftime v1.0.0 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.1.1 // indirect - github.com/patrickmn/go-cache v2.1.0+incompatible // indirect github.com/pelletier/go-toml/v2 v2.2.2 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect diff --git a/backend/go.sum b/backend/go.sum index f6fdb851..eda2af99 100644 --- a/backend/go.sum +++ b/backend/go.sum @@ -18,6 +18,8 @@ github.com/andybalholm/brotli v1.2.0 h1:ukwgCxwYrmACq68yiUqwIWnGY0cTPox/M94sVwTo github.com/andybalholm/brotli v1.2.0/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY= github.com/apparentlymart/go-textseg/v15 v15.0.0 h1:uYvfpb3DyLSCGWnctWKGj857c6ew1u1fNQOlOtuGxQY= github.com/apparentlymart/go-textseg/v15 v15.0.0/go.mod h1:K8XmNZdhEBkdlyDdvbmmsvpAG721bKi0joRfFdHIWJ4= +github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8= +github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/bmatcuk/doublestar v1.3.4 h1:gPypJ5xD31uhX6Tf54sDPUOBXTqKH4c9aPY66CyQrS0= github.com/bmatcuk/doublestar v1.3.4/go.mod h1:wiQtGV+rzVYxB7WIlirSN++5HPtPlXEo9MEoZQC/PmE= github.com/boombuler/barcode v1.0.1-0.20190219062509-6c824513bacc h1:biVzkmvwrH8WK8raXaxBx6fRVTlJILwEwQGL1I/ByEI= @@ -137,8 +139,6 @@ github.com/icholy/digest v1.1.0 h1:HfGg9Irj7i+IX1o1QAmPfIBNu/Q5A5Tu3n/MED9k9H4= github.com/icholy/digest v1.1.0/go.mod h1:QNrsSGQ5v7v9cReDI0+eyjsXGUoRSUZQHeQ5C4XLa0Y= github.com/imroc/req/v3 v3.57.0 h1:LMTUjNRUybUkTPn8oJDq8Kg3JRBOBTcnDhKu7mzupKI= github.com/imroc/req/v3 v3.57.0/go.mod h1:JL62ey1nvSLq81HORNcosvlf7SxZStONNqOprg0Pz00= -github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= -github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg= github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo= @@ -174,8 +174,6 @@ github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovk github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= -github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U= -github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/mattn/go-sqlite3 v1.14.17 h1:mCRHCLDUBXgpKAqIKsaAaAsrAlbkeomtRFKXh2L6YIM= github.com/mattn/go-sqlite3 v1.14.17/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg= github.com/mdelapenya/tlscert v0.2.0 h1:7H81W6Z/4weDvZBNOfQte5GpIMo0lGYEeWbkGp5LJHI= @@ -209,8 +207,6 @@ github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w= github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= -github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec= -github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040= @@ -240,8 +236,6 @@ github.com/refraction-networking/utls v1.8.1 h1:yNY1kapmQU8JeM1sSw2H2asfTIwWxIkr github.com/refraction-networking/utls v1.8.1/go.mod h1:jkSOEkLqn+S/jtpEHPOsVv/4V4EVnelwbMQl4vCWXAM= github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= -github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= -github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs= github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro= github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= @@ -264,8 +258,6 @@ github.com/spf13/afero v1.11.0 h1:WJQKhtpdm3v2IzqG8VMqrr6Rf3UYpEF239Jy9wNepM8= github.com/spf13/afero v1.11.0/go.mod h1:GH9Y3pIexgf1MTIWtNGyogA5MwRIDXGUr+hbWNoBjkY= github.com/spf13/cast v1.6.0 h1:GEiTHELF+vaR5dhz3VqZfFSzZjYbgeKDpBxQVS4GYJ0= github.com/spf13/cast v1.6.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo= -github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I= -github.com/spf13/cobra v1.7.0/go.mod h1:uLxZILRyS/50WlhOIKD7W6V5bgeIt+4sICxh6uRMrb0= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/spf13/viper v1.18.2 h1:LUXCnvUvSM6FXAsj6nnfc8Q2tp1dIgUfY9Kc8GsSOiQ= @@ -342,10 +334,14 @@ go.uber.org/atomic v1.10.0 h1:9qC72Qh0+3MqyJbAn8YU5xVq1frD8bn3JtD2oXtafVQ= go.uber.org/atomic v1.10.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs= go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/mock v0.6.0 h1:hyF9dfmbgIX5EfOdasqLsWD6xqpNZlXblLB/Dbnwv3Y= go.uber.org/mock v0.6.0/go.mod h1:KiVJ4BqZJaMj4svdfmHM0AUx4NJYO8ZNpPnZn1Z+BBU= go.uber.org/multierr v1.9.0 h1:7fIwc/ZtS0q++VgcfqFDxSBZVv/Xo49/SYnDFupUwlI= go.uber.org/multierr v1.9.0/go.mod h1:X2jQV1h+kxSjClGpnseKVIxpmcjrj7MNnI0bnlfKTVQ= +go.uber.org/zap v1.24.0 h1:FiJd5l1UOLj0wCgbSE0rwwXHzEdAZS6hiiSnxJN/D60= +go.uber.org/zap v1.24.0/go.mod h1:2kMP+WWQ8aoFoedH3T2sq6iJ2yDWpHbP0f6MQbS9Gkg= golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k= golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= @@ -393,6 +389,8 @@ gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntN gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA= gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= +gopkg.in/natefinch/lumberjack.v2 v2.2.1 h1:bBRl1b0OH9s/DuPhuXpNl+VtCaJXFZ5/uEFST95x9zc= +gopkg.in/natefinch/lumberjack.v2 v2.2.1/go.mod h1:YD8tP3GAjkrDg1eZH7EGmyESg/lsYskCTPBJVb9jqSc= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/backend/internal/config/config.go b/backend/internal/config/config.go index 3f3deefc..3706e5e3 100644 --- a/backend/internal/config/config.go +++ b/backend/internal/config/config.go @@ -39,6 +39,7 @@ const ( type Config struct { Server ServerConfig `mapstructure:"server"` + Log LogConfig `mapstructure:"log"` CORS CORSConfig `mapstructure:"cors"` Security SecurityConfig `mapstructure:"security"` Billing BillingConfig `mapstructure:"billing"` @@ -68,6 +69,38 @@ type Config struct { Update UpdateConfig `mapstructure:"update"` } +type LogConfig struct { + Level string `mapstructure:"level"` + Format string `mapstructure:"format"` + ServiceName string `mapstructure:"service_name"` + Environment string `mapstructure:"env"` + Caller bool `mapstructure:"caller"` + StacktraceLevel string `mapstructure:"stacktrace_level"` + Output LogOutputConfig `mapstructure:"output"` + Rotation LogRotationConfig `mapstructure:"rotation"` + Sampling LogSamplingConfig `mapstructure:"sampling"` +} + +type LogOutputConfig struct { + ToStdout bool `mapstructure:"to_stdout"` + ToFile bool `mapstructure:"to_file"` + FilePath string `mapstructure:"file_path"` +} + +type LogRotationConfig struct { + MaxSizeMB int `mapstructure:"max_size_mb"` + MaxBackups int `mapstructure:"max_backups"` + MaxAgeDays int `mapstructure:"max_age_days"` + Compress bool `mapstructure:"compress"` + LocalTime bool `mapstructure:"local_time"` +} + +type LogSamplingConfig struct { + Enabled bool `mapstructure:"enabled"` + Initial int `mapstructure:"initial"` + Thereafter int `mapstructure:"thereafter"` +} + type GeminiConfig struct { OAuth GeminiOAuthConfig `mapstructure:"oauth"` Quota GeminiQuotaConfig `mapstructure:"quota"` @@ -756,6 +789,12 @@ func load(allowMissingJWTSecret bool) (*Config, error) { cfg.Security.ResponseHeaders.AdditionalAllowed = normalizeStringSlice(cfg.Security.ResponseHeaders.AdditionalAllowed) cfg.Security.ResponseHeaders.ForceRemove = normalizeStringSlice(cfg.Security.ResponseHeaders.ForceRemove) cfg.Security.CSP.Policy = strings.TrimSpace(cfg.Security.CSP.Policy) + cfg.Log.Level = strings.ToLower(strings.TrimSpace(cfg.Log.Level)) + cfg.Log.Format = strings.ToLower(strings.TrimSpace(cfg.Log.Format)) + cfg.Log.ServiceName = strings.TrimSpace(cfg.Log.ServiceName) + cfg.Log.Environment = strings.TrimSpace(cfg.Log.Environment) + cfg.Log.StacktraceLevel = strings.ToLower(strings.TrimSpace(cfg.Log.StacktraceLevel)) + cfg.Log.Output.FilePath = strings.TrimSpace(cfg.Log.Output.FilePath) // Auto-generate TOTP encryption key if not set (32 bytes = 64 hex chars for AES-256) cfg.Totp.EncryptionKey = strings.TrimSpace(cfg.Totp.EncryptionKey) @@ -825,6 +864,25 @@ func setDefaults() { viper.SetDefault("server.h2c.max_upload_buffer_per_connection", 2<<20) // 2MB viper.SetDefault("server.h2c.max_upload_buffer_per_stream", 512<<10) // 512KB + // Log + viper.SetDefault("log.level", "info") + viper.SetDefault("log.format", "json") + viper.SetDefault("log.service_name", "sub2api") + viper.SetDefault("log.env", "production") + viper.SetDefault("log.caller", true) + viper.SetDefault("log.stacktrace_level", "error") + viper.SetDefault("log.output.to_stdout", true) + viper.SetDefault("log.output.to_file", true) + viper.SetDefault("log.output.file_path", "") + viper.SetDefault("log.rotation.max_size_mb", 100) + viper.SetDefault("log.rotation.max_backups", 10) + viper.SetDefault("log.rotation.max_age_days", 7) + viper.SetDefault("log.rotation.compress", true) + viper.SetDefault("log.rotation.local_time", true) + viper.SetDefault("log.sampling.enabled", false) + viper.SetDefault("log.sampling.initial", 100) + viper.SetDefault("log.sampling.thereafter", 100) + // CORS viper.SetDefault("cors.allowed_origins", []string{}) viper.SetDefault("cors.allow_credentials", true) @@ -1098,6 +1156,54 @@ func (c *Config) Validate() error { if len([]byte(jwtSecret)) < 32 { return fmt.Errorf("jwt.secret must be at least 32 bytes") } + switch c.Log.Level { + case "debug", "info", "warn", "error": + case "": + return fmt.Errorf("log.level is required") + default: + return fmt.Errorf("log.level must be one of: debug/info/warn/error") + } + switch c.Log.Format { + case "json", "console": + case "": + return fmt.Errorf("log.format is required") + default: + return fmt.Errorf("log.format must be one of: json/console") + } + switch c.Log.StacktraceLevel { + case "none", "error", "fatal": + case "": + return fmt.Errorf("log.stacktrace_level is required") + default: + return fmt.Errorf("log.stacktrace_level must be one of: none/error/fatal") + } + if !c.Log.Output.ToStdout && !c.Log.Output.ToFile { + return fmt.Errorf("log.output.to_stdout and log.output.to_file cannot both be false") + } + if c.Log.Rotation.MaxSizeMB <= 0 { + return fmt.Errorf("log.rotation.max_size_mb must be positive") + } + if c.Log.Rotation.MaxBackups < 0 { + return fmt.Errorf("log.rotation.max_backups must be non-negative") + } + if c.Log.Rotation.MaxAgeDays < 0 { + return fmt.Errorf("log.rotation.max_age_days must be non-negative") + } + if c.Log.Sampling.Enabled { + if c.Log.Sampling.Initial <= 0 { + return fmt.Errorf("log.sampling.initial must be positive when sampling is enabled") + } + if c.Log.Sampling.Thereafter <= 0 { + return fmt.Errorf("log.sampling.thereafter must be positive when sampling is enabled") + } + } else { + if c.Log.Sampling.Initial < 0 { + return fmt.Errorf("log.sampling.initial must be non-negative") + } + if c.Log.Sampling.Thereafter < 0 { + return fmt.Errorf("log.sampling.thereafter must be non-negative") + } + } if c.SubscriptionMaintenance.WorkerCount < 0 { return fmt.Errorf("subscription_maintenance.worker_count must be non-negative") diff --git a/backend/internal/config/config_test.go b/backend/internal/config/config_test.go index cbefb465..a3c65c41 100644 --- a/backend/internal/config/config_test.go +++ b/backend/internal/config/config_test.go @@ -965,6 +965,37 @@ func TestValidateConfigErrors(t *testing.T) { }, wantErr: "gateway.scheduling.outbox_lag_rebuild_seconds", }, + { + name: "log level invalid", + mutate: func(c *Config) { c.Log.Level = "trace" }, + wantErr: "log.level", + }, + { + name: "log format invalid", + mutate: func(c *Config) { c.Log.Format = "plain" }, + wantErr: "log.format", + }, + { + name: "log output disabled", + mutate: func(c *Config) { + c.Log.Output.ToStdout = false + c.Log.Output.ToFile = false + }, + wantErr: "log.output.to_stdout and log.output.to_file cannot both be false", + }, + { + name: "log rotation size", + mutate: func(c *Config) { c.Log.Rotation.MaxSizeMB = 0 }, + wantErr: "log.rotation.max_size_mb", + }, + { + name: "log sampling enabled invalid", + mutate: func(c *Config) { + c.Log.Sampling.Enabled = true + c.Log.Sampling.Initial = 0 + }, + wantErr: "log.sampling.initial", + }, { name: "ops metrics collector ttl", mutate: func(c *Config) { c.Ops.MetricsCollectorCache.TTL = -1 }, diff --git a/backend/internal/handler/admin/ops_runtime_logging_handler_test.go b/backend/internal/handler/admin/ops_runtime_logging_handler_test.go new file mode 100644 index 00000000..0e84b4f9 --- /dev/null +++ b/backend/internal/handler/admin/ops_runtime_logging_handler_test.go @@ -0,0 +1,173 @@ +package admin + +import ( + "bytes" + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + + "github.com/Wei-Shaw/sub2api/internal/config" + "github.com/Wei-Shaw/sub2api/internal/pkg/logger" + "github.com/Wei-Shaw/sub2api/internal/server/middleware" + "github.com/Wei-Shaw/sub2api/internal/service" + "github.com/gin-gonic/gin" +) + +type testSettingRepo struct { + values map[string]string +} + +func newTestSettingRepo() *testSettingRepo { + return &testSettingRepo{values: map[string]string{}} +} + +func (s *testSettingRepo) Get(ctx context.Context, key string) (*service.Setting, error) { + v, err := s.GetValue(ctx, key) + if err != nil { + return nil, err + } + return &service.Setting{Key: key, Value: v}, nil +} +func (s *testSettingRepo) GetValue(ctx context.Context, key string) (string, error) { + v, ok := s.values[key] + if !ok { + return "", service.ErrSettingNotFound + } + return v, nil +} +func (s *testSettingRepo) Set(ctx context.Context, key, value string) error { + s.values[key] = value + return nil +} +func (s *testSettingRepo) GetMultiple(ctx context.Context, keys []string) (map[string]string, error) { + out := make(map[string]string, len(keys)) + for _, k := range keys { + if v, ok := s.values[k]; ok { + out[k] = v + } + } + return out, nil +} +func (s *testSettingRepo) SetMultiple(ctx context.Context, settings map[string]string) error { + for k, v := range settings { + s.values[k] = v + } + return nil +} +func (s *testSettingRepo) GetAll(ctx context.Context) (map[string]string, error) { + out := make(map[string]string, len(s.values)) + for k, v := range s.values { + out[k] = v + } + return out, nil +} +func (s *testSettingRepo) Delete(ctx context.Context, key string) error { + delete(s.values, key) + return nil +} + +func newOpsRuntimeRouter(handler *OpsHandler, withUser bool) *gin.Engine { + gin.SetMode(gin.TestMode) + r := gin.New() + if withUser { + r.Use(func(c *gin.Context) { + c.Set(string(middleware.ContextKeyUser), middleware.AuthSubject{UserID: 7}) + c.Next() + }) + } + r.GET("/runtime/logging", handler.GetRuntimeLogConfig) + r.PUT("/runtime/logging", handler.UpdateRuntimeLogConfig) + r.POST("/runtime/logging/reset", handler.ResetRuntimeLogConfig) + return r +} + +func newRuntimeOpsService(t *testing.T) *service.OpsService { + t.Helper() + if err := logger.Init(logger.InitOptions{ + Level: "info", + Format: "json", + ServiceName: "sub2api", + Environment: "test", + Output: logger.OutputOptions{ + ToStdout: false, + ToFile: false, + }, + }); err != nil { + t.Fatalf("init logger: %v", err) + } + + settingRepo := newTestSettingRepo() + cfg := &config.Config{ + Ops: config.OpsConfig{Enabled: true}, + Log: config.LogConfig{ + Level: "info", + Caller: true, + StacktraceLevel: "error", + Sampling: config.LogSamplingConfig{ + Enabled: false, + Initial: 100, + Thereafter: 100, + }, + }, + } + return service.NewOpsService(nil, settingRepo, cfg, nil, nil, nil, nil, nil, nil, nil, nil) +} + +func TestOpsRuntimeLoggingHandler_GetConfig(t *testing.T) { + h := NewOpsHandler(newRuntimeOpsService(t)) + r := newOpsRuntimeRouter(h, false) + + w := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/runtime/logging", nil) + r.ServeHTTP(w, req) + if w.Code != http.StatusOK { + t.Fatalf("status=%d, want 200", w.Code) + } +} + +func TestOpsRuntimeLoggingHandler_UpdateUnauthorized(t *testing.T) { + h := NewOpsHandler(newRuntimeOpsService(t)) + r := newOpsRuntimeRouter(h, false) + + body := `{"level":"debug","enable_sampling":false,"sampling_initial":100,"sampling_thereafter":100,"caller":true,"stacktrace_level":"error","retention_days":30}` + w := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPut, "/runtime/logging", bytes.NewBufferString(body)) + req.Header.Set("Content-Type", "application/json") + r.ServeHTTP(w, req) + if w.Code != http.StatusUnauthorized { + t.Fatalf("status=%d, want 401", w.Code) + } +} + +func TestOpsRuntimeLoggingHandler_UpdateAndResetSuccess(t *testing.T) { + h := NewOpsHandler(newRuntimeOpsService(t)) + r := newOpsRuntimeRouter(h, true) + + payload := map[string]any{ + "level": "debug", + "enable_sampling": false, + "sampling_initial": 100, + "sampling_thereafter": 100, + "caller": true, + "stacktrace_level": "error", + "retention_days": 30, + } + raw, _ := json.Marshal(payload) + + w := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPut, "/runtime/logging", bytes.NewReader(raw)) + req.Header.Set("Content-Type", "application/json") + r.ServeHTTP(w, req) + if w.Code != http.StatusOK { + t.Fatalf("update status=%d, want 200, body=%s", w.Code, w.Body.String()) + } + + w = httptest.NewRecorder() + req = httptest.NewRequest(http.MethodPost, "/runtime/logging/reset", nil) + r.ServeHTTP(w, req) + if w.Code != http.StatusOK { + t.Fatalf("reset status=%d, want 200, body=%s", w.Code, w.Body.String()) + } +} diff --git a/backend/internal/handler/admin/ops_settings_handler.go b/backend/internal/handler/admin/ops_settings_handler.go index ebc8bf49..226b89f3 100644 --- a/backend/internal/handler/admin/ops_settings_handler.go +++ b/backend/internal/handler/admin/ops_settings_handler.go @@ -4,6 +4,7 @@ import ( "net/http" "github.com/Wei-Shaw/sub2api/internal/pkg/response" + "github.com/Wei-Shaw/sub2api/internal/server/middleware" "github.com/Wei-Shaw/sub2api/internal/service" "github.com/gin-gonic/gin" ) @@ -101,6 +102,84 @@ func (h *OpsHandler) UpdateAlertRuntimeSettings(c *gin.Context) { response.Success(c, updated) } +// GetRuntimeLogConfig returns runtime log config (DB-backed). +// GET /api/v1/admin/ops/runtime/logging +func (h *OpsHandler) GetRuntimeLogConfig(c *gin.Context) { + if h.opsService == nil { + response.Error(c, http.StatusServiceUnavailable, "Ops service not available") + return + } + if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil { + response.ErrorFrom(c, err) + return + } + + cfg, err := h.opsService.GetRuntimeLogConfig(c.Request.Context()) + if err != nil { + response.Error(c, http.StatusInternalServerError, "Failed to get runtime log config") + return + } + response.Success(c, cfg) +} + +// UpdateRuntimeLogConfig updates runtime log config and applies changes immediately. +// PUT /api/v1/admin/ops/runtime/logging +func (h *OpsHandler) UpdateRuntimeLogConfig(c *gin.Context) { + if h.opsService == nil { + response.Error(c, http.StatusServiceUnavailable, "Ops service not available") + return + } + if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil { + response.ErrorFrom(c, err) + return + } + + var req service.OpsRuntimeLogConfig + if err := c.ShouldBindJSON(&req); err != nil { + response.BadRequest(c, "Invalid request body") + return + } + + subject, ok := middleware.GetAuthSubjectFromContext(c) + if !ok || subject.UserID <= 0 { + response.Error(c, http.StatusUnauthorized, "Unauthorized") + return + } + + updated, err := h.opsService.UpdateRuntimeLogConfig(c.Request.Context(), &req, subject.UserID) + if err != nil { + response.Error(c, http.StatusBadRequest, err.Error()) + return + } + response.Success(c, updated) +} + +// ResetRuntimeLogConfig removes runtime override and falls back to env/yaml baseline. +// POST /api/v1/admin/ops/runtime/logging/reset +func (h *OpsHandler) ResetRuntimeLogConfig(c *gin.Context) { + if h.opsService == nil { + response.Error(c, http.StatusServiceUnavailable, "Ops service not available") + return + } + if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil { + response.ErrorFrom(c, err) + return + } + + subject, ok := middleware.GetAuthSubjectFromContext(c) + if !ok || subject.UserID <= 0 { + response.Error(c, http.StatusUnauthorized, "Unauthorized") + return + } + + updated, err := h.opsService.ResetRuntimeLogConfig(c.Request.Context(), subject.UserID) + if err != nil { + response.Error(c, http.StatusBadRequest, err.Error()) + return + } + response.Success(c, updated) +} + // GetAdvancedSettings returns Ops advanced settings (DB-backed). // GET /api/v1/admin/ops/advanced-settings func (h *OpsHandler) GetAdvancedSettings(c *gin.Context) { diff --git a/backend/internal/handler/admin/ops_system_log_handler.go b/backend/internal/handler/admin/ops_system_log_handler.go new file mode 100644 index 00000000..31fd51eb --- /dev/null +++ b/backend/internal/handler/admin/ops_system_log_handler.go @@ -0,0 +1,174 @@ +package admin + +import ( + "net/http" + "strconv" + "strings" + "time" + + "github.com/Wei-Shaw/sub2api/internal/pkg/response" + "github.com/Wei-Shaw/sub2api/internal/server/middleware" + "github.com/Wei-Shaw/sub2api/internal/service" + "github.com/gin-gonic/gin" +) + +type opsSystemLogCleanupRequest struct { + StartTime string `json:"start_time"` + EndTime string `json:"end_time"` + + Level string `json:"level"` + Component string `json:"component"` + RequestID string `json:"request_id"` + ClientRequestID string `json:"client_request_id"` + UserID *int64 `json:"user_id"` + AccountID *int64 `json:"account_id"` + Platform string `json:"platform"` + Model string `json:"model"` + Query string `json:"q"` +} + +// ListSystemLogs returns indexed system logs. +// GET /api/v1/admin/ops/system-logs +func (h *OpsHandler) ListSystemLogs(c *gin.Context) { + if h.opsService == nil { + response.Error(c, http.StatusServiceUnavailable, "Ops service not available") + return + } + if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil { + response.ErrorFrom(c, err) + return + } + + page, pageSize := response.ParsePagination(c) + if pageSize > 200 { + pageSize = 200 + } + + start, end, err := parseOpsTimeRange(c, "1h") + if err != nil { + response.BadRequest(c, err.Error()) + return + } + + filter := &service.OpsSystemLogFilter{ + Page: page, + PageSize: pageSize, + StartTime: &start, + EndTime: &end, + Level: strings.TrimSpace(c.Query("level")), + Component: strings.TrimSpace(c.Query("component")), + RequestID: strings.TrimSpace(c.Query("request_id")), + ClientRequestID: strings.TrimSpace(c.Query("client_request_id")), + Platform: strings.TrimSpace(c.Query("platform")), + Model: strings.TrimSpace(c.Query("model")), + Query: strings.TrimSpace(c.Query("q")), + } + if v := strings.TrimSpace(c.Query("user_id")); v != "" { + id, parseErr := strconv.ParseInt(v, 10, 64) + if parseErr != nil || id <= 0 { + response.BadRequest(c, "Invalid user_id") + return + } + filter.UserID = &id + } + if v := strings.TrimSpace(c.Query("account_id")); v != "" { + id, parseErr := strconv.ParseInt(v, 10, 64) + if parseErr != nil || id <= 0 { + response.BadRequest(c, "Invalid account_id") + return + } + filter.AccountID = &id + } + + result, err := h.opsService.ListSystemLogs(c.Request.Context(), filter) + if err != nil { + response.ErrorFrom(c, err) + return + } + response.Paginated(c, result.Logs, int64(result.Total), result.Page, result.PageSize) +} + +// CleanupSystemLogs deletes indexed system logs by filter. +// POST /api/v1/admin/ops/system-logs/cleanup +func (h *OpsHandler) CleanupSystemLogs(c *gin.Context) { + if h.opsService == nil { + response.Error(c, http.StatusServiceUnavailable, "Ops service not available") + return + } + if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil { + response.ErrorFrom(c, err) + return + } + + subject, ok := middleware.GetAuthSubjectFromContext(c) + if !ok || subject.UserID <= 0 { + response.Error(c, http.StatusUnauthorized, "Unauthorized") + return + } + + var req opsSystemLogCleanupRequest + if err := c.ShouldBindJSON(&req); err != nil { + response.BadRequest(c, "Invalid request body") + return + } + + parseTS := func(raw string) (*time.Time, error) { + raw = strings.TrimSpace(raw) + if raw == "" { + return nil, nil + } + if t, err := time.Parse(time.RFC3339Nano, raw); err == nil { + return &t, nil + } + t, err := time.Parse(time.RFC3339, raw) + if err != nil { + return nil, err + } + return &t, nil + } + start, err := parseTS(req.StartTime) + if err != nil { + response.BadRequest(c, "Invalid start_time") + return + } + end, err := parseTS(req.EndTime) + if err != nil { + response.BadRequest(c, "Invalid end_time") + return + } + + filter := &service.OpsSystemLogCleanupFilter{ + StartTime: start, + EndTime: end, + Level: strings.TrimSpace(req.Level), + Component: strings.TrimSpace(req.Component), + RequestID: strings.TrimSpace(req.RequestID), + ClientRequestID: strings.TrimSpace(req.ClientRequestID), + UserID: req.UserID, + AccountID: req.AccountID, + Platform: strings.TrimSpace(req.Platform), + Model: strings.TrimSpace(req.Model), + Query: strings.TrimSpace(req.Query), + } + + deleted, err := h.opsService.CleanupSystemLogs(c.Request.Context(), filter, subject.UserID) + if err != nil { + response.ErrorFrom(c, err) + return + } + response.Success(c, gin.H{"deleted": deleted}) +} + +// GetSystemLogIngestionHealth returns sink health metrics. +// GET /api/v1/admin/ops/system-logs/health +func (h *OpsHandler) GetSystemLogIngestionHealth(c *gin.Context) { + if h.opsService == nil { + response.Error(c, http.StatusServiceUnavailable, "Ops service not available") + return + } + if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil { + response.ErrorFrom(c, err) + return + } + response.Success(c, h.opsService.GetSystemLogSinkHealth()) +} diff --git a/backend/internal/handler/gateway_handler.go b/backend/internal/handler/gateway_handler.go index 415a38de..ce297aa8 100644 --- a/backend/internal/handler/gateway_handler.go +++ b/backend/internal/handler/gateway_handler.go @@ -276,7 +276,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) { return } account := selection.Account - setOpsSelectedAccount(c, account.ID) + setOpsSelectedAccount(c, account.ID, account.Platform) // 检查请求拦截(预热请求、SUGGESTION MODE等) if account.IsInterceptWarmupEnabled() { @@ -462,7 +462,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) { return } account := selection.Account - setOpsSelectedAccount(c, account.ID) + setOpsSelectedAccount(c, account.ID, account.Platform) // 检查请求拦截(预热请求、SUGGESTION MODE等) if account.IsInterceptWarmupEnabled() { @@ -1087,7 +1087,7 @@ func (h *GatewayHandler) CountTokens(c *gin.Context) { h.errorResponse(c, http.StatusServiceUnavailable, "api_error", "Service temporarily unavailable") return } - setOpsSelectedAccount(c, account.ID) + setOpsSelectedAccount(c, account.ID, account.Platform) // 转发请求(不记录使用量) if err := h.gatewayService.ForwardCountTokens(c.Request.Context(), c, account, parsedReq); err != nil { diff --git a/backend/internal/handler/gemini_v1beta_handler.go b/backend/internal/handler/gemini_v1beta_handler.go index f8fb0dcb..66a800d4 100644 --- a/backend/internal/handler/gemini_v1beta_handler.go +++ b/backend/internal/handler/gemini_v1beta_handler.go @@ -358,7 +358,7 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) { return } account := selection.Account - setOpsSelectedAccount(c, account.ID) + setOpsSelectedAccount(c, account.ID, account.Platform) // 检测账号切换:如果粘性会话绑定的账号与当前选择的账号不同,清除 thoughtSignature // 注意:Gemini 原生 API 的 thoughtSignature 与具体上游账号强相关;跨账号透传会导致 400。 diff --git a/backend/internal/handler/openai_gateway_handler.go b/backend/internal/handler/openai_gateway_handler.go index fce3fc1c..948baa64 100644 --- a/backend/internal/handler/openai_gateway_handler.go +++ b/backend/internal/handler/openai_gateway_handler.go @@ -240,7 +240,7 @@ func (h *OpenAIGatewayHandler) Responses(c *gin.Context) { } account := selection.Account log.Printf("[OpenAI Handler] Selected account: id=%d name=%s", account.ID, account.Name) - setOpsSelectedAccount(c, account.ID) + setOpsSelectedAccount(c, account.ID, account.Platform) // 3. Acquire account concurrency slot accountReleaseFunc := selection.ReleaseFunc diff --git a/backend/internal/handler/ops_error_logger.go b/backend/internal/handler/ops_error_logger.go index 697078a1..f2b15f7b 100644 --- a/backend/internal/handler/ops_error_logger.go +++ b/backend/internal/handler/ops_error_logger.go @@ -255,18 +255,33 @@ func setOpsRequestContext(c *gin.Context, model string, stream bool, requestBody if c == nil { return } + model = strings.TrimSpace(model) c.Set(opsModelKey, model) c.Set(opsStreamKey, stream) if len(requestBody) > 0 { c.Set(opsRequestBodyKey, requestBody) } + if c.Request != nil && model != "" { + ctx := context.WithValue(c.Request.Context(), ctxkey.Model, model) + c.Request = c.Request.WithContext(ctx) + } } -func setOpsSelectedAccount(c *gin.Context, accountID int64) { +func setOpsSelectedAccount(c *gin.Context, accountID int64, platform ...string) { if c == nil || accountID <= 0 { return } c.Set(opsAccountIDKey, accountID) + if c.Request != nil { + ctx := context.WithValue(c.Request.Context(), ctxkey.AccountID, accountID) + if len(platform) > 0 { + p := strings.TrimSpace(platform[0]) + if p != "" { + ctx = context.WithValue(ctx, ctxkey.Platform, p) + } + } + c.Request = c.Request.WithContext(ctx) + } } type opsCaptureWriter struct { diff --git a/backend/internal/handler/sora_gateway_handler.go b/backend/internal/handler/sora_gateway_handler.go index aed54167..2bf43b06 100644 --- a/backend/internal/handler/sora_gateway_handler.go +++ b/backend/internal/handler/sora_gateway_handler.go @@ -215,7 +215,7 @@ func (h *SoraGatewayHandler) ChatCompletions(c *gin.Context) { return } account := selection.Account - setOpsSelectedAccount(c, account.ID) + setOpsSelectedAccount(c, account.ID, account.Platform) accountReleaseFunc := selection.ReleaseFunc if !selection.Acquired { diff --git a/backend/internal/pkg/ctxkey/ctxkey.go b/backend/internal/pkg/ctxkey/ctxkey.go index 0c4d82f7..54add8a0 100644 --- a/backend/internal/pkg/ctxkey/ctxkey.go +++ b/backend/internal/pkg/ctxkey/ctxkey.go @@ -8,9 +8,21 @@ const ( // ForcePlatform 强制平台(用于 /antigravity 路由),由 middleware.ForcePlatform 设置 ForcePlatform Key = "ctx_force_platform" + // RequestID 为服务端生成/透传的请求 ID。 + RequestID Key = "ctx_request_id" + // ClientRequestID 客户端请求的唯一标识,用于追踪请求全生命周期(用于 Ops 监控与排障)。 ClientRequestID Key = "ctx_client_request_id" + // Model 请求模型标识(用于统一请求链路日志字段)。 + Model Key = "ctx_model" + + // Platform 当前请求最终命中的平台(用于统一请求链路日志字段)。 + Platform Key = "ctx_platform" + + // AccountID 当前请求最终命中的账号 ID(用于统一请求链路日志字段)。 + AccountID Key = "ctx_account_id" + // RetryCount 表示当前请求在网关层的重试次数(用于 Ops 记录与排障)。 RetryCount Key = "ctx_retry_count" diff --git a/backend/internal/pkg/logger/config_adapter.go b/backend/internal/pkg/logger/config_adapter.go new file mode 100644 index 00000000..c34e448b --- /dev/null +++ b/backend/internal/pkg/logger/config_adapter.go @@ -0,0 +1,31 @@ +package logger + +import "github.com/Wei-Shaw/sub2api/internal/config" + +func OptionsFromConfig(cfg config.LogConfig) InitOptions { + return InitOptions{ + Level: cfg.Level, + Format: cfg.Format, + ServiceName: cfg.ServiceName, + Environment: cfg.Environment, + Caller: cfg.Caller, + StacktraceLevel: cfg.StacktraceLevel, + Output: OutputOptions{ + ToStdout: cfg.Output.ToStdout, + ToFile: cfg.Output.ToFile, + FilePath: cfg.Output.FilePath, + }, + Rotation: RotationOptions{ + MaxSizeMB: cfg.Rotation.MaxSizeMB, + MaxBackups: cfg.Rotation.MaxBackups, + MaxAgeDays: cfg.Rotation.MaxAgeDays, + Compress: cfg.Rotation.Compress, + LocalTime: cfg.Rotation.LocalTime, + }, + Sampling: SamplingOptions{ + Enabled: cfg.Sampling.Enabled, + Initial: cfg.Sampling.Initial, + Thereafter: cfg.Sampling.Thereafter, + }, + } +} diff --git a/backend/internal/pkg/logger/logger.go b/backend/internal/pkg/logger/logger.go new file mode 100644 index 00000000..57e6fd1f --- /dev/null +++ b/backend/internal/pkg/logger/logger.go @@ -0,0 +1,373 @@ +package logger + +import ( + "context" + "fmt" + "log" + "log/slog" + "os" + "path/filepath" + "strings" + "sync" + "time" + + "go.uber.org/zap" + "go.uber.org/zap/zapcore" + "gopkg.in/natefinch/lumberjack.v2" +) + +type Level = zapcore.Level + +const ( + LevelDebug = zapcore.DebugLevel + LevelInfo = zapcore.InfoLevel + LevelWarn = zapcore.WarnLevel + LevelError = zapcore.ErrorLevel + LevelFatal = zapcore.FatalLevel +) + +type Sink interface { + WriteLogEvent(event *LogEvent) +} + +type LogEvent struct { + Time time.Time + Level string + Component string + Message string + LoggerName string + Fields map[string]any +} + +var ( + mu sync.RWMutex + global *zap.Logger + sugar *zap.SugaredLogger + atomicLevel zap.AtomicLevel + initOptions InitOptions + currentSink Sink + stdLogUndo func() + bootstrapOnce sync.Once +) + +func InitBootstrap() { + bootstrapOnce.Do(func() { + if err := Init(bootstrapOptions()); err != nil { + _, _ = fmt.Fprintf(os.Stderr, "logger bootstrap init failed: %v\n", err) + } + }) +} + +func Init(options InitOptions) error { + mu.Lock() + defer mu.Unlock() + return initLocked(options) +} + +func initLocked(options InitOptions) error { + normalized := options.normalized() + zl, al, err := buildLogger(normalized) + if err != nil { + return err + } + + prev := global + global = zl + sugar = zl.Sugar() + atomicLevel = al + initOptions = normalized + + bridgeStdLogLocked() + bridgeSlogLocked() + + if prev != nil { + _ = prev.Sync() + } + return nil +} + +func Reconfigure(mutator func(*InitOptions) error) error { + mu.Lock() + defer mu.Unlock() + next := initOptions + if mutator != nil { + if err := mutator(&next); err != nil { + return err + } + } + return initLocked(next) +} + +func SetLevel(level string) error { + lv, ok := parseLevel(level) + if !ok { + return fmt.Errorf("invalid log level: %s", level) + } + + mu.Lock() + defer mu.Unlock() + atomicLevel.SetLevel(lv) + initOptions.Level = strings.ToLower(strings.TrimSpace(level)) + return nil +} + +func CurrentLevel() string { + mu.RLock() + defer mu.RUnlock() + if global == nil { + return "info" + } + return atomicLevel.Level().String() +} + +func SetSink(sink Sink) { + mu.Lock() + defer mu.Unlock() + currentSink = sink +} + +func L() *zap.Logger { + mu.RLock() + defer mu.RUnlock() + if global != nil { + return global + } + return zap.NewNop() +} + +func S() *zap.SugaredLogger { + mu.RLock() + defer mu.RUnlock() + if sugar != nil { + return sugar + } + return zap.NewNop().Sugar() +} + +func With(fields ...zap.Field) *zap.Logger { + return L().With(fields...) +} + +func Sync() { + mu.RLock() + l := global + mu.RUnlock() + if l != nil { + _ = l.Sync() + } +} + +func bridgeStdLogLocked() { + if stdLogUndo != nil { + stdLogUndo() + stdLogUndo = nil + } + + log.SetFlags(0) + log.SetPrefix("") + undo, err := zap.RedirectStdLogAt(global.Named("stdlog"), zap.InfoLevel) + if err != nil { + _, _ = fmt.Fprintf(os.Stderr, "logger redirect stdlog failed: %v\n", err) + return + } + stdLogUndo = undo +} + +func bridgeSlogLocked() { + slog.SetDefault(slog.New(newSlogZapHandler(global.Named("slog")))) +} + +func buildLogger(options InitOptions) (*zap.Logger, zap.AtomicLevel, error) { + level, _ := parseLevel(options.Level) + atomic := zap.NewAtomicLevelAt(level) + + encoderCfg := zapcore.EncoderConfig{ + TimeKey: "time", + LevelKey: "level", + NameKey: "logger", + CallerKey: "caller", + MessageKey: "msg", + StacktraceKey: "stacktrace", + LineEnding: zapcore.DefaultLineEnding, + EncodeLevel: zapcore.CapitalLevelEncoder, + EncodeTime: zapcore.ISO8601TimeEncoder, + EncodeDuration: zapcore.MillisDurationEncoder, + EncodeCaller: zapcore.ShortCallerEncoder, + } + + var enc zapcore.Encoder + if options.Format == "console" { + enc = zapcore.NewConsoleEncoder(encoderCfg) + } else { + enc = zapcore.NewJSONEncoder(encoderCfg) + } + + sinkCore := newSinkCore() + cores := make([]zapcore.Core, 0, 3) + + if options.Output.ToStdout { + infoPriority := zap.LevelEnablerFunc(func(lvl zapcore.Level) bool { + return lvl >= atomic.Level() && lvl < zapcore.WarnLevel + }) + errPriority := zap.LevelEnablerFunc(func(lvl zapcore.Level) bool { + return lvl >= atomic.Level() && lvl >= zapcore.WarnLevel + }) + cores = append(cores, zapcore.NewCore(enc, zapcore.Lock(os.Stdout), infoPriority)) + cores = append(cores, zapcore.NewCore(enc, zapcore.Lock(os.Stderr), errPriority)) + } + + if options.Output.ToFile { + fileCore, filePath, fileErr := buildFileCore(enc, atomic, options) + if fileErr != nil { + _, _ = fmt.Fprintf(os.Stderr, "time=%s level=WARN msg=\"日志文件输出初始化失败,降级为仅标准输出\" path=%s err=%v\n", + time.Now().Format(time.RFC3339Nano), + filePath, + fileErr, + ) + } else { + cores = append(cores, fileCore) + } + } + + if len(cores) == 0 { + cores = append(cores, zapcore.NewCore(enc, zapcore.Lock(os.Stdout), atomic)) + } + + core := zapcore.NewTee(cores...) + if options.Sampling.Enabled { + core = zapcore.NewSamplerWithOptions(core, samplingTick(), options.Sampling.Initial, options.Sampling.Thereafter) + } + core = sinkCore.Wrap(core) + + stacktraceLevel, _ := parseStacktraceLevel(options.StacktraceLevel) + zapOpts := make([]zap.Option, 0, 5) + if options.Caller { + zapOpts = append(zapOpts, zap.AddCaller()) + } + if stacktraceLevel <= zapcore.FatalLevel { + zapOpts = append(zapOpts, zap.AddStacktrace(stacktraceLevel)) + } + zapOpts = append(zapOpts, zap.AddCallerSkip(1)) + + logger := zap.New(core, zapOpts...).With( + zap.String("service", options.ServiceName), + zap.String("env", options.Environment), + ) + return logger, atomic, nil +} + +func buildFileCore(enc zapcore.Encoder, atomic zap.AtomicLevel, options InitOptions) (zapcore.Core, string, error) { + filePath := options.Output.FilePath + if strings.TrimSpace(filePath) == "" { + filePath = resolveLogFilePath("") + } + + dir := filepath.Dir(filePath) + if err := os.MkdirAll(dir, 0o755); err != nil { + return nil, filePath, err + } + lj := &lumberjack.Logger{ + Filename: filePath, + MaxSize: options.Rotation.MaxSizeMB, + MaxBackups: options.Rotation.MaxBackups, + MaxAge: options.Rotation.MaxAgeDays, + Compress: options.Rotation.Compress, + LocalTime: options.Rotation.LocalTime, + } + return zapcore.NewCore(enc, zapcore.AddSync(lj), atomic), filePath, nil +} + +type sinkCore struct { + core zapcore.Core + fields []zapcore.Field +} + +func newSinkCore() *sinkCore { + return &sinkCore{} +} + +func (s *sinkCore) Wrap(core zapcore.Core) zapcore.Core { + cp := *s + cp.core = core + return &cp +} + +func (s *sinkCore) Enabled(level zapcore.Level) bool { + return s.core.Enabled(level) +} + +func (s *sinkCore) With(fields []zapcore.Field) zapcore.Core { + nextFields := append([]zapcore.Field{}, s.fields...) + nextFields = append(nextFields, fields...) + return &sinkCore{ + core: s.core.With(fields), + fields: nextFields, + } +} + +func (s *sinkCore) Check(entry zapcore.Entry, ce *zapcore.CheckedEntry) *zapcore.CheckedEntry { + if s.Enabled(entry.Level) { + return ce.AddCore(entry, s) + } + return ce +} + +func (s *sinkCore) Write(entry zapcore.Entry, fields []zapcore.Field) error { + if err := s.core.Write(entry, fields); err != nil { + return err + } + + mu.RLock() + sink := currentSink + mu.RUnlock() + if sink == nil { + return nil + } + + enc := zapcore.NewMapObjectEncoder() + for _, f := range s.fields { + f.AddTo(enc) + } + for _, f := range fields { + f.AddTo(enc) + } + + event := &LogEvent{ + Time: entry.Time, + Level: strings.ToLower(entry.Level.String()), + Component: entry.LoggerName, + Message: entry.Message, + LoggerName: entry.LoggerName, + Fields: enc.Fields, + } + sink.WriteLogEvent(event) + return nil +} + +func (s *sinkCore) Sync() error { + return s.core.Sync() +} + +type contextKey string + +const loggerContextKey contextKey = "ctx_logger" + +func IntoContext(ctx context.Context, l *zap.Logger) context.Context { + if ctx == nil { + ctx = context.Background() + } + if l == nil { + l = L() + } + return context.WithValue(ctx, loggerContextKey, l) +} + +func FromContext(ctx context.Context) *zap.Logger { + if ctx == nil { + return L() + } + if l, ok := ctx.Value(loggerContextKey).(*zap.Logger); ok && l != nil { + return l + } + return L() +} diff --git a/backend/internal/pkg/logger/logger_test.go b/backend/internal/pkg/logger/logger_test.go new file mode 100644 index 00000000..75c85a9d --- /dev/null +++ b/backend/internal/pkg/logger/logger_test.go @@ -0,0 +1,129 @@ +package logger + +import ( + "io" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestInit_DualOutput(t *testing.T) { + tmpDir := t.TempDir() + logPath := filepath.Join(tmpDir, "logs", "sub2api.log") + + origStdout := os.Stdout + origStderr := os.Stderr + stdoutR, stdoutW, err := os.Pipe() + if err != nil { + t.Fatalf("create stdout pipe: %v", err) + } + stderrR, stderrW, err := os.Pipe() + if err != nil { + t.Fatalf("create stderr pipe: %v", err) + } + os.Stdout = stdoutW + os.Stderr = stderrW + t.Cleanup(func() { + os.Stdout = origStdout + os.Stderr = origStderr + _ = stdoutR.Close() + _ = stderrR.Close() + _ = stdoutW.Close() + _ = stderrW.Close() + }) + + err = Init(InitOptions{ + Level: "debug", + Format: "json", + ServiceName: "sub2api", + Environment: "test", + Output: OutputOptions{ + ToStdout: true, + ToFile: true, + FilePath: logPath, + }, + Rotation: RotationOptions{ + MaxSizeMB: 10, + MaxBackups: 2, + MaxAgeDays: 1, + }, + Sampling: SamplingOptions{Enabled: false}, + }) + if err != nil { + t.Fatalf("Init() error: %v", err) + } + + L().Info("dual-output-info") + L().Warn("dual-output-warn") + Sync() + + _ = stdoutW.Close() + _ = stderrW.Close() + stdoutBytes, _ := io.ReadAll(stdoutR) + stderrBytes, _ := io.ReadAll(stderrR) + stdoutText := string(stdoutBytes) + stderrText := string(stderrBytes) + + if !strings.Contains(stdoutText, "dual-output-info") { + t.Fatalf("stdout missing info log: %s", stdoutText) + } + if !strings.Contains(stderrText, "dual-output-warn") { + t.Fatalf("stderr missing warn log: %s", stderrText) + } + + fileBytes, err := os.ReadFile(logPath) + if err != nil { + t.Fatalf("read log file: %v", err) + } + fileText := string(fileBytes) + if !strings.Contains(fileText, "dual-output-info") || !strings.Contains(fileText, "dual-output-warn") { + t.Fatalf("file missing logs: %s", fileText) + } +} + +func TestInit_FileOutputFailureDowngrade(t *testing.T) { + origStdout := os.Stdout + origStderr := os.Stderr + _, stdoutW, err := os.Pipe() + if err != nil { + t.Fatalf("create stdout pipe: %v", err) + } + stderrR, stderrW, err := os.Pipe() + if err != nil { + t.Fatalf("create stderr pipe: %v", err) + } + os.Stdout = stdoutW + os.Stderr = stderrW + t.Cleanup(func() { + os.Stdout = origStdout + os.Stderr = origStderr + _ = stdoutW.Close() + _ = stderrR.Close() + _ = stderrW.Close() + }) + + err = Init(InitOptions{ + Level: "info", + Format: "json", + Output: OutputOptions{ + ToStdout: true, + ToFile: true, + FilePath: filepath.Join(os.DevNull, "logs", "sub2api.log"), + }, + Rotation: RotationOptions{ + MaxSizeMB: 10, + MaxBackups: 1, + MaxAgeDays: 1, + }, + }) + if err != nil { + t.Fatalf("Init() should downgrade instead of failing, got: %v", err) + } + + _ = stderrW.Close() + stderrBytes, _ := io.ReadAll(stderrR) + if !strings.Contains(string(stderrBytes), "日志文件输出初始化失败") { + t.Fatalf("stderr should contain fallback warning, got: %s", string(stderrBytes)) + } +} diff --git a/backend/internal/pkg/logger/options.go b/backend/internal/pkg/logger/options.go new file mode 100644 index 00000000..c14bb41a --- /dev/null +++ b/backend/internal/pkg/logger/options.go @@ -0,0 +1,161 @@ +package logger + +import ( + "os" + "path/filepath" + "strings" + "time" +) + +const ( + // DefaultContainerLogPath 为容器内默认日志文件路径。 + DefaultContainerLogPath = "/app/data/logs/sub2api.log" + defaultLogFilename = "sub2api.log" +) + +type InitOptions struct { + Level string + Format string + ServiceName string + Environment string + Caller bool + StacktraceLevel string + Output OutputOptions + Rotation RotationOptions + Sampling SamplingOptions +} + +type OutputOptions struct { + ToStdout bool + ToFile bool + FilePath string +} + +type RotationOptions struct { + MaxSizeMB int + MaxBackups int + MaxAgeDays int + Compress bool + LocalTime bool +} + +type SamplingOptions struct { + Enabled bool + Initial int + Thereafter int +} + +func (o InitOptions) normalized() InitOptions { + out := o + out.Level = strings.ToLower(strings.TrimSpace(out.Level)) + if out.Level == "" { + out.Level = "info" + } + out.Format = strings.ToLower(strings.TrimSpace(out.Format)) + if out.Format == "" { + out.Format = "json" + } + out.ServiceName = strings.TrimSpace(out.ServiceName) + if out.ServiceName == "" { + out.ServiceName = "sub2api" + } + out.Environment = strings.TrimSpace(out.Environment) + if out.Environment == "" { + out.Environment = "production" + } + out.StacktraceLevel = strings.ToLower(strings.TrimSpace(out.StacktraceLevel)) + if out.StacktraceLevel == "" { + out.StacktraceLevel = "error" + } + if !out.Output.ToStdout && !out.Output.ToFile { + out.Output.ToStdout = true + } + out.Output.FilePath = resolveLogFilePath(out.Output.FilePath) + if out.Rotation.MaxSizeMB <= 0 { + out.Rotation.MaxSizeMB = 100 + } + if out.Rotation.MaxBackups < 0 { + out.Rotation.MaxBackups = 10 + } + if out.Rotation.MaxAgeDays < 0 { + out.Rotation.MaxAgeDays = 7 + } + if out.Sampling.Enabled { + if out.Sampling.Initial <= 0 { + out.Sampling.Initial = 100 + } + if out.Sampling.Thereafter <= 0 { + out.Sampling.Thereafter = 100 + } + } + return out +} + +func resolveLogFilePath(explicit string) string { + explicit = strings.TrimSpace(explicit) + if explicit != "" { + return explicit + } + dataDir := strings.TrimSpace(os.Getenv("DATA_DIR")) + if dataDir != "" { + return filepath.Join(dataDir, "logs", defaultLogFilename) + } + return DefaultContainerLogPath +} + +func bootstrapOptions() InitOptions { + return InitOptions{ + Level: "info", + Format: "console", + ServiceName: "sub2api", + Environment: "bootstrap", + Output: OutputOptions{ + ToStdout: true, + ToFile: false, + }, + Rotation: RotationOptions{ + MaxSizeMB: 100, + MaxBackups: 10, + MaxAgeDays: 7, + Compress: true, + LocalTime: true, + }, + Sampling: SamplingOptions{ + Enabled: false, + Initial: 100, + Thereafter: 100, + }, + } +} + +func parseLevel(level string) (Level, bool) { + switch strings.ToLower(strings.TrimSpace(level)) { + case "debug": + return LevelDebug, true + case "info": + return LevelInfo, true + case "warn": + return LevelWarn, true + case "error": + return LevelError, true + default: + return LevelInfo, false + } +} + +func parseStacktraceLevel(level string) (Level, bool) { + switch strings.ToLower(strings.TrimSpace(level)) { + case "none": + return LevelFatal + 1, true + case "error": + return LevelError, true + case "fatal": + return LevelFatal, true + default: + return LevelError, false + } +} + +func samplingTick() time.Duration { + return time.Second +} diff --git a/backend/internal/pkg/logger/options_test.go b/backend/internal/pkg/logger/options_test.go new file mode 100644 index 00000000..10d50d72 --- /dev/null +++ b/backend/internal/pkg/logger/options_test.go @@ -0,0 +1,102 @@ +package logger + +import ( + "os" + "path/filepath" + "testing" + + "go.uber.org/zap" + "go.uber.org/zap/zapcore" +) + +func TestResolveLogFilePath_Default(t *testing.T) { + t.Setenv("DATA_DIR", "") + got := resolveLogFilePath("") + if got != DefaultContainerLogPath { + t.Fatalf("resolveLogFilePath() = %q, want %q", got, DefaultContainerLogPath) + } +} + +func TestResolveLogFilePath_WithDataDir(t *testing.T) { + t.Setenv("DATA_DIR", "/tmp/sub2api-data") + got := resolveLogFilePath("") + want := filepath.Join("/tmp/sub2api-data", "logs", "sub2api.log") + if got != want { + t.Fatalf("resolveLogFilePath() = %q, want %q", got, want) + } +} + +func TestResolveLogFilePath_ExplicitPath(t *testing.T) { + t.Setenv("DATA_DIR", "/tmp/ignore") + got := resolveLogFilePath("/var/log/custom.log") + if got != "/var/log/custom.log" { + t.Fatalf("resolveLogFilePath() = %q, want explicit path", got) + } +} + +func TestNormalizedOptions_InvalidFallback(t *testing.T) { + t.Setenv("DATA_DIR", "") + opts := InitOptions{ + Level: "TRACE", + Format: "TEXT", + ServiceName: "", + Environment: "", + StacktraceLevel: "panic", + Output: OutputOptions{ + ToStdout: false, + ToFile: false, + }, + Rotation: RotationOptions{ + MaxSizeMB: 0, + MaxBackups: -1, + MaxAgeDays: -1, + }, + Sampling: SamplingOptions{ + Enabled: true, + Initial: 0, + Thereafter: 0, + }, + } + out := opts.normalized() + if out.Level != "trace" { + // normalized 仅做 trim/lower,不做校验;校验在 config 层。 + t.Fatalf("normalized level should preserve value for upstream validation, got %q", out.Level) + } + if !out.Output.ToStdout { + t.Fatalf("normalized output should fallback to stdout") + } + if out.Output.FilePath != DefaultContainerLogPath { + t.Fatalf("normalized file path = %q", out.Output.FilePath) + } + if out.Rotation.MaxSizeMB != 100 { + t.Fatalf("normalized max_size_mb = %d", out.Rotation.MaxSizeMB) + } + if out.Rotation.MaxBackups != 10 { + t.Fatalf("normalized max_backups = %d", out.Rotation.MaxBackups) + } + if out.Rotation.MaxAgeDays != 7 { + t.Fatalf("normalized max_age_days = %d", out.Rotation.MaxAgeDays) + } + if out.Sampling.Initial != 100 || out.Sampling.Thereafter != 100 { + t.Fatalf("normalized sampling defaults invalid: %+v", out.Sampling) + } +} + +func TestBuildFileCore_InvalidPathFallback(t *testing.T) { + t.Setenv("DATA_DIR", "") + opts := bootstrapOptions() + opts.Output.ToFile = true + opts.Output.FilePath = filepath.Join(os.DevNull, "logs", "sub2api.log") + encoderCfg := zapcore.EncoderConfig{ + TimeKey: "time", + LevelKey: "level", + MessageKey: "msg", + EncodeTime: zapcore.ISO8601TimeEncoder, + EncodeLevel: zapcore.CapitalLevelEncoder, + } + encoder := zapcore.NewJSONEncoder(encoderCfg) + _, _, err := buildFileCore(encoder, zap.NewAtomicLevel(), opts) + if err == nil { + t.Fatalf("buildFileCore() expected error for invalid path") + } +} diff --git a/backend/internal/pkg/logger/slog_handler.go b/backend/internal/pkg/logger/slog_handler.go new file mode 100644 index 00000000..47c80a6d --- /dev/null +++ b/backend/internal/pkg/logger/slog_handler.go @@ -0,0 +1,133 @@ +package logger + +import ( + "context" + "log/slog" + "strings" + "time" + + "go.uber.org/zap" + "go.uber.org/zap/zapcore" +) + +type slogZapHandler struct { + logger *zap.Logger + attrs []slog.Attr + groups []string +} + +func newSlogZapHandler(logger *zap.Logger) slog.Handler { + if logger == nil { + logger = zap.NewNop() + } + return &slogZapHandler{ + logger: logger, + attrs: make([]slog.Attr, 0, 8), + groups: make([]string, 0, 4), + } +} + +func (h *slogZapHandler) Enabled(_ context.Context, level slog.Level) bool { + switch { + case level >= slog.LevelError: + return h.logger.Core().Enabled(LevelError) + case level >= slog.LevelWarn: + return h.logger.Core().Enabled(LevelWarn) + case level <= slog.LevelDebug: + return h.logger.Core().Enabled(LevelDebug) + default: + return h.logger.Core().Enabled(LevelInfo) + } +} + +func (h *slogZapHandler) Handle(_ context.Context, record slog.Record) error { + fields := make([]zap.Field, 0, len(h.attrs)+record.NumAttrs()+4) + fields = append(fields, zap.Time("time", record.Time)) + fields = append(fields, slogAttrsToZapFields(h.groups, h.attrs)...) + record.Attrs(func(attr slog.Attr) bool { + fields = append(fields, slogAttrToZapField(h.groups, attr)) + return true + }) + + entry := h.logger.With(fields...) + switch { + case record.Level >= slog.LevelError: + entry.Error(record.Message) + case record.Level >= slog.LevelWarn: + entry.Warn(record.Message) + case record.Level <= slog.LevelDebug: + entry.Debug(record.Message) + default: + entry.Info(record.Message) + } + return nil +} + +func (h *slogZapHandler) WithAttrs(attrs []slog.Attr) slog.Handler { + next := *h + next.attrs = append(append([]slog.Attr{}, h.attrs...), attrs...) + return &next +} + +func (h *slogZapHandler) WithGroup(name string) slog.Handler { + name = strings.TrimSpace(name) + if name == "" { + return h + } + next := *h + next.groups = append(append([]string{}, h.groups...), name) + return &next +} + +func slogAttrsToZapFields(groups []string, attrs []slog.Attr) []zap.Field { + fields := make([]zap.Field, 0, len(attrs)) + for _, attr := range attrs { + fields = append(fields, slogAttrToZapField(groups, attr)) + } + return fields +} + +func slogAttrToZapField(groups []string, attr slog.Attr) zap.Field { + if len(groups) > 0 { + attr.Key = strings.Join(append(append([]string{}, groups...), attr.Key), ".") + } + value := attr.Value.Resolve() + switch value.Kind() { + case slog.KindBool: + return zap.Bool(attr.Key, value.Bool()) + case slog.KindInt64: + return zap.Int64(attr.Key, value.Int64()) + case slog.KindUint64: + return zap.Uint64(attr.Key, value.Uint64()) + case slog.KindFloat64: + return zap.Float64(attr.Key, value.Float64()) + case slog.KindDuration: + return zap.Duration(attr.Key, value.Duration()) + case slog.KindTime: + return zap.Time(attr.Key, value.Time()) + case slog.KindString: + return zap.String(attr.Key, value.String()) + case slog.KindGroup: + groupFields := make([]zap.Field, 0, len(value.Group())) + for _, nested := range value.Group() { + groupFields = append(groupFields, slogAttrToZapField(nil, nested)) + } + return zap.Object(attr.Key, zapObjectFields(groupFields)) + case slog.KindAny: + if t, ok := value.Any().(time.Time); ok { + return zap.Time(attr.Key, t) + } + return zap.Any(attr.Key, value.Any()) + default: + return zap.String(attr.Key, value.String()) + } +} + +type zapObjectFields []zap.Field + +func (z zapObjectFields) MarshalLogObject(enc zapcore.ObjectEncoder) error { + for _, field := range z { + field.AddTo(enc) + } + return nil +} diff --git a/backend/internal/repository/ops_repo.go b/backend/internal/repository/ops_repo.go index 8f2c30c0..2705d429 100644 --- a/backend/internal/repository/ops_repo.go +++ b/backend/internal/repository/ops_repo.go @@ -3,6 +3,7 @@ package repository import ( "context" "database/sql" + "encoding/json" "fmt" "strings" "time" @@ -938,6 +939,243 @@ WHERE id = $1` return err } +func (r *opsRepository) BatchInsertSystemLogs(ctx context.Context, inputs []*service.OpsInsertSystemLogInput) (int64, error) { + if r == nil || r.db == nil { + return 0, fmt.Errorf("nil ops repository") + } + if len(inputs) == 0 { + return 0, nil + } + + tx, err := r.db.BeginTx(ctx, nil) + if err != nil { + return 0, err + } + stmt, err := tx.PrepareContext(ctx, pq.CopyIn( + "ops_system_logs", + "created_at", + "level", + "component", + "message", + "request_id", + "client_request_id", + "user_id", + "account_id", + "platform", + "model", + "extra", + )) + if err != nil { + _ = tx.Rollback() + return 0, err + } + + var inserted int64 + for _, input := range inputs { + if input == nil { + continue + } + createdAt := input.CreatedAt + if createdAt.IsZero() { + createdAt = time.Now().UTC() + } + component := strings.TrimSpace(input.Component) + level := strings.ToLower(strings.TrimSpace(input.Level)) + message := strings.TrimSpace(input.Message) + if level == "" || message == "" { + continue + } + if component == "" { + component = "app" + } + extra := strings.TrimSpace(input.ExtraJSON) + if extra == "" { + extra = "{}" + } + if _, err := stmt.ExecContext( + ctx, + createdAt.UTC(), + level, + component, + message, + opsNullString(input.RequestID), + opsNullString(input.ClientRequestID), + opsNullInt64(input.UserID), + opsNullInt64(input.AccountID), + opsNullString(input.Platform), + opsNullString(input.Model), + extra, + ); err != nil { + _ = stmt.Close() + _ = tx.Rollback() + return inserted, err + } + inserted++ + } + + if _, err := stmt.ExecContext(ctx); err != nil { + _ = stmt.Close() + _ = tx.Rollback() + return inserted, err + } + if err := stmt.Close(); err != nil { + _ = tx.Rollback() + return inserted, err + } + if err := tx.Commit(); err != nil { + return inserted, err + } + return inserted, nil +} + +func (r *opsRepository) ListSystemLogs(ctx context.Context, filter *service.OpsSystemLogFilter) (*service.OpsSystemLogList, error) { + if r == nil || r.db == nil { + return nil, fmt.Errorf("nil ops repository") + } + if filter == nil { + filter = &service.OpsSystemLogFilter{} + } + + page := filter.Page + if page <= 0 { + page = 1 + } + pageSize := filter.PageSize + if pageSize <= 0 { + pageSize = 50 + } + if pageSize > 200 { + pageSize = 200 + } + + where, args, _ := buildOpsSystemLogsWhere(filter) + countSQL := "SELECT COUNT(*) FROM ops_system_logs l " + where + var total int + if err := r.db.QueryRowContext(ctx, countSQL, args...).Scan(&total); err != nil { + return nil, err + } + + offset := (page - 1) * pageSize + argsWithLimit := append(args, pageSize, offset) + query := ` +SELECT + l.id, + l.created_at, + l.level, + COALESCE(l.component, ''), + COALESCE(l.message, ''), + COALESCE(l.request_id, ''), + COALESCE(l.client_request_id, ''), + l.user_id, + l.account_id, + COALESCE(l.platform, ''), + COALESCE(l.model, ''), + COALESCE(l.extra::text, '{}') +FROM ops_system_logs l +` + where + ` +ORDER BY l.created_at DESC, l.id DESC +LIMIT $` + itoa(len(args)+1) + ` OFFSET $` + itoa(len(args)+2) + + rows, err := r.db.QueryContext(ctx, query, argsWithLimit...) + if err != nil { + return nil, err + } + defer func() { _ = rows.Close() }() + + logs := make([]*service.OpsSystemLog, 0, pageSize) + for rows.Next() { + item := &service.OpsSystemLog{} + var userID sql.NullInt64 + var accountID sql.NullInt64 + var extraRaw string + if err := rows.Scan( + &item.ID, + &item.CreatedAt, + &item.Level, + &item.Component, + &item.Message, + &item.RequestID, + &item.ClientRequestID, + &userID, + &accountID, + &item.Platform, + &item.Model, + &extraRaw, + ); err != nil { + return nil, err + } + if userID.Valid { + v := userID.Int64 + item.UserID = &v + } + if accountID.Valid { + v := accountID.Int64 + item.AccountID = &v + } + extraRaw = strings.TrimSpace(extraRaw) + if extraRaw != "" && extraRaw != "null" && extraRaw != "{}" { + extra := make(map[string]any) + if err := json.Unmarshal([]byte(extraRaw), &extra); err == nil { + item.Extra = extra + } + } + logs = append(logs, item) + } + if err := rows.Err(); err != nil { + return nil, err + } + + return &service.OpsSystemLogList{ + Logs: logs, + Total: total, + Page: page, + PageSize: pageSize, + }, nil +} + +func (r *opsRepository) DeleteSystemLogs(ctx context.Context, filter *service.OpsSystemLogCleanupFilter) (int64, error) { + if r == nil || r.db == nil { + return 0, fmt.Errorf("nil ops repository") + } + if filter == nil { + filter = &service.OpsSystemLogCleanupFilter{} + } + + where, args, hasConstraint := buildOpsSystemLogsCleanupWhere(filter) + if !hasConstraint { + return 0, fmt.Errorf("cleanup requires at least one filter condition") + } + + query := "DELETE FROM ops_system_logs l " + where + res, err := r.db.ExecContext(ctx, query, args...) + if err != nil { + return 0, err + } + return res.RowsAffected() +} + +func (r *opsRepository) InsertSystemLogCleanupAudit(ctx context.Context, input *service.OpsSystemLogCleanupAudit) error { + if r == nil || r.db == nil { + return fmt.Errorf("nil ops repository") + } + if input == nil { + return fmt.Errorf("nil input") + } + createdAt := input.CreatedAt + if createdAt.IsZero() { + createdAt = time.Now().UTC() + } + _, err := r.db.ExecContext(ctx, ` +INSERT INTO ops_system_log_cleanup_audits ( + created_at, + operator_id, + conditions, + deleted_rows +) VALUES ($1,$2,$3,$4) +`, createdAt.UTC(), input.OperatorID, input.Conditions, input.DeletedRows) + return err +} + func buildOpsErrorLogsWhere(filter *service.OpsErrorLogFilter) (string, []any) { clauses := make([]string, 0, 12) args := make([]any, 0, 12) @@ -1053,6 +1291,95 @@ func buildOpsErrorLogsWhere(filter *service.OpsErrorLogFilter) (string, []any) { return "WHERE " + strings.Join(clauses, " AND "), args } +func buildOpsSystemLogsWhere(filter *service.OpsSystemLogFilter) (string, []any, bool) { + clauses := make([]string, 0, 10) + args := make([]any, 0, 10) + clauses = append(clauses, "1=1") + hasConstraint := false + + if filter != nil && filter.StartTime != nil && !filter.StartTime.IsZero() { + args = append(args, filter.StartTime.UTC()) + clauses = append(clauses, "l.created_at >= $"+itoa(len(args))) + hasConstraint = true + } + if filter != nil && filter.EndTime != nil && !filter.EndTime.IsZero() { + args = append(args, filter.EndTime.UTC()) + clauses = append(clauses, "l.created_at < $"+itoa(len(args))) + hasConstraint = true + } + if filter != nil { + if v := strings.ToLower(strings.TrimSpace(filter.Level)); v != "" { + args = append(args, v) + clauses = append(clauses, "LOWER(COALESCE(l.level,'')) = $"+itoa(len(args))) + hasConstraint = true + } + if v := strings.TrimSpace(filter.Component); v != "" { + args = append(args, v) + clauses = append(clauses, "COALESCE(l.component,'') = $"+itoa(len(args))) + hasConstraint = true + } + if v := strings.TrimSpace(filter.RequestID); v != "" { + args = append(args, v) + clauses = append(clauses, "COALESCE(l.request_id,'') = $"+itoa(len(args))) + hasConstraint = true + } + if v := strings.TrimSpace(filter.ClientRequestID); v != "" { + args = append(args, v) + clauses = append(clauses, "COALESCE(l.client_request_id,'') = $"+itoa(len(args))) + hasConstraint = true + } + if filter.UserID != nil && *filter.UserID > 0 { + args = append(args, *filter.UserID) + clauses = append(clauses, "l.user_id = $"+itoa(len(args))) + hasConstraint = true + } + if filter.AccountID != nil && *filter.AccountID > 0 { + args = append(args, *filter.AccountID) + clauses = append(clauses, "l.account_id = $"+itoa(len(args))) + hasConstraint = true + } + if v := strings.TrimSpace(filter.Platform); v != "" { + args = append(args, v) + clauses = append(clauses, "COALESCE(l.platform,'') = $"+itoa(len(args))) + hasConstraint = true + } + if v := strings.TrimSpace(filter.Model); v != "" { + args = append(args, v) + clauses = append(clauses, "COALESCE(l.model,'') = $"+itoa(len(args))) + hasConstraint = true + } + if v := strings.TrimSpace(filter.Query); v != "" { + like := "%" + v + "%" + args = append(args, like) + n := itoa(len(args)) + clauses = append(clauses, "(l.message ILIKE $"+n+" OR COALESCE(l.request_id,'') ILIKE $"+n+" OR COALESCE(l.client_request_id,'') ILIKE $"+n+" OR COALESCE(l.extra::text,'') ILIKE $"+n+")") + hasConstraint = true + } + } + + return "WHERE " + strings.Join(clauses, " AND "), args, hasConstraint +} + +func buildOpsSystemLogsCleanupWhere(filter *service.OpsSystemLogCleanupFilter) (string, []any, bool) { + if filter == nil { + filter = &service.OpsSystemLogCleanupFilter{} + } + listFilter := &service.OpsSystemLogFilter{ + StartTime: filter.StartTime, + EndTime: filter.EndTime, + Level: filter.Level, + Component: filter.Component, + RequestID: filter.RequestID, + ClientRequestID: filter.ClientRequestID, + UserID: filter.UserID, + AccountID: filter.AccountID, + Platform: filter.Platform, + Model: filter.Model, + Query: filter.Query, + } + return buildOpsSystemLogsWhere(listFilter) +} + // Helpers for nullable args func opsNullString(v any) any { switch s := v.(type) { diff --git a/backend/internal/repository/ops_repo_system_logs_test.go b/backend/internal/repository/ops_repo_system_logs_test.go new file mode 100644 index 00000000..c3524fe4 --- /dev/null +++ b/backend/internal/repository/ops_repo_system_logs_test.go @@ -0,0 +1,86 @@ +package repository + +import ( + "strings" + "testing" + "time" + + "github.com/Wei-Shaw/sub2api/internal/service" +) + +func TestBuildOpsSystemLogsWhere_WithClientRequestIDAndUserID(t *testing.T) { + start := time.Date(2026, 2, 1, 0, 0, 0, 0, time.UTC) + end := time.Date(2026, 2, 2, 0, 0, 0, 0, time.UTC) + userID := int64(12) + accountID := int64(34) + + filter := &service.OpsSystemLogFilter{ + StartTime: &start, + EndTime: &end, + Level: "warn", + Component: "http.access", + RequestID: "req-1", + ClientRequestID: "creq-1", + UserID: &userID, + AccountID: &accountID, + Platform: "openai", + Model: "gpt-5", + Query: "timeout", + } + + where, args, hasConstraint := buildOpsSystemLogsWhere(filter) + if !hasConstraint { + t.Fatalf("expected hasConstraint=true") + } + if where == "" { + t.Fatalf("where should not be empty") + } + if len(args) != 11 { + t.Fatalf("args len = %d, want 11", len(args)) + } + if !contains(where, "COALESCE(l.client_request_id,'') = $") { + t.Fatalf("where should include client_request_id condition: %s", where) + } + if !contains(where, "l.user_id = $") { + t.Fatalf("where should include user_id condition: %s", where) + } +} + +func TestBuildOpsSystemLogsCleanupWhere_RequireConstraint(t *testing.T) { + where, args, hasConstraint := buildOpsSystemLogsCleanupWhere(&service.OpsSystemLogCleanupFilter{}) + if hasConstraint { + t.Fatalf("expected hasConstraint=false") + } + if where == "" { + t.Fatalf("where should not be empty") + } + if len(args) != 0 { + t.Fatalf("args len = %d, want 0", len(args)) + } +} + +func TestBuildOpsSystemLogsCleanupWhere_WithClientRequestIDAndUserID(t *testing.T) { + userID := int64(9) + filter := &service.OpsSystemLogCleanupFilter{ + ClientRequestID: "creq-9", + UserID: &userID, + } + + where, args, hasConstraint := buildOpsSystemLogsCleanupWhere(filter) + if !hasConstraint { + t.Fatalf("expected hasConstraint=true") + } + if len(args) != 2 { + t.Fatalf("args len = %d, want 2", len(args)) + } + if !contains(where, "COALESCE(l.client_request_id,'') = $") { + t.Fatalf("where should include client_request_id condition: %s", where) + } + if !contains(where, "l.user_id = $") { + t.Fatalf("where should include user_id condition: %s", where) + } +} + +func contains(s string, sub string) bool { + return strings.Contains(s, sub) +} diff --git a/backend/internal/server/middleware/client_request_id.go b/backend/internal/server/middleware/client_request_id.go index d22b6cc5..6838d6af 100644 --- a/backend/internal/server/middleware/client_request_id.go +++ b/backend/internal/server/middleware/client_request_id.go @@ -2,10 +2,13 @@ package middleware import ( "context" + "strings" "github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey" + "github.com/Wei-Shaw/sub2api/internal/pkg/logger" "github.com/gin-gonic/gin" "github.com/google/uuid" + "go.uber.org/zap" ) // ClientRequestID ensures every request has a unique client_request_id in request.Context(). @@ -24,7 +27,10 @@ func ClientRequestID() gin.HandlerFunc { } id := uuid.New().String() - c.Request = c.Request.WithContext(context.WithValue(c.Request.Context(), ctxkey.ClientRequestID, id)) + ctx := context.WithValue(c.Request.Context(), ctxkey.ClientRequestID, id) + requestLogger := logger.FromContext(ctx).With(zap.String("client_request_id", strings.TrimSpace(id))) + ctx = logger.IntoContext(ctx, requestLogger) + c.Request = c.Request.WithContext(ctx) c.Next() } } diff --git a/backend/internal/server/middleware/logger.go b/backend/internal/server/middleware/logger.go index 27d4f51c..b14a3a21 100644 --- a/backend/internal/server/middleware/logger.go +++ b/backend/internal/server/middleware/logger.go @@ -1,10 +1,12 @@ package middleware import ( - "log" "time" + "github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey" + "github.com/Wei-Shaw/sub2api/internal/pkg/logger" "github.com/gin-gonic/gin" + "go.uber.org/zap" ) // Logger 请求日志中间件 @@ -24,38 +26,41 @@ func Logger() gin.HandlerFunc { return } - // 结束时间 endTime := time.Now() - - // 执行时间 latency := endTime.Sub(startTime) - // 请求方法 method := c.Request.Method - - // 状态码 statusCode := c.Writer.Status() - - // 客户端IP clientIP := c.ClientIP() - - // 协议版本 protocol := c.Request.Proto + accountID, hasAccountID := c.Request.Context().Value(ctxkey.AccountID).(int64) + platform, _ := c.Request.Context().Value(ctxkey.Platform).(string) + model, _ := c.Request.Context().Value(ctxkey.Model).(string) - // 日志格式: [时间] 状态码 | 延迟 | IP | 协议 | 方法 路径 - log.Printf("[GIN] %v | %3d | %13v | %15s | %-6s | %-7s %s", - endTime.Format("2006/01/02 - 15:04:05"), - statusCode, - latency, - clientIP, - protocol, - method, - path, - ) + fields := []zap.Field{ + zap.String("component", "http.access"), + zap.Int("status_code", statusCode), + zap.Int64("latency_ms", latency.Milliseconds()), + zap.String("client_ip", clientIP), + zap.String("protocol", protocol), + zap.String("method", method), + zap.String("path", path), + } + if hasAccountID && accountID > 0 { + fields = append(fields, zap.Int64("account_id", accountID)) + } + if platform != "" { + fields = append(fields, zap.String("platform", platform)) + } + if model != "" { + fields = append(fields, zap.String("model", model)) + } + + l := logger.FromContext(c.Request.Context()).With(fields...) + l.Info("http request completed", zap.Time("completed_at", endTime)) - // 如果有错误,额外记录错误信息 if len(c.Errors) > 0 { - log.Printf("[GIN] Errors: %v", c.Errors.String()) + l.Warn("http request contains gin errors", zap.String("errors", c.Errors.String())) } } } diff --git a/backend/internal/server/middleware/request_access_logger_test.go b/backend/internal/server/middleware/request_access_logger_test.go new file mode 100644 index 00000000..07faa32f --- /dev/null +++ b/backend/internal/server/middleware/request_access_logger_test.go @@ -0,0 +1,193 @@ +package middleware + +import ( + "context" + "net/http" + "net/http/httptest" + "sync" + "testing" + + "github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey" + "github.com/Wei-Shaw/sub2api/internal/pkg/logger" + "github.com/gin-gonic/gin" +) + +type testLogSink struct { + mu sync.Mutex + events []*logger.LogEvent +} + +func (s *testLogSink) WriteLogEvent(event *logger.LogEvent) { + s.mu.Lock() + defer s.mu.Unlock() + s.events = append(s.events, event) +} + +func (s *testLogSink) list() []*logger.LogEvent { + s.mu.Lock() + defer s.mu.Unlock() + out := make([]*logger.LogEvent, len(s.events)) + copy(out, s.events) + return out +} + +func initMiddlewareTestLogger(t *testing.T) *testLogSink { + t.Helper() + if err := logger.Init(logger.InitOptions{ + Level: "debug", + Format: "json", + ServiceName: "sub2api", + Environment: "test", + Output: logger.OutputOptions{ + ToStdout: false, + ToFile: false, + }, + }); err != nil { + t.Fatalf("init logger: %v", err) + } + sink := &testLogSink{} + logger.SetSink(sink) + t.Cleanup(func() { + logger.SetSink(nil) + }) + return sink +} + +func TestRequestLogger_GenerateAndPropagateRequestID(t *testing.T) { + gin.SetMode(gin.TestMode) + r := gin.New() + r.Use(RequestLogger()) + r.GET("/t", func(c *gin.Context) { + reqID, ok := c.Request.Context().Value(ctxkey.RequestID).(string) + if !ok || reqID == "" { + t.Fatalf("request_id missing in context") + } + if got := c.Writer.Header().Get(requestIDHeader); got != reqID { + t.Fatalf("response header request_id mismatch, header=%q ctx=%q", got, reqID) + } + c.Status(http.StatusOK) + }) + + w := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/t", nil) + r.ServeHTTP(w, req) + if w.Code != http.StatusOK { + t.Fatalf("status=%d", w.Code) + } + if w.Header().Get(requestIDHeader) == "" { + t.Fatalf("X-Request-ID should be set") + } +} + +func TestRequestLogger_KeepIncomingRequestID(t *testing.T) { + gin.SetMode(gin.TestMode) + r := gin.New() + r.Use(RequestLogger()) + r.GET("/t", func(c *gin.Context) { + reqID, _ := c.Request.Context().Value(ctxkey.RequestID).(string) + if reqID != "rid-fixed" { + t.Fatalf("request_id=%q, want rid-fixed", reqID) + } + c.Status(http.StatusOK) + }) + + w := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/t", nil) + req.Header.Set(requestIDHeader, "rid-fixed") + r.ServeHTTP(w, req) + if w.Code != http.StatusOK { + t.Fatalf("status=%d", w.Code) + } + if got := w.Header().Get(requestIDHeader); got != "rid-fixed" { + t.Fatalf("header=%q, want rid-fixed", got) + } +} + +func TestLogger_AccessLogIncludesCoreFields(t *testing.T) { + gin.SetMode(gin.TestMode) + sink := initMiddlewareTestLogger(t) + + r := gin.New() + r.Use(Logger()) + r.Use(func(c *gin.Context) { + ctx := c.Request.Context() + ctx = context.WithValue(ctx, ctxkey.AccountID, int64(101)) + ctx = context.WithValue(ctx, ctxkey.Platform, "openai") + ctx = context.WithValue(ctx, ctxkey.Model, "gpt-5") + c.Request = c.Request.WithContext(ctx) + c.Next() + }) + r.GET("/api/test", func(c *gin.Context) { + c.Status(http.StatusCreated) + }) + + w := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/api/test", nil) + r.ServeHTTP(w, req) + if w.Code != http.StatusCreated { + t.Fatalf("status=%d", w.Code) + } + + events := sink.list() + if len(events) == 0 { + t.Fatalf("expected at least one log event") + } + found := false + for _, event := range events { + if event == nil || event.Message != "http request completed" { + continue + } + found = true + switch v := event.Fields["status_code"].(type) { + case int: + if v != http.StatusCreated { + t.Fatalf("status_code field mismatch: %v", v) + } + case int64: + if v != int64(http.StatusCreated) { + t.Fatalf("status_code field mismatch: %v", v) + } + default: + t.Fatalf("status_code type mismatch: %T", v) + } + switch v := event.Fields["account_id"].(type) { + case int64: + if v != 101 { + t.Fatalf("account_id field mismatch: %v", v) + } + case int: + if v != 101 { + t.Fatalf("account_id field mismatch: %v", v) + } + default: + t.Fatalf("account_id type mismatch: %T", v) + } + if event.Fields["platform"] != "openai" || event.Fields["model"] != "gpt-5" { + t.Fatalf("platform/model mismatch: %+v", event.Fields) + } + } + if !found { + t.Fatalf("access log event not found") + } +} + +func TestLogger_HealthPathSkipped(t *testing.T) { + gin.SetMode(gin.TestMode) + sink := initMiddlewareTestLogger(t) + + r := gin.New() + r.Use(Logger()) + r.GET("/health", func(c *gin.Context) { + c.Status(http.StatusOK) + }) + + w := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/health", nil) + r.ServeHTTP(w, req) + if w.Code != http.StatusOK { + t.Fatalf("status=%d", w.Code) + } + if len(sink.list()) != 0 { + t.Fatalf("health endpoint should not write access log") + } +} diff --git a/backend/internal/server/middleware/request_logger.go b/backend/internal/server/middleware/request_logger.go new file mode 100644 index 00000000..0fb2feca --- /dev/null +++ b/backend/internal/server/middleware/request_logger.go @@ -0,0 +1,45 @@ +package middleware + +import ( + "context" + "strings" + + "github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey" + "github.com/Wei-Shaw/sub2api/internal/pkg/logger" + "github.com/gin-gonic/gin" + "github.com/google/uuid" + "go.uber.org/zap" +) + +const requestIDHeader = "X-Request-ID" + +// RequestLogger 在请求入口注入 request-scoped logger。 +func RequestLogger() gin.HandlerFunc { + return func(c *gin.Context) { + if c.Request == nil { + c.Next() + return + } + + requestID := strings.TrimSpace(c.GetHeader(requestIDHeader)) + if requestID == "" { + requestID = uuid.NewString() + } + c.Header(requestIDHeader, requestID) + + ctx := context.WithValue(c.Request.Context(), ctxkey.RequestID, requestID) + clientRequestID, _ := ctx.Value(ctxkey.ClientRequestID).(string) + + requestLogger := logger.With( + zap.String("component", "http"), + zap.String("request_id", requestID), + zap.String("client_request_id", strings.TrimSpace(clientRequestID)), + zap.String("path", c.Request.URL.Path), + zap.String("method", c.Request.Method), + ) + + ctx = logger.IntoContext(ctx, requestLogger) + c.Request = c.Request.WithContext(ctx) + c.Next() + } +} diff --git a/backend/internal/server/router.go b/backend/internal/server/router.go index cf9015e4..fb91bc0e 100644 --- a/backend/internal/server/router.go +++ b/backend/internal/server/router.go @@ -29,6 +29,7 @@ func SetupRouter( redisClient *redis.Client, ) *gin.Engine { // 应用中间件 + r.Use(middleware2.RequestLogger()) r.Use(middleware2.Logger()) r.Use(middleware2.CORS(cfg.CORS)) r.Use(middleware2.SecurityHeaders(cfg.Security.CSP)) diff --git a/backend/internal/server/routes/admin.go b/backend/internal/server/routes/admin.go index ca1fdf98..e865ae31 100644 --- a/backend/internal/server/routes/admin.go +++ b/backend/internal/server/routes/admin.go @@ -101,6 +101,9 @@ func registerOpsRoutes(admin *gin.RouterGroup, h *handler.Handlers) { { runtime.GET("/alert", h.Admin.Ops.GetAlertRuntimeSettings) runtime.PUT("/alert", h.Admin.Ops.UpdateAlertRuntimeSettings) + runtime.GET("/logging", h.Admin.Ops.GetRuntimeLogConfig) + runtime.PUT("/logging", h.Admin.Ops.UpdateRuntimeLogConfig) + runtime.POST("/logging/reset", h.Admin.Ops.ResetRuntimeLogConfig) } // Advanced settings (DB-backed) @@ -144,6 +147,11 @@ func registerOpsRoutes(admin *gin.RouterGroup, h *handler.Handlers) { // Request drilldown (success + error) ops.GET("/requests", h.Admin.Ops.ListRequestDetails) + // Indexed system logs + ops.GET("/system-logs", h.Admin.Ops.ListSystemLogs) + ops.POST("/system-logs/cleanup", h.Admin.Ops.CleanupSystemLogs) + ops.GET("/system-logs/health", h.Admin.Ops.GetSystemLogIngestionHealth) + // Dashboard (vNext - raw path for MVP) ops.GET("/dashboard/overview", h.Admin.Ops.GetDashboardOverview) ops.GET("/dashboard/throughput-trend", h.Admin.Ops.GetDashboardThroughputTrend) diff --git a/backend/internal/service/domain_constants.go b/backend/internal/service/domain_constants.go index 8a4f69b8..ceae443f 100644 --- a/backend/internal/service/domain_constants.go +++ b/backend/internal/service/domain_constants.go @@ -161,6 +161,9 @@ const ( // SettingKeyOpsAdvancedSettings stores JSON config for ops advanced settings (data retention, aggregation). SettingKeyOpsAdvancedSettings = "ops_advanced_settings" + // SettingKeyOpsRuntimeLogConfig stores JSON config for runtime log settings. + SettingKeyOpsRuntimeLogConfig = "ops_runtime_log_config" + // ========================= // Stream Timeout Handling // ========================= diff --git a/backend/internal/service/ops_cleanup_service.go b/backend/internal/service/ops_cleanup_service.go index 1ade7176..f4e32433 100644 --- a/backend/internal/service/ops_cleanup_service.go +++ b/backend/internal/service/ops_cleanup_service.go @@ -157,6 +157,8 @@ type opsCleanupDeletedCounts struct { errorLogs int64 retryAttempts int64 alertEvents int64 + systemLogs int64 + logAudits int64 systemMetrics int64 hourlyPreagg int64 dailyPreagg int64 @@ -164,10 +166,12 @@ type opsCleanupDeletedCounts struct { func (c opsCleanupDeletedCounts) String() string { return fmt.Sprintf( - "error_logs=%d retry_attempts=%d alert_events=%d system_metrics=%d hourly_preagg=%d daily_preagg=%d", + "error_logs=%d retry_attempts=%d alert_events=%d system_logs=%d log_audits=%d system_metrics=%d hourly_preagg=%d daily_preagg=%d", c.errorLogs, c.retryAttempts, c.alertEvents, + c.systemLogs, + c.logAudits, c.systemMetrics, c.hourlyPreagg, c.dailyPreagg, @@ -204,6 +208,18 @@ func (s *OpsCleanupService) runCleanupOnce(ctx context.Context) (opsCleanupDelet return out, err } out.alertEvents = n + + n, err = deleteOldRowsByID(ctx, s.db, "ops_system_logs", "created_at", cutoff, batchSize, false) + if err != nil { + return out, err + } + out.systemLogs = n + + n, err = deleteOldRowsByID(ctx, s.db, "ops_system_log_cleanup_audits", "created_at", cutoff, batchSize, false) + if err != nil { + return out, err + } + out.logAudits = n } // Minute-level metrics snapshots. diff --git a/backend/internal/service/ops_log_runtime.go b/backend/internal/service/ops_log_runtime.go new file mode 100644 index 00000000..ed8aefa9 --- /dev/null +++ b/backend/internal/service/ops_log_runtime.go @@ -0,0 +1,267 @@ +package service + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "strings" + "time" + + "github.com/Wei-Shaw/sub2api/internal/config" + "github.com/Wei-Shaw/sub2api/internal/pkg/logger" + "go.uber.org/zap" +) + +func defaultOpsRuntimeLogConfig(cfg *config.Config) *OpsRuntimeLogConfig { + out := &OpsRuntimeLogConfig{ + Level: "info", + EnableSampling: false, + SamplingInitial: 100, + SamplingNext: 100, + Caller: true, + StacktraceLevel: "error", + RetentionDays: 30, + } + if cfg == nil { + return out + } + out.Level = strings.ToLower(strings.TrimSpace(cfg.Log.Level)) + out.EnableSampling = cfg.Log.Sampling.Enabled + out.SamplingInitial = cfg.Log.Sampling.Initial + out.SamplingNext = cfg.Log.Sampling.Thereafter + out.Caller = cfg.Log.Caller + out.StacktraceLevel = strings.ToLower(strings.TrimSpace(cfg.Log.StacktraceLevel)) + if cfg.Ops.Cleanup.ErrorLogRetentionDays > 0 { + out.RetentionDays = cfg.Ops.Cleanup.ErrorLogRetentionDays + } + return out +} + +func normalizeOpsRuntimeLogConfig(cfg *OpsRuntimeLogConfig, defaults *OpsRuntimeLogConfig) { + if cfg == nil || defaults == nil { + return + } + cfg.Level = strings.ToLower(strings.TrimSpace(cfg.Level)) + if cfg.Level == "" { + cfg.Level = defaults.Level + } + cfg.StacktraceLevel = strings.ToLower(strings.TrimSpace(cfg.StacktraceLevel)) + if cfg.StacktraceLevel == "" { + cfg.StacktraceLevel = defaults.StacktraceLevel + } + if cfg.SamplingInitial <= 0 { + cfg.SamplingInitial = defaults.SamplingInitial + } + if cfg.SamplingNext <= 0 { + cfg.SamplingNext = defaults.SamplingNext + } + if cfg.RetentionDays <= 0 { + cfg.RetentionDays = defaults.RetentionDays + } +} + +func validateOpsRuntimeLogConfig(cfg *OpsRuntimeLogConfig) error { + if cfg == nil { + return errors.New("invalid config") + } + switch strings.ToLower(strings.TrimSpace(cfg.Level)) { + case "debug", "info", "warn", "error": + default: + return errors.New("level must be one of: debug/info/warn/error") + } + switch strings.ToLower(strings.TrimSpace(cfg.StacktraceLevel)) { + case "none", "error", "fatal": + default: + return errors.New("stacktrace_level must be one of: none/error/fatal") + } + if cfg.SamplingInitial <= 0 { + return errors.New("sampling_initial must be positive") + } + if cfg.SamplingNext <= 0 { + return errors.New("sampling_thereafter must be positive") + } + if cfg.RetentionDays < 1 || cfg.RetentionDays > 3650 { + return errors.New("retention_days must be between 1 and 3650") + } + return nil +} + +func (s *OpsService) GetRuntimeLogConfig(ctx context.Context) (*OpsRuntimeLogConfig, error) { + if s == nil || s.settingRepo == nil { + var cfg *config.Config + if s != nil { + cfg = s.cfg + } + defaultCfg := defaultOpsRuntimeLogConfig(cfg) + return defaultCfg, nil + } + defaultCfg := defaultOpsRuntimeLogConfig(s.cfg) + if ctx == nil { + ctx = context.Background() + } + + raw, err := s.settingRepo.GetValue(ctx, SettingKeyOpsRuntimeLogConfig) + if err != nil { + if errors.Is(err, ErrSettingNotFound) { + b, _ := json.Marshal(defaultCfg) + _ = s.settingRepo.Set(ctx, SettingKeyOpsRuntimeLogConfig, string(b)) + return defaultCfg, nil + } + return nil, err + } + + cfg := &OpsRuntimeLogConfig{} + if err := json.Unmarshal([]byte(raw), cfg); err != nil { + return defaultCfg, nil + } + normalizeOpsRuntimeLogConfig(cfg, defaultCfg) + return cfg, nil +} + +func (s *OpsService) UpdateRuntimeLogConfig(ctx context.Context, req *OpsRuntimeLogConfig, operatorID int64) (*OpsRuntimeLogConfig, error) { + if s == nil || s.settingRepo == nil { + return nil, errors.New("setting repository not initialized") + } + if req == nil { + return nil, errors.New("invalid config") + } + if ctx == nil { + ctx = context.Background() + } + if operatorID <= 0 { + return nil, errors.New("invalid operator id") + } + + oldCfg, err := s.GetRuntimeLogConfig(ctx) + if err != nil { + return nil, err + } + next := *req + normalizeOpsRuntimeLogConfig(&next, defaultOpsRuntimeLogConfig(s.cfg)) + if err := validateOpsRuntimeLogConfig(&next); err != nil { + s.auditRuntimeLogConfigFailure(operatorID, oldCfg, &next, "validation_failed: "+err.Error()) + return nil, err + } + + if err := applyOpsRuntimeLogConfig(&next); err != nil { + s.auditRuntimeLogConfigFailure(operatorID, oldCfg, &next, "apply_failed: "+err.Error()) + return nil, err + } + + next.Source = "runtime_setting" + next.UpdatedAt = time.Now().UTC().Format(time.RFC3339Nano) + next.UpdatedByUserID = operatorID + + encoded, err := json.Marshal(&next) + if err != nil { + return nil, err + } + if err := s.settingRepo.Set(ctx, SettingKeyOpsRuntimeLogConfig, string(encoded)); err != nil { + // 存储失败时回滚到旧配置,避免内存状态与持久化状态不一致。 + _ = applyOpsRuntimeLogConfig(oldCfg) + s.auditRuntimeLogConfigFailure(operatorID, oldCfg, &next, "persist_failed: "+err.Error()) + return nil, err + } + + s.auditRuntimeLogConfigChange(operatorID, oldCfg, &next, "updated") + + return &next, nil +} + +func (s *OpsService) ResetRuntimeLogConfig(ctx context.Context, operatorID int64) (*OpsRuntimeLogConfig, error) { + if s == nil || s.settingRepo == nil { + return nil, errors.New("setting repository not initialized") + } + if ctx == nil { + ctx = context.Background() + } + if operatorID <= 0 { + return nil, errors.New("invalid operator id") + } + + oldCfg, err := s.GetRuntimeLogConfig(ctx) + if err != nil { + return nil, err + } + + resetCfg := defaultOpsRuntimeLogConfig(s.cfg) + normalizeOpsRuntimeLogConfig(resetCfg, defaultOpsRuntimeLogConfig(s.cfg)) + if err := validateOpsRuntimeLogConfig(resetCfg); err != nil { + s.auditRuntimeLogConfigFailure(operatorID, oldCfg, resetCfg, "reset_validation_failed: "+err.Error()) + return nil, err + } + if err := applyOpsRuntimeLogConfig(resetCfg); err != nil { + s.auditRuntimeLogConfigFailure(operatorID, oldCfg, resetCfg, "reset_apply_failed: "+err.Error()) + return nil, err + } + + // 清理 runtime 覆盖配置,回退到 env/yaml baseline。 + if err := s.settingRepo.Delete(ctx, SettingKeyOpsRuntimeLogConfig); err != nil && !errors.Is(err, ErrSettingNotFound) { + _ = applyOpsRuntimeLogConfig(oldCfg) + s.auditRuntimeLogConfigFailure(operatorID, oldCfg, resetCfg, "reset_persist_failed: "+err.Error()) + return nil, err + } + + now := time.Now().UTC().Format(time.RFC3339Nano) + resetCfg.Source = "baseline" + resetCfg.UpdatedAt = now + resetCfg.UpdatedByUserID = operatorID + + s.auditRuntimeLogConfigChange(operatorID, oldCfg, resetCfg, "reset") + return resetCfg, nil +} + +func applyOpsRuntimeLogConfig(cfg *OpsRuntimeLogConfig) error { + if cfg == nil { + return fmt.Errorf("nil runtime log config") + } + if err := logger.Reconfigure(func(opts *logger.InitOptions) error { + opts.Level = strings.ToLower(strings.TrimSpace(cfg.Level)) + opts.Caller = cfg.Caller + opts.StacktraceLevel = strings.ToLower(strings.TrimSpace(cfg.StacktraceLevel)) + opts.Sampling.Enabled = cfg.EnableSampling + opts.Sampling.Initial = cfg.SamplingInitial + opts.Sampling.Thereafter = cfg.SamplingNext + return nil + }); err != nil { + return err + } + return nil +} + +func (s *OpsService) applyRuntimeLogConfigOnStartup(ctx context.Context) { + if s == nil { + return + } + cfg, err := s.GetRuntimeLogConfig(ctx) + if err != nil { + return + } + _ = applyOpsRuntimeLogConfig(cfg) +} + +func (s *OpsService) auditRuntimeLogConfigChange(operatorID int64, oldCfg *OpsRuntimeLogConfig, newCfg *OpsRuntimeLogConfig, action string) { + oldRaw, _ := json.Marshal(oldCfg) + newRaw, _ := json.Marshal(newCfg) + logger.With( + zap.String("component", "audit.log_config_change"), + zap.String("action", strings.TrimSpace(action)), + zap.Int64("operator_id", operatorID), + zap.String("old", string(oldRaw)), + zap.String("new", string(newRaw)), + ).Info("runtime log config changed") +} + +func (s *OpsService) auditRuntimeLogConfigFailure(operatorID int64, oldCfg *OpsRuntimeLogConfig, newCfg *OpsRuntimeLogConfig, reason string) { + oldRaw, _ := json.Marshal(oldCfg) + newRaw, _ := json.Marshal(newCfg) + logger.With( + zap.String("component", "audit.log_config_change"), + zap.String("action", "failed"), + zap.Int64("operator_id", operatorID), + zap.String("reason", strings.TrimSpace(reason)), + zap.String("old", string(oldRaw)), + zap.String("new", string(newRaw)), + ).Warn("runtime log config change failed") +} diff --git a/backend/internal/service/ops_models.go b/backend/internal/service/ops_models.go index 347cd52b..2ed06d90 100644 --- a/backend/internal/service/ops_models.go +++ b/backend/internal/service/ops_models.go @@ -2,6 +2,21 @@ package service import "time" +type OpsSystemLog struct { + ID int64 `json:"id"` + CreatedAt time.Time `json:"created_at"` + Level string `json:"level"` + Component string `json:"component"` + Message string `json:"message"` + RequestID string `json:"request_id"` + ClientRequestID string `json:"client_request_id"` + UserID *int64 `json:"user_id"` + AccountID *int64 `json:"account_id"` + Platform string `json:"platform"` + Model string `json:"model"` + Extra map[string]any `json:"extra,omitempty"` +} + type OpsErrorLog struct { ID int64 `json:"id"` CreatedAt time.Time `json:"created_at"` diff --git a/backend/internal/service/ops_port.go b/backend/internal/service/ops_port.go index 7a00988c..f3633eae 100644 --- a/backend/internal/service/ops_port.go +++ b/backend/internal/service/ops_port.go @@ -10,6 +10,10 @@ type OpsRepository interface { ListErrorLogs(ctx context.Context, filter *OpsErrorLogFilter) (*OpsErrorLogList, error) GetErrorLogByID(ctx context.Context, id int64) (*OpsErrorLogDetail, error) ListRequestDetails(ctx context.Context, filter *OpsRequestDetailFilter) ([]*OpsRequestDetail, int64, error) + BatchInsertSystemLogs(ctx context.Context, inputs []*OpsInsertSystemLogInput) (int64, error) + ListSystemLogs(ctx context.Context, filter *OpsSystemLogFilter) (*OpsSystemLogList, error) + DeleteSystemLogs(ctx context.Context, filter *OpsSystemLogCleanupFilter) (int64, error) + InsertSystemLogCleanupAudit(ctx context.Context, input *OpsSystemLogCleanupAudit) error InsertRetryAttempt(ctx context.Context, input *OpsInsertRetryAttemptInput) (int64, error) UpdateRetryAttempt(ctx context.Context, input *OpsUpdateRetryAttemptInput) error @@ -205,6 +209,69 @@ type OpsInsertSystemMetricsInput struct { ConcurrencyQueueDepth *int } +type OpsInsertSystemLogInput struct { + CreatedAt time.Time + Level string + Component string + Message string + RequestID string + ClientRequestID string + UserID *int64 + AccountID *int64 + Platform string + Model string + ExtraJSON string +} + +type OpsSystemLogFilter struct { + StartTime *time.Time + EndTime *time.Time + + Level string + Component string + + RequestID string + ClientRequestID string + UserID *int64 + AccountID *int64 + Platform string + Model string + Query string + + Page int + PageSize int +} + +type OpsSystemLogCleanupFilter struct { + StartTime *time.Time + EndTime *time.Time + + Level string + Component string + + RequestID string + ClientRequestID string + UserID *int64 + AccountID *int64 + Platform string + Model string + Query string +} + +type OpsSystemLogList struct { + Logs []*OpsSystemLog `json:"logs"` + Total int `json:"total"` + Page int `json:"page"` + PageSize int `json:"page_size"` +} + +type OpsSystemLogCleanupAudit struct { + CreatedAt time.Time + OperatorID int64 + Conditions string + DeletedRows int64 +} + type OpsSystemMetricsSnapshot struct { ID int64 `json:"id"` CreatedAt time.Time `json:"created_at"` diff --git a/backend/internal/service/ops_repo_mock_test.go b/backend/internal/service/ops_repo_mock_test.go new file mode 100644 index 00000000..e250dea3 --- /dev/null +++ b/backend/internal/service/ops_repo_mock_test.go @@ -0,0 +1,196 @@ +package service + +import ( + "context" + "time" +) + +// opsRepoMock is a test-only OpsRepository implementation with optional function hooks. +type opsRepoMock struct { + BatchInsertSystemLogsFn func(ctx context.Context, inputs []*OpsInsertSystemLogInput) (int64, error) + ListSystemLogsFn func(ctx context.Context, filter *OpsSystemLogFilter) (*OpsSystemLogList, error) + DeleteSystemLogsFn func(ctx context.Context, filter *OpsSystemLogCleanupFilter) (int64, error) + InsertSystemLogCleanupAuditFn func(ctx context.Context, input *OpsSystemLogCleanupAudit) error +} + +func (m *opsRepoMock) InsertErrorLog(ctx context.Context, input *OpsInsertErrorLogInput) (int64, error) { + return 0, nil +} + +func (m *opsRepoMock) ListErrorLogs(ctx context.Context, filter *OpsErrorLogFilter) (*OpsErrorLogList, error) { + return &OpsErrorLogList{Errors: []*OpsErrorLog{}, Page: 1, PageSize: 20}, nil +} + +func (m *opsRepoMock) GetErrorLogByID(ctx context.Context, id int64) (*OpsErrorLogDetail, error) { + return &OpsErrorLogDetail{}, nil +} + +func (m *opsRepoMock) ListRequestDetails(ctx context.Context, filter *OpsRequestDetailFilter) ([]*OpsRequestDetail, int64, error) { + return []*OpsRequestDetail{}, 0, nil +} + +func (m *opsRepoMock) BatchInsertSystemLogs(ctx context.Context, inputs []*OpsInsertSystemLogInput) (int64, error) { + if m.BatchInsertSystemLogsFn != nil { + return m.BatchInsertSystemLogsFn(ctx, inputs) + } + return int64(len(inputs)), nil +} + +func (m *opsRepoMock) ListSystemLogs(ctx context.Context, filter *OpsSystemLogFilter) (*OpsSystemLogList, error) { + if m.ListSystemLogsFn != nil { + return m.ListSystemLogsFn(ctx, filter) + } + return &OpsSystemLogList{Logs: []*OpsSystemLog{}, Total: 0, Page: 1, PageSize: 50}, nil +} + +func (m *opsRepoMock) DeleteSystemLogs(ctx context.Context, filter *OpsSystemLogCleanupFilter) (int64, error) { + if m.DeleteSystemLogsFn != nil { + return m.DeleteSystemLogsFn(ctx, filter) + } + return 0, nil +} + +func (m *opsRepoMock) InsertSystemLogCleanupAudit(ctx context.Context, input *OpsSystemLogCleanupAudit) error { + if m.InsertSystemLogCleanupAuditFn != nil { + return m.InsertSystemLogCleanupAuditFn(ctx, input) + } + return nil +} + +func (m *opsRepoMock) InsertRetryAttempt(ctx context.Context, input *OpsInsertRetryAttemptInput) (int64, error) { + return 0, nil +} + +func (m *opsRepoMock) UpdateRetryAttempt(ctx context.Context, input *OpsUpdateRetryAttemptInput) error { + return nil +} + +func (m *opsRepoMock) GetLatestRetryAttemptForError(ctx context.Context, sourceErrorID int64) (*OpsRetryAttempt, error) { + return nil, nil +} + +func (m *opsRepoMock) ListRetryAttemptsByErrorID(ctx context.Context, sourceErrorID int64, limit int) ([]*OpsRetryAttempt, error) { + return []*OpsRetryAttempt{}, nil +} + +func (m *opsRepoMock) UpdateErrorResolution(ctx context.Context, errorID int64, resolved bool, resolvedByUserID *int64, resolvedRetryID *int64, resolvedAt *time.Time) error { + return nil +} + +func (m *opsRepoMock) GetWindowStats(ctx context.Context, filter *OpsDashboardFilter) (*OpsWindowStats, error) { + return &OpsWindowStats{}, nil +} + +func (m *opsRepoMock) GetRealtimeTrafficSummary(ctx context.Context, filter *OpsDashboardFilter) (*OpsRealtimeTrafficSummary, error) { + return &OpsRealtimeTrafficSummary{}, nil +} + +func (m *opsRepoMock) GetDashboardOverview(ctx context.Context, filter *OpsDashboardFilter) (*OpsDashboardOverview, error) { + return &OpsDashboardOverview{}, nil +} + +func (m *opsRepoMock) GetThroughputTrend(ctx context.Context, filter *OpsDashboardFilter, bucketSeconds int) (*OpsThroughputTrendResponse, error) { + return &OpsThroughputTrendResponse{}, nil +} + +func (m *opsRepoMock) GetLatencyHistogram(ctx context.Context, filter *OpsDashboardFilter) (*OpsLatencyHistogramResponse, error) { + return &OpsLatencyHistogramResponse{}, nil +} + +func (m *opsRepoMock) GetErrorTrend(ctx context.Context, filter *OpsDashboardFilter, bucketSeconds int) (*OpsErrorTrendResponse, error) { + return &OpsErrorTrendResponse{}, nil +} + +func (m *opsRepoMock) GetErrorDistribution(ctx context.Context, filter *OpsDashboardFilter) (*OpsErrorDistributionResponse, error) { + return &OpsErrorDistributionResponse{}, nil +} + +func (m *opsRepoMock) GetOpenAITokenStats(ctx context.Context, filter *OpsOpenAITokenStatsFilter) (*OpsOpenAITokenStatsResponse, error) { + return &OpsOpenAITokenStatsResponse{}, nil +} + +func (m *opsRepoMock) InsertSystemMetrics(ctx context.Context, input *OpsInsertSystemMetricsInput) error { + return nil +} + +func (m *opsRepoMock) GetLatestSystemMetrics(ctx context.Context, windowMinutes int) (*OpsSystemMetricsSnapshot, error) { + return &OpsSystemMetricsSnapshot{}, nil +} + +func (m *opsRepoMock) UpsertJobHeartbeat(ctx context.Context, input *OpsUpsertJobHeartbeatInput) error { + return nil +} + +func (m *opsRepoMock) ListJobHeartbeats(ctx context.Context) ([]*OpsJobHeartbeat, error) { + return []*OpsJobHeartbeat{}, nil +} + +func (m *opsRepoMock) ListAlertRules(ctx context.Context) ([]*OpsAlertRule, error) { + return []*OpsAlertRule{}, nil +} + +func (m *opsRepoMock) CreateAlertRule(ctx context.Context, input *OpsAlertRule) (*OpsAlertRule, error) { + return input, nil +} + +func (m *opsRepoMock) UpdateAlertRule(ctx context.Context, input *OpsAlertRule) (*OpsAlertRule, error) { + return input, nil +} + +func (m *opsRepoMock) DeleteAlertRule(ctx context.Context, id int64) error { + return nil +} + +func (m *opsRepoMock) ListAlertEvents(ctx context.Context, filter *OpsAlertEventFilter) ([]*OpsAlertEvent, error) { + return []*OpsAlertEvent{}, nil +} + +func (m *opsRepoMock) GetAlertEventByID(ctx context.Context, eventID int64) (*OpsAlertEvent, error) { + return &OpsAlertEvent{}, nil +} + +func (m *opsRepoMock) GetActiveAlertEvent(ctx context.Context, ruleID int64) (*OpsAlertEvent, error) { + return nil, nil +} + +func (m *opsRepoMock) GetLatestAlertEvent(ctx context.Context, ruleID int64) (*OpsAlertEvent, error) { + return nil, nil +} + +func (m *opsRepoMock) CreateAlertEvent(ctx context.Context, event *OpsAlertEvent) (*OpsAlertEvent, error) { + return event, nil +} + +func (m *opsRepoMock) UpdateAlertEventStatus(ctx context.Context, eventID int64, status string, resolvedAt *time.Time) error { + return nil +} + +func (m *opsRepoMock) UpdateAlertEventEmailSent(ctx context.Context, eventID int64, emailSent bool) error { + return nil +} + +func (m *opsRepoMock) CreateAlertSilence(ctx context.Context, input *OpsAlertSilence) (*OpsAlertSilence, error) { + return input, nil +} + +func (m *opsRepoMock) IsAlertSilenced(ctx context.Context, ruleID int64, platform string, groupID *int64, region *string, now time.Time) (bool, error) { + return false, nil +} + +func (m *opsRepoMock) UpsertHourlyMetrics(ctx context.Context, startTime, endTime time.Time) error { + return nil +} + +func (m *opsRepoMock) UpsertDailyMetrics(ctx context.Context, startTime, endTime time.Time) error { + return nil +} + +func (m *opsRepoMock) GetLatestHourlyBucketStart(ctx context.Context) (time.Time, bool, error) { + return time.Time{}, false, nil +} + +func (m *opsRepoMock) GetLatestDailyBucketDate(ctx context.Context) (time.Time, bool, error) { + return time.Time{}, false, nil +} + +var _ OpsRepository = (*opsRepoMock)(nil) diff --git a/backend/internal/service/ops_service.go b/backend/internal/service/ops_service.go index 9c121b8b..ed54bf6a 100644 --- a/backend/internal/service/ops_service.go +++ b/backend/internal/service/ops_service.go @@ -37,6 +37,7 @@ type OpsService struct { openAIGatewayService *OpenAIGatewayService geminiCompatService *GeminiMessagesCompatService antigravityGatewayService *AntigravityGatewayService + systemLogSink *OpsSystemLogSink } func NewOpsService( @@ -50,8 +51,9 @@ func NewOpsService( openAIGatewayService *OpenAIGatewayService, geminiCompatService *GeminiMessagesCompatService, antigravityGatewayService *AntigravityGatewayService, + systemLogSink *OpsSystemLogSink, ) *OpsService { - return &OpsService{ + svc := &OpsService{ opsRepo: opsRepo, settingRepo: settingRepo, cfg: cfg, @@ -64,7 +66,10 @@ func NewOpsService( openAIGatewayService: openAIGatewayService, geminiCompatService: geminiCompatService, antigravityGatewayService: antigravityGatewayService, + systemLogSink: systemLogSink, } + svc.applyRuntimeLogConfigOnStartup(context.Background()) + return svc } func (s *OpsService) RequireMonitoringEnabled(ctx context.Context) error { diff --git a/backend/internal/service/ops_system_log_service.go b/backend/internal/service/ops_system_log_service.go new file mode 100644 index 00000000..f5a64803 --- /dev/null +++ b/backend/internal/service/ops_system_log_service.go @@ -0,0 +1,124 @@ +package service + +import ( + "context" + "database/sql" + "encoding/json" + "errors" + "log" + "strings" + "time" + + infraerrors "github.com/Wei-Shaw/sub2api/internal/pkg/errors" +) + +func (s *OpsService) ListSystemLogs(ctx context.Context, filter *OpsSystemLogFilter) (*OpsSystemLogList, error) { + if err := s.RequireMonitoringEnabled(ctx); err != nil { + return nil, err + } + if s.opsRepo == nil { + return &OpsSystemLogList{ + Logs: []*OpsSystemLog{}, + Total: 0, + Page: 1, + PageSize: 50, + }, nil + } + if filter == nil { + filter = &OpsSystemLogFilter{} + } + if filter.Page <= 0 { + filter.Page = 1 + } + if filter.PageSize <= 0 { + filter.PageSize = 50 + } + if filter.PageSize > 200 { + filter.PageSize = 200 + } + + result, err := s.opsRepo.ListSystemLogs(ctx, filter) + if err != nil { + return nil, infraerrors.InternalServer("OPS_SYSTEM_LOG_LIST_FAILED", "Failed to list system logs").WithCause(err) + } + return result, nil +} + +func (s *OpsService) CleanupSystemLogs(ctx context.Context, filter *OpsSystemLogCleanupFilter, operatorID int64) (int64, error) { + if err := s.RequireMonitoringEnabled(ctx); err != nil { + return 0, err + } + if s.opsRepo == nil { + return 0, infraerrors.ServiceUnavailable("OPS_REPO_UNAVAILABLE", "Ops repository not available") + } + if operatorID <= 0 { + return 0, infraerrors.BadRequest("OPS_SYSTEM_LOG_CLEANUP_INVALID_OPERATOR", "invalid operator") + } + if filter == nil { + filter = &OpsSystemLogCleanupFilter{} + } + if filter.EndTime != nil && filter.StartTime != nil && filter.StartTime.After(*filter.EndTime) { + return 0, infraerrors.BadRequest("OPS_SYSTEM_LOG_CLEANUP_INVALID_RANGE", "invalid time range") + } + + deletedRows, err := s.opsRepo.DeleteSystemLogs(ctx, filter) + if err != nil { + if errors.Is(err, sql.ErrNoRows) { + return 0, nil + } + if strings.Contains(strings.ToLower(err.Error()), "requires at least one filter") { + return 0, infraerrors.BadRequest("OPS_SYSTEM_LOG_CLEANUP_FILTER_REQUIRED", "cleanup requires at least one filter condition") + } + return 0, infraerrors.InternalServer("OPS_SYSTEM_LOG_CLEANUP_FAILED", "Failed to cleanup system logs").WithCause(err) + } + + if auditErr := s.opsRepo.InsertSystemLogCleanupAudit(ctx, &OpsSystemLogCleanupAudit{ + CreatedAt: time.Now().UTC(), + OperatorID: operatorID, + Conditions: marshalSystemLogCleanupConditions(filter), + DeletedRows: deletedRows, + }); auditErr != nil { + // 审计失败不影响主流程,避免运维清理被阻塞。 + log.Printf("[OpsSystemLog] cleanup audit failed: %v", auditErr) + } + return deletedRows, nil +} + +func marshalSystemLogCleanupConditions(filter *OpsSystemLogCleanupFilter) string { + if filter == nil { + return "{}" + } + payload := map[string]any{ + "level": strings.TrimSpace(filter.Level), + "component": strings.TrimSpace(filter.Component), + "request_id": strings.TrimSpace(filter.RequestID), + "client_request_id": strings.TrimSpace(filter.ClientRequestID), + "platform": strings.TrimSpace(filter.Platform), + "model": strings.TrimSpace(filter.Model), + "query": strings.TrimSpace(filter.Query), + } + if filter.UserID != nil { + payload["user_id"] = *filter.UserID + } + if filter.AccountID != nil { + payload["account_id"] = *filter.AccountID + } + if filter.StartTime != nil && !filter.StartTime.IsZero() { + payload["start_time"] = filter.StartTime.UTC().Format(time.RFC3339Nano) + } + if filter.EndTime != nil && !filter.EndTime.IsZero() { + payload["end_time"] = filter.EndTime.UTC().Format(time.RFC3339Nano) + } + raw, err := json.Marshal(payload) + if err != nil { + return "{}" + } + return string(raw) +} + +func (s *OpsService) GetSystemLogSinkHealth() OpsSystemLogSinkHealth { + if s == nil || s.systemLogSink == nil { + return OpsSystemLogSinkHealth{} + } + return s.systemLogSink.Health() +} diff --git a/backend/internal/service/ops_system_log_sink.go b/backend/internal/service/ops_system_log_sink.go new file mode 100644 index 00000000..65fa9e3f --- /dev/null +++ b/backend/internal/service/ops_system_log_sink.go @@ -0,0 +1,302 @@ +package service + +import ( + "context" + "encoding/json" + "fmt" + "os" + "strconv" + "strings" + "sync" + "sync/atomic" + "time" + + "github.com/Wei-Shaw/sub2api/internal/pkg/logger" + "github.com/Wei-Shaw/sub2api/internal/util/logredact" +) + +type OpsSystemLogSinkHealth struct { + QueueDepth int64 `json:"queue_depth"` + QueueCapacity int64 `json:"queue_capacity"` + DroppedCount uint64 `json:"dropped_count"` + WriteFailed uint64 `json:"write_failed_count"` + WrittenCount uint64 `json:"written_count"` + AvgWriteDelayMs uint64 `json:"avg_write_delay_ms"` + LastError string `json:"last_error"` +} + +type OpsSystemLogSink struct { + opsRepo OpsRepository + + queue chan *logger.LogEvent + + batchSize int + flushInterval time.Duration + + ctx context.Context + cancel context.CancelFunc + wg sync.WaitGroup + + droppedCount uint64 + writeFailed uint64 + writtenCount uint64 + totalDelayNs uint64 + + lastError atomic.Value +} + +func NewOpsSystemLogSink(opsRepo OpsRepository) *OpsSystemLogSink { + ctx, cancel := context.WithCancel(context.Background()) + s := &OpsSystemLogSink{ + opsRepo: opsRepo, + queue: make(chan *logger.LogEvent, 5000), + batchSize: 200, + flushInterval: time.Second, + ctx: ctx, + cancel: cancel, + } + s.lastError.Store("") + return s +} + +func (s *OpsSystemLogSink) Start() { + if s == nil || s.opsRepo == nil { + return + } + s.wg.Add(1) + go s.run() +} + +func (s *OpsSystemLogSink) Stop() { + if s == nil { + return + } + s.cancel() + s.wg.Wait() +} + +func (s *OpsSystemLogSink) WriteLogEvent(event *logger.LogEvent) { + if s == nil || event == nil || !s.shouldIndex(event) { + return + } + + select { + case s.queue <- event: + default: + atomic.AddUint64(&s.droppedCount, 1) + } +} + +func (s *OpsSystemLogSink) shouldIndex(event *logger.LogEvent) bool { + level := strings.ToLower(strings.TrimSpace(event.Level)) + switch level { + case "warn", "warning", "error", "fatal", "panic", "dpanic": + return true + } + + component := strings.ToLower(strings.TrimSpace(event.Component)) + if strings.Contains(component, "http.access") { + return true + } + if strings.Contains(component, "audit") { + return true + } + return false +} + +func (s *OpsSystemLogSink) run() { + defer s.wg.Done() + + ticker := time.NewTicker(s.flushInterval) + defer ticker.Stop() + + batch := make([]*logger.LogEvent, 0, s.batchSize) + flush := func() { + if len(batch) == 0 { + return + } + started := time.Now() + inserted, err := s.flushBatch(batch) + delay := time.Since(started) + if err != nil { + atomic.AddUint64(&s.writeFailed, uint64(len(batch))) + s.lastError.Store(err.Error()) + _, _ = fmt.Fprintf(os.Stderr, "time=%s level=WARN msg=\"ops system log sink flush failed\" err=%v batch=%d\n", + time.Now().Format(time.RFC3339Nano), err, len(batch), + ) + } else { + atomic.AddUint64(&s.writtenCount, uint64(inserted)) + atomic.AddUint64(&s.totalDelayNs, uint64(delay.Nanoseconds())) + s.lastError.Store("") + } + batch = batch[:0] + } + + for { + select { + case <-s.ctx.Done(): + flush() + return + case item := <-s.queue: + if item == nil { + continue + } + batch = append(batch, item) + if len(batch) >= s.batchSize { + flush() + } + case <-ticker.C: + flush() + } + } +} + +func (s *OpsSystemLogSink) flushBatch(batch []*logger.LogEvent) (int, error) { + inputs := make([]*OpsInsertSystemLogInput, 0, len(batch)) + for _, event := range batch { + if event == nil { + continue + } + createdAt := event.Time.UTC() + if createdAt.IsZero() { + createdAt = time.Now().UTC() + } + + fields := copyMap(event.Fields) + requestID := asString(fields["request_id"]) + clientRequestID := asString(fields["client_request_id"]) + platform := asString(fields["platform"]) + model := asString(fields["model"]) + component := strings.TrimSpace(event.Component) + if fieldComponent := asString(fields["component"]); fieldComponent != "" { + component = fieldComponent + } + if component == "" { + component = "app" + } + + userID := asInt64Ptr(fields["user_id"]) + accountID := asInt64Ptr(fields["account_id"]) + + // 统一脱敏后写入索引。 + message := logredact.RedactText(strings.TrimSpace(event.Message)) + redactedExtra := logredact.RedactMap(fields) + extraJSONBytes, _ := json.Marshal(redactedExtra) + extraJSON := string(extraJSONBytes) + if strings.TrimSpace(extraJSON) == "" { + extraJSON = "{}" + } + + inputs = append(inputs, &OpsInsertSystemLogInput{ + CreatedAt: createdAt, + Level: strings.ToLower(strings.TrimSpace(event.Level)), + Component: component, + Message: message, + RequestID: requestID, + ClientRequestID: clientRequestID, + UserID: userID, + AccountID: accountID, + Platform: platform, + Model: model, + ExtraJSON: extraJSON, + }) + } + + if len(inputs) == 0 { + return 0, nil + } + ctx, cancel := context.WithTimeout(s.ctx, 5*time.Second) + defer cancel() + inserted, err := s.opsRepo.BatchInsertSystemLogs(ctx, inputs) + if err != nil { + return 0, err + } + return int(inserted), nil +} + +func (s *OpsSystemLogSink) Health() OpsSystemLogSinkHealth { + if s == nil { + return OpsSystemLogSinkHealth{} + } + written := atomic.LoadUint64(&s.writtenCount) + totalDelay := atomic.LoadUint64(&s.totalDelayNs) + var avgDelay uint64 + if written > 0 { + avgDelay = (totalDelay / written) / uint64(time.Millisecond) + } + + lastErr, _ := s.lastError.Load().(string) + return OpsSystemLogSinkHealth{ + QueueDepth: int64(len(s.queue)), + QueueCapacity: int64(cap(s.queue)), + DroppedCount: atomic.LoadUint64(&s.droppedCount), + WriteFailed: atomic.LoadUint64(&s.writeFailed), + WrittenCount: written, + AvgWriteDelayMs: avgDelay, + LastError: strings.TrimSpace(lastErr), + } +} + +func copyMap(in map[string]any) map[string]any { + if len(in) == 0 { + return map[string]any{} + } + out := make(map[string]any, len(in)) + for k, v := range in { + out[k] = v + } + return out +} + +func asString(v any) string { + switch t := v.(type) { + case string: + return strings.TrimSpace(t) + case fmt.Stringer: + return strings.TrimSpace(t.String()) + default: + return "" + } +} + +func asInt64Ptr(v any) *int64 { + switch t := v.(type) { + case int: + n := int64(t) + if n <= 0 { + return nil + } + return &n + case int64: + n := t + if n <= 0 { + return nil + } + return &n + case float64: + n := int64(t) + if n <= 0 { + return nil + } + return &n + case json.Number: + if n, err := t.Int64(); err == nil { + if n <= 0 { + return nil + } + return &n + } + case string: + raw := strings.TrimSpace(t) + if raw == "" { + return nil + } + if n, err := strconv.ParseInt(raw, 10, 64); err == nil { + if n <= 0 { + return nil + } + return &n + } + } + return nil +} diff --git a/backend/internal/service/ops_system_log_sink_test.go b/backend/internal/service/ops_system_log_sink_test.go new file mode 100644 index 00000000..335ffea4 --- /dev/null +++ b/backend/internal/service/ops_system_log_sink_test.go @@ -0,0 +1,254 @@ +package service + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "strings" + "sync/atomic" + "testing" + "time" + + "github.com/Wei-Shaw/sub2api/internal/pkg/logger" +) + +func TestOpsSystemLogSink_ShouldIndex(t *testing.T) { + sink := &OpsSystemLogSink{} + + cases := []struct { + name string + event *logger.LogEvent + want bool + }{ + { + name: "warn level", + event: &logger.LogEvent{Level: "warn", Component: "app"}, + want: true, + }, + { + name: "error level", + event: &logger.LogEvent{Level: "error", Component: "app"}, + want: true, + }, + { + name: "access component", + event: &logger.LogEvent{Level: "info", Component: "http.access"}, + want: true, + }, + { + name: "audit component", + event: &logger.LogEvent{Level: "info", Component: "audit.log_config_change"}, + want: true, + }, + { + name: "plain info", + event: &logger.LogEvent{Level: "info", Component: "app"}, + want: false, + }, + } + + for _, tc := range cases { + if got := sink.shouldIndex(tc.event); got != tc.want { + t.Fatalf("%s: shouldIndex()=%v, want %v", tc.name, got, tc.want) + } + } +} + +func TestOpsSystemLogSink_WriteLogEvent_ShouldDropWhenQueueFull(t *testing.T) { + sink := &OpsSystemLogSink{ + queue: make(chan *logger.LogEvent, 1), + } + + sink.WriteLogEvent(&logger.LogEvent{Level: "warn", Component: "app"}) + sink.WriteLogEvent(&logger.LogEvent{Level: "warn", Component: "app"}) + + if got := len(sink.queue); got != 1 { + t.Fatalf("queue len = %d, want 1", got) + } + if dropped := atomic.LoadUint64(&sink.droppedCount); dropped != 1 { + t.Fatalf("droppedCount = %d, want 1", dropped) + } +} + +func TestOpsSystemLogSink_Health(t *testing.T) { + sink := &OpsSystemLogSink{ + queue: make(chan *logger.LogEvent, 10), + } + sink.lastError.Store("db timeout") + atomic.StoreUint64(&sink.droppedCount, 3) + atomic.StoreUint64(&sink.writeFailed, 2) + atomic.StoreUint64(&sink.writtenCount, 5) + atomic.StoreUint64(&sink.totalDelayNs, uint64(5000000)) // 5ms total -> avg 1ms + sink.queue <- &logger.LogEvent{Level: "warn", Component: "app"} + sink.queue <- &logger.LogEvent{Level: "warn", Component: "app"} + + health := sink.Health() + if health.QueueDepth != 2 { + t.Fatalf("queue depth = %d, want 2", health.QueueDepth) + } + if health.QueueCapacity != 10 { + t.Fatalf("queue capacity = %d, want 10", health.QueueCapacity) + } + if health.DroppedCount != 3 { + t.Fatalf("dropped = %d, want 3", health.DroppedCount) + } + if health.WriteFailed != 2 { + t.Fatalf("write failed = %d, want 2", health.WriteFailed) + } + if health.WrittenCount != 5 { + t.Fatalf("written = %d, want 5", health.WrittenCount) + } + if health.AvgWriteDelayMs != 1 { + t.Fatalf("avg delay ms = %d, want 1", health.AvgWriteDelayMs) + } + if health.LastError != "db timeout" { + t.Fatalf("last error = %q, want db timeout", health.LastError) + } +} + +func TestOpsSystemLogSink_StartStopAndFlushSuccess(t *testing.T) { + done := make(chan struct{}, 1) + var captured []*OpsInsertSystemLogInput + repo := &opsRepoMock{ + BatchInsertSystemLogsFn: func(_ context.Context, inputs []*OpsInsertSystemLogInput) (int64, error) { + captured = append(captured, inputs...) + select { + case done <- struct{}{}: + default: + } + return int64(len(inputs)), nil + }, + } + + sink := NewOpsSystemLogSink(repo) + sink.batchSize = 1 + sink.flushInterval = 10 * time.Millisecond + sink.Start() + defer sink.Stop() + + sink.WriteLogEvent(&logger.LogEvent{ + Time: time.Now().UTC(), + Level: "warn", + Component: "http.access", + Message: `authorization="Bearer sk-test-123"`, + Fields: map[string]any{ + "component": "http.access", + "request_id": "req-1", + "client_request_id": "creq-1", + "user_id": "12", + "account_id": json.Number("34"), + "platform": "openai", + "model": "gpt-5", + }, + }) + + select { + case <-done: + case <-time.After(2 * time.Second): + t.Fatalf("timeout waiting for sink flush") + } + + if len(captured) != 1 { + t.Fatalf("captured len = %d, want 1", len(captured)) + } + item := captured[0] + if item.RequestID != "req-1" || item.ClientRequestID != "creq-1" { + t.Fatalf("unexpected request ids: %+v", item) + } + if item.UserID == nil || *item.UserID != 12 { + t.Fatalf("unexpected user_id: %+v", item.UserID) + } + if item.AccountID == nil || *item.AccountID != 34 { + t.Fatalf("unexpected account_id: %+v", item.AccountID) + } + if strings.TrimSpace(item.Message) == "" { + t.Fatalf("message should not be empty") + } + health := sink.Health() + if health.WrittenCount == 0 { + t.Fatalf("written_count should be >0") + } +} + +func TestOpsSystemLogSink_FlushFailureUpdatesHealth(t *testing.T) { + repo := &opsRepoMock{ + BatchInsertSystemLogsFn: func(_ context.Context, inputs []*OpsInsertSystemLogInput) (int64, error) { + return 0, errors.New("db unavailable") + }, + } + sink := NewOpsSystemLogSink(repo) + sink.batchSize = 1 + sink.flushInterval = 10 * time.Millisecond + sink.Start() + defer sink.Stop() + + sink.WriteLogEvent(&logger.LogEvent{ + Time: time.Now().UTC(), + Level: "warn", + Component: "app", + Message: "boom", + Fields: map[string]any{}, + }) + + deadline := time.Now().Add(2 * time.Second) + for time.Now().Before(deadline) { + health := sink.Health() + if health.WriteFailed > 0 { + if !strings.Contains(health.LastError, "db unavailable") { + t.Fatalf("unexpected last error: %s", health.LastError) + } + return + } + time.Sleep(20 * time.Millisecond) + } + t.Fatalf("write_failed_count not updated") +} + +type stringerValue string + +func (s stringerValue) String() string { return string(s) } + +func TestOpsSystemLogSink_HelperFunctions(t *testing.T) { + src := map[string]any{"a": 1} + cloned := copyMap(src) + src["a"] = 2 + v, ok := cloned["a"].(int) + if !ok || v != 1 { + t.Fatalf("copyMap should create copy") + } + if got := asString(stringerValue(" hello ")); got != "hello" { + t.Fatalf("asString stringer = %q", got) + } + if got := asString(fmt.Errorf("x")); got != "" { + t.Fatalf("asString error should be empty, got %q", got) + } + if got := asString(123); got != "" { + t.Fatalf("asString non-string should be empty, got %q", got) + } + + cases := []struct { + in any + want int64 + ok bool + }{ + {in: 5, want: 5, ok: true}, + {in: int64(6), want: 6, ok: true}, + {in: float64(7), want: 7, ok: true}, + {in: json.Number("8"), want: 8, ok: true}, + {in: "9", want: 9, ok: true}, + {in: "0", ok: false}, + {in: -1, ok: false}, + {in: "abc", ok: false}, + } + for _, tc := range cases { + got := asInt64Ptr(tc.in) + if tc.ok { + if got == nil || *got != tc.want { + t.Fatalf("asInt64Ptr(%v) = %+v, want %d", tc.in, got, tc.want) + } + } else if got != nil { + t.Fatalf("asInt64Ptr(%v) should be nil, got %d", tc.in, *got) + } + } +} diff --git a/backend/internal/service/wire.go b/backend/internal/service/wire.go index 310fac1e..5d712f75 100644 --- a/backend/internal/service/wire.go +++ b/backend/internal/service/wire.go @@ -6,6 +6,7 @@ import ( "time" "github.com/Wei-Shaw/sub2api/internal/config" + "github.com/Wei-Shaw/sub2api/internal/pkg/logger" "github.com/google/wire" "github.com/redis/go-redis/v9" ) @@ -193,6 +194,13 @@ func ProvideOpsCleanupService( return svc } +func ProvideOpsSystemLogSink(opsRepo OpsRepository) *OpsSystemLogSink { + sink := NewOpsSystemLogSink(opsRepo) + sink.Start() + logger.SetSink(sink) + return sink +} + // ProvideSoraMediaStorage 初始化 Sora 媒体存储 func ProvideSoraMediaStorage(cfg *config.Config) *SoraMediaStorage { return NewSoraMediaStorage(cfg) @@ -268,6 +276,7 @@ var ProviderSet = wire.NewSet( NewAccountUsageService, NewAccountTestService, NewSettingService, + ProvideOpsSystemLogSink, NewOpsService, ProvideOpsMetricsCollector, ProvideOpsAggregationService, diff --git a/backend/migrations/054_ops_system_logs.sql b/backend/migrations/054_ops_system_logs.sql new file mode 100644 index 00000000..9337ba69 --- /dev/null +++ b/backend/migrations/054_ops_system_logs.sql @@ -0,0 +1,55 @@ +-- 054_ops_system_logs.sql +-- 统一日志索引表与清理审计表 + +CREATE TABLE IF NOT EXISTS ops_system_logs ( + id BIGSERIAL PRIMARY KEY, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + level VARCHAR(16) NOT NULL, + component VARCHAR(128) NOT NULL DEFAULT '', + message TEXT NOT NULL, + request_id VARCHAR(128), + client_request_id VARCHAR(128), + user_id BIGINT, + account_id BIGINT, + platform VARCHAR(32), + model VARCHAR(128), + extra JSONB NOT NULL DEFAULT '{}'::jsonb +); + +CREATE INDEX IF NOT EXISTS idx_ops_system_logs_created_at_id + ON ops_system_logs (created_at DESC, id DESC); + +CREATE INDEX IF NOT EXISTS idx_ops_system_logs_level_created_at + ON ops_system_logs (level, created_at DESC); + +CREATE INDEX IF NOT EXISTS idx_ops_system_logs_component_created_at + ON ops_system_logs (component, created_at DESC); + +CREATE INDEX IF NOT EXISTS idx_ops_system_logs_request_id + ON ops_system_logs (request_id); + +CREATE INDEX IF NOT EXISTS idx_ops_system_logs_client_request_id + ON ops_system_logs (client_request_id); + +CREATE INDEX IF NOT EXISTS idx_ops_system_logs_user_id_created_at + ON ops_system_logs (user_id, created_at DESC); + +CREATE INDEX IF NOT EXISTS idx_ops_system_logs_account_id_created_at + ON ops_system_logs (account_id, created_at DESC); + +CREATE INDEX IF NOT EXISTS idx_ops_system_logs_platform_model_created_at + ON ops_system_logs (platform, model, created_at DESC); + +CREATE INDEX IF NOT EXISTS idx_ops_system_logs_message_search + ON ops_system_logs USING GIN (to_tsvector('simple', COALESCE(message, ''))); + +CREATE TABLE IF NOT EXISTS ops_system_log_cleanup_audits ( + id BIGSERIAL PRIMARY KEY, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + operator_id BIGINT NOT NULL, + conditions JSONB NOT NULL DEFAULT '{}'::jsonb, + deleted_rows BIGINT NOT NULL DEFAULT 0 +); + +CREATE INDEX IF NOT EXISTS idx_ops_system_log_cleanup_audits_created_at + ON ops_system_log_cleanup_audits (created_at DESC, id DESC); diff --git a/deploy/.env.example b/deploy/.env.example index ec9150e1..cdc9db37 100644 --- a/deploy/.env.example +++ b/deploy/.env.example @@ -20,6 +20,52 @@ SERVER_PORT=8080 # Server mode: release or debug SERVER_MODE=release +# ----------------------------------------------------------------------------- +# Logging Configuration +# 日志配置 +# ----------------------------------------------------------------------------- +# 日志级别:debug/info/warn/error +LOG_LEVEL=info +# 日志格式:json/console +LOG_FORMAT=json +# 每条日志附带的 service 字段 +LOG_SERVICE_NAME=sub2api +# 每条日志附带的 env 字段 +LOG_ENV=production +# 是否输出调用方位置信息 +LOG_CALLER=true +# 堆栈输出阈值:none/error/fatal +LOG_STACKTRACE_LEVEL=error + +# 输出开关(建议容器内保持双输出) +# 是否输出到 stdout/stderr +LOG_OUTPUT_TO_STDOUT=true +# 是否输出到文件 +LOG_OUTPUT_TO_FILE=true +# 日志文件路径(留空自动推导): +# - 设置 DATA_DIR:${DATA_DIR}/logs/sub2api.log +# - 未设置 DATA_DIR:/app/data/logs/sub2api.log +LOG_OUTPUT_FILE_PATH= + +# 滚动配置 +# 单文件最大体积(MB) +LOG_ROTATION_MAX_SIZE_MB=100 +# 保留历史文件数量(0 表示不限制) +LOG_ROTATION_MAX_BACKUPS=10 +# 历史日志保留天数(0 表示不限制) +LOG_ROTATION_MAX_AGE_DAYS=7 +# 是否压缩历史日志 +LOG_ROTATION_COMPRESS=true +# 滚动文件时间戳是否使用本地时间 +LOG_ROTATION_LOCAL_TIME=true + +# 采样配置(高频重复日志降噪) +LOG_SAMPLING_ENABLED=false +# 每秒前 N 条日志不采样 +LOG_SAMPLING_INITIAL=100 +# 之后每 N 条保留 1 条 +LOG_SAMPLING_THEREAFTER=100 + # Global max request body size in bytes (default: 100MB) # 全局最大请求体大小(字节,默认 100MB) # Applies to all requests, especially important for h2c first request memory protection diff --git a/deploy/config.example.yaml b/deploy/config.example.yaml index 9ab3bfd0..2b553321 100644 --- a/deploy/config.example.yaml +++ b/deploy/config.example.yaml @@ -286,6 +286,70 @@ gateway: # profile_2: # name: "Custom Profile 2" +# ============================================================================= +# Logging Configuration +# 日志配置 +# ============================================================================= +log: + # Log level: debug/info/warn/error + # 日志级别:debug/info/warn/error + level: "info" + # Log format: json/console + # 日志格式:json/console + format: "json" + # Service name field written into each log line + # 每条日志都会附带 service 字段 + service_name: "sub2api" + # Environment field written into each log line + # 每条日志都会附带 env 字段 + env: "production" + # Include caller information + # 是否输出调用方位置信息 + caller: true + # Stacktrace threshold: none/error/fatal + # 堆栈输出阈值:none/error/fatal + stacktrace_level: "error" + output: + # Keep stdout/stderr output for container log collection + # 保持标准输出用于容器日志采集 + to_stdout: true + # Enable file output (default path auto-derived) + # 启用文件输出(默认路径自动推导) + to_file: true + # Empty means: + # - DATA_DIR set: {{DATA_DIR}}/logs/sub2api.log + # - otherwise: /app/data/logs/sub2api.log + # 留空时: + # - 设置 DATA_DIR:{{DATA_DIR}}/logs/sub2api.log + # - 否则:/app/data/logs/sub2api.log + file_path: "" + rotation: + # Max file size before rotation (MB) + # 单文件滚动阈值(MB) + max_size_mb: 100 + # Number of rotated files to keep (0 means unlimited) + # 保留历史文件数量(0 表示不限制) + max_backups: 10 + # Number of days to keep old log files (0 means unlimited) + # 历史日志保留天数(0 表示不限制) + max_age_days: 7 + # Compress rotated files + # 是否压缩历史日志 + compress: true + # Use local time for timestamp in rotated filename + # 滚动文件名时间戳使用本地时区 + local_time: true + sampling: + # Enable zap sampler (reduce high-frequency repetitive logs) + # 启用 zap 采样(减少高频重复日志) + enabled: false + # Number of first entries per second to always log + # 每秒无采样保留的前 N 条日志 + initial: 100 + # Thereafter keep 1 out of N entries per second + # 之后每 N 条保留 1 条 + thereafter: 100 + # ============================================================================= # Sora Direct Client Configuration # Sora 直连配置 diff --git a/frontend/src/api/admin/ops.ts b/frontend/src/api/admin/ops.ts index 7e70aacb..33cb62f4 100644 --- a/frontend/src/api/admin/ops.ts +++ b/frontend/src/api/admin/ops.ts @@ -850,6 +850,77 @@ export interface OpsAggregationSettings { aggregation_enabled: boolean } +export interface OpsRuntimeLogConfig { + level: 'debug' | 'info' | 'warn' | 'error' + enable_sampling: boolean + sampling_initial: number + sampling_thereafter: number + caller: boolean + stacktrace_level: 'none' | 'error' | 'fatal' + retention_days: number + source?: string + updated_at?: string + updated_by_user_id?: number +} + +export interface OpsSystemLog { + id: number + created_at: string + level: string + component: string + message: string + request_id?: string + client_request_id?: string + user_id?: number | null + account_id?: number | null + platform?: string + model?: string + extra?: Record +} + +export type OpsSystemLogListResponse = PaginatedResponse + +export interface OpsSystemLogQuery { + page?: number + page_size?: number + time_range?: '5m' | '30m' | '1h' | '6h' | '24h' | '7d' | '30d' + start_time?: string + end_time?: string + level?: string + component?: string + request_id?: string + client_request_id?: string + user_id?: number | null + account_id?: number | null + platform?: string + model?: string + q?: string +} + +export interface OpsSystemLogCleanupRequest { + start_time?: string + end_time?: string + level?: string + component?: string + request_id?: string + client_request_id?: string + user_id?: number | null + account_id?: number | null + platform?: string + model?: string + q?: string +} + +export interface OpsSystemLogSinkHealth { + queue_depth: number + queue_capacity: number + dropped_count: number + write_failed_count: number + written_count: number + avg_write_delay_ms: number + last_error?: string +} + export interface OpsErrorLog { id: number created_at: string @@ -1205,6 +1276,36 @@ export async function updateAlertRuntimeSettings(config: OpsAlertRuntimeSettings return data } +export async function getRuntimeLogConfig(): Promise { + const { data } = await apiClient.get('/admin/ops/runtime/logging') + return data +} + +export async function updateRuntimeLogConfig(config: OpsRuntimeLogConfig): Promise { + const { data } = await apiClient.put('/admin/ops/runtime/logging', config) + return data +} + +export async function resetRuntimeLogConfig(): Promise { + const { data } = await apiClient.post('/admin/ops/runtime/logging/reset') + return data +} + +export async function listSystemLogs(params: OpsSystemLogQuery): Promise { + const { data } = await apiClient.get('/admin/ops/system-logs', { params }) + return data +} + +export async function cleanupSystemLogs(payload: OpsSystemLogCleanupRequest): Promise<{ deleted: number }> { + const { data } = await apiClient.post<{ deleted: number }>('/admin/ops/system-logs/cleanup', payload) + return data +} + +export async function getSystemLogSinkHealth(): Promise { + const { data } = await apiClient.get('/admin/ops/system-logs/health') + return data +} + // Advanced settings (DB-backed) export async function getAdvancedSettings(): Promise { const { data } = await apiClient.get('/admin/ops/advanced-settings') @@ -1272,10 +1373,16 @@ export const opsAPI = { updateEmailNotificationConfig, getAlertRuntimeSettings, updateAlertRuntimeSettings, + getRuntimeLogConfig, + updateRuntimeLogConfig, + resetRuntimeLogConfig, getAdvancedSettings, updateAdvancedSettings, getMetricThresholds, - updateMetricThresholds + updateMetricThresholds, + listSystemLogs, + cleanupSystemLogs, + getSystemLogSinkHealth } export default opsAPI diff --git a/frontend/src/views/admin/ops/OpsDashboard.vue b/frontend/src/views/admin/ops/OpsDashboard.vue index fa9d41f1..11f20f15 100644 --- a/frontend/src/views/admin/ops/OpsDashboard.vue +++ b/frontend/src/views/admin/ops/OpsDashboard.vue @@ -96,6 +96,13 @@ + + +