feat(基础设施): 添加运维监控功能的基础配置和依赖
- 更新 .gitignore 排除临时文件 - 添加 ops 监控相关配置项到 config.yaml - 更新 Go 依赖包(go.mod/go.sum) - 扩展 config.go 支持 ops 监控配置 - 新增上下文键定义(ClientRequestID)
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -123,6 +123,4 @@ backend/cmd/server/server
|
|||||||
deploy/docker-compose.override.yml
|
deploy/docker-compose.override.yml
|
||||||
.gocache/
|
.gocache/
|
||||||
vite.config.js
|
vite.config.js
|
||||||
!docs/
|
|
||||||
docs/*
|
docs/*
|
||||||
!docs/dependency-security.md
|
|
||||||
|
|||||||
@@ -8,9 +8,11 @@ require (
|
|||||||
github.com/golang-jwt/jwt/v5 v5.2.2
|
github.com/golang-jwt/jwt/v5 v5.2.2
|
||||||
github.com/google/uuid v1.6.0
|
github.com/google/uuid v1.6.0
|
||||||
github.com/google/wire v0.7.0
|
github.com/google/wire v0.7.0
|
||||||
|
github.com/gorilla/websocket v1.5.3
|
||||||
github.com/imroc/req/v3 v3.57.0
|
github.com/imroc/req/v3 v3.57.0
|
||||||
github.com/lib/pq v1.10.9
|
github.com/lib/pq v1.10.9
|
||||||
github.com/redis/go-redis/v9 v9.17.2
|
github.com/redis/go-redis/v9 v9.17.2
|
||||||
|
github.com/shirou/gopsutil/v4 v4.25.6
|
||||||
github.com/spf13/viper v1.18.2
|
github.com/spf13/viper v1.18.2
|
||||||
github.com/stretchr/testify v1.11.1
|
github.com/stretchr/testify v1.11.1
|
||||||
github.com/testcontainers/testcontainers-go/modules/postgres v0.40.0
|
github.com/testcontainers/testcontainers-go/modules/postgres v0.40.0
|
||||||
@@ -104,9 +106,9 @@ require (
|
|||||||
github.com/quic-go/quic-go v0.57.1 // indirect
|
github.com/quic-go/quic-go v0.57.1 // indirect
|
||||||
github.com/refraction-networking/utls v1.8.1 // indirect
|
github.com/refraction-networking/utls v1.8.1 // indirect
|
||||||
github.com/rivo/uniseg v0.2.0 // indirect
|
github.com/rivo/uniseg v0.2.0 // indirect
|
||||||
|
github.com/robfig/cron/v3 v3.0.1 // indirect
|
||||||
github.com/sagikazarmark/locafero v0.4.0 // indirect
|
github.com/sagikazarmark/locafero v0.4.0 // indirect
|
||||||
github.com/sagikazarmark/slog-shim v0.1.0 // indirect
|
github.com/sagikazarmark/slog-shim v0.1.0 // indirect
|
||||||
github.com/shirou/gopsutil/v4 v4.25.6 // indirect
|
|
||||||
github.com/sirupsen/logrus v1.9.3 // indirect
|
github.com/sirupsen/logrus v1.9.3 // indirect
|
||||||
github.com/sourcegraph/conc v0.3.0 // indirect
|
github.com/sourcegraph/conc v0.3.0 // indirect
|
||||||
github.com/spaolacci/murmur3 v1.1.0 // indirect
|
github.com/spaolacci/murmur3 v1.1.0 // indirect
|
||||||
|
|||||||
@@ -113,6 +113,8 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
|||||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||||
github.com/google/wire v0.7.0 h1:JxUKI6+CVBgCO2WToKy/nQk0sS+amI9z9EjVmdaocj4=
|
github.com/google/wire v0.7.0 h1:JxUKI6+CVBgCO2WToKy/nQk0sS+amI9z9EjVmdaocj4=
|
||||||
github.com/google/wire v0.7.0/go.mod h1:n6YbUQD9cPKTnHXEBN2DXlOp/mVADhVErcMFb0v3J18=
|
github.com/google/wire v0.7.0/go.mod h1:n6YbUQD9cPKTnHXEBN2DXlOp/mVADhVErcMFb0v3J18=
|
||||||
|
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
|
||||||
|
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
|
||||||
github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.3 h1:NmZ1PKzSTQbuGHw9DGPFomqkkLWMC+vZCkfs+FHv1Vg=
|
github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.3 h1:NmZ1PKzSTQbuGHw9DGPFomqkkLWMC+vZCkfs+FHv1Vg=
|
||||||
github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.3/go.mod h1:zQrxl1YP88HQlA6i9c63DSVPFklWpGX4OWAc9bFuaH4=
|
github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.3/go.mod h1:zQrxl1YP88HQlA6i9c63DSVPFklWpGX4OWAc9bFuaH4=
|
||||||
github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
|
github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
|
||||||
@@ -220,6 +222,8 @@ github.com/refraction-networking/utls v1.8.1 h1:yNY1kapmQU8JeM1sSw2H2asfTIwWxIkr
|
|||||||
github.com/refraction-networking/utls v1.8.1/go.mod h1:jkSOEkLqn+S/jtpEHPOsVv/4V4EVnelwbMQl4vCWXAM=
|
github.com/refraction-networking/utls v1.8.1/go.mod h1:jkSOEkLqn+S/jtpEHPOsVv/4V4EVnelwbMQl4vCWXAM=
|
||||||
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
|
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
|
||||||
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
|
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
|
||||||
|
github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs=
|
||||||
|
github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro=
|
||||||
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
|
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
|
||||||
github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
|
github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
|
||||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||||
|
|||||||
@@ -42,6 +42,7 @@ type Config struct {
|
|||||||
Turnstile TurnstileConfig `mapstructure:"turnstile"`
|
Turnstile TurnstileConfig `mapstructure:"turnstile"`
|
||||||
Database DatabaseConfig `mapstructure:"database"`
|
Database DatabaseConfig `mapstructure:"database"`
|
||||||
Redis RedisConfig `mapstructure:"redis"`
|
Redis RedisConfig `mapstructure:"redis"`
|
||||||
|
Ops OpsConfig `mapstructure:"ops"`
|
||||||
JWT JWTConfig `mapstructure:"jwt"`
|
JWT JWTConfig `mapstructure:"jwt"`
|
||||||
Default DefaultConfig `mapstructure:"default"`
|
Default DefaultConfig `mapstructure:"default"`
|
||||||
RateLimit RateLimitConfig `mapstructure:"rate_limit"`
|
RateLimit RateLimitConfig `mapstructure:"rate_limit"`
|
||||||
@@ -304,6 +305,47 @@ func (r *RedisConfig) Address() string {
|
|||||||
return fmt.Sprintf("%s:%d", r.Host, r.Port)
|
return fmt.Sprintf("%s:%d", r.Host, r.Port)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type OpsConfig struct {
|
||||||
|
// Enabled controls whether ops features should run.
|
||||||
|
//
|
||||||
|
// NOTE: vNext still has a DB-backed feature flag (ops_monitoring_enabled) for runtime on/off.
|
||||||
|
// This config flag is the "hard switch" for deployments that want to disable ops completely.
|
||||||
|
Enabled bool `mapstructure:"enabled"`
|
||||||
|
|
||||||
|
// UsePreaggregatedTables prefers ops_metrics_hourly/daily for long-window dashboard queries.
|
||||||
|
UsePreaggregatedTables bool `mapstructure:"use_preaggregated_tables"`
|
||||||
|
|
||||||
|
// Cleanup controls periodic deletion of old ops data to prevent unbounded growth.
|
||||||
|
Cleanup OpsCleanupConfig `mapstructure:"cleanup"`
|
||||||
|
|
||||||
|
// MetricsCollectorCache controls Redis caching for expensive per-window collector queries.
|
||||||
|
MetricsCollectorCache OpsMetricsCollectorCacheConfig `mapstructure:"metrics_collector_cache"`
|
||||||
|
|
||||||
|
// Pre-aggregation configuration.
|
||||||
|
Aggregation OpsAggregationConfig `mapstructure:"aggregation"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type OpsCleanupConfig struct {
|
||||||
|
Enabled bool `mapstructure:"enabled"`
|
||||||
|
Schedule string `mapstructure:"schedule"`
|
||||||
|
|
||||||
|
// Retention days (0 disables that cleanup target).
|
||||||
|
//
|
||||||
|
// vNext requirement: default 30 days across ops datasets.
|
||||||
|
ErrorLogRetentionDays int `mapstructure:"error_log_retention_days"`
|
||||||
|
MinuteMetricsRetentionDays int `mapstructure:"minute_metrics_retention_days"`
|
||||||
|
HourlyMetricsRetentionDays int `mapstructure:"hourly_metrics_retention_days"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type OpsAggregationConfig struct {
|
||||||
|
Enabled bool `mapstructure:"enabled"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type OpsMetricsCollectorCacheConfig struct {
|
||||||
|
Enabled bool `mapstructure:"enabled"`
|
||||||
|
TTL time.Duration `mapstructure:"ttl"`
|
||||||
|
}
|
||||||
|
|
||||||
type JWTConfig struct {
|
type JWTConfig struct {
|
||||||
Secret string `mapstructure:"secret"`
|
Secret string `mapstructure:"secret"`
|
||||||
ExpireHour int `mapstructure:"expire_hour"`
|
ExpireHour int `mapstructure:"expire_hour"`
|
||||||
@@ -489,6 +531,20 @@ func setDefaults() {
|
|||||||
viper.SetDefault("redis.pool_size", 128)
|
viper.SetDefault("redis.pool_size", 128)
|
||||||
viper.SetDefault("redis.min_idle_conns", 10)
|
viper.SetDefault("redis.min_idle_conns", 10)
|
||||||
|
|
||||||
|
// Ops (vNext)
|
||||||
|
viper.SetDefault("ops.enabled", true)
|
||||||
|
viper.SetDefault("ops.use_preaggregated_tables", false)
|
||||||
|
viper.SetDefault("ops.cleanup.enabled", true)
|
||||||
|
viper.SetDefault("ops.cleanup.schedule", "0 2 * * *")
|
||||||
|
// Retention days: vNext defaults to 30 days across ops datasets.
|
||||||
|
viper.SetDefault("ops.cleanup.error_log_retention_days", 30)
|
||||||
|
viper.SetDefault("ops.cleanup.minute_metrics_retention_days", 30)
|
||||||
|
viper.SetDefault("ops.cleanup.hourly_metrics_retention_days", 30)
|
||||||
|
viper.SetDefault("ops.aggregation.enabled", true)
|
||||||
|
viper.SetDefault("ops.metrics_collector_cache.enabled", true)
|
||||||
|
// TTL should be slightly larger than collection interval (1m) to maximize cross-replica cache hits.
|
||||||
|
viper.SetDefault("ops.metrics_collector_cache.ttl", 65*time.Second)
|
||||||
|
|
||||||
// JWT
|
// JWT
|
||||||
viper.SetDefault("jwt.secret", "")
|
viper.SetDefault("jwt.secret", "")
|
||||||
viper.SetDefault("jwt.expire_hour", 24)
|
viper.SetDefault("jwt.expire_hour", 24)
|
||||||
@@ -687,6 +743,21 @@ func (c *Config) Validate() error {
|
|||||||
if c.Gateway.Scheduling.SlotCleanupInterval < 0 {
|
if c.Gateway.Scheduling.SlotCleanupInterval < 0 {
|
||||||
return fmt.Errorf("gateway.scheduling.slot_cleanup_interval must be non-negative")
|
return fmt.Errorf("gateway.scheduling.slot_cleanup_interval must be non-negative")
|
||||||
}
|
}
|
||||||
|
if c.Ops.MetricsCollectorCache.TTL < 0 {
|
||||||
|
return fmt.Errorf("ops.metrics_collector_cache.ttl must be non-negative")
|
||||||
|
}
|
||||||
|
if c.Ops.Cleanup.ErrorLogRetentionDays < 0 {
|
||||||
|
return fmt.Errorf("ops.cleanup.error_log_retention_days must be non-negative")
|
||||||
|
}
|
||||||
|
if c.Ops.Cleanup.MinuteMetricsRetentionDays < 0 {
|
||||||
|
return fmt.Errorf("ops.cleanup.minute_metrics_retention_days must be non-negative")
|
||||||
|
}
|
||||||
|
if c.Ops.Cleanup.HourlyMetricsRetentionDays < 0 {
|
||||||
|
return fmt.Errorf("ops.cleanup.hourly_metrics_retention_days must be non-negative")
|
||||||
|
}
|
||||||
|
if c.Ops.Cleanup.Enabled && strings.TrimSpace(c.Ops.Cleanup.Schedule) == "" {
|
||||||
|
return fmt.Errorf("ops.cleanup.schedule is required when ops.cleanup.enabled=true")
|
||||||
|
}
|
||||||
if c.Concurrency.PingInterval < 5 || c.Concurrency.PingInterval > 30 {
|
if c.Concurrency.PingInterval < 5 || c.Concurrency.PingInterval > 30 {
|
||||||
return fmt.Errorf("concurrency.ping_interval must be between 5-30 seconds")
|
return fmt.Errorf("concurrency.ping_interval must be between 5-30 seconds")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,4 +7,10 @@ type Key string
|
|||||||
const (
|
const (
|
||||||
// ForcePlatform 强制平台(用于 /antigravity 路由),由 middleware.ForcePlatform 设置
|
// ForcePlatform 强制平台(用于 /antigravity 路由),由 middleware.ForcePlatform 设置
|
||||||
ForcePlatform Key = "ctx_force_platform"
|
ForcePlatform Key = "ctx_force_platform"
|
||||||
|
|
||||||
|
// ClientRequestID 客户端请求的唯一标识,用于追踪请求全生命周期(用于 Ops 监控与排障)。
|
||||||
|
ClientRequestID Key = "ctx_client_request_id"
|
||||||
|
|
||||||
|
// RetryCount 表示当前请求在网关层的重试次数(用于 Ops 记录与排障)。
|
||||||
|
RetryCount Key = "ctx_retry_count"
|
||||||
)
|
)
|
||||||
|
|||||||
35
config.yaml
35
config.yaml
@@ -221,6 +221,41 @@ redis:
|
|||||||
# 数据库编号(0-15)
|
# 数据库编号(0-15)
|
||||||
db: 0
|
db: 0
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Ops Monitoring (Optional)
|
||||||
|
# 运维监控 (可选)
|
||||||
|
# =============================================================================
|
||||||
|
ops:
|
||||||
|
# Hard switch: disable all ops background jobs and APIs when false
|
||||||
|
# 硬开关:为 false 时禁用所有 Ops 后台任务与接口
|
||||||
|
enabled: true
|
||||||
|
|
||||||
|
# Prefer pre-aggregated tables (ops_metrics_hourly/ops_metrics_daily) for long-window dashboard queries.
|
||||||
|
# 优先使用预聚合表(用于长时间窗口查询性能)
|
||||||
|
use_preaggregated_tables: false
|
||||||
|
|
||||||
|
# Data cleanup configuration
|
||||||
|
# 数据清理配置(vNext 默认统一保留 30 天)
|
||||||
|
cleanup:
|
||||||
|
enabled: true
|
||||||
|
# Cron expression (minute hour dom month dow), e.g. "0 2 * * *" = daily at 2 AM
|
||||||
|
# Cron 表达式(分 时 日 月 周),例如 "0 2 * * *" = 每天凌晨 2 点
|
||||||
|
schedule: "0 2 * * *"
|
||||||
|
error_log_retention_days: 30
|
||||||
|
minute_metrics_retention_days: 30
|
||||||
|
hourly_metrics_retention_days: 30
|
||||||
|
|
||||||
|
# Pre-aggregation configuration
|
||||||
|
# 预聚合任务配置
|
||||||
|
aggregation:
|
||||||
|
enabled: true
|
||||||
|
|
||||||
|
# OpsMetricsCollector Redis cache (reduces duplicate expensive window aggregation in multi-replica deployments)
|
||||||
|
# 指标采集 Redis 缓存(多副本部署时减少重复计算)
|
||||||
|
metrics_collector_cache:
|
||||||
|
enabled: true
|
||||||
|
ttl: 65s
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# JWT Configuration
|
# JWT Configuration
|
||||||
# JWT 配置
|
# JWT 配置
|
||||||
|
|||||||
Reference in New Issue
Block a user