feat(backup): 备份/恢复异步化,解决 504 超时

POST /backups 和 POST /backups/:id/restore 改为异步:立即返回 HTTP 202,
后台 goroutine 独立执行 pg_dump → gzip → S3 上传,前端每 2s 轮询状态。

后端:
- 新增 StartBackup/StartRestore 方法,后台 goroutine 不依赖 HTTP 连接
- Graceful shutdown 等待活跃操作完成,启动时清理孤立 running 记录
- BackupRecord 新增 progress/restore_status 字段支持进度和恢复状态追踪

前端:
- 创建备份/恢复后轮询 GET /backups/:id 直到完成或失败
- 标签页切换暂停/恢复轮询,组件卸载清理定时器
- 正确处理 409(备份进行中)和轮询超时

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
QTom
2026-03-16 20:03:08 +08:00
parent f42c8f2abe
commit c1fab7f8d8
9 changed files with 780 additions and 68 deletions

View File

@@ -134,6 +134,30 @@ func (m *mockDumper) Restore(_ context.Context, data io.Reader) error {
return nil
}
// blockingDumper 可控延迟的 dumper用于测试异步行为
type blockingDumper struct {
blockCh chan struct{}
data []byte
restErr error
}
func (d *blockingDumper) Dump(ctx context.Context) (io.ReadCloser, error) {
select {
case <-d.blockCh:
case <-ctx.Done():
return nil, ctx.Err()
}
return io.NopCloser(bytes.NewReader(d.data)), nil
}
func (d *blockingDumper) Restore(_ context.Context, data io.Reader) error {
if d.restErr != nil {
return d.restErr
}
_, _ = io.ReadAll(data)
return nil
}
type mockObjectStore struct {
objects map[string][]byte
mu sync.Mutex
@@ -179,7 +203,7 @@ func (m *mockObjectStore) HeadBucket(_ context.Context) error {
return nil
}
func newTestBackupService(repo *mockSettingRepo, dumper *mockDumper, store *mockObjectStore) *BackupService {
func newTestBackupService(repo *mockSettingRepo, dumper DBDumper, store *mockObjectStore) *BackupService {
cfg := &config.Config{
Database: config.DatabaseConfig{
Host: "localhost",
@@ -361,9 +385,9 @@ func TestBackupService_CreateBackup_ConcurrentBlocked(t *testing.T) {
svc := newTestBackupService(repo, dumper, store)
// 手动设置 backingUp 标志
svc.mu.Lock()
svc.opMu.Lock()
svc.backingUp = true
svc.mu.Unlock()
svc.opMu.Unlock()
_, err := svc.CreateBackup(context.Background(), "manual", 14)
require.ErrorIs(t, err, ErrBackupInProgress)
@@ -526,3 +550,154 @@ func TestBackupService_LoadS3Config_Corrupted(t *testing.T) {
require.Error(t, err)
require.Nil(t, cfg)
}
// ─── Async Backup Tests ───
func TestStartBackup_ReturnsImmediately(t *testing.T) {
repo := newMockSettingRepo()
seedS3Config(t, repo)
dumper := &blockingDumper{blockCh: make(chan struct{}), data: []byte("data")}
store := newMockObjectStore()
svc := newTestBackupService(repo, dumper, store)
record, err := svc.StartBackup(context.Background(), "manual", 14)
require.NoError(t, err)
require.Equal(t, "running", record.Status)
require.NotEmpty(t, record.ID)
// 释放 dumper 让后台完成
close(dumper.blockCh)
svc.wg.Wait()
// 验证最终状态
final, err := svc.GetBackupRecord(context.Background(), record.ID)
require.NoError(t, err)
require.Equal(t, "completed", final.Status)
require.Greater(t, final.SizeBytes, int64(0))
}
func TestStartBackup_ConcurrentBlocked(t *testing.T) {
repo := newMockSettingRepo()
seedS3Config(t, repo)
dumper := &blockingDumper{blockCh: make(chan struct{}), data: []byte("data")}
store := newMockObjectStore()
svc := newTestBackupService(repo, dumper, store)
// 第一次启动
_, err := svc.StartBackup(context.Background(), "manual", 14)
require.NoError(t, err)
// 第二次应被阻塞
_, err = svc.StartBackup(context.Background(), "manual", 14)
require.ErrorIs(t, err, ErrBackupInProgress)
close(dumper.blockCh)
svc.wg.Wait()
}
func TestStartBackup_ShuttingDown(t *testing.T) {
repo := newMockSettingRepo()
seedS3Config(t, repo)
svc := newTestBackupService(repo, &mockDumper{dumpData: []byte("data")}, newMockObjectStore())
svc.shuttingDown.Store(true)
_, err := svc.StartBackup(context.Background(), "manual", 14)
require.Error(t, err)
require.Contains(t, err.Error(), "shutting down")
}
func TestRecoverStaleRecords(t *testing.T) {
repo := newMockSettingRepo()
svc := newTestBackupService(repo, &mockDumper{}, newMockObjectStore())
// 模拟一条孤立的 running 记录
_ = svc.saveRecord(context.Background(), &BackupRecord{
ID: "stale-1",
Status: "running",
StartedAt: time.Now().Add(-1 * time.Hour).Format(time.RFC3339),
})
// 模拟一条孤立的恢复中记录
_ = svc.saveRecord(context.Background(), &BackupRecord{
ID: "stale-2",
Status: "completed",
RestoreStatus: "running",
StartedAt: time.Now().Add(-1 * time.Hour).Format(time.RFC3339),
})
svc.recoverStaleRecords()
r1, _ := svc.GetBackupRecord(context.Background(), "stale-1")
require.Equal(t, "failed", r1.Status)
require.Contains(t, r1.ErrorMsg, "server restart")
r2, _ := svc.GetBackupRecord(context.Background(), "stale-2")
require.Equal(t, "failed", r2.RestoreStatus)
require.Contains(t, r2.RestoreError, "server restart")
}
func TestGracefulShutdown(t *testing.T) {
repo := newMockSettingRepo()
seedS3Config(t, repo)
dumper := &blockingDumper{blockCh: make(chan struct{}), data: []byte("data")}
store := newMockObjectStore()
svc := newTestBackupService(repo, dumper, store)
_, err := svc.StartBackup(context.Background(), "manual", 14)
require.NoError(t, err)
// Stop 应该等待备份完成
done := make(chan struct{})
go func() {
svc.Stop()
close(done)
}()
// 短暂等待确认 Stop 还在等待
select {
case <-done:
t.Fatal("Stop returned before backup finished")
case <-time.After(100 * time.Millisecond):
// 预期Stop 还在等待
}
// 释放备份
close(dumper.blockCh)
// 现在 Stop 应该完成
select {
case <-done:
// 预期
case <-time.After(5 * time.Second):
t.Fatal("Stop did not return after backup finished")
}
}
func TestStartRestore_Async(t *testing.T) {
repo := newMockSettingRepo()
seedS3Config(t, repo)
dumpContent := "-- PostgreSQL dump\nCREATE TABLE test (id int);\n"
dumper := &mockDumper{dumpData: []byte(dumpContent)}
store := newMockObjectStore()
svc := newTestBackupService(repo, dumper, store)
// 先创建一个备份(同步方式)
record, err := svc.CreateBackup(context.Background(), "manual", 14)
require.NoError(t, err)
// 异步恢复
restored, err := svc.StartRestore(context.Background(), record.ID)
require.NoError(t, err)
require.Equal(t, "running", restored.RestoreStatus)
svc.wg.Wait()
// 验证最终状态
final, err := svc.GetBackupRecord(context.Background(), record.ID)
require.NoError(t, err)
require.Equal(t, "completed", final.RestoreStatus)
}