From 75e1b40fb4f9adbeaabdae5d60d0041454db06b4 Mon Sep 17 00:00:00 2001 From: erio Date: Thu, 23 Apr 2026 18:33:28 +0800 Subject: [PATCH] fix(payment): ack unknown-order webhooks with 2xx to stop provider retries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a sentinel ErrOrderNotFound in the payment service layer so the webhook handler can distinguish "the out_trade_no does not exist in our DB" from other fulfillment failures, and downgrade the former to a WARN log + success response. Background - Providers (Stripe, Alipay, Wxpay, EasyPay, ...) retry webhooks whenever we answer non-2xx. When a webhook endpoint is misconfigured (e.g. a foreign environment points at us) or our orders table has been wiped, we return 500 forever and the provider retries for days, spamming logs. - The old code also collapsed "order not found" and "DB query failed" into the same branch — a DB blip would be reported as "order not found" and swallowed. Service layer (payment_fulfillment.go) - Add `var ErrOrderNotFound = errors.New("payment order not found")`. - In HandlePaymentNotification, distinguish the two error paths: * dbent.IsNotFound(err) → wrap with ErrOrderNotFound so callers can errors.Is(...) it. * anything else → wrap the original err with %w so it still bubbles up as 500 and the provider retries (DB hiccup should be retried). Handler layer (payment_webhook_handler.go) - Before returning 500, check errors.Is(err, service.ErrOrderNotFound): emit a WARN (with provider / outTradeNo / tradeNo for discoverability), then call writeSuccessResponse so the provider sees its expected 2xx body (Stripe empty body / Wxpay JSON / others "success"). - Other errors retain the existing 500 behavior. Monitoring note: because this path now swallows unknown-order webhooks silently from the provider's perspective, the WARN log line is the only signal. Alert on "unknown order, acking to stop retries" if you want visibility into misrouted webhooks or accidental data loss. --- .../internal/handler/payment_webhook_handler.go | 15 +++++++++++++++ backend/internal/service/payment_fulfillment.go | 14 +++++++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/backend/internal/handler/payment_webhook_handler.go b/backend/internal/handler/payment_webhook_handler.go index c06a5b7e..9ae799fd 100644 --- a/backend/internal/handler/payment_webhook_handler.go +++ b/backend/internal/handler/payment_webhook_handler.go @@ -2,6 +2,7 @@ package handler import ( "context" + "errors" "fmt" "io" "log/slog" @@ -114,6 +115,20 @@ func (h *PaymentWebhookHandler) handleNotify(c *gin.Context, providerKey string) } if err := h.paymentService.HandlePaymentNotification(c.Request.Context(), notification, resolvedProviderKey); err != nil { + // Unknown order: ack with 2xx so the provider stops retrying. This + // guards against foreign environments whose webhook endpoints are + // (mis)configured to point at us — without a 2xx, the provider will + // retry for days and spam our error logs. We still emit a WARN so the + // event is discoverable in logs. + if errors.Is(err, service.ErrOrderNotFound) { + slog.Warn("[Payment Webhook] unknown order, acking to stop retries", + "provider", resolvedProviderKey, + "outTradeNo", notification.OrderID, + "tradeNo", notification.TradeNo, + ) + writeSuccessResponse(c, resolvedProviderKey) + return + } slog.Error("[Payment Webhook] handle notification failed", "provider", resolvedProviderKey, "error", err) c.String(http.StatusInternalServerError, "handle failed") return diff --git a/backend/internal/service/payment_fulfillment.go b/backend/internal/service/payment_fulfillment.go index 71f1eb2f..243edff3 100644 --- a/backend/internal/service/payment_fulfillment.go +++ b/backend/internal/service/payment_fulfillment.go @@ -2,6 +2,7 @@ package service import ( "context" + "errors" "fmt" "log/slog" "math" @@ -16,6 +17,14 @@ import ( infraerrors "github.com/Wei-Shaw/sub2api/internal/pkg/errors" ) +// ErrOrderNotFound is returned by HandlePaymentNotification when the webhook +// references an out_trade_no that does not exist in our DB. Callers (webhook +// handlers) should treat this as a terminal, non-retryable condition and still +// respond with a 2xx success to the provider — otherwise the provider will keep +// retrying forever (e.g. when a foreign environment's webhook endpoint is +// misconfigured to point at us, or when our orders table has been wiped). +var ErrOrderNotFound = errors.New("payment order not found") + // --- Payment Notification & Fulfillment --- func (s *PaymentService) HandlePaymentNotification(ctx context.Context, n *payment.PaymentNotification, pk string) error { @@ -30,7 +39,10 @@ func (s *PaymentService) HandlePaymentNotification(ctx context.Context, n *payme if oid, ok := parseLegacyPaymentOrderID(n.OrderID, err); ok { return s.confirmPayment(ctx, oid, n.TradeNo, n.Amount, pk, n.Metadata) } - return fmt.Errorf("order not found for out_trade_no: %s", n.OrderID) + if dbent.IsNotFound(err) { + return fmt.Errorf("%w: out_trade_no=%s", ErrOrderNotFound, n.OrderID) + } + return fmt.Errorf("lookup order failed for out_trade_no %s: %w", n.OrderID, err) } return s.confirmPayment(ctx, order.ID, n.TradeNo, n.Amount, pk, n.Metadata) }