Webhook Retry Logic in Node.js: Backoff, Jitter, and DLQs

How to Implement Webhook Retry Logic in Node.js (And Why You Shouldn't)

Production webhook retry logic requires exponential backoff, jitter, idempotency tracking, dead letter queues, and per-destination rate limiting — typically 1,500–3,000 lines of code before you add any destination-specific logic. This post walks through building it yourself, then explains why most SaaS teams shouldn't.

Cover Image for How to Implement Webhook Retry Logic in Node.js (And Why You Shouldn't)

Production webhook retry logic requires exponential backoff, jitter, idempotency tracking, dead letter queues, and per-destination rate limiting. Most teams underestimate the scope: a production-grade retry system runs 1,500–3,000 lines of code before you add any destination-specific logic. Tools like Meshes handle retries automatically with exponential backoff and dead letter management, letting you emit events once and track delivery status through the API.

This post walks through building webhook retry logic yourself in Node.js — not because you should, but so you understand exactly what you're signing up for.

The first approach (and why it fails immediately)

Every webhook retry system starts the same way. Someone writes a function that sends an HTTP POST and wraps it in a loop:

async function sendWebhook(url: string, payload: object, retries = 3) {
  for (let attempt = 1; attempt <= retries; attempt++) {
    try {
      const res = await fetch(url, {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify(payload),
      });
      if (res.ok) return { success: true, attempt };
    } catch (err) {
      if (attempt === retries) throw err;
    }
  }
}

function calculateBackoff(
  attempt: number,
  baseDelay = 1000,
  maxDelay = 300_000
): number {
  // Exponential: 1s, 2s, 4s, 8s, 16s, 32s...
  const exponential = baseDelay * Math.pow(2, attempt - 1);
  // Cap at max delay (5 minutes)
  const capped = Math.min(exponential, maxDelay);
  // Full jitter: random value between 0 and capped delay
  return Math.floor(Math.random() * capped);
}

import { Queue, Worker } from 'bullmq';
import IORedis from 'ioredis';

const connection = new IORedis({ host: '127.0.0.1', port: 6379 });

const webhookQueue = new Queue('webhook-delivery', { connection });

// Enqueue a webhook delivery
async function enqueueWebhook(
  destinationUrl: string,
  payload: object,
  eventId: string
) {
  await webhookQueue.add(
    'deliver',
    { destinationUrl, payload, eventId },
    {
      attempts: 5,
      backoff: { type: 'exponential', delay: 1000 },
      removeOnComplete: 1000,
      removeOnFail: false, // Keep failed jobs for dead letter inspection
    }
  );
}

// Process webhook deliveries
const worker = new Worker(
  'webhook-delivery',
  async (job) => {
    const { destinationUrl, payload, eventId } = job.data;
    const res = await fetch(destinationUrl, {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
        'X-Event-Id': eventId,
      },
      body: JSON.stringify(payload),
      signal: AbortSignal.timeout(30_000),
    });

    if (!res.ok) {
      throw new Error(`Webhook failed: ${res.status} ${res.statusText}`);
    }
  },
  { connection, concurrency: 10 }
);

function isRetryable(statusCode: number): boolean {
  const retryableCodes = new Set([408, 429, 500, 502, 503, 504]);
  return retryableCodes.has(statusCode);
}

function getRetryDelay(res: Response, attempt: number): number {
  // Respect Retry-After header if present
  const retryAfter = res.headers.get('Retry-After');
  if (retryAfter) {
    const seconds = parseInt(retryAfter, 10);
    if (!isNaN(seconds)) return seconds * 1000;

    // Retry-After can also be an HTTP date
    const date = new Date(retryAfter);
    if (!isNaN(date.getTime())) {
      return Math.max(0, date.getTime() - Date.now());
    }
  }
  return calculateBackoff(attempt);
}

// Track deliveries to prevent duplicates
async function hasBeenDelivered(
  eventId: string,
  destinationId: string
): Promise<boolean> {
  const key = `delivered:${eventId}:${destinationId}`;
  const exists = await redis.get(key);
  return exists !== null;
}

async function markDelivered(
  eventId: string,
  destinationId: string
): Promise<void> {
  const key = `delivered:${eventId}:${destinationId}`;
  // TTL: keep for 7 days to catch delayed retries
  await redis.set(key, '1', 'EX', 604_800);
}

import { RateLimiterRedis } from 'rate-limiter-flexible';

const rateLimiters = new Map<string, RateLimiterRedis>();

function getRateLimiter(destinationId: string, maxPerSecond = 10) {
  if (!rateLimiters.has(destinationId)) {
    rateLimiters.set(
      destinationId,
      new RateLimiterRedis({
        storeClient: redis,
        keyPrefix: `rl:${destinationId}`,
        points: maxPerSecond,
        duration: 1,
      })
    );
  }
  return rateLimiters.get(destinationId)!;
}

// In your worker, before sending:
async function sendWithRateLimit(
  destinationId: string,
  destinationUrl: string,
  payload: object
) {
  const limiter = getRateLimiter(destinationId);
  try {
    await limiter.consume(destinationId);
  } catch {
    // Rate limited — delay and retry
    throw new Error('Rate limited, will retry');
  }
  // ... send the webhook
}

interface DeadLetter {
  eventId: string;
  destination: string;
  payload: object;
  attempts: Array<{
    timestamp: Date;
    statusCode: number | null;
    error: string;
  }>;
  createdAt: Date;
  lastAttemptAt: Date;
}

async function moveToDeadLetter(job: Job, error: Error): Promise<void> {
  const deadLetter: DeadLetter = {
    eventId: job.data.eventId,
    destination: job.data.destinationUrl,
    payload: job.data.payload,
    attempts: job.data.attemptHistory || [],
    createdAt: new Date(job.timestamp),
    lastAttemptAt: new Date(),
  };

  await db.collection('dead_letters').insertOne(deadLetter);

  // Alert if DLQ is growing
  const recentCount = await db.collection('dead_letters').countDocuments({
    destination: job.data.destinationUrl,
    createdAt: { $gte: new Date(Date.now() - 3600_000) },
  });

  if (recentCount > 10) {
    await alertOps(
      `Dead letter spike: ${recentCount} failures for ${job.data.destinationUrl} in the last hour`
    );
  }
}

import { MeshesEventsClient } from '@mesheshq/events';

const meshes = new MeshesEventsClient('your_publishable_key');

await meshes.emit({
  event: 'user.signup',
  payload: {
    email: 'jane@example.com',
    plan: 'pro',
    source: 'website',
  },
});