From brightdata-pack
Implements Bright Data webhook handling for Web Scraper and Datasets APIs: trigger async collections and validate deliveries with auth headers in Node.js/Express.
npx claudepluginhub jeremylongshore/claude-code-plugins-plus-skills --plugin brightdata-packThis skill is limited to using the following tools:
Handle Bright Data webhook deliveries from the Web Scraper API and Datasets API. When you trigger an async collection, Bright Data sends the results to your webhook URL with the collected data in JSON, NDJSON, or CSV format.
Implements concurrent queues, exponential backoff, and retry logic for Bright Data rate limits on proxies, scrapers, and APIs.
Onboards coding agents to Bright Data for live web scraping, SERP results, structured data extraction, and API integration. Installs CLI, skills, and handles OAuth authentication with one command.
Implements Firecrawl webhook handlers for async crawl/page/batch events with HMAC signature verification using Express. Processes real-time scrape results without polling.
Share bugs, ideas, or general feedback.
Handle Bright Data webhook deliveries from the Web Scraper API and Datasets API. When you trigger an async collection, Bright Data sends the results to your webhook URL with the collected data in JSON, NDJSON, or CSV format.
// trigger-with-webhook.ts
const API_TOKEN = process.env.BRIGHTDATA_API_TOKEN!;
async function triggerWithWebhook(datasetId: string, urls: string[]) {
const params = new URLSearchParams({
dataset_id: datasetId,
format: 'json',
endpoint: 'https://your-app.com/webhooks/brightdata', // Your webhook URL
uncompressed_webhook: 'true', // Send uncompressed for easier handling
auth_header: `Bearer ${process.env.BRIGHTDATA_WEBHOOK_SECRET}`, // Auth header sent with delivery
});
// Optional: notification URL (lightweight ping when done)
params.set('notify', 'https://your-app.com/webhooks/brightdata-notify');
const response = await fetch(
`https://api.brightdata.com/datasets/v3/trigger?${params}`,
{
method: 'POST',
headers: {
'Authorization': `Bearer ${API_TOKEN}`,
'Content-Type': 'application/json',
},
body: JSON.stringify(urls.map(url => ({ url }))),
}
);
const result = await response.json();
console.log('Snapshot ID:', result.snapshot_id);
return result;
}
// api/webhooks/brightdata.ts
import express from 'express';
const app = express();
// Bright Data sends collected data as JSON array
app.post('/webhooks/brightdata',
express.json({ limit: '50mb' }), // Collections can be large
async (req, res) => {
// Validate Authorization header
const authHeader = req.headers.authorization;
if (authHeader !== `Bearer ${process.env.BRIGHTDATA_WEBHOOK_SECRET}`) {
console.error('Invalid webhook authorization');
return res.status(401).json({ error: 'Unauthorized' });
}
const records = req.body; // Array of scraped records
console.log(`Received ${records.length} records`);
// Process records
for (const record of records) {
console.log(`URL: ${record.url}`);
console.log(`Title: ${record.title}`);
console.log(`Data: ${JSON.stringify(record).substring(0, 200)}`);
}
// Store results
await saveToDatabase(records);
// Return 200 quickly — Bright Data retries on non-2xx
res.status(200).json({ received: records.length });
}
);
// api/webhooks/brightdata-notify.ts
// Notification is a small JSON with snapshot status — not the full data
app.post('/webhooks/brightdata-notify',
express.json(),
async (req, res) => {
const { snapshot_id, status } = req.body;
console.log(`Collection ${snapshot_id}: ${status}`);
if (status === 'ready') {
// Option A: Data already delivered to endpoint above
// Option B: Fetch data manually
const data = await fetch(
`https://api.brightdata.com/datasets/v3/snapshot/${snapshot_id}?format=json`,
{ headers: { 'Authorization': `Bearer ${process.env.BRIGHTDATA_API_TOKEN}` } }
);
const records = await data.json();
console.log(`Fetched ${records.length} records from snapshot`);
}
res.status(200).json({ received: true });
}
);
// Bright Data may retry delivery — deduplicate by snapshot_id
const processedSnapshots = new Set<string>();
async function handleDelivery(snapshotId: string, records: any[]) {
if (processedSnapshots.has(snapshotId)) {
console.log(`Snapshot ${snapshotId} already processed, skipping`);
return;
}
await saveToDatabase(records);
processedSnapshots.add(snapshotId);
// For production, use Redis instead of in-memory Set
// await redis.set(`bd:snapshot:${snapshotId}`, '1', 'EX', 86400 * 7);
}
# Expose local server with ngrok
ngrok http 3000
# Trigger a small collection with your ngrok URL
curl -X POST "https://api.brightdata.com/datasets/v3/trigger?dataset_id=YOUR_ID&format=json&endpoint=https://YOUR.ngrok.io/webhooks/brightdata&auth_header=Bearer%20test_secret" \
-H "Authorization: Bearer ${BRIGHTDATA_API_TOKEN}" \
-H "Content-Type: application/json" \
-d '[{"url": "https://example.com"}]'
| Parameter | Values | Default |
|---|---|---|
format | json, ndjson, csv, jsonl | json |
uncompressed_webhook | true, false | false (gzip) |
endpoint | Your webhook URL | None |
auth_header | Authorization header value | None |
notify | Notification-only URL | None |
| Issue | Cause | Solution |
|---|---|---|
| No delivery received | Wrong endpoint URL | Check URL in trigger params |
| 413 Payload Too Large | Large collection | Increase body limit or use streaming |
| Duplicate deliveries | Retry on timeout | Implement snapshot_id deduplication |
| Auth header mismatch | Wrong secret | Check auth_header in trigger params |
For performance optimization, see brightdata-performance-tuning.