From apify-pack
Deploys Apify Actors via apify push and integrates results into web apps using ApifyClient for sync/async scraping runs and dataset access.
npx claudepluginhub jeremylongshore/claude-code-plugins-plus-skills --plugin apify-packThis skill is limited to using the following tools:
Deploy Actors to the Apify platform and integrate their results into external applications. Covers `apify push` deployment, API-triggered runs from web apps, scheduled scraping with data pipelines, and platform-specific integration patterns.
Provides reference architectures for Apify scrapers including standalone Actors, multi-Actor pipelines, and system integrations. Use for production scraping setups.
Develop, debug, and deploy Apify Actors for web scraping, automation, and data processing. Guides CLI setup, login, and templates for JavaScript, TypeScript, Python.
Guides Apify Actor development: project creation/modification/debugging, template selection, input/output wiring, runtime logic, secure CLI setup, and deployment workflows.
Share bugs, ideas, or general feedback.
Deploy Actors to the Apify platform and integrate their results into external applications. Covers apify push deployment, API-triggered runs from web apps, scheduled scraping with data pipelines, and platform-specific integration patterns.
apify run)apify login completed# Push Actor code to Apify
apify push
# Push to a specific Actor (creates if doesn't exist)
apify push username/my-scraper
# Pull an existing Actor to modify
apify pull username/existing-actor
The most common pattern: trigger an Actor from your app and consume results.
// src/services/apify.ts
import { ApifyClient } from 'apify-client';
const client = new ApifyClient({ token: process.env.APIFY_TOKEN });
interface ScrapeResult {
url: string;
title: string;
price: number;
inStock: boolean;
}
/**
* Run a scraping Actor and return typed results.
* Blocks until the Actor finishes (synchronous pattern).
*/
export async function scrapeProducts(urls: string[]): Promise<ScrapeResult[]> {
const run = await client.actor('username/product-scraper').call({
startUrls: urls.map(url => ({ url })),
maxItems: 500,
}, {
memory: 2048,
timeout: 600, // 10 minutes
});
if (run.status !== 'SUCCEEDED') {
throw new Error(`Scrape failed: ${run.status} — ${run.statusMessage}`);
}
const { items } = await client.dataset(run.defaultDatasetId).listItems();
return items as ScrapeResult[];
}
/**
* Start a scraping Actor without waiting (async pattern).
* Returns run ID for later polling.
*/
export async function startScrape(urls: string[]): Promise<string> {
const run = await client.actor('username/product-scraper').start({
startUrls: urls.map(url => ({ url })),
});
return run.id;
}
/**
* Check if a run has finished and get results.
*/
export async function getScrapeResults(runId: string): Promise<{
status: string;
items?: ScrapeResult[];
}> {
const run = await client.run(runId).get();
if (run.status === 'RUNNING' || run.status === 'READY') {
return { status: run.status };
}
if (run.status === 'SUCCEEDED') {
const { items } = await client.dataset(run.defaultDatasetId).listItems();
return { status: 'SUCCEEDED', items: items as ScrapeResult[] };
}
return { status: run.status };
}
// app/api/scrape/route.ts (Next.js App Router)
import { NextResponse } from 'next/server';
import { ApifyClient } from 'apify-client';
const client = new ApifyClient({ token: process.env.APIFY_TOKEN });
export async function POST(request: Request) {
const { urls } = await request.json();
if (!urls?.length) {
return NextResponse.json({ error: 'urls required' }, { status: 400 });
}
try {
// Start Actor (non-blocking)
const run = await client.actor('username/product-scraper').start({
startUrls: urls.map((url: string) => ({ url })),
maxItems: 100,
});
return NextResponse.json({
runId: run.id,
status: run.status,
statusUrl: `/api/scrape/${run.id}`,
});
} catch (error) {
return NextResponse.json(
{ error: (error as Error).message },
{ status: 500 },
);
}
}
// app/api/scrape/[runId]/route.ts
export async function GET(
_req: Request,
{ params }: { params: { runId: string } },
) {
const run = await client.run(params.runId).get();
if (run.status === 'SUCCEEDED') {
const { items } = await client
.dataset(run.defaultDatasetId)
.listItems({ limit: 100 });
return NextResponse.json({ status: 'SUCCEEDED', items });
}
return NextResponse.json({
status: run.status,
statusMessage: run.statusMessage,
});
}
// Receive notifications when an Actor run completes
import express from 'express';
import { ApifyClient } from 'apify-client';
const app = express();
const client = new ApifyClient({ token: process.env.APIFY_TOKEN });
app.use(express.json());
app.post('/webhooks/apify', async (req, res) => {
const { eventType, eventData } = req.body;
// Verify the webhook (check run exists)
const { actorRunId } = eventData;
const run = await client.run(actorRunId).get();
if (!run) {
return res.status(400).json({ error: 'Invalid run ID' });
}
switch (eventType) {
case 'ACTOR.RUN.SUCCEEDED': {
const { items } = await client
.dataset(run.defaultDatasetId)
.listItems();
console.log(`Run succeeded with ${items.length} items`);
// Process items: save to DB, send notifications, etc.
await processScrapedData(items);
break;
}
case 'ACTOR.RUN.FAILED':
case 'ACTOR.RUN.TIMED_OUT':
console.error(`Run ${eventType}: ${run.statusMessage}`);
// Alert team via Slack, PagerDuty, etc.
await sendAlert(`Apify run ${eventType}: ${run.statusMessage}`);
break;
}
res.json({ received: true });
});
// Run daily via cron, schedule, or Apify Schedule
import { ApifyClient } from 'apify-client';
import { writeFileSync } from 'fs';
const client = new ApifyClient({ token: process.env.APIFY_TOKEN });
async function dailyScrapeAndExport() {
// Run Actor
const run = await client.actor('username/product-scraper').call({
startUrls: [{ url: 'https://target-store.com/products' }],
maxItems: 5000,
});
if (run.status !== 'SUCCEEDED') {
throw new Error(`Run failed: ${run.status}`);
}
// Export as CSV
const csvBuffer = await client
.dataset(run.defaultDatasetId)
.downloadItems('csv');
writeFileSync(`exports/products-${Date.now()}.csv`, csvBuffer);
// Also store in a named dataset for historical access
const archive = await client.datasets().getOrCreate('product-archive');
const { items } = await client.dataset(run.defaultDatasetId).listItems();
await client.dataset(archive.id).pushItems(
items.map(item => ({ ...item, scrapedDate: new Date().toISOString() })),
);
console.log(`Exported ${items.length} products`);
}
# Dockerfile for an app that calls Apify
FROM node:20-slim
WORKDIR /app
COPY package*.json ./
RUN npm ci --omit=dev
COPY . .
CMD ["node", "dist/index.js"]
# Build and deploy
docker build -t apify-integration .
docker run -e APIFY_TOKEN=apify_api_xxx apify-integration
# Or deploy to Cloud Run
gcloud run deploy apify-service \
--source . \
--set-secrets=APIFY_TOKEN=apify-token:latest \
--region us-central1
┌────────────────┐ ┌──────────────┐ ┌────────────────┐
│ Your App │────▶│ Apify API │────▶│ Actor Run │
│ (apify-client)│ │ │ │ (on Apify │
│ │◀────│ │◀────│ platform) │
└────────────────┘ └──────────────┘ └────────────────┘
│ │
│ Poll or Webhook │
▼ ▼
┌────────────────┐ ┌────────────────┐
│ Your DB │ │ Dataset │
│ (processed) │ │ (raw results) │
└────────────────┘ └────────────────┘
| Issue | Cause | Solution |
|---|---|---|
apify push fails | Auth or build error | Check apify login and Dockerfile |
| Webhook not received | URL unreachable from internet | Use ngrok for dev; verify HTTPS in prod |
| Timeout in API route | Actor takes too long | Use async pattern (start + poll) |
| Memory error on platform | Actor needs more RAM | Increase memory option |
| Large dataset download | >100MB results | Use pagination or streaming |
For webhook handling, see apify-webhooks-events.