From aidotnet-moyucode
Scrapes and processes web data using Puppeteer for dynamic sites, Cheerio for static HTML, CSS selectors, XPath, rate limiting, and error handling. Useful for extracting product info, articles, or lists ethically.
npx claudepluginhub joshuarweaver/cascade-data-analytics --plugin aidotnet-moyucodeThis skill uses the workspace's default tool permissions.
Extract and process data from web pages with intelligent parsing capabilities.
Conducts multi-round deep research on GitHub repos via API and web searches, generating markdown reports with executive summaries, timelines, metrics, and Mermaid diagrams.
Dynamically discovers and combines enabled skills into cohesive, unexpected delightful experiences like interactive HTML or themed artifacts. Activates on 'surprise me', inspiration, or boredom cues.
Generates images from structured JSON prompts via Python script execution. Supports reference images and aspect ratios for characters, scenes, products, visuals.
Extract and process data from web pages with intelligent parsing capabilities.
/scrape commandYou are a web scraping expert that extracts data efficiently and ethically.
import puppeteer from 'puppeteer';
interface Product {
name: string;
price: number;
rating: number;
url: string;
}
async function scrapeProducts(url: string): Promise<Product[]> {
const browser = await puppeteer.launch({ headless: 'new' });
const page = await browser.newPage();
// Set user agent to avoid detection
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36');
await page.goto(url, { waitUntil: 'networkidle2' });
// Wait for products to load
await page.waitForSelector('.product-card');
const products = await page.evaluate(() => {
const items = document.querySelectorAll('.product-card');
return Array.from(items).map(item => ({
name: item.querySelector('.product-name')?.textContent?.trim() ?? '',
price: parseFloat(item.querySelector('.price')?.textContent?.replace('$', '') ?? '0'),
rating: parseFloat(item.querySelector('.rating')?.getAttribute('data-rating') ?? '0'),
url: item.querySelector('a')?.href ?? '',
}));
});
await browser.close();
return products;
}
import axios from 'axios';
import * as cheerio from 'cheerio';
async function parseArticle(url: string) {
const { data } = await axios.get(url, {
headers: { 'User-Agent': 'Mozilla/5.0' }
});
const $ = cheerio.load(data);
return {
title: $('h1.article-title').text().trim(),
author: $('span.author-name').text().trim(),
date: $('time').attr('datetime'),
content: $('article.content p').map((_, el) => $(el).text()).get().join('\n\n'),
tags: $('a.tag').map((_, el) => $(el).text()).get(),
};
}
class RateLimiter {
private queue: (() => Promise<void>)[] = [];
private processing = false;
constructor(private delayMs: number = 1000) {}
async add<T>(fn: () => Promise<T>): Promise<T> {
return new Promise((resolve, reject) => {
this.queue.push(async () => {
try {
resolve(await fn());
} catch (e) {
reject(e);
}
});
this.process();
});
}
private async process() {
if (this.processing) return;
this.processing = true;
while (this.queue.length > 0) {
const fn = this.queue.shift()!;
await fn();
await new Promise(r => setTimeout(r, this.delayMs));
}
this.processing = false;
}
}
// Usage
const limiter = new RateLimiter(2000); // 2 seconds between requests
const results = await Promise.all(
urls.map(url => limiter.add(() => scrapeProducts(url)))
);
web-scraping, data-extraction, parsing, automation, html