Build computer vision solutions - image classification, object detection, and transfer learning
Build image classification, object detection, and segmentation models using PyTorch and YOLOv8. Use this when creating or fine-tuning visual AI systems with transfer learning and data augmentation.
/plugin marketplace add pluginagentmarketplace/custom-plugin-machine-learning/plugin install machine-learning-assistant@pluginagentmarketplace-machine-learningThis skill inherits all available tools. When active, it can use any tool Claude has access to.
assets/config.yamlassets/schema.jsonreferences/GUIDE.mdreferences/PATTERNS.mdscripts/validate.pyBuild visual AI systems from classification to detection.
import torch
import timm
from PIL import Image
from torchvision import transforms
# Load pretrained model
model = timm.create_model('efficientnet_b0', pretrained=True, num_classes=10)
model.eval()
# Preprocessing
transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
# Inference
image = Image.open('image.jpg').convert('RGB')
input_tensor = transform(image).unsqueeze(0)
with torch.no_grad():
output = model(input_tensor)
predicted_class = output.argmax(dim=1).item()
import albumentations as A
from albumentations.pytorch import ToTensorV2
train_transform = A.Compose([
A.RandomResizedCrop(224, 224, scale=(0.8, 1.0)),
A.HorizontalFlip(p=0.5),
A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=15),
A.ColorJitter(brightness=0.2, contrast=0.2),
A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
ToTensorV2()
])
val_transform = A.Compose([
A.Resize(256, 256),
A.CenterCrop(224, 224),
A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
ToTensorV2()
])
| Model | Params | ImageNet Acc | Speed |
|---|---|---|---|
| EfficientNet-B0 | 5.3M | 77% | Fast |
| ResNet-50 | 25.6M | 76% | Fast |
| ViT-B/16 | 86M | 84% | Slow |
import timm
class TransferClassifier(torch.nn.Module):
def __init__(self, backbone='efficientnet_b0', num_classes=10):
super().__init__()
self.backbone = timm.create_model(backbone, pretrained=True, num_classes=0)
self.classifier = torch.nn.Linear(self.backbone.num_features, num_classes)
# Freeze backbone
for param in self.backbone.parameters():
param.requires_grad = False
def unfreeze(self):
for param in self.backbone.parameters():
param.requires_grad = True
def forward(self, x):
features = self.backbone(x)
return self.classifier(features)
from ultralytics import YOLO
# Load model
model = YOLO('yolov8n.pt')
# Train
results = model.train(
data='dataset.yaml',
epochs=100,
imgsz=640,
batch=16
)
# Inference
results = model('image.jpg')
for r in results:
boxes = r.boxes
for box in boxes:
print(f"Class: {r.names[int(box.cls)]}, Conf: {box.conf:.2f}")
import segmentation_models_pytorch as smp
# Create U-Net model
model = smp.Unet(
encoder_name='resnet50',
encoder_weights='imagenet',
in_channels=3,
classes=21
)
# Loss function
loss_fn = smp.losses.DiceLoss(mode='multiclass')
from sklearn.metrics import classification_report, confusion_matrix
def evaluate_classifier(model, dataloader, device):
model.eval()
all_preds, all_labels = [], []
with torch.no_grad():
for images, labels in dataloader:
outputs = model(images.to(device))
preds = outputs.argmax(dim=1)
all_preds.extend(preds.cpu().numpy())
all_labels.extend(labels.numpy())
return {
'report': classification_report(all_labels, all_preds),
'confusion_matrix': confusion_matrix(all_labels, all_preds)
}
# TODO: Fine-tune EfficientNet on CIFAR-10
# Freeze backbone first, then unfreeze
# TODO: Train YOLOv8 on custom dataset
# Create dataset.yaml and train
import pytest
import torch
def test_model_output_shape():
"""Test model output dimensions."""
model = TransferClassifier(num_classes=10)
x = torch.randn(4, 3, 224, 224)
output = model(x)
assert output.shape == (4, 10)
def test_augmentation_preserves_shape():
"""Test augmentation output shape."""
import numpy as np
image = np.random.randint(0, 255, (256, 256, 3), dtype=np.uint8)
augmented = train_transform(image=image)['image']
assert augmented.shape == (3, 224, 224)
| Problem | Cause | Solution |
|---|---|---|
| Overfitting | Small dataset | More augmentation |
| Slow training | Large images | Resize, use AMP |
| Poor detection | Wrong anchors | Adjust anchor sizes |
| Memory error | Batch too large | Reduce batch size |
06-computer-visionnlp-basicsml-deploymentVersion: 1.4.0 | Status: Production Ready
Use when working with Payload CMS projects (payload.config.ts, collections, fields, hooks, access control, Payload API). Use when debugging validation errors, security issues, relationship queries, transactions, or hook behavior.
Applies Anthropic's official brand colors and typography to any sort of artifact that may benefit from having Anthropic's look-and-feel. Use it when brand colors or style guidelines, visual formatting, or company design standards apply.
Creating algorithmic art using p5.js with seeded randomness and interactive parameter exploration. Use this when users request creating art using code, generative art, algorithmic art, flow fields, or particle systems. Create original algorithmic art rather than copying existing artists' work to avoid copyright violations.