From aj-geddes-useful-ai-prompts-4
Builds and trains ML models using scikit-learn, PyTorch, TensorFlow for classification, regression, clustering. Covers data prep, evaluation, Python code examples.
npx claudepluginhub joshuarweaver/cascade-code-languages-misc-1 --plugin aj-geddes-useful-ai-prompts-4This skill uses the workspace's default tool permissions.
Training machine learning models involves selecting appropriate algorithms, preparing data, and optimizing model parameters to achieve strong predictive performance.
Searches, retrieves, and installs Agent Skills from prompts.chat registry using MCP tools like search_skills and get_skill. Activates for finding skills, browsing catalogs, or extending Claude.
Searches prompts.chat for AI prompt templates by keyword or category, retrieves by ID with variable handling, and improves prompts via AI. Use for discovering or enhancing prompts.
Checks Next.js compilation errors using a running Turbopack dev server after code edits. Fixes actionable issues before reporting complete. Replaces `next build`.
Training machine learning models involves selecting appropriate algorithms, preparing data, and optimizing model parameters to achieve strong predictive performance.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (accuracy_score, precision_score, recall_score,
f1_score, confusion_matrix, roc_auc_score)
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import tensorflow as tf
from tensorflow import keras
# 1. Generate synthetic dataset
np.random.seed(42)
n_samples = 1000
n_features = 20
X = np.random.randn(n_samples, n_features)
y = (X[:, 0] + X[:, 1] - X[:, 2] + np.random.randn(n_samples) * 0.5 > 0).astype(int)
# Split data
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
# Normalize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
print("Dataset shapes:")
print(f"Training: {X_train_scaled.shape}, Testing: {X_test_scaled.shape}")
print(f"Class distribution: {np.bincount(y_train)}")
# 2. Scikit-learn models
print("\n=== Scikit-learn Models ===")
models = {
'Logistic Regression': LogisticRegression(max_iter=1000),
'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
'Gradient Boosting': GradientBoostingClassifier(n_estimators=100, random_state=42),
}
sklearn_results = {}
for name, model in models.items():
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)
y_pred_proba = model.predict_proba(X_test_scaled)[:, 1]
sklearn_results[name] = {
'accuracy': accuracy_score(y_test, y_pred),
'precision': precision_score(y_test, y_pred),
'recall': recall_score(y_test, y_pred),
'f1': f1_score(y_test, y_pred),
'roc_auc': roc_auc_score(y_test, y_pred_proba)
}
print(f"\n{name}:")
for metric, value in sklearn_results[name].items():
print(f" {metric}: {value:.4f}")
# 3. PyTorch neural network
print("\n=== PyTorch Model ===")
class NeuralNetPyTorch(nn.Module):
def __init__(self, input_size):
super().__init__()
self.fc1 = nn.Linear(input_size, 64)
self.fc2 = nn.Linear(64, 32)
self.fc3 = nn.Linear(32, 1)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(0.3)
def forward(self, x):
x = self.relu(self.fc1(x))
x = self.dropout(x)
x = self.relu(self.fc2(x))
x = self.dropout(x)
x = torch.sigmoid(self.fc3(x))
return x
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
pytorch_model = NeuralNetPyTorch(n_features).to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(pytorch_model.parameters(), lr=0.001)
# Create data loaders
train_dataset = TensorDataset(torch.FloatTensor(X_train_scaled),
torch.FloatTensor(y_train).unsqueeze(1))
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
# Train PyTorch model
epochs = 50
pytorch_losses = []
for epoch in range(epochs):
total_loss = 0
for batch_X, batch_y in train_loader:
batch_X, batch_y = batch_X.to(device), batch_y.to(device)
optimizer.zero_grad()
outputs = pytorch_model(batch_X)
loss = criterion(outputs, batch_y)
loss.backward()
optimizer.step()
total_loss += loss.item()
pytorch_losses.append(total_loss / len(train_loader))
if (epoch + 1) % 10 == 0:
print(f"Epoch {epoch + 1}/{epochs}, Loss: {pytorch_losses[-1]:.4f}")
# Evaluate PyTorch
pytorch_model.eval()
with torch.no_grad():
y_pred_pytorch = pytorch_model(torch.FloatTensor(X_test_scaled).to(device))
y_pred_pytorch = (y_pred_pytorch.cpu().numpy() > 0.5).astype(int).flatten()
print(f"\nPyTorch Accuracy: {accuracy_score(y_test, y_pred_pytorch):.4f}")
# 4. TensorFlow/Keras model
print("\n=== TensorFlow/Keras Model ===")
tf_model = keras.Sequential([
keras.layers.Dense(64, activation='relu', input_shape=(n_features,)),
keras.layers.Dropout(0.3),
keras.layers.Dense(32, activation='relu'),
keras.layers.Dropout(0.3),
keras.layers.Dense(1, activation='sigmoid')
])
tf_model.compile(
optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy']
)
history = tf_model.fit(
X_train_scaled, y_train,
batch_size=32,
epochs=50,
validation_split=0.2,
verbose=0
)
y_pred_tf = (tf_model.predict(X_test_scaled) > 0.5).astype(int).flatten()
print(f"TensorFlow Accuracy: {accuracy_score(y_test, y_pred_tf):.4f}")
# 5. Visualization
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
# Model comparison
models_names = list(sklearn_results.keys()) + ['PyTorch', 'TensorFlow']
accuracies = [sklearn_results[m]['accuracy'] for m in sklearn_results.keys()] + \
[accuracy_score(y_test, y_pred_pytorch),
accuracy_score(y_test, y_pred_tf)]
axes[0, 0].bar(range(len(models_names)), accuracies, color='steelblue')
axes[0, 0].set_xticks(range(len(models_names)))
axes[0, 0].set_xticklabels(models_names, rotation=45)
axes[0, 0].set_ylabel('Accuracy')
axes[0, 0].set_title('Model Comparison')
axes[0, 0].set_ylim([0, 1])
# Training loss curves
axes[0, 1].plot(pytorch_losses, label='PyTorch', linewidth=2)
axes[0, 1].plot(history.history['loss'], label='TensorFlow', linewidth=2)
axes[0, 1].set_xlabel('Epoch')
axes[0, 1].set_ylabel('Loss')
axes[0, 1].set_title('Training Loss Comparison')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)
# Scikit-learn metrics
metrics = ['accuracy', 'precision', 'recall', 'f1']
rf_metrics = [sklearn_results['Random Forest'][m] for m in metrics]
axes[1, 0].bar(metrics, rf_metrics, color='coral')
axes[1, 0].set_ylabel('Score')
axes[1, 0].set_title('Random Forest Metrics')
axes[1, 0].set_ylim([0, 1])
# Validation accuracy over epochs
axes[1, 1].plot(history.history['accuracy'], label='Training', linewidth=2)
axes[1, 1].plot(history.history['val_accuracy'], label='Validation', linewidth=2)
axes[1, 1].set_xlabel('Epoch')
axes[1, 1].set_ylabel('Accuracy')
axes[1, 1].set_title('TensorFlow Training History')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('model_training_comparison.png', dpi=100, bbox_inches='tight')
print("\nVisualization saved as 'model_training_comparison.png'")
print("\nModel training completed!")