From r-tidy-modeling
Comprehensive model evaluation using yardstick and related packages. Covers metrics for classification, regression, and survival outcomes, plus calibration and uncertainty quantification.
npx claudepluginhub choxos/biostatagent --plugin r-tidy-modelingThis skill uses the workspace's default tool permissions.
Comprehensive model evaluation using yardstick and related packages. Covers metrics for classification, regression, and survival outcomes, plus calibration and uncertainty quantification.
Creates new Angular apps using Angular CLI with flags for routing, SSR, SCSS, prefixes, and AI config. Follows best practices for modern TypeScript/Angular development. Use when starting Angular projects.
Generates Angular code and provides architectural guidance for projects, components, services, reactivity with signals, forms, dependency injection, routing, SSR, ARIA accessibility, animations, Tailwind styling, testing, and CLI tooling.
Provides UI/UX resources: 50+ styles, color palettes, font pairings, guidelines, charts for web/mobile across React, Next.js, Vue, Svelte, Tailwind, React Native, Flutter. Aids planning, building, reviewing interfaces.
Comprehensive model evaluation using yardstick and related packages. Covers metrics for classification, regression, and survival outcomes, plus calibration and uncertainty quantification.
library(yardstick)
# Hard predictions (class)
predictions |>
accuracy(truth = outcome, estimate = .pred_class)
predictions |>
sens(truth = outcome, estimate = .pred_class) # sensitivity/recall
predictions |>
spec(truth = outcome, estimate = .pred_class) # specificity
predictions |>
ppv(truth = outcome, estimate = .pred_class) # precision
predictions |>
npv(truth = outcome, estimate = .pred_class)
predictions |>
f_meas(truth = outcome, estimate = .pred_class) # F1 score
predictions |>
kap(truth = outcome, estimate = .pred_class) # Cohen's kappa
predictions |>
mcc(truth = outcome, estimate = .pred_class) # Matthews correlation
# ROC AUC
predictions |>
roc_auc(truth = outcome, .pred_positive_class)
# PR AUC (better for imbalanced data)
predictions |>
pr_auc(truth = outcome, .pred_positive_class)
# Brier score
predictions |>
brier_class(truth = outcome, .pred_positive_class)
# Log loss
predictions |>
mn_log_loss(truth = outcome, .pred_positive_class)
# Gain capture (lift)
predictions |>
gain_capture(truth = outcome, .pred_positive_class)
# Macro-averaged (average across classes)
predictions |>
accuracy(truth = outcome, estimate = .pred_class)
predictions |>
f_meas(truth = outcome, estimate = .pred_class, estimator = "macro")
# Micro-averaged (pool then calculate)
predictions |>
f_meas(truth = outcome, estimate = .pred_class, estimator = "micro")
# Weighted by class prevalence
predictions |>
f_meas(truth = outcome, estimate = .pred_class, estimator = "macro_weighted")
# Multi-class ROC AUC (one-vs-all)
predictions |>
roc_auc(truth = outcome, .pred_class1:.pred_classN)
# Create metric set for consistent evaluation
class_metrics <- metric_set(
accuracy,
sens,
spec,
ppv,
f_meas,
roc_auc
)
# Use in tuning
tune_results <- workflow |>
tune_grid(
resamples = cv_folds,
metrics = class_metrics
)
# Use on predictions
predictions |>
class_metrics(truth = outcome, estimate = .pred_class, .pred_positive)
# RMSE (penalizes large errors)
predictions |>
rmse(truth = outcome, estimate = .pred)
# MAE (robust to outliers)
predictions |>
mae(truth = outcome, estimate = .pred)
# R-squared
predictions |>
rsq(truth = outcome, estimate = .pred)
# R-squared traditional (can be negative)
predictions |>
rsq_trad(truth = outcome, estimate = .pred)
# Mean absolute percentage error
predictions |>
mape(truth = outcome, estimate = .pred)
# Symmetric MAPE
predictions |>
smape(truth = outcome, estimate = .pred)
# Huber loss (robust to outliers)
predictions |>
huber_loss(truth = outcome, estimate = .pred)
# Concordance correlation coefficient
predictions |>
ccc(truth = outcome, estimate = .pred)
# Index of ideality of correlation
predictions |>
iic(truth = outcome, estimate = .pred)
reg_metrics <- metric_set(
rmse,
mae,
rsq,
mape
)
predictions |>
reg_metrics(truth = outcome, estimate = .pred)
# Generate ROC curve data
roc_data <- predictions |>
roc_curve(truth = outcome, .pred_positive)
# Plot
autoplot(roc_data)
# Multiple models
all_predictions |>
group_by(model) |>
roc_curve(truth = outcome, .pred_positive) |>
autoplot()
pr_data <- predictions |>
pr_curve(truth = outcome, .pred_positive)
autoplot(pr_data)
# Gain curve
gain_data <- predictions |>
gain_curve(truth = outcome, .pred_positive)
autoplot(gain_data)
# Lift curve
lift_data <- predictions |>
lift_curve(truth = outcome, .pred_positive)
autoplot(lift_data)
# Calibration data
cal_data <- predictions |>
cal_plot_breaks(truth = outcome, .pred_positive, num_breaks = 10)
# Plot calibration
autoplot(cal_data)
# Windowed calibration
cal_data <- predictions |>
cal_plot_windowed(truth = outcome, .pred_positive)
autoplot(cal_data)
# Generate confusion matrix
conf_mat <- predictions |>
conf_mat(truth = outcome, estimate = .pred_class)
# Visualize
autoplot(conf_mat, type = "heatmap")
autoplot(conf_mat, type = "mosaic")
# Extract metrics from confusion matrix
summary(conf_mat)
library(probably)
# Logistic calibration (Platt scaling)
cal_obj <- predictions |>
cal_estimate_logistic(truth = outcome, .pred_positive)
calibrated <- predictions |>
cal_apply(cal_obj)
# Isotonic regression
cal_obj <- predictions |>
cal_estimate_isotonic(truth = outcome, .pred_positive)
# Beta calibration
cal_obj <- predictions |>
cal_estimate_beta(truth = outcome, .pred_positive)
# Add calibration to workflow
calibrated_wf <- workflow |>
add_model(model_spec) |>
add_recipe(recipe) |>
add_calibration() # not yet in tidymodels but conceptually
library(probably)
# Optimize for J-index (sens + spec - 1)
threshold_perf <- predictions |>
threshold_perf(
truth = outcome,
.pred_positive,
thresholds = seq(0.1, 0.9, by = 0.05),
metrics = metric_set(j_index, sens, spec)
)
# Find optimal
best_threshold <- threshold_perf |>
filter(.metric == "j_index") |>
slice_max(.estimate)
# Apply threshold
predictions |>
mutate(.pred_class = make_two_class_pred(.pred_positive, levels(outcome), threshold = 0.4))
# With different misclassification costs
cost_matrix <- matrix(c(0, 1, 5, 0), nrow = 2) # FN costs 5x FP
predictions |>
classification_cost(
truth = outcome,
.pred_positive,
costs = cost_matrix
)
# Bootstrap metric estimates
boot_metrics <- bootstraps(predictions, times = 1000) |>
mutate(
metrics = map(splits, ~ {
analysis(.x) |>
accuracy(truth = outcome, estimate = .pred_class)
})
) |>
unnest(metrics)
# Calculate CI
quantile(boot_metrics$.estimate, c(0.025, 0.975))
library(probably)
# Conformal prediction intervals
conf_obj <- predictions |>
conformal_cv(outcome ~ ., data = train_data, cv_folds)
# Predict with intervals
predict(conf_obj, new_data, level = 0.95)
# Collect metrics from multiple workflows
wf_results <- workflow_set |>
workflow_map(resamples = cv_folds)
# Compare
autoplot(wf_results)
rank_results(wf_results, rank_metric = "roc_auc")
# Statistical comparison
# (informally via confidence intervals)
collect_metrics(wf_results) |>
filter(.metric == "roc_auc") |>
ggplot(aes(x = wflow_id, y = mean, ymin = mean - std_err, ymax = mean + std_err)) +
geom_pointrange()
# Resample-level comparison
library(tidyposterior)
# ANOVA-like comparison
perf_mod <- perf_mod(wf_results, metric = "roc_auc")
# Contrasts
contrast_models(perf_mod, list_1 = "model_A", list_2 = "model_B")
# Collect training CV metrics
train_metrics <- collect_metrics(tune_results)
# Get test metrics
test_metrics <- final_fit |>
collect_metrics()
# Compare for overfitting
bind_rows(
train_metrics |> mutate(set = "CV"),
test_metrics |> mutate(set = "Test")
)
# Performance by subgroup
predictions |>
group_by(subgroup) |>
metrics(truth = outcome, estimate = .pred_class, .pred_positive)
| Problem | Primary Metric | Secondary Metrics |
|---|---|---|
| Binary balanced | ROC AUC | Accuracy, F1 |
| Binary imbalanced | PR AUC, F1 | Sens, PPV |
| Multi-class | Macro F1 | Accuracy, Kappa |
| Regression | RMSE | MAE, R² |
| Regression with outliers | MAE, Huber | RMSE |
| Rare events | PR AUC | Sens, PPV |
| Medical diagnosis | Sens, Spec | NPV, PPV |