Comprehensive hyperparameter optimization using the tune package. Covers grid search, iterative search, racing methods, and Bayesian optimization.
/plugin marketplace add choxos/BiostatAgent/plugin install choxos-r-tidy-modeling-plugins-r-tidy-modeling@choxos/BiostatAgentThis skill inherits all available tools. When active, it can use any tool Claude has access to.
Comprehensive hyperparameter optimization using the tune package. Covers grid search, iterative search, racing methods, and Bayesian optimization.
library(tidymodels)
# Use tune() placeholder in model specification
rf_spec <- rand_forest(
mtry = tune(),
trees = 1000,
min_n = tune()
) |>
set_engine("ranger") |>
set_mode("classification")
# Tune recipe steps
rec <- recipe(outcome ~ ., data = train) |>
step_pca(all_numeric_predictors(), num_comp = tune()) |>
step_normalize(all_numeric_predictors())
library(dials)
# View parameter information
mtry()
min_n()
trees()
learn_rate()
penalty()
# Customize parameter ranges
mtry(range = c(2, 20))
min_n(range = c(5, 50))
learn_rate(range = c(-3, -1), trans = log10_trans())
# Update based on data
mtry_final <- finalize(mtry(), train_data)
# Evenly spaced grid
regular_grid <- grid_regular(
mtry(range = c(2, 10)),
min_n(range = c(2, 20)),
levels = 5 # 5 levels per parameter = 25 combinations
)
# Different levels per parameter
regular_grid <- grid_regular(
mtry(range = c(2, 10)),
min_n(range = c(2, 20)),
levels = c(mtry = 5, min_n = 3)
)
# Random sampling of parameter space
random_grid <- grid_random(
mtry(range = c(2, 10)),
min_n(range = c(2, 20)),
size = 50
)
# Latin hypercube (better coverage than random)
lhs_grid <- grid_latin_hypercube(
mtry(range = c(2, 10)),
min_n(range = c(2, 20)),
size = 30
)
# Maximum entropy grid
maxent_grid <- grid_max_entropy(
mtry(range = c(2, 10)),
min_n(range = c(2, 20)),
size = 30
)
# Basic grid search
tune_results <- workflow |>
tune_grid(
resamples = cv_folds,
grid = 20, # auto-generates grid
metrics = metric_set(roc_auc, accuracy),
control = control_grid(verbose = TRUE)
)
# With explicit grid
tune_results <- workflow |>
tune_grid(
resamples = cv_folds,
grid = my_grid,
metrics = metric_set(roc_auc, accuracy)
)
# Iterative Bayesian search
tune_results <- workflow |>
tune_bayes(
resamples = cv_folds,
iter = 50, # maximum iterations
initial = 10, # initial grid points
metrics = metric_set(roc_auc),
control = control_bayes(
verbose = TRUE,
no_improve = 20 # stop if no improvement for 20 iters
)
)
# Start with grid results
initial_grid <- workflow |>
tune_grid(
resamples = cv_folds,
grid = 10,
metrics = metric_set(roc_auc)
)
# Continue with Bayesian
bayes_results <- workflow |>
tune_bayes(
resamples = cv_folds,
iter = 40,
initial = initial_grid, # use grid results as initial points
metrics = metric_set(roc_auc)
)
control_bayes(
# Expected improvement (default)
objective = exp_improve(),
# Probability of improvement
objective = prob_improve(),
# Confidence bound
objective = conf_bound(kappa = 2)
)
library(finetune)
# Eliminate poor configurations early
race_results <- workflow |>
tune_race_anova(
resamples = cv_folds,
grid = 50,
metrics = metric_set(roc_auc),
control = control_race(
verbose = TRUE,
burn_in = 3 # evaluate all for first 3 resamples
)
)
# Plot racing results
plot_race(race_results)
# Statistical comparison of configurations
race_results <- workflow |>
tune_race_win_loss(
resamples = cv_folds,
grid = 50,
metrics = metric_set(roc_auc),
control = control_race()
)
library(finetune)
# Simulated annealing search
sa_results <- workflow |>
tune_sim_anneal(
resamples = cv_folds,
iter = 50,
initial = 5,
metrics = metric_set(roc_auc),
control = control_sim_anneal(
verbose = TRUE,
cooling_coef = 0.1
)
)
# Best by primary metric
best_params <- select_best(tune_results, metric = "roc_auc")
# Best within one SE of optimal (more parsimonious)
best_params <- select_by_one_std_err(
tune_results,
metric = "roc_auc",
mtry, min_n # parameters to minimize
)
# Best by percent loss from optimal
best_params <- select_by_pct_loss(
tune_results,
metric = "roc_auc",
limit = 2 # within 2% of best
)
# Performance across parameters
autoplot(tune_results)
# Specific parameter effects
autoplot(tune_results, type = "marginals")
# Performance vs parameters
autoplot(tune_results, type = "parameters")
# Collect all metrics
metrics_df <- collect_metrics(tune_results)
# Finalize workflow with best parameters
final_wf <- workflow |>
finalize_workflow(best_params)
# Final fit on all training data
final_fit <- final_wf |>
last_fit(data_split)
# Extract metrics on test set
collect_metrics(final_fit)
library(doParallel)
# Register parallel backend
cl <- makePSOCKcluster(parallel::detectCores() - 1)
registerDoParallel(cl)
# Tuning will automatically use parallel
tune_results <- workflow |>
tune_grid(
resamples = cv_folds,
grid = 100
)
# Stop cluster
stopCluster(cl)
library(future)
# Set up parallel plan
plan(multisession, workers = 4)
# With finetune racing
race_results <- workflow |>
tune_race_anova(
resamples = cv_folds,
grid = 100,
control = control_race(parallel_over = "everything")
)
# XGBoost with many tunable parameters
xgb_spec <- boost_tree(
trees = tune(),
tree_depth = tune(),
min_n = tune(),
loss_reduction = tune(),
sample_size = tune(),
mtry = tune(),
learn_rate = tune()
) |>
set_engine("xgboost") |>
set_mode("classification")
# Define parameter grid
xgb_grid <- grid_latin_hypercube(
trees(range = c(100, 1500)),
tree_depth(range = c(3, 15)),
min_n(range = c(2, 30)),
loss_reduction(),
sample_prop(range = c(0.5, 1)),
finalize(mtry(), train_data),
learn_rate(range = c(-3, -1)),
size = 50
)
# Use racing for efficiency
xgb_results <- workflow(rec, xgb_spec) |>
tune_race_anova(
resamples = cv_folds,
grid = xgb_grid,
metrics = metric_set(roc_auc)
)
| Function | Key Control Options |
|---|---|
tune_grid | save_pred, verbose, allow_par |
tune_bayes | no_improve, uncertain, objective |
tune_race_* | burn_in, num_ties, alpha |
tune_sim_anneal | cooling_coef, restart |
select_by_one_std_err for simpler modelsThis skill should be used when the user asks about libraries, frameworks, API references, or needs code examples. Activates for setup questions, code generation involving libraries, or mentions of specific frameworks like React, Vue, Next.js, Prisma, Supabase, etc.