From r-tidy-modeling
Comprehensive hyperparameter optimization using the tune package. Covers grid search, iterative search, racing methods, and Bayesian optimization.
npx claudepluginhub choxos/biostatagent --plugin r-tidy-modelingThis skill uses the workspace's default tool permissions.
Comprehensive hyperparameter optimization using the tune package. Covers grid search, iterative search, racing methods, and Bayesian optimization.
Creates new Angular apps using Angular CLI with flags for routing, SSR, SCSS, prefixes, and AI config. Follows best practices for modern TypeScript/Angular development. Use when starting Angular projects.
Generates Angular code and provides architectural guidance for projects, components, services, reactivity with signals, forms, dependency injection, routing, SSR, ARIA accessibility, animations, Tailwind styling, testing, and CLI tooling.
Provides UI/UX resources: 50+ styles, color palettes, font pairings, guidelines, charts for web/mobile across React, Next.js, Vue, Svelte, Tailwind, React Native, Flutter. Aids planning, building, reviewing interfaces.
Comprehensive hyperparameter optimization using the tune package. Covers grid search, iterative search, racing methods, and Bayesian optimization.
library(tidymodels)
# Use tune() placeholder in model specification
rf_spec <- rand_forest(
mtry = tune(),
trees = 1000,
min_n = tune()
) |>
set_engine("ranger") |>
set_mode("classification")
# Tune recipe steps
rec <- recipe(outcome ~ ., data = train) |>
step_pca(all_numeric_predictors(), num_comp = tune()) |>
step_normalize(all_numeric_predictors())
library(dials)
# View parameter information
mtry()
min_n()
trees()
learn_rate()
penalty()
# Customize parameter ranges
mtry(range = c(2, 20))
min_n(range = c(5, 50))
learn_rate(range = c(-3, -1), trans = log10_trans())
# Update based on data
mtry_final <- finalize(mtry(), train_data)
# Evenly spaced grid
regular_grid <- grid_regular(
mtry(range = c(2, 10)),
min_n(range = c(2, 20)),
levels = 5 # 5 levels per parameter = 25 combinations
)
# Different levels per parameter
regular_grid <- grid_regular(
mtry(range = c(2, 10)),
min_n(range = c(2, 20)),
levels = c(mtry = 5, min_n = 3)
)
# Random sampling of parameter space
random_grid <- grid_random(
mtry(range = c(2, 10)),
min_n(range = c(2, 20)),
size = 50
)
# Latin hypercube (better coverage than random)
lhs_grid <- grid_latin_hypercube(
mtry(range = c(2, 10)),
min_n(range = c(2, 20)),
size = 30
)
# Maximum entropy grid
maxent_grid <- grid_max_entropy(
mtry(range = c(2, 10)),
min_n(range = c(2, 20)),
size = 30
)
# Basic grid search
tune_results <- workflow |>
tune_grid(
resamples = cv_folds,
grid = 20, # auto-generates grid
metrics = metric_set(roc_auc, accuracy),
control = control_grid(verbose = TRUE)
)
# With explicit grid
tune_results <- workflow |>
tune_grid(
resamples = cv_folds,
grid = my_grid,
metrics = metric_set(roc_auc, accuracy)
)
# Iterative Bayesian search
tune_results <- workflow |>
tune_bayes(
resamples = cv_folds,
iter = 50, # maximum iterations
initial = 10, # initial grid points
metrics = metric_set(roc_auc),
control = control_bayes(
verbose = TRUE,
no_improve = 20 # stop if no improvement for 20 iters
)
)
# Start with grid results
initial_grid <- workflow |>
tune_grid(
resamples = cv_folds,
grid = 10,
metrics = metric_set(roc_auc)
)
# Continue with Bayesian
bayes_results <- workflow |>
tune_bayes(
resamples = cv_folds,
iter = 40,
initial = initial_grid, # use grid results as initial points
metrics = metric_set(roc_auc)
)
control_bayes(
# Expected improvement (default)
objective = exp_improve(),
# Probability of improvement
objective = prob_improve(),
# Confidence bound
objective = conf_bound(kappa = 2)
)
library(finetune)
# Eliminate poor configurations early
race_results <- workflow |>
tune_race_anova(
resamples = cv_folds,
grid = 50,
metrics = metric_set(roc_auc),
control = control_race(
verbose = TRUE,
burn_in = 3 # evaluate all for first 3 resamples
)
)
# Plot racing results
plot_race(race_results)
# Statistical comparison of configurations
race_results <- workflow |>
tune_race_win_loss(
resamples = cv_folds,
grid = 50,
metrics = metric_set(roc_auc),
control = control_race()
)
library(finetune)
# Simulated annealing search
sa_results <- workflow |>
tune_sim_anneal(
resamples = cv_folds,
iter = 50,
initial = 5,
metrics = metric_set(roc_auc),
control = control_sim_anneal(
verbose = TRUE,
cooling_coef = 0.1
)
)
# Best by primary metric
best_params <- select_best(tune_results, metric = "roc_auc")
# Best within one SE of optimal (more parsimonious)
best_params <- select_by_one_std_err(
tune_results,
metric = "roc_auc",
mtry, min_n # parameters to minimize
)
# Best by percent loss from optimal
best_params <- select_by_pct_loss(
tune_results,
metric = "roc_auc",
limit = 2 # within 2% of best
)
# Performance across parameters
autoplot(tune_results)
# Specific parameter effects
autoplot(tune_results, type = "marginals")
# Performance vs parameters
autoplot(tune_results, type = "parameters")
# Collect all metrics
metrics_df <- collect_metrics(tune_results)
# Finalize workflow with best parameters
final_wf <- workflow |>
finalize_workflow(best_params)
# Final fit on all training data
final_fit <- final_wf |>
last_fit(data_split)
# Extract metrics on test set
collect_metrics(final_fit)
library(doParallel)
# Register parallel backend
cl <- makePSOCKcluster(parallel::detectCores() - 1)
registerDoParallel(cl)
# Tuning will automatically use parallel
tune_results <- workflow |>
tune_grid(
resamples = cv_folds,
grid = 100
)
# Stop cluster
stopCluster(cl)
library(future)
# Set up parallel plan
plan(multisession, workers = 4)
# With finetune racing
race_results <- workflow |>
tune_race_anova(
resamples = cv_folds,
grid = 100,
control = control_race(parallel_over = "everything")
)
# XGBoost with many tunable parameters
xgb_spec <- boost_tree(
trees = tune(),
tree_depth = tune(),
min_n = tune(),
loss_reduction = tune(),
sample_size = tune(),
mtry = tune(),
learn_rate = tune()
) |>
set_engine("xgboost") |>
set_mode("classification")
# Define parameter grid
xgb_grid <- grid_latin_hypercube(
trees(range = c(100, 1500)),
tree_depth(range = c(3, 15)),
min_n(range = c(2, 30)),
loss_reduction(),
sample_prop(range = c(0.5, 1)),
finalize(mtry(), train_data),
learn_rate(range = c(-3, -1)),
size = 50
)
# Use racing for efficiency
xgb_results <- workflow(rec, xgb_spec) |>
tune_race_anova(
resamples = cv_folds,
grid = xgb_grid,
metrics = metric_set(roc_auc)
)
| Function | Key Control Options |
|---|---|
tune_grid | save_pred, verbose, allow_par |
tune_bayes | no_improve, uncertain, objective |
tune_race_* | burn_in, num_ties, alpha |
tune_sim_anneal | cooling_coef, restart |
select_by_one_std_err for simpler models