From lang-stan
Expert guidance for Stan probabilistic programming language development, including modern syntax, cmdstanr/cmdstanpy integration, and testing patterns
npx claudepluginhub seabbs/skills --plugin lang-stanThis skill uses the workspace's default tool permissions.
Use this skill when working with Stan models for Bayesian inference and probabilistic programming, particularly when integrating with R (cmdstanr) or Python (cmdstanpy).
Searches, retrieves, and installs Agent Skills from prompts.chat registry using MCP tools like search_skills and get_skill. Activates for finding skills, browsing catalogs, or extending Claude.
Searches prompts.chat for AI prompt templates by keyword or category, retrieves by ID with variable handling, and improves prompts via AI. Use for discovering or enhancing prompts.
Checks Next.js compilation errors using a running Turbopack dev server after code edits. Fixes actionable issues before reporting complete. Replaces `next build`.
Use this skill when working with Stan models for Bayesian inference and probabilistic programming, particularly when integrating with R (cmdstanr) or Python (cmdstanpy).
Use new array syntax introduced in Stan 2.26+:
// Good - Modern syntax (Stan 2.26+)
array[10] int x; // Array of 10 integers
array[N] real y; // Array of N reals
array[N, M] real z; // 2D array
array[N] vector[K] v; // Array of vectors
// Avoid - Old syntax (deprecated)
int x[10]; // Old style
real y[N]; // Old style
real z[N, M]; // Old style
vector[K] v[N]; // Old style
Why the change?
Data types:
// Scalar types
int n;
real x;
// Vector/Matrix types
vector[N] v; // Column vector
row_vector[N] rv; // Row vector
matrix[N, M] A; // Matrix
// Arrays of vectors/matrices
array[K] vector[N] vectors;
array[K] matrix[N, M] matrices;
// Constrained types
real<lower=0> sigma; // Lower bound
real<lower=0, upper=1> theta; // Bounded
simplex[K] pi; // Simplex constraint
ordered[N] sorted_values; // Ordered constraint
project/
├── inst/stan/ # Stan models (R packages)
│ ├── model.stan # Main model file
│ ├── functions/ # Reusable Stan functions
│ │ ├── likelihood.stan
│ │ ├── priors.stan
│ │ └── utils.stan
│ └── chunks/ # Reusable code chunks (optional)
└── stan/ # Alternative location (Python projects)
Main model includes functions:
functions {
#include functions/likelihood.stan
#include functions/priors.stan
#include functions/utils.stan
}
data {
int<lower=0> N;
array[N] real y;
}
parameters {
real mu;
real<lower=0> sigma;
}
model {
// Use functions from included files
y ~ custom_likelihood(mu, sigma);
mu ~ custom_prior();
}
Benefits of modular approach:
library(cmdstanr)
# Compile Stan model
model <- cmdstan_model("path/to/model.stan")
# Prepare data (list with names matching Stan data block)
stan_data <- list(
N = 100,
y = rnorm(100)
)
# Fit model
fit <- model$sample(
data = stan_data,
chains = 4,
parallel_chains = 4,
iter_warmup = 1000,
iter_sampling = 1000
)
# Extract samples
draws <- fit$draws()
summary <- fit$summary()
# Get model path from package
model_path <- system.file("stan", "model.stan", package = "mypackage")
# Compile with cmdstanr
model <- cmdstan_model(model_path)
# Package-specific model functions
# Many packages provide wrapper functions:
my_package_model <- get_model() # Returns compiled model
# NUTS sampling (default, most robust)
fit <- model$sample(data = stan_data)
# Variational inference (faster, approximate)
fit <- model$variational(data = stan_data)
# Pathfinder (fast, approximate)
fit <- model$pathfinder(data = stan_data)
# Laplace approximation (very fast, approximate)
fit <- model$laplace(data = stan_data)
# Optimization (MAP estimate)
fit <- model$optimize(data = stan_data)
In R packages with cmdstanr:
# Expose Stan functions for testing
# Typically in a package function
expose_stan_functions <- function() {
model_path <- system.file("stan", "functions.stan", package = "mypackage")
# Note: This typically works on Linux only
cmdstanr::cmdstan_model(model_path, compile_standalone = TRUE)
}
# Then in tests
test_that("Stan function works correctly", {
# Exposed functions are now available in R
result <- stan_function_name(args)
expect_equal(result, expected)
})
Common pattern in test setup:
# tests/testthat/setup.R
if (on_linux()) {
expose_stan_functions()
}
# tests/testthat/test-stan-functions.R
test_that("likelihood function works", {
skip_if_not(on_linux()) # Stan function exposure often Linux-only
result <- stan_likelihood(data, params)
expect_gt(result, -Inf)
})
# Convert R data structures to Stan format
stan_data_list <- list(
N = nrow(data),
K = ncol(X),
y = data$outcome,
X = as.matrix(X),
# Arrays use R vectors/matrices directly
group = as.integer(data$group)
)
# For complex models, packages often provide conversion functions
stan_data <- prepare_stan_data(preprocessed_data, model_spec)
Empirical Bayes approach - priors as data:
data {
// Data
int<lower=0> N;
array[N] real y;
// Priors as data (empirical Bayes)
real prior_mu_mean;
real<lower=0> prior_mu_sd;
real<lower=0> prior_sigma_alpha;
real<lower=0> prior_sigma_beta;
}
parameters {
real mu;
real<lower=0> sigma;
}
model {
// Use data-specified priors
mu ~ normal(prior_mu_mean, prior_mu_sd);
sigma ~ gamma(prior_sigma_alpha, prior_sigma_beta);
y ~ normal(mu, sigma);
}
Benefits:
For packages with multiple distribution options:
# R side - get distribution ID
dist_id <- get_distribution_id("lognormal")
# Stan side - use distribution
stan_data <- list(
distribution = dist_id, # Integer ID
# ... other data
)
// Stan functions for distribution dispatch
real compute_lpdf(real y, int distribution, vector params) {
if (distribution == 1) { // Normal
return normal_lpdf(y | params[1], params[2]);
} else if (distribution == 2) { // Lognormal
return lognormal_lpdf(y | params[1], params[2]);
}
// ... other distributions
}
// Good - vectorized (much faster)
y ~ normal(mu, sigma);
// Avoid - loop (slower)
for (n in 1:N) {
y[n] ~ normal(mu, sigma);
}
// Use built-in matrix operations
vector[N] mu = X * beta; // Matrix-vector multiplication
// Avoid loops when vectorization possible
data {
int<lower=0> N;
int<lower=0> N_obs;
array[N_obs] int<lower=1, upper=N> obs_idx;
array[N_obs] real y_obs;
}
parameters {
array[N] real y_latent;
real mu;
real<lower=0> sigma;
}
model {
// Likelihood for observed data
y_obs ~ normal(mu, sigma);
// Prior/model for all data
y_latent ~ normal(mu, sigma);
// Constrain observed values
y_latent[obs_idx] ~ normal(y_obs, 0.001); // Small noise
}
Divergences:
# Increase adapt_delta
fit <- model$sample(
data = stan_data,
adapt_delta = 0.95 # Default 0.8
)
Slow mixing:
# Use non-centered parameterization
# Reparameterize hierarchical models
Model diagnostics:
# Check convergence
fit$diagnostic_summary()
# Check R-hat, ESS
fit$summary(c("Rhat", "ess_bulk", "ess_tail"))
# Pairs plot for problematic parameters
bayesplot::mcmc_pairs(fit$draws(), pars = c("mu", "sigma"))
Activate this skill when:
This skill provides Stan-specific development patterns. Project-specific model architecture and domain knowledge should remain in project CLAUDE.md files.