R patterns: tidyverse data pipelines with dplyr/tidyr/purrr, native pipe |>, R6 classes, tidy evaluation with rlang {{, vctrs custom types, renv dependency management, ggplot2 visualization, functional programming with purrr::map/walk/reduce. Use when writing or reviewing R code.
From clarcnpx claudepluginhub marvinrichter/clarc --plugin clarcThis skill uses the workspace's default tool permissions.
Designs and optimizes AI agent action spaces, tool definitions, observation formats, error recovery, and context for higher task completion rates.
Enables AI agents to execute x402 payments with per-task budgets, spending controls, and non-custodial wallets via MCP tools. Use when agents pay for APIs, services, or other agents.
Compares coding agents like Claude Code and Aider on custom YAML-defined codebase tasks using git worktrees, measuring pass rate, cost, time, and consistency.
.R, .Rmd, .qmd){{ }}) to write reusable functions that accept column names as argumentsrenv and a committed lockfilepurrr::map variants and for loops for iteration over data structureslibrary(dplyr)
library(tidyr)
library(lubridate)
# Full ETL pipeline using |> (R 4.1+)
monthly_revenue <- transactions |>
dplyr::filter(
!is.na(amount),
status == "completed"
) |>
dplyr::mutate(
date = lubridate::ymd(date_str),
month = lubridate::floor_date(date, "month"),
revenue = amount * (1 - discount_rate)
) |>
dplyr::group_by(category, month) |>
dplyr::summarise(
total = sum(revenue),
n_orders = dplyr::n(),
avg_order = mean(revenue),
.groups = "drop"
) |>
dplyr::arrange(month, dplyr::desc(total))
library(purrr)
# Type-safe map variants
means <- purrr::map_dbl(datasets, ~ mean(.x$value, na.rm = TRUE))
names <- purrr::map_chr(users, ~ .x$name)
flags <- purrr::map_lgl(records, ~ !is.na(.x$email))
# Error-safe mapping
safe_read <- purrr::safely(readRDS)
results <- purrr::map(file_paths, safe_read)
data_ok <- purrr::keep(results, ~ is.null(.x$error)) |> purrr::map("result")
data_err <- purrr::keep(results, ~ !is.null(.x$error))
# Walk for side effects (no return value)
purrr::walk(output_files, ~ message("Written: ", .x))
# Reduce to accumulate
total <- purrr::reduce(c(1, 2, 3, 4), `+`, .init = 0) # 10
# Map2 — parallel iteration
combined <- purrr::map2_chr(
first_names, last_names,
~ paste(.x, .y)
)
library(R6)
library(glue)
# R6 class with private state
DataPipeline <- R6::R6Class(
"DataPipeline",
private = list(
steps = NULL,
log_msgs = NULL
),
public = list(
initialize = function() {
private$steps <- list()
private$log_msgs <- character(0)
},
add_step = function(name, fn) {
stopifnot(is.character(name), is.function(fn))
private$steps[[name]] <- fn
invisible(self) # enable method chaining
},
run = function(data) {
result <- data
for (step_name in names(private$steps)) {
result <- private$steps[[step_name]](result)
private$log_msgs <- c(
private$log_msgs,
glue::glue("[{Sys.time()}] Step '{step_name}' complete: {nrow(result)} rows")
)
}
result
},
get_log = function() private$log_msgs
)
)
# Method chaining
pipeline <- DataPipeline$new()$
add_step("clean", ~ dplyr::filter(.x, !is.na(value)))$
add_step("transform", ~ dplyr::mutate(.x, value = log1p(value)))
result <- pipeline$run(raw_data)
For functions that take column names as arguments:
library(dplyr)
library(rlang)
# Embrace operator {{ }} for column names
group_summary <- function(data, group_col, value_col) {
data |>
dplyr::group_by({{ group_col }}) |>
dplyr::summarise(
n = dplyr::n(),
mean = mean({{ value_col }}, na.rm = TRUE),
sd = sd({{ value_col }}, na.rm = TRUE),
.groups = "drop"
)
}
# Works with any column names — no quoting needed
group_summary(mtcars, cyl, mpg)
group_summary(flights, carrier, arr_delay)
# .data pronoun — for string column names
filter_by_col <- function(data, col_name, threshold) {
data |> dplyr::filter(.data[[col_name]] > threshold)
}
library(ggplot2)
library(scales)
# Publication-ready chart
monthly_revenue |>
ggplot2::ggplot(ggplot2::aes(x = month, y = total, color = category)) +
ggplot2::geom_line(linewidth = 1) +
ggplot2::geom_point(size = 2) +
ggplot2::scale_y_continuous(labels = scales::dollar_format()) +
ggplot2::scale_x_date(date_breaks = "1 month", date_labels = "%b %Y") +
ggplot2::labs(
title = "Monthly Revenue by Category",
subtitle = "Jan–Dec 2024",
x = NULL,
y = "Revenue (USD)",
color = "Category"
) +
ggplot2::theme_minimal() +
ggplot2::theme(legend.position = "bottom")
#' User Service
#'
#' Manages user retrieval and creation.
#'
#' @export
UserService <- R6::R6Class(
"UserService",
public = list(
#' @description Create a new UserService
#' @param repo A UserRepository object
initialize = function(repo) {
self$repo <- repo
},
#' @description Find user by ID
#' @param id Integer user ID
#' @return A User object or NULL
find_by_id = function(id) {
stopifnot(is.integer(id), length(id) == 1L, id > 0L)
self$repo$find(id)
},
repo = NULL
)
)
# Initialize renv in a project
renv::init()
# Snapshot current state
renv::snapshot()
# Restore from lockfile (CI / team members)
renv::restore()
# Update a package
renv::update("dplyr")
renv::snapshot() # always snapshot after update
Always commit renv.lock to version control. Add renv/library/ to .gitignore.
| Anti-Pattern | Problem | Better |
|---|---|---|
for loop over data frame rows | Slow, hard to read | dplyr::mutate or purrr::map |
T/F for TRUE/FALSE | Overridable as variables | Use TRUE/FALSE |
attach(data) | Pollutes global environment | Use data$col or with(data, ...) |
setwd() in scripts | Breaks portability | Use here::here() for paths |
1:length(x) in loops | Fails on empty vectors | seq_along(x) or seq_len(length(x)) |
= for assignment | Style inconsistency | <- always |
Ignoring NA values | Silent incorrect results | Explicitly use na.rm = TRUE |