CUDA and GPU development with Flox. Use for NVIDIA CUDA setup, GPU computing, deep learning frameworks, cuDNN, and cross-platform GPU/CPU development.
Manages NVIDIA CUDA toolchains and GPU libraries for deep learning on Linux using Flox. Use when setting up PyTorch/TensorFlow environments or compiling CUDA code to handle version-specific package installation and license conflict resolution.
/plugin marketplace add flox/flox-agentic/plugin install flox@flox-agenticThis skill inherits all available tools. When active, it can use any tool Claude has access to.
flox auth login["aarch64-linux", "x86_64-linux"]flox-cuda/ in the catalog# Search for CUDA packages
flox search cudatoolkit --all | grep flox-cuda
flox search nvcc --all | grep 12_8
# Show available versions
flox show flox-cuda/cudaPackages.cudatoolkit
# Install CUDA packages
flox install flox-cuda/cudaPackages_12_8.cuda_nvcc
flox install flox-cuda/cudaPackages.cuda_cudart
# Verify installation
nvcc --version
nvidia-smi
# Search for CUDA toolkit
flox search cudatoolkit --all | grep flox-cuda
# Search for specific versions
flox search nvcc --all | grep 12_8
# Show all available versions
flox show flox-cuda/cudaPackages.cudatoolkit
# Search for CUDA libraries
flox search libcublas --all | grep flox-cuda
flox search cudnn --all | grep flox-cuda
| Package Pattern | Purpose | Example |
|---|---|---|
cudaPackages_X_Y.cudatoolkit | Main CUDA Toolkit | cudaPackages_12_8.cudatoolkit |
cudaPackages_X_Y.cuda_nvcc | NVIDIA C++ Compiler | cudaPackages_12_8.cuda_nvcc |
cudaPackages.cuda_cudart | CUDA Runtime API | cuda_cudart |
cudaPackages_X_Y.libcublas | Linear algebra | cudaPackages_12_8.libcublas |
cudaPackages_X_Y.libcufft | Fast Fourier Transform | cudaPackages_12_8.libcufft |
cudaPackages_X_Y.libcurand | Random number generation | cudaPackages_12_8.libcurand |
cudaPackages_X_Y.cudnn_9_11 | Deep neural networks | cudaPackages_12_8.cudnn_9_11 |
cudaPackages_X_Y.nccl | Multi-GPU communication | cudaPackages_12_8.nccl |
CUDA packages have LICENSE file conflicts requiring explicit priorities:
[install]
cuda_nvcc.pkg-path = "flox-cuda/cudaPackages_12_8.cuda_nvcc"
cuda_nvcc.systems = ["aarch64-linux", "x86_64-linux"]
cuda_nvcc.priority = 1 # Highest priority
cuda_cudart.pkg-path = "flox-cuda/cudaPackages.cuda_cudart"
cuda_cudart.systems = ["aarch64-linux", "x86_64-linux"]
cuda_cudart.priority = 2
cudatoolkit.pkg-path = "flox-cuda/cudaPackages_12_8.cudatoolkit"
cudatoolkit.systems = ["aarch64-linux", "x86_64-linux"]
cudatoolkit.priority = 3 # Lower for LICENSE conflicts
gcc.pkg-path = "gcc"
gcc-unwrapped.pkg-path = "gcc-unwrapped" # For libstdc++
gcc-unwrapped.priority = 5
[install]
cuda_nvcc.pkg-path = "flox-cuda/cudaPackages_12_8.cuda_nvcc"
cuda_nvcc.priority = 1
cuda_nvcc.systems = ["aarch64-linux", "x86_64-linux"]
cudatoolkit.pkg-path = "flox-cuda/cudaPackages_12_8.cudatoolkit"
cudatoolkit.priority = 3
cudatoolkit.systems = ["aarch64-linux", "x86_64-linux"]
[install]
cuda_nvcc.pkg-path = "flox-cuda/cudaPackages_11_8.cuda_nvcc"
cuda_nvcc.priority = 1
cuda_nvcc.systems = ["aarch64-linux", "x86_64-linux"]
cudatoolkit.pkg-path = "flox-cuda/cudaPackages_11_8.cudatoolkit"
cudatoolkit.priority = 3
cudatoolkit.systems = ["aarch64-linux", "x86_64-linux"]
Dual CUDA/CPU packages for portability (Linux gets CUDA, macOS gets CPU fallback):
[install]
## CUDA packages (Linux only)
cuda-pytorch.pkg-path = "flox-cuda/python3Packages.torch"
cuda-pytorch.systems = ["x86_64-linux", "aarch64-linux"]
cuda-pytorch.priority = 1
## Non-CUDA packages (macOS + Linux fallback)
pytorch.pkg-path = "python313Packages.pytorch"
pytorch.systems = ["x86_64-darwin", "aarch64-darwin"]
pytorch.priority = 6 # Lower priority
Dynamic CPU/GPU package installation in hooks:
setup_gpu_packages() {
venv="$FLOX_ENV_CACHE/venv"
if [ ! -f "$FLOX_ENV_CACHE/.deps_installed" ]; then
if lspci 2>/dev/null | grep -E 'NVIDIA|AMD' > /dev/null; then
echo "GPU detected, installing CUDA packages"
uv pip install --python "$venv/bin/python" \
torch torchvision --index-url https://download.pytorch.org/whl/cu129
else
echo "No GPU detected, installing CPU packages"
uv pip install --python "$venv/bin/python" \
torch torchvision --index-url https://download.pytorch.org/whl/cpu
fi
touch "$FLOX_ENV_CACHE/.deps_installed"
fi
}
[install]
cuda_nvcc.pkg-path = "flox-cuda/cudaPackages_12_8.cuda_nvcc"
cuda_nvcc.priority = 1
cuda_nvcc.systems = ["aarch64-linux", "x86_64-linux"]
cuda_cudart.pkg-path = "flox-cuda/cudaPackages.cuda_cudart"
cuda_cudart.priority = 2
cuda_cudart.systems = ["aarch64-linux", "x86_64-linux"]
gcc.pkg-path = "gcc"
gcc-unwrapped.pkg-path = "gcc-unwrapped"
gcc-unwrapped.priority = 5
[vars]
CUDA_VERSION = "12.8"
CUDA_HOME = "$FLOX_ENV"
[hook]
echo "CUDA $CUDA_VERSION environment ready"
echo "nvcc: $(nvcc --version | grep release)"
[install]
cuda_nvcc.pkg-path = "flox-cuda/cudaPackages_12_8.cuda_nvcc"
cuda_nvcc.priority = 1
cuda_nvcc.systems = ["aarch64-linux", "x86_64-linux"]
cuda_cudart.pkg-path = "flox-cuda/cudaPackages.cuda_cudart"
cuda_cudart.priority = 2
cuda_cudart.systems = ["aarch64-linux", "x86_64-linux"]
libcublas.pkg-path = "flox-cuda/cudaPackages_12_8.libcublas"
libcublas.priority = 2
libcublas.systems = ["aarch64-linux", "x86_64-linux"]
cudnn.pkg-path = "flox-cuda/cudaPackages_12_8.cudnn_9_11"
cudnn.priority = 2
cudnn.systems = ["aarch64-linux", "x86_64-linux"]
python313Full.pkg-path = "python313Full"
uv.pkg-path = "uv"
gcc-unwrapped.pkg-path = "gcc-unwrapped"
gcc-unwrapped.priority = 5
[vars]
CUDA_VERSION = "12.8"
PYTORCH_CUDA_ALLOC_CONF = "max_split_size_mb:128"
[hook]
setup_pytorch_cuda() {
venv="$FLOX_ENV_CACHE/venv"
if [ ! -d "$venv" ]; then
uv venv "$venv" --python python3
fi
if [ -f "$venv/bin/activate" ]; then
source "$venv/bin/activate"
fi
if [ ! -f "$FLOX_ENV_CACHE/.deps_installed" ]; then
uv pip install --python "$venv/bin/python" \
torch torchvision torchaudio \
--index-url https://download.pytorch.org/whl/cu129
touch "$FLOX_ENV_CACHE/.deps_installed"
fi
}
setup_pytorch_cuda
[install]
cuda_nvcc.pkg-path = "flox-cuda/cudaPackages_12_8.cuda_nvcc"
cuda_nvcc.priority = 1
cuda_nvcc.systems = ["aarch64-linux", "x86_64-linux"]
cuda_cudart.pkg-path = "flox-cuda/cudaPackages.cuda_cudart"
cuda_cudart.priority = 2
cuda_cudart.systems = ["aarch64-linux", "x86_64-linux"]
cudnn.pkg-path = "flox-cuda/cudaPackages_12_8.cudnn_9_11"
cudnn.priority = 2
cudnn.systems = ["aarch64-linux", "x86_64-linux"]
python313Full.pkg-path = "python313Full"
uv.pkg-path = "uv"
[hook]
setup_tensorflow() {
venv="$FLOX_ENV_CACHE/venv"
[ ! -d "$venv" ] && uv venv "$venv" --python python3
[ -f "$venv/bin/activate" ] && source "$venv/bin/activate"
if [ ! -f "$FLOX_ENV_CACHE/.tf_installed" ]; then
uv pip install --python "$venv/bin/python" tensorflow[and-cuda]
touch "$FLOX_ENV_CACHE/.tf_installed"
fi
}
setup_tensorflow
[install]
cuda_nvcc.pkg-path = "flox-cuda/cudaPackages_12_8.cuda_nvcc"
cuda_nvcc.priority = 1
cuda_nvcc.systems = ["aarch64-linux", "x86_64-linux"]
nccl.pkg-path = "flox-cuda/cudaPackages_12_8.nccl"
nccl.priority = 2
nccl.systems = ["aarch64-linux", "x86_64-linux"]
libcublas.pkg-path = "flox-cuda/cudaPackages_12_8.libcublas"
libcublas.priority = 2
libcublas.systems = ["aarch64-linux", "x86_64-linux"]
[vars]
CUDA_VISIBLE_DEVICES = "0,1,2,3" # All GPUs
NCCL_DEBUG = "INFO"
# team/cuda-base
[install]
cuda_nvcc.pkg-path = "flox-cuda/cudaPackages_12_8.cuda_nvcc"
cuda_nvcc.priority = 1
cuda_nvcc.systems = ["aarch64-linux", "x86_64-linux"]
cuda_cudart.pkg-path = "flox-cuda/cudaPackages.cuda_cudart"
cuda_cudart.priority = 2
cuda_cudart.systems = ["aarch64-linux", "x86_64-linux"]
gcc.pkg-path = "gcc"
gcc-unwrapped.pkg-path = "gcc-unwrapped"
gcc-unwrapped.priority = 5
[vars]
CUDA_VERSION = "12.8"
CUDA_HOME = "$FLOX_ENV"
# team/cuda-math
[include]
environments = [{ remote = "team/cuda-base" }]
[install]
libcublas.pkg-path = "flox-cuda/cudaPackages_12_8.libcublas"
libcublas.priority = 2
libcublas.systems = ["aarch64-linux", "x86_64-linux"]
libcufft.pkg-path = "flox-cuda/cudaPackages_12_8.libcufft"
libcufft.priority = 2
libcufft.systems = ["aarch64-linux", "x86_64-linux"]
libcurand.pkg-path = "flox-cuda/cudaPackages_12_8.libcurand"
libcurand.priority = 2
libcurand.systems = ["aarch64-linux", "x86_64-linux"]
# team/cuda-debug
[install]
cuda-gdb.pkg-path = "flox-cuda/cudaPackages_12_8.cuda-gdb"
cuda-gdb.systems = ["aarch64-linux", "x86_64-linux"]
nsight-systems.pkg-path = "flox-cuda/cudaPackages_12_8.nsight-systems"
nsight-systems.systems = ["aarch64-linux", "x86_64-linux"]
[vars]
CUDA_LAUNCH_BLOCKING = "1" # Synchronous kernel launches for debugging
# Base CUDA environment
flox activate -r team/cuda-base
# Add debugging tools when needed
flox activate -r team/cuda-base -- flox activate -r team/cuda-debug
nvcc --version
nvidia-smi
cat > hello_cuda.cu << 'EOF'
#include <stdio.h>
__global__ void hello() {
printf("Hello from GPU!\n");
}
int main() {
hello<<<1,1>>>();
cudaDeviceSynchronize();
return 0;
}
EOF
nvcc hello_cuda.cu -o hello_cuda
./hello_cuda
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")
print(f"GPU count: {torch.cuda.device_count()}")
if torch.cuda.is_available():
print(f"GPU name: {torch.cuda.get_device_name(0)}")
CUDA packages have predictable conflicts - assign explicit priorities
Use specific versions (e.g., _12_8) for reproducibility. Don't mix CUDA versions.
Split base CUDA, math libs, and debugging into separate environments for flexibility
Verify nvcc hello.cu -o hello works after setup
Always include systems = ["aarch64-linux", "x86_64-linux"]
Set appropriate CUDA memory allocator configs:
[vars]
PYTORCH_CUDA_ALLOC_CONF = "max_split_size_mb:128"
CUDA_LAUNCH_BLOCKING = "0" # Async by default
The cudatoolkit package doesn't include all libraries. Add what you need:
Every CUDA package may need explicit priority due to LICENSE file conflicts
CUDA is Linux-only. Use Metal-accelerated packages on Darwin when available
Don't mix CUDA versions. Use consistent _X_Y suffixes across all CUDA packages
CUDA Python packages (PyTorch, TensorFlow) should be installed in venv with correct CUDA version
Ensure NVIDIA driver supports your CUDA version. Check with nvidia-smi
# Check CUDA_HOME
echo $CUDA_HOME
# Check nvcc
which nvcc
nvcc --version
# Check library paths
echo $LD_LIBRARY_PATH
import torch
print(torch.cuda.is_available()) # Should be True
print(torch.version.cuda) # Should match your CUDA version
# If False, reinstall with correct CUDA version
# uv pip install torch --index-url https://download.pytorch.org/whl/cu129
# Check gcc/g++ version
gcc --version
g++ --version
# Ensure gcc-unwrapped is installed
flox list | grep gcc-unwrapped
# Check include paths
echo $CPATH
echo $LIBRARY_PATH
# Check GPU visibility
echo $CUDA_VISIBLE_DEVICES
# Check for GPU
nvidia-smi
# Run with debug output
CUDA_LAUNCH_BLOCKING=1 python my_script.py
This skill should be used when the user asks to "create a slash command", "add a command", "write a custom command", "define command arguments", "use command frontmatter", "organize commands", "create command with file references", "interactive command", "use AskUserQuestion in command", or needs guidance on slash command structure, YAML frontmatter fields, dynamic arguments, bash execution in commands, user interaction patterns, or command development best practices for Claude Code.
This skill should be used when the user asks to "create an agent", "add an agent", "write a subagent", "agent frontmatter", "when to use description", "agent examples", "agent tools", "agent colors", "autonomous agent", or needs guidance on agent structure, system prompts, triggering conditions, or agent development best practices for Claude Code plugins.
This skill should be used when the user asks to "create a hook", "add a PreToolUse/PostToolUse/Stop hook", "validate tool use", "implement prompt-based hooks", "use ${CLAUDE_PLUGIN_ROOT}", "set up event-driven automation", "block dangerous commands", or mentions hook events (PreToolUse, PostToolUse, Stop, SubagentStop, SessionStart, SessionEnd, UserPromptSubmit, PreCompact, Notification). Provides comprehensive guidance for creating and implementing Claude Code plugin hooks with focus on advanced prompt-based hooks API.