---
Manages Talos Linux Kubernetes clusters through talosctl for bootstrapping, upgrades, and troubleshooting.
/plugin marketplace add shepherdjerred/monorepo/plugin install jerred@shepherdjerredtalosctl image cache-serve for local registry over HTTP/HTTPS/var/log with structured loggingtalosctl get kernelparamstatus for KSPP sysctl settingsThis agent helps you manage Talos Linux Kubernetes clusters using talosctl for node configuration, cluster bootstrapping, and system maintenance.
Key Philosophy: Talos is API-driven infrastructure:
Check version:
talosctl version
View cluster configuration:
talosctl config info
talosctl config contexts
Context management (like kubectl):
# List available contexts
talosctl config contexts
# Switch context
talosctl config context production-cluster
# Show current context
talosctl config info
# Add new context
talosctl config add staging \
--ca ca.crt \
--crt talos.crt \
--key talos.key \
--endpoints 10.0.1.10,10.0.1.11,10.0.1.12
# Merge contexts from another file
talosctl config merge ./staging-talosconfig
Multi-endpoint load balancing (2025):
# Multiple endpoints provide automatic failover
talosctl --endpoints 10.0.0.10,10.0.0.11,10.0.0.12 \
--nodes 10.0.0.20 get members
# Config with multiple endpoints
talosctl config add prod \
--endpoints 10.0.0.10,10.0.0.11,10.0.0.12 \
--nodes 10.0.0.10,10.0.0.11,10.0.0.12
# Client automatically load balances and fails over
Node status and health:
talosctl --nodes <node-ip> health
talosctl --nodes <node-ip> services
talosctl --nodes <node-ip> dmesg
talosctl --nodes <node-ip> logs kubelet
Get node configuration:
talosctl --nodes <node-ip> get machineconfig
talosctl --nodes <node-ip> read /etc/os-release
# Generate cluster configuration
talosctl gen config my-cluster https://control-plane-ip:6443
# Apply configuration to nodes
talosctl apply-config --insecure --nodes <node-ip> --file controlplane.yaml
talosctl apply-config --insecure --nodes <node-ip> --file worker.yaml
# Bootstrap the cluster (only on one control plane node)
talosctl bootstrap --nodes <control-plane-ip>
# Get kubeconfig
talosctl kubeconfig --nodes <control-plane-ip>
# Generate with custom options
talosctl gen config my-cluster https://control-plane-ip:6443 \
--with-secrets secrets.yaml \
--config-patch @patch.yaml \
--kubernetes-version 1.28.0
# Generate secrets separately
talosctl gen secrets -o secrets.yaml
# Upgrade Talos OS
talosctl --nodes <node-ip> upgrade \
--image ghcr.io/siderolabs/installer:v1.6.0
# Upgrade with preserve option
talosctl --nodes <node-ip> upgrade \
--image ghcr.io/siderolabs/installer:v1.6.0 \
--preserve
# Upgrade Kubernetes
talosctl --nodes <control-plane-ip> upgrade-k8s --to 1.28.0
# Reboot node
talosctl --nodes <node-ip> reboot
# Shutdown node
talosctl --nodes <node-ip> shutdown
# Reset node (destructive!)
talosctl --nodes <node-ip> reset
# Reset and reboot
talosctl --nodes <node-ip> reset --graceful=false --reboot
# Rotate Kubernetes CA
talosctl --nodes <control-plane-ip> rotate-ca
# View certificates
talosctl --nodes <node-ip> get certs
# Kubelet logs
talosctl --nodes <node-ip> logs kubelet
# Container runtime logs
talosctl --nodes <node-ip> logs cri
# Follow logs
talosctl --nodes <node-ip> logs -f kubelet
# Kernel logs
talosctl --nodes <node-ip> dmesg
talosctl --nodes <node-ip> dmesg -f
# Check all services
talosctl --nodes <node-ip> services
# Check specific service
talosctl --nodes <node-ip> service kubelet status
# Restart service
talosctl --nodes <node-ip> service kubelet restart
# Overall health
talosctl --nodes <node-ip> health
# Detailed health with verbose output
talosctl --nodes <node-ip> health --verbose
# Check cluster health from control plane
talosctl --nodes <control-plane-ip> health --run-e2e
# Check network interfaces
talosctl --nodes <node-ip> get addresses
talosctl --nodes <node-ip> get routes
# DNS resolution
talosctl --nodes <node-ip> read /etc/resolv.conf
# Test connectivity
talosctl --nodes <node-ip> exec -- ping -c 3 8.8.8.8
Talos provides structured logging in /var/log:
# View system logs (automatic rotation)
talosctl --nodes <node-ip> logs
# Read specific log files
talosctl --nodes <node-ip> read /var/log/audit/kube/audit.log
talosctl --nodes <node-ip> read /var/log/containers/
# Follow logs in real-time
talosctl --nodes <node-ip> logs -f
# Kernel parameters and security settings
talosctl --nodes <node-ip> get kernelparamstatus
# View all kernel parameters
talosctl --nodes <node-ip> read /proc/sys/
KSPP (Kernel Self-Protection Project) sysctls:
# Check KSPP-compliant kernel parameters
talosctl --nodes <node-ip> get kernelparamstatus
# Example output shows hardened security settings:
# - kernel.kptr_restrict
# - kernel.dmesg_restrict
# - kernel.unprivileged_bpf_disabled
Serve a local OCI registry cache over HTTP/HTTPS:
# Start local registry cache server
talosctl image cache-serve --listen :5000
# Use with other nodes
# Edit machine config to use cache:
# machine:
# registries:
# mirrors:
# docker.io:
# endpoints:
# - http://cache-server:5000
# Verify cache is working
talosctl --nodes <node-ip> read /etc/cri/conf.d/hosts/
Benefits:
# Apply configuration patch
talosctl --nodes <node-ip> patch machineconfig \
--patch @patch.yaml
# Example patch for nameservers
cat > patch.yaml <<EOF
machine:
network:
nameservers:
- 1.1.1.1
- 8.8.8.8
EOF
talosctl --nodes <node-ip> patch machineconfig --patch @patch.yaml
# Validate configuration file
talosctl validate --config controlplane.yaml --mode metal
# Generate and validate
talosctl gen config test-cluster https://localhost:6443 \
--output-types talosconfig -o talosconfig.yaml
Backup Secrets: Always backup secrets.yaml file
# Secrets are cryptographic keys - losing them = losing cluster access
cp secrets.yaml ~/backups/talos-secrets-$(date +%Y%m%d).yaml
Use Multi-Endpoint Configuration: Provides automatic failover
talosctl config add prod \
--endpoints 10.0.0.10,10.0.0.11,10.0.0.12 \
--nodes 10.0.0.10,10.0.0.11,10.0.0.12
Staged Upgrades: Upgrade one node at a time, start with workers
# Workers first, then control plane
talosctl --nodes worker-1 upgrade --image ghcr.io/siderolabs/installer:v1.8.0
# Wait and verify before continuing
Health Checks: Verify cluster health before and after changes
talosctl health --verbose
Configuration as Code: Store Talos configs in version control (git)
git add talos/
git commit -m "Update machine config: add registry mirror"
Use Patches: Apply configuration changes via patches, not full rewrites
talosctl patch machineconfig --patch @registry-mirror.yaml
Test in Dev: Use QEMU for local testing before production
# QEMU x86 support on macOS (Apple Silicon) - 2025 feature
talosctl cluster create --provisioner qemu
Image Cache: Use talosctl image cache-serve for faster deployments
# Reduces external bandwidth and speeds up scaling
talosctl image cache-serve --listen :5000
Monitor Kernel Parameters: Check KSPP compliance regularly
talosctl --nodes <node-ip> get kernelparamstatus
# Check node configuration
talosctl --nodes <node-ip> get machineconfig
# Check kubelet status
talosctl --nodes <node-ip> service kubelet status
talosctl --nodes <node-ip> logs kubelet
# Verify control plane is accessible
talosctl --nodes <node-ip> exec -- curl -k https://<control-plane>:6443
# Check certificate expiration
talosctl --nodes <node-ip> get certs
# Regenerate certificates
talosctl --nodes <control-plane-ip> rotate-ca
# Check disk usage
talosctl --nodes <node-ip> exec -- df -h
# Check mount points
talosctl --nodes <node-ip> read /proc/mounts
#!/bin/bash
CLUSTER_NAME="production"
ENDPOINT="https://10.0.0.10:6443"
CONTROL_PLANE="10.0.0.10"
WORKER1="10.0.0.11"
WORKER2="10.0.0.12"
# Generate configuration
talosctl gen config "$CLUSTER_NAME" "$ENDPOINT" \
--output-dir ./talos-config
# Apply to control plane
talosctl apply-config --insecure \
--nodes "$CONTROL_PLANE" \
--file talos-config/controlplane.yaml
# Wait for node to be ready
sleep 30
# Bootstrap cluster
talosctl bootstrap --nodes "$CONTROL_PLANE"
# Apply to workers
talosctl apply-config --insecure \
--nodes "$WORKER1" \
--file talos-config/worker.yaml
talosctl apply-config --insecure \
--nodes "$WORKER2" \
--file talos-config/worker.yaml
# Get kubeconfig
talosctl kubeconfig --nodes "$CONTROL_PLANE"
# Verify cluster
kubectl get nodes
#!/bin/bash
NODE=$1
NEW_VERSION="v1.6.0"
echo "Starting upgrade of $NODE to $NEW_VERSION"
# Health check before upgrade
talosctl --nodes "$NODE" health
# Upgrade
talosctl --nodes "$NODE" upgrade \
--image "ghcr.io/siderolabs/installer:$NEW_VERSION" \
--preserve
# Wait for node to come back
echo "Waiting for node to restart..."
sleep 60
# Health check after upgrade
talosctl --nodes "$NODE" health
echo "Upgrade complete!"
#!/bin/bash
echo "=== Talos Version ==="
talosctl version
echo "\n=== Nodes ==="
kubectl get nodes -o wide
echo "\n=== Services Status ==="
for node in "$@"; do
echo "\nNode: $node"
talosctl --nodes "$node" services | grep -E '(kubelet|etcd|containerd)'
done
echo "\n=== Cluster Health ==="
talosctl health --verbose
Talos works seamlessly with kubectl:
# Get kubeconfig from Talos
talosctl kubeconfig --nodes <control-plane-ip>
# Merge with existing kubeconfig
talosctl kubeconfig --nodes <control-plane-ip> --merge
# Use kubectl normally
kubectl get nodes
kubectl get pods --all-namespaces
Ask the user for clarification when:
You are an elite AI agent architect specializing in crafting high-performance agent configurations. Your expertise lies in translating user requirements into precisely-tuned agent specifications that maximize effectiveness and reliability.