Master high-performance rendering for large datasets with Datashader. Use this skill when working with datasets exceeding 100M+ points, optimizing visualization performance, or implementing efficient rendering strategies with rasterization and colormapping techniques.
Renders massive datasets using Datashader for high-performance visualization.
/plugin marketplace add uw-ssec/rse-agents/plugin install uw-ssec-holoviz-visualization-community-plugins-holoviz-visualization@uw-ssec/rse-agentsThis skill inherits all available tools. When active, it can use any tool Claude has access to.
Master high-performance rendering for large datasets with Datashader and optimization techniques. This skill covers handling 100M+ point datasets, performance tuning, and efficient visualization strategies.
Datashader is designed for rasterizing large datasets:
import datashader as ds
from datashader.mpl_ext import _colorize
import holoviews as hv
# Load large dataset (can handle 100M+ points)
df = pd.read_csv('large_dataset.csv') # Millions or billions of rows
# Create datashader canvas
canvas = ds.Canvas(plot_width=800, plot_height=600)
# Rasterize aggregation
agg = canvas.points(df, 'x', 'y')
# Convert to image
img = agg.to_array(True)
from holoviews.operation.datashader import datashade, aggregate, shade
# Quick datashading with HoloViews
scatter = hv.Scatter(df, 'x', 'y')
shaded = datashade(scatter)
# With custom aggregation
agg = aggregate(scatter, width=800, height=600)
colored = shade(agg, cmap='viridis')
# Control rasterization
from holoviews.operation import rasterize
rasterized = rasterize(
scatter,
aggregator=ds.count(),
pixel_ratio=2,
upsample_method='interp'
)
import datashader as ds
from colorcet import cm
# Count aggregation (heatmap)
canvas = ds.Canvas()
agg = canvas.points(df, 'x', 'y', agg=ds.count())
# Weighted aggregation
agg = canvas.points(df, 'x', 'y', agg=ds.sum('value'))
# Mean aggregation
agg = canvas.points(df, 'x', 'y', agg=ds.mean('value'))
# Custom colormapping
import datashader.transfer_functions as tf
shaded = tf.shade(agg, cmap=cm['viridis'])
shaded_with_spread = tf.spread(shaded, px=2)
# Combine multiple datasets
canvas = ds.Canvas(x_range=(0, 100), y_range=(0, 100))
agg1 = canvas.points(df1, 'x', 'y')
agg2 = canvas.points(df2, 'x', 'y')
# Shade separately
shaded1 = tf.shade(agg1, cmap=cm['reds'])
shaded2 = tf.shade(agg2, cmap=cm['blues'])
# Composite
import datashader.transfer_functions as tf
composite = tf.composite(shaded1, shaded2)
from holoviews.operation.datashader import datashade
from holoviews import streams
# Interactive scatter with zooming
def create_datashaded_plot(data):
scatter = hv.Scatter(data, 'x', 'y')
return datashade(scatter, cmap='viridis')
# Add interaction
range_stream = streams.RangeXY()
interactive_plot = hv.DynamicMap(
create_datashaded_plot,
streams=[range_stream]
)
# Efficient streaming plot for time series
from holoviews.operation.datashader import rasterize
from holoviews import streams
def create_timeseries_plot(df_window):
curve = hv.Curve(df_window, 'timestamp', 'value')
return curve
# Rasterize for efficiency
rasterized = rasterize(
hv.Curve(df, 'timestamp', 'value'),
aggregator=ds.mean('value'),
width=1000,
height=400
)
# Use data types efficiently
df = pd.read_csv(
'large_file.csv',
dtype={
'x': 'float32',
'y': 'float32',
'value': 'float32',
'category': 'category'
}
)
# Chunk processing for extremely large files
chunk_size = 1_000_000
aggregations = []
for chunk in pd.read_csv('huge.csv', chunksize=chunk_size):
canvas = ds.Canvas()
agg = canvas.points(chunk, 'x', 'y')
aggregations.append(agg)
# Combine results
combined_agg = aggregations[0]
for agg in aggregations[1:]:
combined_agg = combined_agg + agg
# Adjust canvas resolution based on data density
def auto_canvas(df, target_pixels=500000):
data_points = len(df)
aspect_ratio = (df['x'].max() - df['x'].min()) / (df['y'].max() - df['y'].min())
pixels = int(np.sqrt(target_pixels / aspect_ratio))
height = pixels
width = int(pixels * aspect_ratio)
return ds.Canvas(
plot_width=width,
plot_height=height,
x_range=(df['x'].min(), df['x'].max()),
y_range=(df['y'].min(), df['y'].max())
)
canvas = auto_canvas(df)
agg = canvas.points(df, 'x', 'y')
# Choose appropriate aggregation for your data
canvas = ds.Canvas()
# For counting: count()
agg_count = canvas.points(df, 'x', 'y', agg=ds.count())
# For averages: mean()
agg_mean = canvas.points(df, 'x', 'y', agg=ds.mean('value'))
# For sums: sum()
agg_sum = canvas.points(df, 'x', 'y', agg=ds.sum('value'))
# For max/min
agg_max = canvas.points(df, 'x', 'y', agg=ds.max('value'))
# For percentiles
agg_p95 = canvas.points(df, 'x', 'y', agg=ds.count_cat('category'))
from colorcet import cm, cmap_d
import datashader.transfer_functions as tf
# Use perceptually uniform colormaps
canvas = ds.Canvas()
agg = canvas.points(df, 'x', 'y', agg=ds.count())
# Gray scale
shaded_gray = tf.shade(agg, cmap=cm['gray'])
# Perceptual colormaps
shaded_viridis = tf.shade(agg, cmap=cm['viridis'])
shaded_turbo = tf.shade(agg, cmap=cm['turbo'])
# Category colormaps
shaded_color = tf.shade(agg, cmap=cm['cet_c5'])
# Logarithmic normalization
from datashader.transfer_functions import Log
canvas = ds.Canvas()
agg = canvas.points(df, 'x', 'y', agg=ds.sum('value'))
# Log transform for better visualization
shaded = tf.shade(agg, norm='log', cmap=cm['viridis'])
# Power law normalization
shaded_power = tf.shade(agg, norm=ds.transfer_functions.eq_hist, cmap=cm['plasma'])
# Separate visualization of multiple datasets
canvas = ds.Canvas()
agg_red = canvas.points(df_red, 'x', 'y')
agg_green = canvas.points(df_green, 'x', 'y')
agg_blue = canvas.points(df_blue, 'x', 'y')
# Stack as RGB
from datashader.colors import rgb
result = rgb(agg_red, agg_green, agg_blue)
import panel as pn
from holoviews.operation.datashader import datashade
# Create interactive dashboard with datashader
class LargeDataViewer(param.Parameterized):
cmap = param.Selector(default='viridis', objects=list(cm.keys()))
show_spread = param.Boolean(default=False)
def __init__(self, data):
super().__init__()
self.data = data
@param.depends('cmap', 'show_spread')
def plot(self):
scatter = hv.Scatter(self.data, 'x', 'y')
shaded = datashade(scatter, cmap=cm[self.cmap])
if self.show_spread:
shaded = tf.spread(shaded, px=2)
return shaded
viewer = LargeDataViewer(large_df)
pn.extension('material')
app = pn.Column(
pn.param.ParamMethod.from_param(viewer.param),
viewer.plot
)
app.servable()
< 10k points: Use standard HoloViews/hvPlot
10k - 1M points: Use rasterize() for dense plots
1M - 100M points: Use Datashader
> 100M points: Use Datashader with chunking
# General rule: 400-1000 pixels on each axis
# Too small: loses detail
# Too large: slow rendering, memory waste
canvas = ds.Canvas(plot_width=800, plot_height=600) # Good default
# When data has extreme outliers
canvas = ds.Canvas()
agg = canvas.points(df, 'x', 'y', agg=ds.mean('value'))
# Use appropriate normalization
shaded = tf.shade(agg, norm='log', cmap=cm['viridis'])
def create_progressive_plot(df):
# Start with aggregated view
agg = canvas.points(df, 'x', 'y')
return tf.shade(agg, cmap='viridis')
# User can zoom to see more detail
# Datashader automatically recalculates at new resolution
canvas = ds.Canvas()
# Aggregate by category
for category in df['category'].unique():
subset = df[df['category'] == category]
agg = canvas.points(subset, 'x', 'y', agg=ds.count())
shaded = tf.shade(agg, cmap=cm[f'category_{category}'])
def aggregate_time_series(df, time_bucket):
df['time_bucket'] = pd.cut(df['timestamp'], bins=time_bucket)
aggregated = df.groupby('time_bucket').agg({
'x': 'mean',
'y': 'mean',
'value': 'sum'
})
return aggregated
This skill should be used when the user asks to "create a slash command", "add a command", "write a custom command", "define command arguments", "use command frontmatter", "organize commands", "create command with file references", "interactive command", "use AskUserQuestion in command", or needs guidance on slash command structure, YAML frontmatter fields, dynamic arguments, bash execution in commands, user interaction patterns, or command development best practices for Claude Code.
This skill should be used when the user asks to "create an agent", "add an agent", "write a subagent", "agent frontmatter", "when to use description", "agent examples", "agent tools", "agent colors", "autonomous agent", or needs guidance on agent structure, system prompts, triggering conditions, or agent development best practices for Claude Code plugins.
This skill should be used when the user asks to "create a hook", "add a PreToolUse/PostToolUse/Stop hook", "validate tool use", "implement prompt-based hooks", "use ${CLAUDE_PLUGIN_ROOT}", "set up event-driven automation", "block dangerous commands", or mentions hook events (PreToolUse, PostToolUse, Stop, SubagentStop, SessionStart, SessionEnd, UserPromptSubmit, PreCompact, Notification). Provides comprehensive guidance for creating and implementing Claude Code plugin hooks with focus on advanced prompt-based hooks API.