From aj-geddes-useful-ai-prompts-4
Creates effective visualizations using matplotlib and seaborn for exploratory data analysis, presenting insights, and communicating findings to stakeholders.
npx claudepluginhub joshuarweaver/cascade-code-languages-misc-1 --plugin aj-geddes-useful-ai-prompts-4This skill uses the workspace's default tool permissions.
Data visualization transforms complex data into clear, compelling visual representations that reveal patterns, trends, and insights for storytelling and decision-making.
Searches, retrieves, and installs Agent Skills from prompts.chat registry using MCP tools like search_skills and get_skill. Activates for finding skills, browsing catalogs, or extending Claude.
Searches prompts.chat for AI prompt templates by keyword or category, retrieves by ID with variable handling, and improves prompts via AI. Use for discovering or enhancing prompts.
Checks Next.js compilation errors using a running Turbopack dev server after code edits. Fixes actionable issues before reporting complete. Replaces `next build`.
Data visualization transforms complex data into clear, compelling visual representations that reveal patterns, trends, and insights for storytelling and decision-making.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.gridspec import GridSpec
# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)
# Generate sample data
np.random.seed(42)
n = 500
data = pd.DataFrame({
'age': np.random.uniform(20, 70, n),
'income': np.random.exponential(50000, n),
'education_years': np.random.uniform(12, 20, n),
'category': np.random.choice(['A', 'B', 'C'], n),
'region': np.random.choice(['North', 'South', 'East', 'West'], n),
'satisfaction': np.random.uniform(1, 5, n),
'purchased': np.random.choice([0, 1], n),
})
print(data.head())
# 1. Distribution Plots
fig, axes = plt.subplots(2, 2, figsize=(12, 8))
# Histogram
axes[0, 0].hist(data['age'], bins=30, color='skyblue', edgecolor='black')
axes[0, 0].set_title('Age Distribution (Histogram)')
axes[0, 0].set_xlabel('Age')
axes[0, 0].set_ylabel('Frequency')
# KDE plot
data['income'].plot(kind='kde', ax=axes[0, 1], color='green', linewidth=2)
axes[0, 1].set_title('Income Distribution (KDE)')
axes[0, 1].set_xlabel('Income')
# Box plot
sns.boxplot(data=data, y='satisfaction', x='category', ax=axes[1, 0], palette='Set2')
axes[1, 0].set_title('Satisfaction by Category (Box Plot)')
# Violin plot
sns.violinplot(data=data, y='age', x='category', ax=axes[1, 1], palette='Set2')
axes[1, 1].set_title('Age by Category (Violin Plot)')
plt.tight_layout()
plt.show()
# 2. Relationship Plots
fig, axes = plt.subplots(2, 2, figsize=(12, 8))
# Scatter plot
axes[0, 0].scatter(data['age'], data['income'], alpha=0.5, s=30)
axes[0, 0].set_title('Age vs Income (Scatter Plot)')
axes[0, 0].set_xlabel('Age')
axes[0, 0].set_ylabel('Income')
# Scatter with regression line
sns.regplot(x='age', y='income', data=data, ax=axes[0, 1], scatter_kws={'alpha': 0.5})
axes[0, 1].set_title('Age vs Income (with Regression Line)')
# Joint plot alternative
ax_hex = axes[1, 0]
hexbin = ax_hex.hexbin(data['age'], data['income'], gridsize=15, cmap='YlOrRd')
ax_hex.set_title('Age vs Income (Hex Bin)')
ax_hex.set_xlabel('Age')
ax_hex.set_ylabel('Income')
# Bubble plot
scatter = axes[1, 1].scatter(
data['age'], data['income'], s=data['satisfaction']*50,
c=data['satisfaction'], cmap='viridis', alpha=0.6, edgecolors='black'
)
axes[1, 1].set_title('Age vs Income (Bubble Plot)')
axes[1, 1].set_xlabel('Age')
axes[1, 1].set_ylabel('Income')
plt.colorbar(scatter, ax=axes[1, 1], label='Satisfaction')
plt.tight_layout()
plt.show()
# 3. Comparison Plots
fig, axes = plt.subplots(2, 2, figsize=(12, 8))
# Bar plot
category_counts = data['category'].value_counts()
axes[0, 0].bar(category_counts.index, category_counts.values, color='skyblue', edgecolor='black')
axes[0, 0].set_title('Category Distribution (Bar Chart)')
axes[0, 0].set_ylabel('Count')
# Grouped bar plot
grouped_data = data.groupby(['category', 'region']).size().unstack()
grouped_data.plot(kind='bar', ax=axes[0, 1], edgecolor='black')
axes[0, 1].set_title('Category by Region (Grouped Bar)')
axes[0, 1].set_ylabel('Count')
axes[0, 1].legend(title='Region')
# Stacked bar plot
grouped_data.plot(kind='bar', stacked=True, ax=axes[1, 0], edgecolor='black')
axes[1, 0].set_title('Category by Region (Stacked Bar)')
axes[1, 0].set_ylabel('Count')
# Horizontal bar plot
region_counts = data['region'].value_counts()
axes[1, 1].barh(region_counts.index, region_counts.values, color='lightcoral', edgecolor='black')
axes[1, 1].set_title('Region Distribution (Horizontal Bar)')
axes[1, 1].set_xlabel('Count')
plt.tight_layout()
plt.show()
# 4. Correlation and Heatmaps
numeric_cols = data[['age', 'income', 'education_years', 'satisfaction']].corr()
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
# Correlation heatmap
sns.heatmap(numeric_cols, annot=True, fmt='.2f', cmap='coolwarm', center=0,
square=True, ax=axes[0], cbar_kws={'label': 'Correlation'})
axes[0].set_title('Correlation Matrix Heatmap')
# Clustermap alternative
from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.spatial.distance import pdist, squareform
# Create a simpler heatmap for category averages
category_avg = data.groupby('category')[['age', 'income', 'education_years', 'satisfaction']].mean()
sns.heatmap(category_avg.T, annot=True, fmt='.1f', cmap='YlGnBu', ax=axes[1],
cbar_kws={'label': 'Average Value'})
axes[1].set_title('Average Values by Category')
plt.tight_layout()
plt.show()
# 5. Pair Plot
pair_cols = ['age', 'income', 'education_years', 'satisfaction']
plt.figure(figsize=(12, 10))
pair_plot = sns.pairplot(data[pair_cols], diag_kind='hist', corner=False)
pair_plot.fig.suptitle('Pair Plot Matrix', y=1.00)
plt.show()
# 6. Multi-dimensional Visualization
fig = plt.figure(figsize=(14, 6))
gs = GridSpec(2, 3, figure=fig)
# Subplots with different aspects
ax1 = fig.add_subplot(gs[0, 0])
ax1.scatter(data['age'], data['income'], c=data['satisfaction'], cmap='viridis', alpha=0.6)
ax1.set_title('Age vs Income (colored by Satisfaction)')
ax1.set_xlabel('Age')
ax1.set_ylabel('Income')
ax2 = fig.add_subplot(gs[0, 1])
for cat in data['category'].unique():
subset = data[data['category'] == cat]
ax2.scatter(subset['age'], subset['income'], label=cat, alpha=0.6)
ax2.set_title('Age vs Income (by Category)')
ax2.set_xlabel('Age')
ax2.set_ylabel('Income')
ax2.legend()
ax3 = fig.add_subplot(gs[0, 2])
sns.boxplot(data=data, x='region', y='income', ax=ax3, palette='Set2')
ax3.set_title('Income Distribution by Region')
ax4 = fig.add_subplot(gs[1, 0])
data.groupby('category')['satisfaction'].mean().plot(kind='bar', ax=ax4, color='skyblue', edgecolor='black')
ax4.set_title('Average Satisfaction by Category')
ax4.set_ylabel('Satisfaction')
ax4.set_xlabel('Category')
ax5 = fig.add_subplot(gs[1, 1:])
region_category = pd.crosstab(data['region'], data['category'])
region_category.plot(kind='bar', ax=ax5, edgecolor='black')
ax5.set_title('Region vs Category Distribution')
ax5.set_ylabel('Count')
ax5.set_xlabel('Region')
ax5.legend(title='Category')
plt.tight_layout()
plt.show()
# 7. Time Series Visualization (if temporal data)
dates = pd.date_range('2023-01-01', periods=len(data))
data['date'] = dates
data['cumulative_income'] = data['income'].cumsum()
fig, axes = plt.subplots(2, 1, figsize=(12, 8))
# Line plot
axes[0].plot(data['date'], data['income'], linewidth=1, alpha=0.7, label='Income')
axes[0].fill_between(data['date'], data['income'], alpha=0.3)
axes[0].set_title('Income Over Time')
axes[0].set_ylabel('Income')
axes[0].grid(True, alpha=0.3)
axes[0].legend()
# Area plot
axes[1].plot(data['date'], data['cumulative_income'], linewidth=2, color='green')
axes[1].fill_between(data['date'], data['cumulative_income'], alpha=0.3, color='green')
axes[1].set_title('Cumulative Income Over Time')
axes[1].set_ylabel('Cumulative Income')
axes[1].set_xlabel('Date')
axes[1].grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# 8. Composition Visualization
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
# Pie chart
category_counts = data['category'].value_counts()
colors = ['#ff9999', '#66b3ff', '#99ff99']
axes[0].pie(category_counts.values, labels=category_counts.index, autopct='%1.1f%%',
colors=colors, startangle=90)
axes[0].set_title('Category Distribution (Pie Chart)')
# Donut chart
axes[1].pie(category_counts.values, labels=category_counts.index, autopct='%1.1f%%',
colors=colors, startangle=90, wedgeprops=dict(width=0.5, edgecolor='white'))
axes[1].set_title('Category Distribution (Donut Chart)')
plt.tight_layout()
plt.show()
# 9. Dashboard-style Visualization
fig = plt.figure(figsize=(16, 10))
gs = GridSpec(3, 3, figure=fig, hspace=0.3, wspace=0.3)
# Key metrics
ax_metric = fig.add_subplot(gs[0, :])
ax_metric.axis('off')
metrics_text = f"""
Average Age: {data['age'].mean():.1f} | Average Income: ${data['income'].mean():.0f} |
Average Satisfaction: {data['satisfaction'].mean():.2f} | Purchase Rate: {(data['purchased'].mean()*100):.1f}%
"""
ax_metric.text(0.5, 0.5, metrics_text, ha='center', va='center', fontsize=12,
bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.7))
# Subplots
ax1 = fig.add_subplot(gs[1, 0])
data['age'].hist(bins=20, ax=ax1, color='skyblue', edgecolor='black')
ax1.set_title('Age Distribution')
ax2 = fig.add_subplot(gs[1, 1])
category_counts.plot(kind='bar', ax=ax2, color='lightcoral', edgecolor='black')
ax2.set_title('Category Counts')
ax3 = fig.add_subplot(gs[1, 2])
data.groupby('category')['satisfaction'].mean().plot(kind='bar', ax=ax3, color='lightgreen', edgecolor='black')
ax3.set_title('Avg Satisfaction by Category')
ax4 = fig.add_subplot(gs[2, :2])
sns.boxplot(data=data, x='region', y='income', ax=ax4, palette='Set2')
ax4.set_title('Income by Region')
ax5 = fig.add_subplot(gs[2, 2])
data['satisfaction'].value_counts().sort_index().plot(kind='bar', ax=ax5, color='orange', edgecolor='black')
ax5.set_title('Satisfaction Scores')
plt.suptitle('Data Analytics Dashboard', fontsize=16, fontweight='bold', y=0.995)
plt.show()
print("Visualization examples completed!")