Statistical distribution fitting skill for input modeling in simulation and analysis.
Fits statistical distributions to data for simulation input modeling and analysis.
npx claudepluginhub a5c-ai/babysitterThis skill is limited to using the following tools:
You are distribution-fitter - a specialized skill for fitting statistical distributions to data for input modeling in simulation and analysis.
This skill enables AI-powered distribution fitting including:
from fitter import Fitter
import numpy as np
def fit_distribution(data, distributions=None):
"""
Fit multiple distributions and select best fit
"""
if distributions is None:
distributions = ['norm', 'expon', 'gamma', 'lognorm',
'weibull_min', 'beta', 'uniform', 'triang']
f = Fitter(data, distributions=distributions)
f.fit()
# Get summary
summary = f.summary()
# Best distribution
best = f.get_best(method='sumsquare_error')
return {
"best_distribution": list(best.keys())[0],
"parameters": best,
"summary": summary.to_dict(),
"all_fits": f.fitted_param
}
from scipy import stats
import numpy as np
def goodness_of_fit_tests(data, distribution, params):
"""
Perform multiple goodness-of-fit tests
"""
results = {}
# Kolmogorov-Smirnov test
ks_stat, ks_pvalue = stats.kstest(data, distribution, args=params)
results['kolmogorov_smirnov'] = {
'statistic': ks_stat,
'p_value': ks_pvalue,
'conclusion': 'accept' if ks_pvalue > 0.05 else 'reject'
}
# Chi-square test
observed, bins = np.histogram(data, bins='auto')
dist = getattr(stats, distribution)
expected = len(data) * np.diff(dist.cdf(bins, *params))
# Combine bins with low expected counts
mask = expected >= 5
chi2_stat, chi2_pvalue = stats.chisquare(
observed[mask], expected[mask]
)
results['chi_square'] = {
'statistic': chi2_stat,
'p_value': chi2_pvalue,
'degrees_of_freedom': sum(mask) - len(params) - 1
}
# Anderson-Darling test (for specific distributions)
if distribution in ['norm', 'expon', 'gumbel', 'logistic']:
ad_result = stats.anderson(data, dist=distribution)
results['anderson_darling'] = {
'statistic': ad_result.statistic,
'critical_values': dict(zip(
['15%', '10%', '5%', '2.5%', '1%'],
ad_result.critical_values
))
}
return results
from scipy.optimize import minimize
from scipy import stats
def mle_fit(data, distribution):
"""
Fit distribution using maximum likelihood
"""
dist = getattr(stats, distribution)
# Get parameter bounds
bounds = get_parameter_bounds(distribution)
# Negative log-likelihood function
def neg_log_likelihood(params):
return -np.sum(dist.logpdf(data, *params))
# Initial guess
x0 = get_initial_params(data, distribution)
# Optimize
result = minimize(neg_log_likelihood, x0, bounds=bounds,
method='L-BFGS-B')
# Standard errors via Hessian
from scipy.optimize import approx_fprime
hessian = np.zeros((len(result.x), len(result.x)))
epsilon = 1e-5
for i in range(len(result.x)):
hessian[i] = approx_fprime(result.x,
lambda p: approx_fprime(p, neg_log_likelihood, epsilon)[i],
epsilon)
se = np.sqrt(np.diag(np.linalg.inv(hessian)))
return {
"distribution": distribution,
"parameters": result.x.tolist(),
"standard_errors": se.tolist(),
"log_likelihood": -result.fun,
"aic": 2 * len(result.x) + 2 * result.fun,
"bic": len(result.x) * np.log(len(data)) + 2 * result.fun
}
def analyze_interarrival_times(timestamps):
"""
Analyze inter-arrival times from timestamp data
"""
# Calculate inter-arrival times
timestamps = np.array(timestamps)
interarrivals = np.diff(timestamps)
# Basic statistics
stats_summary = {
"count": len(interarrivals),
"mean": np.mean(interarrivals),
"std": np.std(interarrivals),
"cv": np.std(interarrivals) / np.mean(interarrivals),
"min": np.min(interarrivals),
"max": np.max(interarrivals),
"median": np.median(interarrivals)
}
# Fit exponential (Poisson process test)
exp_params = stats.expon.fit(interarrivals, floc=0)
ks_stat, ks_pvalue = stats.kstest(interarrivals, 'expon', args=exp_params)
is_poisson = ks_pvalue > 0.05 and 0.8 < stats_summary['cv'] < 1.2
# Fit other distributions
fit_result = fit_distribution(interarrivals)
return {
"statistics": stats_summary,
"poisson_process_test": {
"ks_statistic": ks_stat,
"p_value": ks_pvalue,
"cv_test": stats_summary['cv'],
"is_poisson": is_poisson
},
"best_fit": fit_result,
"arrival_rate": 1 / stats_summary['mean']
}
class EmpiricalDistribution:
"""
Create empirical distribution from data
"""
def __init__(self, data):
self.data = np.sort(data)
self.n = len(data)
self.ecdf = np.arange(1, self.n + 1) / self.n
def cdf(self, x):
"""Cumulative distribution function"""
return np.searchsorted(self.data, x, side='right') / self.n
def ppf(self, q):
"""Percent point function (inverse CDF)"""
idx = int(q * self.n)
return self.data[min(idx, self.n - 1)]
def sample(self, size=1):
"""Generate random samples"""
u = np.random.uniform(0, 1, size)
return np.array([self.ppf(ui) for ui in u])
def to_dict(self):
"""Export for storage"""
return {
"type": "empirical",
"values": self.data.tolist(),
"probabilities": self.ecdf.tolist()
}
def compare_distributions(data, candidates):
"""
Compare multiple distribution fits
"""
results = []
for dist_name in candidates:
try:
dist = getattr(stats, dist_name)
params = dist.fit(data)
# Log-likelihood
ll = np.sum(dist.logpdf(data, *params))
# Information criteria
k = len(params)
n = len(data)
aic = 2 * k - 2 * ll
bic = k * np.log(n) - 2 * ll
# KS test
ks_stat, ks_pvalue = stats.kstest(data, dist_name, args=params)
results.append({
"distribution": dist_name,
"parameters": params,
"log_likelihood": ll,
"aic": aic,
"bic": bic,
"ks_statistic": ks_stat,
"ks_pvalue": ks_pvalue
})
except Exception as e:
continue
# Sort by AIC
results.sort(key=lambda x: x['aic'])
return {
"rankings": results,
"best_by_aic": results[0]['distribution'],
"best_by_bic": min(results, key=lambda x: x['bic'])['distribution']
}
This skill integrates with the following processes:
discrete-event-simulation-modeling.jsqueuing-system-analysis.jsdemand-forecasting-model-development.js{
"data_summary": {
"n": 500,
"mean": 5.2,
"std": 2.1,
"cv": 0.40
},
"best_fit": {
"distribution": "gamma",
"parameters": {"shape": 6.1, "scale": 0.85},
"goodness_of_fit": {
"ks_statistic": 0.032,
"ks_pvalue": 0.67,
"aic": 1523.4
}
},
"alternative_fits": [
{"distribution": "lognorm", "aic": 1528.1},
{"distribution": "weibull", "aic": 1531.2}
],
"recommendation": "Use gamma(6.1, 0.85) for simulation input"
}
| Library | Description | Use Case |
|---|---|---|
| scipy.stats | Statistical functions | Core fitting |
| fitter | Auto fitting | Quick analysis |
| statsmodels | Advanced stats | Detailed tests |
| R fitdistrplus | R package | Complex fitting |
Activates when the user asks about AI prompts, needs prompt templates, wants to search for prompts, or mentions prompts.chat. Use for discovering, retrieving, and improving prompts.
Search, retrieve, and install Agent Skills from the prompts.chat registry using MCP tools. Use when the user asks to find skills, browse skill catalogs, install a skill for Claude, or extend Claude's capabilities with reusable AI agent components.
This skill should be used when the user asks to "create an agent", "add an agent", "write a subagent", "agent frontmatter", "when to use description", "agent examples", "agent tools", "agent colors", "autonomous agent", or needs guidance on agent structure, system prompts, triggering conditions, or agent development best practices for Claude Code plugins.