Query gnomAD (Genome Aggregation Database) for population allele frequencies, variant constraint scores (pLI, LOEUF), and loss-of-function intolerance via GraphQL API. Essential for variant pathogenicity interpretation, rare disease genetics, and identifying loss-of-function intolerant genes.
npx claudepluginhub joshuarweaver/cascade-ai-ml-engineering --plugin delphine-l-claude-globalThis skill is limited to using the following tools:
gnomAD is the largest publicly available collection of human genetic variation. gnomAD v4 contains exome sequences from 730,947 individuals and genome sequences from 76,215 individuals across diverse ancestries.
Creates isolated Git worktrees for feature branches with prioritized directory selection, gitignore safety checks, auto project setup for Node/Python/Rust/Go, and baseline verification.
Executes implementation plans in current session by dispatching fresh subagents per independent task, with two-stage reviews: spec compliance then code quality.
Dispatches parallel agents to independently tackle 2+ tasks like separate test failures or subsystems without shared state or dependencies.
gnomAD is the largest publicly available collection of human genetic variation. gnomAD v4 contains exome sequences from 730,947 individuals and genome sequences from 76,215 individuals across diverse ancestries.
Key resources:
Endpoint: POST https://gnomad.broadinstitute.org/api
Datasets: gnomad_r4 (v4 exomes, GRCh38), gnomad_r4_genomes, gnomad_r3 (GRCh38), gnomad_r2_1 (GRCh37)
import requests
def query_gnomad_gene(gene_symbol, dataset="gnomad_r4", reference_genome="GRCh38"):
"""Fetch variants in a gene from gnomAD."""
url = "https://gnomad.broadinstitute.org/api"
query = """
query GeneVariants($gene_symbol: String!, $dataset: DatasetId!, $reference_genome: ReferenceGenomeId!) {
gene(gene_symbol: $gene_symbol, reference_genome: $reference_genome) {
gene_id
gene_symbol
variants(dataset: $dataset) {
variant_id
pos
ref
alt
consequence
genome { af ac an ac_hom populations { id ac an af } }
exome { af ac an ac_hom }
lof
lof_flags
lof_filter
}
}
}
"""
variables = {"gene_symbol": gene_symbol, "dataset": dataset, "reference_genome": reference_genome}
response = requests.post(url, json={"query": query, "variables": variables})
return response.json()
# Filter to rare PTVs
result = query_gnomad_gene("BRCA1")
variants = result["data"]["gene"]["variants"]
rare_ptvs = [v for v in variants
if v.get("lof") == "HC"
and v.get("genome", {}).get("af", 1) < 0.001]
def query_gnomad_variant(variant_id, dataset="gnomad_r4"):
"""Fetch details for a variant (e.g., '17-43094692-G-A')."""
url = "https://gnomad.broadinstitute.org/api"
query = """
query VariantDetails($variantId: String!, $dataset: DatasetId!) {
variant(variantId: $variantId, dataset: $dataset) {
variant_id
chrom pos ref alt consequence lof rsids
genome { af ac an ac_hom populations { id ac an af } }
exome { af ac an ac_hom populations { id ac an af } }
in_silico_predictors { id value flags }
clinvar_variation_id
}
}
"""
response = requests.post(url, json={"query": query, "variables": {"variantId": variant_id, "dataset": dataset}})
return response.json()
def query_gnomad_constraint(gene_symbol, reference_genome="GRCh38"):
"""Fetch constraint scores for a gene."""
url = "https://gnomad.broadinstitute.org/api"
query = """
query GeneConstraint($gene_symbol: String!, $reference_genome: ReferenceGenomeId!) {
gene(gene_symbol: $gene_symbol, reference_genome: $reference_genome) {
gene_id gene_symbol
gnomad_constraint {
exp_lof exp_mis exp_syn obs_lof obs_mis obs_syn
oe_lof oe_mis oe_syn oe_lof_lower oe_lof_upper
lof_z mis_z syn_z pLI
}
}
}
"""
response = requests.post(url, json={"query": query, "variables": {"gene_symbol": gene_symbol, "reference_genome": reference_genome}})
return response.json()
Constraint score interpretation:
| Score | Range | Meaning |
|---|---|---|
pLI | 0-1 | Probability of LoF intolerance; >0.9 = highly intolerant |
LOEUF | 0-inf | LoF observed/expected upper bound; <0.35 = constrained |
oe_lof | 0-inf | Observed/expected ratio for LoF variants |
mis_z | -inf to inf | Missense constraint z-score; >3.09 = constrained |
syn_z | -inf to inf | Synonymous z-score (control; should be near 0) |
LOEUF is preferred over pLI (less sensitive to sample size).
import pandas as pd
def get_population_frequencies(variant_id, dataset="gnomad_r4"):
"""Extract per-population allele frequencies."""
url = "https://gnomad.broadinstitute.org/api"
query = """
query PopFreqs($variantId: String!, $dataset: DatasetId!) {
variant(variantId: $variantId, dataset: $dataset) {
variant_id
genome { populations { id ac an af ac_hom } }
}
}
"""
response = requests.post(url, json={"query": query, "variables": {"variantId": variant_id, "dataset": dataset}})
populations = response.json()["data"]["variant"]["genome"]["populations"]
df = pd.DataFrame(populations)
return df[df["an"] > 0].sort_values("af", ascending=False)
Population IDs: afr (African), ami (Amish), amr (Admixed American), asj (Ashkenazi Jewish), eas (East Asian), fin (Finnish), mid (Middle Eastern), nfe (Non-Finnish European), sas (South Asian)
lof field HC = high-confidence, LC = low-confidencegnomad_r4) by default; v2 only for GRCh37 compatibilityac_hom for recessive disease analysisAdapted from K-Dense-AI/claude-scientific-skills (CC0-1.0). Original skill by Kuan-lin Huang.