Help us improve
Share bugs, ideas, or general feedback.
From superpowers
Queries gnomAD for population allele frequencies, variant constraint scores (pLI, LOEUF), and loss-of-function intolerance. Essential for variant pathogenicity interpretation and rare disease genetics.
npx claudepluginhub lunartech-x/superpowers --plugin superpowersHow this skill is triggered — by the user, by Claude, or both
Slash command
/superpowers:gnomadThe summary Claude sees in its skill listing — used to decide when to auto-load this skill
The Genome Aggregation Database (gnomAD) is the largest publicly available collection of human genetic variation, aggregated from large-scale sequencing projects. gnomAD v4 contains exome sequences from 730,947 individuals and genome sequences from 76,215 individuals across diverse ancestries. It provides population allele frequencies, variant consequence annotations, and gene-level constraint ...
Queries gnomAD v4 variant frequencies via GraphQL API, stratified by 9 ancestry groups, plus gene constraint metrics (pLI, LOEUF) and coverage.
Queries gnomAD for population allele frequencies, gene constraint scores (pLI, LOEUF), and variant annotations to interpret ENCODE regulatory variants. Useful for filtering rare variants in cCREs, GWAS overlaps, or CRISPR/MPRA results.
Searches 1000 Genomes Project (IGSR) populations and samples by superpopulation or free-text query. Use for ancestry-specific allele frequency lookups, population stratification, and cohort-aware variant analysis.
Share bugs, ideas, or general feedback.
The Genome Aggregation Database (gnomAD) is the largest publicly available collection of human genetic variation, aggregated from large-scale sequencing projects. gnomAD v4 contains exome sequences from 730,947 individuals and genome sequences from 76,215 individuals across diverse ancestries. It provides population allele frequencies, variant consequence annotations, and gene-level constraint metrics that are essential for interpreting the clinical significance of genetic variants.
Key resources:
Use gnomAD when:
gnomAD uses a GraphQL API accessible at https://gnomad.broadinstitute.org/api. Most queries fetch variants by gene or specific genomic position.
Datasets available:
gnomad_r4 — gnomAD v4 exomes (recommended default, GRCh38)gnomad_r4_genomes — gnomAD v4 genomes (GRCh38)gnomad_r3 — gnomAD v3 genomes (GRCh38)gnomad_r2_1 — gnomAD v2 exomes (GRCh37)Reference genomes:
GRCh38 — default for v3/v4GRCh37 — for v2import requests
def query_gnomad_gene(gene_symbol, dataset="gnomad_r4", reference_genome="GRCh38"):
"""Fetch variants in a gene from gnomAD."""
url = "https://gnomad.broadinstitute.org/api"
query = """
query GeneVariants($gene_symbol: String!, $dataset: DatasetId!, $reference_genome: ReferenceGenomeId!) {
gene(gene_symbol: $gene_symbol, reference_genome: $reference_genome) {
gene_id
gene_symbol
variants(dataset: $dataset) {
variant_id
pos
ref
alt
consequence
genome {
af
ac
an
ac_hom
populations {
id
ac
an
af
}
}
exome {
af
ac
an
ac_hom
}
lof
lof_flags
lof_filter
}
}
}
"""
variables = {
"gene_symbol": gene_symbol,
"dataset": dataset,
"reference_genome": reference_genome
}
response = requests.post(url, json={"query": query, "variables": variables})
return response.json()
# Example
result = query_gnomad_gene("BRCA1")
gene_data = result["data"]["gene"]
variants = gene_data["variants"]
# Filter to rare PTVs
rare_ptvs = [
v for v in variants
if v.get("lof") == "LC" or v.get("consequence") in ["stop_gained", "frameshift_variant"]
and v.get("genome", {}).get("af", 1) < 0.001
]
print(f"Found {len(rare_ptvs)} rare PTVs in {gene_data['gene_symbol']}")
import requests
def query_gnomad_variant(variant_id, dataset="gnomad_r4"):
"""Fetch details for a specific variant (e.g., '1-55516888-G-GA')."""
url = "https://gnomad.broadinstitute.org/api"
query = """
query VariantDetails($variantId: String!, $dataset: DatasetId!) {
variant(variantId: $variantId, dataset: $dataset) {
variant_id
chrom
pos
ref
alt
genome {
af
ac
an
ac_hom
populations {
id
ac
an
af
}
}
exome {
af
ac
an
ac_hom
populations {
id
ac
an
af
}
}
consequence
lof
rsids
in_silico_predictors {
id
value
flags
}
clinvar_variation_id
}
}
"""
response = requests.post(
url,
json={"query": query, "variables": {"variantId": variant_id, "dataset": dataset}}
)
return response.json()
# Example: query a specific variant
result = query_gnomad_variant("17-43094692-G-A") # BRCA1 missense
variant = result["data"]["variant"]
if variant:
genome_af = variant.get("genome", {}).get("af", "N/A")
exome_af = variant.get("exome", {}).get("af", "N/A")
print(f"Variant: {variant['variant_id']}")
print(f" Consequence: {variant['consequence']}")
print(f" Genome AF: {genome_af}")
print(f" Exome AF: {exome_af}")
print(f" LoF: {variant.get('lof')}")
gnomAD constraint scores assess how tolerant a gene is to variation relative to expectation:
import requests
def query_gnomad_constraint(gene_symbol, reference_genome="GRCh38"):
"""Fetch constraint scores for a gene."""
url = "https://gnomad.broadinstitute.org/api"
query = """
query GeneConstraint($gene_symbol: String!, $reference_genome: ReferenceGenomeId!) {
gene(gene_symbol: $gene_symbol, reference_genome: $reference_genome) {
gene_id
gene_symbol
gnomad_constraint {
exp_lof
exp_mis
exp_syn
obs_lof
obs_mis
obs_syn
oe_lof
oe_mis
oe_syn
oe_lof_lower
oe_lof_upper
lof_z
mis_z
syn_z
pLI
}
}
}
"""
response = requests.post(
url,
json={"query": query, "variables": {"gene_symbol": gene_symbol, "reference_genome": reference_genome}}
)
return response.json()
# Example
result = query_gnomad_constraint("KCNQ2")
gene = result["data"]["gene"]
constraint = gene["gnomad_constraint"]
print(f"Gene: {gene['gene_symbol']}")
print(f" pLI: {constraint['pLI']:.3f} (>0.9 = LoF intolerant)")
print(f" LOEUF: {constraint['oe_lof_upper']:.3f} (<0.35 = highly constrained)")
print(f" Obs/Exp LoF: {constraint['oe_lof']:.3f}")
print(f" Missense Z: {constraint['mis_z']:.3f}")
Constraint score interpretation:
| Score | Range | Meaning |
|---|---|---|
pLI | 0–1 | Probability of LoF intolerance; >0.9 = highly intolerant |
LOEUF | 0–∞ | LoF observed/expected upper bound; <0.35 = constrained |
oe_lof | 0–∞ | Observed/expected ratio for LoF variants |
mis_z | −∞ to ∞ | Missense constraint z-score; >3.09 = constrained |
syn_z | −∞ to ∞ | Synonymous z-score (control; should be near 0) |
import requests
import pandas as pd
def get_population_frequencies(variant_id, dataset="gnomad_r4"):
"""Extract per-population allele frequencies for a variant."""
url = "https://gnomad.broadinstitute.org/api"
query = """
query PopFreqs($variantId: String!, $dataset: DatasetId!) {
variant(variantId: $variantId, dataset: $dataset) {
variant_id
genome {
populations {
id
ac
an
af
ac_hom
}
}
}
}
"""
response = requests.post(
url,
json={"query": query, "variables": {"variantId": variant_id, "dataset": dataset}}
)
data = response.json()
populations = data["data"]["variant"]["genome"]["populations"]
df = pd.DataFrame(populations)
df = df[df["an"] > 0].copy()
df["af"] = df["ac"] / df["an"]
df = df.sort_values("af", ascending=False)
return df
# Population IDs in gnomAD v4:
# afr = African/African American
# ami = Amish
# amr = Admixed American
# asj = Ashkenazi Jewish
# eas = East Asian
# fin = Finnish
# mid = Middle Eastern
# nfe = Non-Finnish European
# sas = South Asian
# remaining = Other
gnomAD also contains a structural variant dataset:
import requests
def query_gnomad_sv(gene_symbol):
"""Query structural variants overlapping a gene."""
url = "https://gnomad.broadinstitute.org/api"
query = """
query SVsByGene($gene_symbol: String!) {
gene(gene_symbol: $gene_symbol, reference_genome: GRCh38) {
structural_variants {
variant_id
type
chrom
pos
end
af
ac
an
}
}
}
"""
response = requests.post(url, json={"query": query, "variables": {"gene_symbol": gene_symbol}})
return response.json()
Check population frequency — Is the variant rare enough to be pathogenic?
Assess functional impact — LoF variants have highest prior probability
lof field: HC = high-confidence LoF, LC = low-confidencelof_flags for issues like "NAGNAG_SITE", "PHYLOCSF_WEAK"Apply ACMG criteria:
ac_hom) are relevant for recessive disease analysis