Interactive Variant Annotation

The following query retrieves variants from DeepVariant-called Platinum Genomes and interactively JOINs them with ClinVar.

To run this on your own table of variants, change the table name and call_set_name in the sample_variants sub query below.

For an ongoing investigation, you may wish to repeat this query each time a new version of ClinVar is released and loaded into BigQuery by changing the table name in the rare_pathenogenic_variants sub query.

See also similar examples for GRCh37 in https://github.com/googlegenomics/bigquery-examples/tree/master/platinumGenomes


In [1]:
%%bq query
#standardSQL
  --
  -- Return variants for sample NA12878 that are:
  --   annotated as 'pathogenic' or 'other' in ClinVar
  --   with observed population frequency less than 5%
  --
  WITH sample_variants AS (
  SELECT
    -- Remove the 'chr' prefix from the reference name.
    REGEXP_EXTRACT(reference_name, r'chr(.+)') AS chr,
    start,
    reference_bases,
    alt,
    call.call_set_name
  FROM
    `genomics-public-data.platinum_genomes_deepvariant.single_sample_genome_calls` v,
    v.call call,
    v.alternate_bases alt WITH OFFSET alt_offset
  WHERE
    call_set_name = 'NA12878_ERR194147'
    -- Require that at least one genotype matches this alternate.
    AND EXISTS (SELECT gt FROM UNNEST(call.genotype) gt WHERE gt = alt_offset+1)
    ),
  --
  --
  rare_pathenogenic_variants AS (
  SELECT
    -- ClinVar does not use the 'chr' prefix for reference names.
    reference_name AS chr,
    start,
    reference_bases,
    alt,
    CLNHGVS,
    CLNALLE,
    CLNSRC,
    CLNORIGIN,
    CLNSRCID,
    CLNSIG,
    CLNDSDB,
    CLNDSDBID,
    CLNDBN,
    CLNREVSTAT,
    CLNACC
  FROM
    `bigquery-public-data.human_variant_annotation.ncbi_clinvar_hg38_20170705` v,
    v.alternate_bases alt
  WHERE
    -- Variant Clinical Significance, 0 - Uncertain significance, 1 - not provided,
    -- 2 - Benign, 3 - Likely benign, 4 - Likely pathogenic, 5 - Pathogenic,
    -- 6 - drug response, 7 - histocompatibility, 255 - other
    EXISTS (SELECT sig FROM UNNEST(CLNSIG) sig WHERE REGEXP_CONTAINS(sig, '(4|5|255)'))
    -- TRUE if >5% minor allele frequency in 1+ populations
    AND G5 IS NULL
)
 --
 --
SELECT
  *
FROM
  sample_variants
JOIN
  rare_pathenogenic_variants USING(chr,
    start,
    reference_bases,
    alt)
ORDER BY
  chr,
  start,
  reference_bases,
  alt


Out[1]:
chrstartreference_basesaltcall_set_nameCLNHGVSCLNALLECLNSRCCLNORIGINCLNSRCIDCLNSIGCLNDSDBCLNDSDBIDCLNDBNCLNREVSTATCLNACC
194047008CTNA12878_ERR194147['NC_000001.11:g.94047009C>T'][2]['HGMD|OMIM_Allelic_Variant|UniProtKB_(protein)'][1]['CM024629|601691.0035|P78363#VAR_008428']['255|5|1|2|3|3|3|3']['MedGen:OMIM|MedGen|MedGen|Human_Phenotype_Ontology:MedGen|MedGen|MedGen|MedGen']['C1855465:248200|CN221809|CN169374|HP:0000608:C0024437|CN239309|CN239466|CN239312']['MACULAR_DEGENERATION\\x2c_AGE-RELATED\\x2c_2\\x2c_SUSCEPTIBILITY_TO|Stargardt_disease_1|not_provided|not_specified|Macular_degeneration|Cone-Rod_Dystrophy\\x2c_Recessive|Retinitis_Pigmentosa\\x2c_Recessive|Stargardt_Disease\\x2c_Recessive']['no_criteria|no_criteria|no_assertion|mult|single|single|single|single']['RCV000008374.4|RCV000008375.4|RCV000085512.3|RCV000152706.4|RCV000294335.1|RCV000349295.1|RCV000392936.1|RCV000399411.1']
1201361939AGNA12878_ERR194147['NC_000001.11:g.201361940A>G'][1]['.'][1]['.']['255|0|0|0|0|0|0|0']['MedGen|MedGen:OMIM|MedGen:OMIM|MedGen:OMIM|Human_Phenotype_Ontology:MedGen|Human_Phenotype_Ontology:MedGen:Orphanet|MedGen|MedGen:Orphanet:SNOMED_CT']['CN169374|C1861864:115195|C2676271:612422|C1832243:601494|HP:0011664:C4021133|HP:0001639:C0007194:ORPHA217569|CN239310|C0340429:ORPHA217635:233878008']['not_specified|Familial_hypertrophic_cardiomyopathy_2|Familial_restrictive_cardiomyopathy_3|Left_ventricular_noncompaction_6|Left_ventricular_noncompaction_cardiomyopathy|Hypertrophic_cardiomyopathy|Dilated_Cardiomyopathy\\x2c_Dominant|Familial_restrictive_cardiomyopathy']['conf|single|single|single|single|single|single|single']['RCV000168973.2|RCV000230425.2|RCV000230425.2|RCV000230425.2|RCV000283636.1|RCV000323526.1|RCV000338870.1|RCV000378147.1']
1212897348TTACACNA12878_ERR194147['NC_000001.11:g.212897351_212897370dup20', 'NC_000001.11:g.212897365_212897370dupCACACA', 'NC_000001.11:g.212897367_212897370dupCACA', 'NC_000001.11:g.212897369_212897370dupCA'][4, -1, -1, -1]['.', '.', '.', '.'][1, 1, 1, 1]['.', '.', '.', '.']['0', '3', '255', '0']['MedGen:OMIM:Orphanet', 'MedGen:OMIM:Orphanet', 'MedGen:OMIM:Orphanet', 'MedGen:OMIM:Orphanet']['C1836916:609033:ORPHA88628', 'C1836916:609033:ORPHA88628', 'C1836916:609033:ORPHA88628', 'C1836916:609033:ORPHA88628']['Posterior_column_ataxia_with_retinitis_pigmentosa', 'Posterior_column_ataxia_with_retinitis_pigmentosa', 'Posterior_column_ataxia_with_retinitis_pigmentosa', 'Posterior_column_ataxia_with_retinitis_pigmentosa']['single', 'single', 'conf', 'single']['RCV000355025.1', 'RCV000297866.1', 'RCV000262602.1', 'RCV000351203.1']
1215671030CTNA12878_ERR194147['NC_000001.11:g.215671031C>T'][1]['UniProtKB_(protein)'][1]['O75445#VAR_061351']['255']['MedGen']['CN169374']['not_specified']['conf']['RCV000041750.4']
1237589773ATANA12878_ERR194147['NC_000001.11:g.237589784delT'][1]['.'][1]['.']['2|255']['MedGen:Orphanet:SNOMED_CT|MedGen']['C0878544:ORPHA167848:85898001|CN169374']['Cardiomyopathy|not_specified']['no_criteria|conf']['RCV000030420.1|RCV000036734.8']
1026088401CTNA12878_ERR194147['NC_000010.11:g.26088402C>T'][1]['.'][1]['.']['255|0']['MedGen|MedGen']['CN169374|CN239439']['not_specified|Nonsyndromic_Hearing_Loss\\x2c_Recessive']['conf|single']['RCV000039026.3|RCV000381484.1']
116392135CTNA12878_ERR194147['NC_000011.10:g.6392136C>T'][1]['.'][1]['.']['255|0']['MedGen|MedGen:SNOMED_CT']['CN169374|C0028064:58459009']['not_specified|Sphingomyelin/cholesterol_lipidosis']['conf|single']['RCV000079188.5|RCV000394529.1']
116617153CTNA12878_ERR194147['NC_000011.10:g.6617154C>A', 'NC_000011.10:g.6617154C>G', 'NC_000011.10:g.6617154C>T'][1, 2, 3]['.', 'OMIM_Allelic_Variant', '.'][1, 1, 1]['.', '607998.0004', '.']['5', '5|5|5|5|5', '5']['MedGen', 'MedGen:OMIM:Orphanet|MedGen:OMIM:Orphanet|MedGen|MeSH:MedGen|MedGen:OMIM:Orphanet:SNOMED_CT', 'MedGen']['CN221809', 'C1876161:204500:ORPHA228349|C1836474:609270:ORPHA284324|CN221809|D030342:C0950123|C0027877:214200:ORPHA216:42012007', 'CN221809']['not_provided', 'Ceroid_lipofuscinosis_neuronal_2|Childhood-onset_autosomal_recessive_slowly_progressive_spinocerebellar_ataxia|not_provided|Inborn_genetic_diseases|Neuronal_ceroid_lipofuscinosis', 'not_provided']['single', 'mult|single|mult|single|single', 'single']['RCV000391641.1', 'RCV000002763.11|RCV000074608.7|RCV000189765.4|RCV000210689.1|RCV000228119.2', 'RCV000189764.3']
1147448802CTNA12878_ERR194147['NC_000011.10:g.47448803C>T'][1]['.'][1]['.']['255']['MedGen']['CN169374']['not_specified']['conf']['RCV000246056.2']
1166510682TCNA12878_ERR194147['NC_000011.10:g.66510683T>C'][1]['.'][1]['.']['255|2']['MedGen|MedGen:OMIM:Orphanet:SNOMED_CT']['CN169374|C0752166:209900:ORPHA110:5619004']['not_specified|Bardet-Biedl_syndrome']['conf|single']['RCV000173529.2|RCV000226235.1']
12102840473TCNA12878_ERR194147['NC_000012.12:g.102840474T>C'][1]['HGMD|OMIM_Allelic_Variant|UniProtKB_(protein)'][1]['CM910294|612349.0017|P00439#VAR_001038']['5|5|5']['MedGen|MedGen|MedGen:OMIM:Orphanet:SNOMED_CT']['C0751435|CN221809|C0031485:261600:ORPHA716:154735006']['Hyperphenylalaninemia\\x2c_non-pku|not_provided|Phenylketonuria']['no_criteria|single|mult']['RCV000000624.4|RCV000078508.6|RCV000150074.4']
1423389061AGANA12878_ERR194147['NC_000014.9:g.23389063delG'][1]['.'][1]['.']['3|255|2|0|0|0']['MedGen:Orphanet:SNOMED_CT|MedGen|MedGen:OMIM|MedGen:Orphanet|MedGen|Human_Phenotype_Ontology:MedGen:Orphanet']['C0878544:ORPHA167848:85898001|CN169374|C2750467:613251|C0018817:ORPHA1478|CN239310|HP:0001639:C0007194:ORPHA217569']['Cardiomyopathy|not_specified|Familial_hypertrophic_cardiomyopathy_14|Atrial_septal_defect|Dilated_Cardiomyopathy\\x2c_Dominant|Hypertrophic_cardiomyopathy']['single|conf|single|single|single|single']['RCV000030306.1|RCV000154759.3|RCV000205051.2|RCV000299696.1|RCV000354591.1|RCV000396023.1']
1464210032CTNA12878_ERR194147['NC_000014.9:g.64210033C>T'][2]['OMIM_Allelic_Variant|UniProtKB_(protein)'][1]['608442.0001|Q8WXH0#VAR_062977']['5|255|3']['MedGen:OMIM|MedGen|MedGen:Orphanet:SNOMED_CT']['C2751805:612999|CN169374|C0410189:ORPHA261:111508004']['Emery-Dreifuss_muscular_dystrophy_5\\x2c_autosomal_dominant|not_specified|Emery-Dreifuss_muscular_dystrophy']['no_criteria|conf|single']['RCV000002414.4|RCV000173937.3|RCV000403391.1']
1565078011CTNA12878_ERR194147['NC_000015.10:g.65078012C>T'][1]['.'][1]['.']['255|3']['MedGen|MedGen']['CN169374|CN239448']['not_specified|Nemaline_Myopathy\\x2c_Dominant']['conf|single']['RCV000117307.4|RCV000304321.1']
1589645160ATNA12878_ERR194147['NC_000015.10:g.89645161A>T'][1]['.'][1]['.']['255|3']['MedGen|Gene:MedGen:OMIM:Orphanet']['CN169374|46:C0796147:200990:ORPHA36']['not_specified|Acrocallosal_syndrome\\x2c_Schinzel_type']['conf|single']['RCV000117416.4|RCV000261344.1']
162497032CGNA12878_ERR194147['NC_000016.10:g.2497033C>G'][1]['UniProtKB_(protein)'][1]['Q9ULP9#VAR_070890']['255|3|3|3']['MedGen|MedGen|MedGen:OMIM|MedGen:OMIM']['CN169374|C3809181|C3892048:616044|C3463992:308350']['not_specified|Caused_by_mutation_in_the_TBC1_domain_family\\x2c_member_24|Deafness\\x2c_autosomal_dominant_65|Epileptic_encephalopathy\\x2c_early_infantile\\x2c_1']['conf|single|single|single']['RCV000128367.6|RCV000477643.1|RCV000477643.1|RCV000477643.1']
1656514588CTNA12878_ERR194147['NC_000016.10:g.56514589C\\x3d', 'NC_000016.10:g.56514589C>T'][0, 1]['OMIM_Allelic_Variant', '.'][1, 1]['606151.0013', '.']['5', '2']['MedGen', 'MedGen']['C4016908', 'CN169374']['Bardet-biedl_syndrome_2/6\\x2c_digenic', 'not_specified']['no_criteria', 'single']['RCV000004838.4', 'RCV000301991.1']
1688805736TCNA12878_ERR194147['NC_000016.10:g.88805737T>C'][1]['UniProtKB_(protein)'][1]['Q9H211#VAR_054504']['255']['MedGen']['CN169374']['not_specified']['conf']['RCV000116652.3']
1717796722AAGGANA12878_ERR194147['NC_000017.11:g.17796729_17796731delGAG'][1]['HGMD'][1]['CD116392']['255|0']['MedGen|MedGen']['CN169374|CN221809']['not_specified|not_provided']['conf|single']['RCV000082265.6|RCV000118114.3']
182700878AGNA12878_ERR194147['NC_000018.10:g.2700879A>G'][1]['.'][1]['.']['255']['MedGen']['CN169374']['not_specified']['conf']['RCV000247697.2']
1911132530CCANA12878_ERR194147['NC_000019.10:g.11125285_11132532dup7248', 'NC_000019.10:g.11132532dupA'][-1, 1]['LDLR_@_LOVD', '.'][5, 1]['LDLR_000294', '.']['255', '3']['MedGen:OMIM:SNOMED_CT:SNOMED_CT', 'MedGen:OMIM:SNOMED_CT:SNOMED_CT']['C0020445:143890:397915002:398036000', 'C0020445:143890:397915002:398036000']['Familial_hypercholesterolemia', 'Familial_hypercholesterolemia']['conf', 'single']['RCV000237281.1', 'RCV000326993.1']
1941414124CTNA12878_ERR194147['NC_000019.10:g.41414125C>T'][1]['HGMD|UniProtKB_(protein)'][1]['CM021497|P12694#VAR_034361']['255|3']['MedGen|MedGen:OMIM:Orphanet:SNOMED_CT']['CN169374|C0024776:248600:ORPHA268184:27718001']['not_specified|Maple_syrup_urine_disease']['conf|single']['RCV000079243.6|RCV000295914.1']
1945179661CTNA12878_ERR194147['NC_000019.10:g.45179662C>T'][1]['.'][1]['.']['255|0']['MedGen|MedGen:SNOMED_CT']['CN169374|C0079504:9311003']['not_specified|Hermansky-Pudlak_syndrome']['conf|single']['RCV000150192.3|RCV000320201.1']
1957231145GGCNA12878_ERR194147['NC_000019.10:g.57231150dupC'][1]['.'][1]['.']['255']['MedGen']['CN239485']['Spermatogenic_Failure']['conf']['RCV000311416.1']
2178532038CTNA12878_ERR194147['NC_000002.12:g.178532039C>T'][1]['.'][1]['.']['255|2|2|2|3|3|3|3|3|3']['MedGen|MedGen:OMIM|MedGen:OMIM:Orphanet|MedGen|MedGen:OMIM:Orphanet|MedGen|Human_Phenotype_Ontology:MedGen:Orphanet|MedGen|MedGen:OMIM:Orphanet|MedGen:OMIM:Orphanet']['CN169374|C1858763:604145|C1837342:608807:ORPHA140922|CN230736|C2673677:611705:ORPHA289377|CN239352|HP:0001639:C0007194:ORPHA217569|CN239310|C1863599:603689:ORPHA178464|C1838244:600334:ORPHA609']['not_specified|Dilated_cardiomyopathy_1G|Limb-girdle_muscular_dystrophy\\x2c_type_2J|Cardiovascular_phenotype|Myopathy\\x2c_early-onset\\x2c_with_fatal_cardiomyopathy|Limb-Girdle_Muscular_Dystrophy\\x2c_Recessive|Hypertrophic_cardiomyopathy|Dilated_Cardiomyopathy\\x2c_Dominant|Hereditary_myopathy_with_early_respiratory_failure|Distal_myopathy_Markesbery-Griggs_type']['conf|single|single|single|single|single|single|single|single|single']['RCV000040944.8|RCV000231098.2|RCV000231098.2|RCV000244925.1|RCV000296735.1|RCV000311858.1|RCV000336579.1|RCV000351463.1|RCV000402951.1|RCV000403378.1']

(rows: 63, time: 5.0s, 10GB processed, job: job_P6NRU_M3B1MeX_TpuZdGC9QTWZwp)