ipyrad-analysis toolkit: distance

Key features:

  1. Calculate pairwise genetic distances between samples.
  2. Filter SNPs to reduce missing data.
  3. Impute missing data using population allele frequencies.

required software


In [1]:
# conda install ipyrad -c bioconda
# conda install toyplot -c eaton-lab (optional)

In [2]:
import ipyrad.analysis as ipa
import toyplot

Short tutorial

Setup input files and params


In [3]:
# the path to your VCF or HDF5 formatted snps file
data = "/home/deren/Downloads/ref_pop2.snps.hdf5"

In [4]:
# group individuals into populations
imap = {
    "virg": ["TXWV2", "LALC2", "SCCU3", "FLSF33", "FLBA140"],
    "mini": ["FLSF47", "FLMO62", "FLSA185", "FLCK216"],
    "gemi": ["FLCK18", "FLSF54", "FLWO6", "FLAB109"],
    "bran": ["BJSL25", "BJSB3", "BJVL19"],
    "fusi": ["MXED8", "MXGT4", "TXGR3", "TXMD3"],
    "sagr": ["CUVN10", "CUCA4", "CUSV6", "CUMM5"],
    "oleo": ["CRL0030", "CRL0001", "HNDA09", "BZBB1", "MXSA3017"],
}

# minimum n samples that must be present in each SNP from each group
minmap = {i: 0.5 for i in imap}

calculate distances


In [5]:
# load the snp data into distance tool with arguments
from ipyrad.analysis.distance import Distance
dist = Distance(
    data=data, 
    imap=imap,
    minmap=minmap,
    mincov=0.5,
    impute_method="sample",
    subsample_snps=False,
)
dist.run()


Samples: 29
Sites before filtering: 349914
Filtered (indels): 0
Filtered (bi-allel): 13379
Filtered (mincov): 30459
Filtered (minmap): 111825
Filtered (combined): 120177
Sites after filtering: 229737
Sites containing missing values: 219551 (95.57%)
Missing values in SNP matrix: 814369 (12.22%)
Imputation: 'sampled'; (0, 1, 2) = 77.3%, 10.7%, 12.0%

save results


In [6]:
# save to a CSV file
dist.dists.to_csv("distances.csv")

In [7]:
# show the upper corner 
dist.dists.head()


Out[7]:
BJSB3 BJSL25 BJVL19 BZBB1 CRL0001 CRL0030 CUCA4 CUMM5 CUSV6 CUVN10 ... FLWO6 HNDA09 LALC2 MXED8 MXGT4 MXSA3017 SCCU3 TXGR3 TXMD3 TXWV2
BJSB3 0.000000 0.250447 0.253472 0.592255 0.530145 0.572576 0.601853 0.597044 0.591990 0.579937 ... 0.594005 0.582000 0.568137 0.464618 0.443942 0.579789 0.603638 0.487945 0.487936 0.590440
BJSL25 0.250447 0.000000 0.235900 0.558630 0.494291 0.537193 0.566156 0.559675 0.554665 0.542768 ... 0.558769 0.548897 0.532239 0.435050 0.412694 0.547182 0.567323 0.453606 0.457105 0.554882
BJVL19 0.253472 0.235900 0.000000 0.567897 0.502775 0.547391 0.576355 0.569360 0.563000 0.554621 ... 0.564728 0.555927 0.539913 0.441844 0.417060 0.556449 0.575336 0.464118 0.465476 0.562278
BZBB1 0.592255 0.558630 0.567897 0.000000 0.280691 0.280569 0.422670 0.422962 0.426266 0.394242 ... 0.559152 0.285883 0.532918 0.525701 0.542381 0.317450 0.576455 0.552554 0.551579 0.563571
CRL0001 0.530145 0.494291 0.502775 0.280691 0.000000 0.239596 0.347859 0.322277 0.342836 0.213266 ... 0.470064 0.262217 0.451717 0.466429 0.477764 0.299538 0.492224 0.487327 0.484123 0.482726

5 rows × 29 columns

Draw the matrix


In [8]:
toyplot.matrix(
    dist.dists, 
    bshow=False,
    tshow=False,
    rlocator=toyplot.locator.Explicit(
        range(len(dist.names)),
        sorted(dist.names),
));


00.0000000.2504470.2534720.5922550.5301450.5725760.6018530.5970440.5919900.5799370.5716360.5481790.5891690.5907190.5699390.5965260.5854170.5836540.5553650.5940050.5820000.5681370.4646180.4439420.5797890.6036380.4879450.4879360.590440BJSB310.2504470.0000000.2359000.5586300.4942910.5371930.5661560.5596750.5546650.5427680.5352950.5107320.5529840.5564750.5362780.5640230.5487620.5484880.5191810.5587690.5488970.5322390.4350500.4126940.5471820.5673230.4536060.4571050.554882BJSL2520.2534720.2359000.0000000.5678970.5027750.5473910.5763550.5693600.5630000.5546210.5407840.5167170.5610680.5612850.5391380.5690160.5572020.5562230.5260540.5647280.5559270.5399130.4418440.4170600.5564490.5753360.4641180.4654760.562278BJVL1930.5922550.5586300.5678970.0000000.2806910.2805690.4226700.4229620.4262660.3942420.5395000.5188720.5616120.5666170.5489100.5762200.5529930.5551480.5203300.5591520.2858830.5329180.5257010.5423810.3174500.5764550.5525540.5515790.563571BZBB140.5301450.4942910.5027750.2806910.0000000.2395960.3478590.3222770.3428360.2132660.4490180.4355980.4714170.4784950.4656630.4923890.4734980.4688190.4296300.4700640.2622170.4517170.4664290.4777640.2995380.4922240.4873270.4841230.482726CRL000150.5725760.5371930.5473910.2805690.2395960.0000000.3981940.4015160.4016510.3683560.5286390.5081770.5494280.5502640.5327480.5615550.5426470.5382370.5070320.5498420.2561230.5222580.5082160.5239560.3065290.5620690.5360960.5360170.549550CRL003060.6018530.5661560.5763550.4226700.3478590.3981940.0000000.2902930.3682300.3592670.4819600.4777940.5025310.5199340.5041770.5416110.5114020.5072630.4619720.5006380.4054420.4874700.5381110.5443530.4334220.5326790.5476220.5449750.523372CUCA470.5970440.5596750.5693600.4229620.3222770.4015160.2902930.0000000.2884300.2883910.4722490.4691360.4947530.5112150.4998980.5329090.5029270.4953230.4527050.4901950.4060900.4814160.5302720.5392990.4319550.5232110.5429860.5390160.513217CUMM580.5919900.5546650.5630000.4262660.3428360.4016510.3682300.2884300.0000000.3584230.4695110.4601040.4895600.5016610.4915100.5245220.4964290.4933080.4487740.4875180.4094120.4744030.5301110.5339760.4319500.5147360.5360870.5325870.508268CUSV690.5799370.5427680.5546210.3942420.2132660.3683560.3592670.2883910.3584230.0000000.4627070.4549380.4883540.5013080.4907570.5172650.4933420.4888890.4469590.4870790.3745760.4678260.5154290.5239860.4012850.5129040.5254010.5224930.503824CUVN10100.5716360.5352950.5407840.5395000.4490180.5286390.4819600.4722490.4695110.4627070.0000000.3819020.3441500.4069610.4035310.4414870.4186090.3863160.3133230.3387830.5350860.3983340.5127690.5116460.5289570.4444040.4999930.4932640.433504FLAB109110.5481790.5107320.5167170.5188720.4355980.5081770.4777940.4691360.4601040.4549380.3819020.0000000.4047980.4189790.4046500.4433290.3211370.4031650.3569080.3997790.5144320.3059890.4813370.4809370.5097660.3299990.4604050.4525170.301062FLBA140120.5891690.5529840.5610680.5616120.4714170.5494280.5025310.4947530.4895600.4883540.3441500.4047980.0000000.4255300.4229790.4598560.4391020.4133290.3276620.3574830.5532980.4166760.5304150.5291790.5518270.4639570.5186150.5130690.454450FLCK18130.5907190.5564750.5612850.5666170.4784950.5502640.5199340.5112150.5016610.5013080.4069610.4189790.4255300.0000000.3754730.3892360.4531700.3832160.3904810.4298960.5587300.4350540.5373710.5321740.5512960.4754570.5276250.5189370.468579FLCK216140.5699390.5362780.5391380.5489100.4656630.5327480.5041770.4998980.4915100.4907570.4035310.4046500.4229790.3754730.0000000.3778760.4418010.3722210.3865380.4254300.5433130.4199240.5163900.5149280.5355120.4644440.5068800.5009950.456853FLMO62150.5965260.5640230.5690160.5762200.4923890.5615550.5416110.5329090.5245220.5172650.4414870.4433290.4598560.3892360.3778760.0000000.4799230.3999230.4271060.4661110.5672620.4611190.5455020.5453280.5622910.4996150.5410230.5357740.493852FLSA185160.5854170.5487620.5572020.5529930.4734980.5426470.5114020.5029270.4964290.4933420.4186090.3211370.4391020.4531700.4418010.4799230.0000000.4347540.3893580.4368690.5481090.3244060.5154070.5150590.5439830.3497040.4949620.4895730.318730FLSF33170.5836540.5484880.5562230.5551480.4688190.5382370.5072630.4953230.4933080.4888890.3863160.4031650.4133290.3832160.3722210.3999230.4347540.0000000.3725520.4144650.5464680.4170460.5243870.5233770.5401650.4616450.5109580.5091470.455099FLSF47180.5553650.5191810.5260540.5203300.4296300.5070320.4619720.4527050.4487740.4469590.3133230.3569080.3276620.3904810.3865380.4271060.3893580.3725520.0000000.3248890.5151590.3715990.4948620.4967940.5070280.4166070.4801270.4737980.407579FLSF54190.5940050.5587690.5647280.5591520.4700640.5498420.5006380.4901950.4875180.4870790.3387830.3997790.3574830.4298960.4254300.4661110.4368690.4144650.3248890.0000000.5553050.4162720.5333970.5316120.5526670.4591210.5209700.5168260.451695FLWO6200.5820000.5488970.5559270.2858830.2622170.2561230.4054420.4060900.4094120.3745760.5350860.5144320.5532980.5587300.5433130.5672620.5481090.5464680.5151590.5553050.0000000.5275380.5189540.5330920.3089180.5687070.5456240.5440390.558321HNDA09210.5681370.5322390.5399130.5329180.4517170.5222580.4874700.4814160.4744030.4678260.3983340.3059890.4166760.4350540.4199240.4611190.3244060.4170460.3715990.4162720.5275380.0000000.4979000.4946700.5236900.3385780.4744160.4657280.299386LALC2220.4646180.4350500.4418440.5257010.4664290.5082160.5381110.5302720.5301110.5154290.5127690.4813370.5304150.5373710.5163900.5455020.5154070.5243870.4948620.5333970.5189540.4979000.0000000.3651480.5109800.5387290.3919830.3857150.523103MXED8230.4439420.4126940.4170600.5423810.4777640.5239560.5443530.5392990.5339760.5239860.5116460.4809370.5291790.5321740.5149280.5453280.5150590.5233770.4967940.5316120.5330920.4946700.3651480.0000000.5261360.5357870.3853060.3844530.522571MXGT4240.5797890.5471820.5564490.3174500.2995380.3065290.4334220.4319550.4319500.4012850.5289570.5097660.5518270.5512960.5355120.5622910.5439830.5401650.5070280.5526670.3089180.5236900.5109800.5261360.0000000.5649900.5429950.5404090.552218MXSA3017250.6036380.5673230.5753360.5764550.4922240.5620690.5326790.5232110.5147360.5129040.4444040.3299990.4639570.4754570.4644440.4996150.3497040.4616450.4166070.4591210.5687070.3385780.5387290.5357870.5649900.0000000.5154550.5056780.322760SCCU3260.4879450.4536060.4641180.5525540.4873270.5360960.5476220.5429860.5360870.5254010.4999930.4604050.5186150.5276250.5068800.5410230.4949620.5109580.4801270.5209700.5456240.4744160.3919830.3853060.5429950.5154550.0000000.3864330.501029TXGR3270.4879360.4571050.4654760.5515790.4841230.5360170.5449750.5390160.5325870.5224930.4932640.4525170.5130690.5189370.5009950.5357740.4895730.5091470.4737980.5168260.5440390.4657280.3857150.3844530.5404090.5056780.3864330.0000000.496041TXMD3280.5904400.5548820.5622780.5635710.4827260.5495500.5233720.5132170.5082680.5038240.4335040.3010620.4544500.4685790.4568530.4938520.3187300.4550990.4075790.4516950.5583210.2993860.5231030.5225710.5522180.3227600.5010290.4960410.000000TXWV2

Draw matrix reordered to match groups in imap


In [9]:
# get list of concatenated names from each group
ordered_names = []
for group in dist.imap.values():
    ordered_names += group

# reorder matrix to match name order    
ordered_matrix = dist.dists[ordered_names].T[ordered_names]

In [10]:
toyplot.matrix(
    ordered_matrix,
    bshow=False,
    tshow=False,
    rlocator=toyplot.locator.Explicit(
        range(len(ordered_names)),
        ordered_names,
));


00.0000000.2993860.3227600.3187300.3010620.4550990.4568530.4938520.4685790.4544500.4075790.4516950.4335040.5548820.5904400.5622780.5231030.5225710.5010290.4960410.5038240.5233720.5082680.5132170.5495500.4827260.5583210.5635710.552218TXWV210.2993860.0000000.3385780.3244060.3059890.4170460.4199240.4611190.4350540.4166760.3715990.4162720.3983340.5322390.5681370.5399130.4979000.4946700.4744160.4657280.4678260.4874700.4744030.4814160.5222580.4517170.5275380.5329180.523690LALC220.3227600.3385780.0000000.3497040.3299990.4616450.4644440.4996150.4754570.4639570.4166070.4591210.4444040.5673230.6036380.5753360.5387290.5357870.5154550.5056780.5129040.5326790.5147360.5232110.5620690.4922240.5687070.5764550.564990SCCU330.3187300.3244060.3497040.0000000.3211370.4347540.4418010.4799230.4531700.4391020.3893580.4368690.4186090.5487620.5854170.5572020.5154070.5150590.4949620.4895730.4933420.5114020.4964290.5029270.5426470.4734980.5481090.5529930.543983FLSF3340.3010620.3059890.3299990.3211370.0000000.4031650.4046500.4433290.4189790.4047980.3569080.3997790.3819020.5107320.5481790.5167170.4813370.4809370.4604050.4525170.4549380.4777940.4601040.4691360.5081770.4355980.5144320.5188720.509766FLBA14050.4550990.4170460.4616450.4347540.4031650.0000000.3722210.3999230.3832160.4133290.3725520.4144650.3863160.5484880.5836540.5562230.5243870.5233770.5109580.5091470.4888890.5072630.4933080.4953230.5382370.4688190.5464680.5551480.540165FLSF4760.4568530.4199240.4644440.4418010.4046500.3722210.0000000.3778760.3754730.4229790.3865380.4254300.4035310.5362780.5699390.5391380.5163900.5149280.5068800.5009950.4907570.5041770.4915100.4998980.5327480.4656630.5433130.5489100.535512FLMO6270.4938520.4611190.4996150.4799230.4433290.3999230.3778760.0000000.3892360.4598560.4271060.4661110.4414870.5640230.5965260.5690160.5455020.5453280.5410230.5357740.5172650.5416110.5245220.5329090.5615550.4923890.5672620.5762200.562291FLSA18580.4685790.4350540.4754570.4531700.4189790.3832160.3754730.3892360.0000000.4255300.3904810.4298960.4069610.5564750.5907190.5612850.5373710.5321740.5276250.5189370.5013080.5199340.5016610.5112150.5502640.4784950.5587300.5666170.551296FLCK21690.4544500.4166760.4639570.4391020.4047980.4133290.4229790.4598560.4255300.0000000.3276620.3574830.3441500.5529840.5891690.5610680.5304150.5291790.5186150.5130690.4883540.5025310.4895600.4947530.5494280.4714170.5532980.5616120.551827FLCK18100.4075790.3715990.4166070.3893580.3569080.3725520.3865380.4271060.3904810.3276620.0000000.3248890.3133230.5191810.5553650.5260540.4948620.4967940.4801270.4737980.4469590.4619720.4487740.4527050.5070320.4296300.5151590.5203300.507028FLSF54110.4516950.4162720.4591210.4368690.3997790.4144650.4254300.4661110.4298960.3574830.3248890.0000000.3387830.5587690.5940050.5647280.5333970.5316120.5209700.5168260.4870790.5006380.4875180.4901950.5498420.4700640.5553050.5591520.552667FLWO6120.4335040.3983340.4444040.4186090.3819020.3863160.4035310.4414870.4069610.3441500.3133230.3387830.0000000.5352950.5716360.5407840.5127690.5116460.4999930.4932640.4627070.4819600.4695110.4722490.5286390.4490180.5350860.5395000.528957FLAB109130.5548820.5322390.5673230.5487620.5107320.5484880.5362780.5640230.5564750.5529840.5191810.5587690.5352950.0000000.2504470.2359000.4350500.4126940.4536060.4571050.5427680.5661560.5546650.5596750.5371930.4942910.5488970.5586300.547182BJSL25140.5904400.5681370.6036380.5854170.5481790.5836540.5699390.5965260.5907190.5891690.5553650.5940050.5716360.2504470.0000000.2534720.4646180.4439420.4879450.4879360.5799370.6018530.5919900.5970440.5725760.5301450.5820000.5922550.579789BJSB3150.5622780.5399130.5753360.5572020.5167170.5562230.5391380.5690160.5612850.5610680.5260540.5647280.5407840.2359000.2534720.0000000.4418440.4170600.4641180.4654760.5546210.5763550.5630000.5693600.5473910.5027750.5559270.5678970.556449BJVL19160.5231030.4979000.5387290.5154070.4813370.5243870.5163900.5455020.5373710.5304150.4948620.5333970.5127690.4350500.4646180.4418440.0000000.3651480.3919830.3857150.5154290.5381110.5301110.5302720.5082160.4664290.5189540.5257010.510980MXED8170.5225710.4946700.5357870.5150590.4809370.5233770.5149280.5453280.5321740.5291790.4967940.5316120.5116460.4126940.4439420.4170600.3651480.0000000.3853060.3844530.5239860.5443530.5339760.5392990.5239560.4777640.5330920.5423810.526136MXGT4180.5010290.4744160.5154550.4949620.4604050.5109580.5068800.5410230.5276250.5186150.4801270.5209700.4999930.4536060.4879450.4641180.3919830.3853060.0000000.3864330.5254010.5476220.5360870.5429860.5360960.4873270.5456240.5525540.542995TXGR3190.4960410.4657280.5056780.4895730.4525170.5091470.5009950.5357740.5189370.5130690.4737980.5168260.4932640.4571050.4879360.4654760.3857150.3844530.3864330.0000000.5224930.5449750.5325870.5390160.5360170.4841230.5440390.5515790.540409TXMD3200.5038240.4678260.5129040.4933420.4549380.4888890.4907570.5172650.5013080.4883540.4469590.4870790.4627070.5427680.5799370.5546210.5154290.5239860.5254010.5224930.0000000.3592670.3584230.2883910.3683560.2132660.3745760.3942420.401285CUVN10210.5233720.4874700.5326790.5114020.4777940.5072630.5041770.5416110.5199340.5025310.4619720.5006380.4819600.5661560.6018530.5763550.5381110.5443530.5476220.5449750.3592670.0000000.3682300.2902930.3981940.3478590.4054420.4226700.433422CUCA4220.5082680.4744030.5147360.4964290.4601040.4933080.4915100.5245220.5016610.4895600.4487740.4875180.4695110.5546650.5919900.5630000.5301110.5339760.5360870.5325870.3584230.3682300.0000000.2884300.4016510.3428360.4094120.4262660.431950CUSV6230.5132170.4814160.5232110.5029270.4691360.4953230.4998980.5329090.5112150.4947530.4527050.4901950.4722490.5596750.5970440.5693600.5302720.5392990.5429860.5390160.2883910.2902930.2884300.0000000.4015160.3222770.4060900.4229620.431955CUMM5240.5495500.5222580.5620690.5426470.5081770.5382370.5327480.5615550.5502640.5494280.5070320.5498420.5286390.5371930.5725760.5473910.5082160.5239560.5360960.5360170.3683560.3981940.4016510.4015160.0000000.2395960.2561230.2805690.306529CRL0030250.4827260.4517170.4922240.4734980.4355980.4688190.4656630.4923890.4784950.4714170.4296300.4700640.4490180.4942910.5301450.5027750.4664290.4777640.4873270.4841230.2132660.3478590.3428360.3222770.2395960.0000000.2622170.2806910.299538CRL0001260.5583210.5275380.5687070.5481090.5144320.5464680.5433130.5672620.5587300.5532980.5151590.5553050.5350860.5488970.5820000.5559270.5189540.5330920.5456240.5440390.3745760.4054420.4094120.4060900.2561230.2622170.0000000.2858830.308918HNDA09270.5635710.5329180.5764550.5529930.5188720.5551480.5489100.5762200.5666170.5616120.5203300.5591520.5395000.5586300.5922550.5678970.5257010.5423810.5525540.5515790.3942420.4226700.4262660.4229620.2805690.2806910.2858830.0000000.317450BZBB1280.5522180.5236900.5649900.5439830.5097660.5401650.5355120.5622910.5512960.5518270.5070280.5526670.5289570.5471820.5797890.5564490.5109800.5261360.5429950.5404090.4012850.4334220.4319500.4319550.3065290.2995380.3089180.3174500.000000MXSA3017