In [1]:
import os
import pandas as pd
import subprocess
from collections import defaultdict
SEED = 5
In [2]:
def read_fasta(fh):
"""
:return: tuples of (title, seq)
"""
title = None
data = None
for line in fh:
if line[0] == ">":
if title:
yield (title.strip(), data)
title = line[1:]
data = ''
else:
data += line.strip()
if not title:
yield None
yield (title.strip(), data)
In [3]:
head2tax = dict()
with open("../data/references/rep82/rep82.tax") as tax_inf:
for line in tax_inf:
line = line.rstrip().split('\t')
head2tax[line[0]] = line[1]
In [4]:
outfile_dir = "../data/split_rep82/"
if not os.path.exists(outfile_dir):
!mkdir {outfile_dir}
In [5]:
tax2file = defaultdict(list)
with open('../data/references/rep82/rep82.fna') as inf:
fasta_gen = read_fasta(inf)
for title, seq in fasta_gen:
outfile = outfile_dir + title + ".fna"
tax = head2tax[title]
tax = ";".join(tax.split(';')[:-1])
tax2file[tax].append(outfile)
if not os.path.exists(outfile):
with open(outfile, 'w') as outf:
outf.write(">%s\n%s\n" % (title, seq))
In [6]:
df_taxatable = pd.read_csv("../results/HMP/taxatable.meta.10m.txt", sep="\t", index_col=0)
In [7]:
def dwgsim(fa_infile, fq_outfile, number, seed):
process = subprocess.Popen(['dwgsim', '-e', '0.02', '-r', '0.01', '-R', '0.5' '-q', 'f', '-c', '0', '-2', '0', '-N', str(int(number)), '-y', '0.0', '-z', str(seed), fa_infile, fq_outfile])
print(process.communicate())
In [13]:
metatype = ["stool", "oral", "skin"]
dict_sizes = defaultdict(lambda: defaultdict(int))
for site in metatype:
if not os.path.exists("../results/simulations"):
!mkdir ../results/simulations
if os.path.exists("../results/simulations/{}.fastq".format(site)):
!rm ../results/simulations/{site}.fastq
site_vector = df_taxatable[site]
site_vector = site_vector[site_vector > 0]
for tax, number in site_vector.iteritems():
if tax in tax2file:
file = tax2file[tax][0]
basename = ".".join(os.path.basename(file).split('.')[:-1])
outfile = "/dev/shm/testq"
number = str(int(number))
cmd = "dwgsim -e 0.02 -n 10 -r 0.01 -1 100 -c 0 -2 0 -N {number} -y 0.0 -z {seed} {file} {outfile}".format(number=number, seed=SEED, file=file, outfile=outfile)
!{cmd}
!cat /dev/shm/testq.bwa.read1.fastq >> ../results/simulations/{site}.fastq
dict_sizes[site][head2tax[basename]] += int(number)
else:
print(tax + " not found")
[dwgsim_core] NZ_CCMM01000001.1 length: 2931264
[dwgsim_core] 1 sequences, total length: 2931264
[dwgsim_core] Currently on:
[dwgsim_core] 6163
[dwgsim_core] Complete!
[dwgsim_core] NC_008618.1 length: 2089645
[dwgsim_core] 1 sequences, total length: 2089645
[dwgsim_core] Currently on:
[dwgsim_core] 11672
[dwgsim_core] Complete!
[dwgsim_core] NC_004307.2 length: 2256640
[dwgsim_core] 1 sequences, total length: 2256640
[dwgsim_core] Currently on:
[dwgsim_core] 5163
[dwgsim_core] Complete!
[dwgsim_core] NZ_AXCY01000001.1 length: 3980586
[dwgsim_core] 1 sequences, total length: 3980586
[dwgsim_core] Currently on:
[dwgsim_core] 9015
[dwgsim_core] Complete!
[dwgsim_core] NZ_KB894643.1 length: 3620316
[dwgsim_core] 1 sequences, total length: 3620316
[dwgsim_core] Currently on:
[dwgsim_core] 5296
[dwgsim_core] Complete!
[dwgsim_core] NZ_CP012801.1 length: 7084828
[dwgsim_core] 1 sequences, total length: 7084828
[dwgsim_core] Currently on:
[dwgsim_core] 285300
[dwgsim_core] Complete!
[dwgsim_core] NZ_DS981516.1 length: 4301007
[dwgsim_core] 1 sequences, total length: 4301007
[dwgsim_core] Currently on:
[dwgsim_core] 194857
[dwgsim_core] Complete!
[dwgsim_core] NZ_EQ973700.1 length: 4042224
[dwgsim_core] 1 sequences, total length: 4042224
[dwgsim_core] Currently on:
[dwgsim_core] 40018
[dwgsim_core] Complete!
[dwgsim_core] NZ_BAKK01000144.1 length: 4784013
[dwgsim_core] 1 sequences, total length: 4784013
[dwgsim_core] Currently on:
[dwgsim_core] 12875
[dwgsim_core] Complete!
[dwgsim_core] NZ_GL882602.1 length: 4331676
[dwgsim_core] 1 sequences, total length: 4331676
[dwgsim_core] Currently on:
[dwgsim_core] 29897
[dwgsim_core] Complete!
[dwgsim_core] NC_006347.1 length: 5277274
[dwgsim_core] 1 sequences, total length: 5277274
[dwgsim_core] Currently on:
[dwgsim_core] 114882
[dwgsim_core] Complete!
[dwgsim_core] NZ_JH992940.1 length: 7087874
[dwgsim_core] 1 sequences, total length: 7087874
[dwgsim_core] Currently on:
[dwgsim_core] 52943
[dwgsim_core] Complete!
[dwgsim_core] NZ_CP012938.1 length: 6472489
[dwgsim_core] 1 sequences, total length: 6472489
[dwgsim_core] Currently on:
[dwgsim_core] 1098068
[dwgsim_core] Complete!
[dwgsim_core] NZ_DS990135.1 length: 4422104
[dwgsim_core] 1 sequences, total length: 4422104
[dwgsim_core] Currently on:
[dwgsim_core] 147568
[dwgsim_core] Complete!
[dwgsim_core] NC_015164.1 length: 4242803
[dwgsim_core] 1 sequences, total length: 4242803
[dwgsim_core] Currently on:
[dwgsim_core] 5815
[dwgsim_core] Complete!
[dwgsim_core] NZ_KB905466.1 length: 5315500
[dwgsim_core] 1 sequences, total length: 5315500
[dwgsim_core] Currently on:
[dwgsim_core] 93207
[dwgsim_core] Complete!
[dwgsim_core] NZ_DS499677.1 length: 4009989
[dwgsim_core] 1 sequences, total length: 4009989
[dwgsim_core] Currently on:
[dwgsim_core] 618810
[dwgsim_core] Complete!
[dwgsim_core] NC_004663.1 length: 6260361
[dwgsim_core] 1 sequences, total length: 6260361
[dwgsim_core] Currently on:
[dwgsim_core] 444167
[dwgsim_core] Complete!
[dwgsim_core] NZ_DS362249.1 length: 4719417
[dwgsim_core] 1 sequences, total length: 4719417
[dwgsim_core] Currently on:
[dwgsim_core] 832392
[dwgsim_core] Complete!
[dwgsim_core] NC_009614.1 length: 5163189
[dwgsim_core] 1 sequences, total length: 5163189
[dwgsim_core] Currently on:
[dwgsim_core] 2116377
[dwgsim_core] Complete!
[dwgsim_core] NZ_JH815203.1 length: 3443946
[dwgsim_core] 1 sequences, total length: 3443946
[dwgsim_core] Currently on:
[dwgsim_core] 197416
[dwgsim_core] Complete!
[dwgsim_core] NZ_KI440778.1 length: 3414482
[dwgsim_core] 1 sequences, total length: 3414482
[dwgsim_core] Currently on:
[dwgsim_core] 10365
[dwgsim_core] Complete!
[dwgsim_core] NZ_JTDA01000001.1 length: 4139184
[dwgsim_core] 1 sequences, total length: 4139184
[dwgsim_core] Currently on:
[dwgsim_core] 8871
[dwgsim_core] Complete!
[dwgsim_core] NZ_KI912578.1 length: 4718527
[dwgsim_core] 1 sequences, total length: 4718527
[dwgsim_core] Currently on:
[dwgsim_core] 17057
[dwgsim_core] Complete!
[dwgsim_core] NZ_JH594596.1 length: 3773091
[dwgsim_core] 1 sequences, total length: 3773091
[dwgsim_core] Currently on:
[dwgsim_core] 5578
[dwgsim_core] Complete!
[dwgsim_core] NC_015160.1 length: 4392288
[dwgsim_core] 1 sequences, total length: 4392288
[dwgsim_core] Currently on:
[dwgsim_core] 107826
[dwgsim_core] Complete!
[dwgsim_core] NZ_JRAO01000001.1 length: 1962136
[dwgsim_core] 1 sequences, total length: 1962136
[dwgsim_core] Currently on:
[dwgsim_core] 6262
[dwgsim_core] Complete!
[dwgsim_core] NZ_GL883805.1 length: 3790672
[dwgsim_core] 1 sequences, total length: 3790672
[dwgsim_core] Currently on:
[dwgsim_core] 13473
[dwgsim_core] Complete!
[dwgsim_core] NZ_ADEG01000118.1 length: 3035131
[dwgsim_core] 1 sequences, total length: 3035131
[dwgsim_core] Currently on:
[dwgsim_core] 5333
[dwgsim_core] Complete!
[dwgsim_core] NZ_GG703878.1 length: 3512733
[dwgsim_core] 1 sequences, total length: 3512733
[dwgsim_core] Currently on:
[dwgsim_core] 806358
[dwgsim_core] Complete!
[dwgsim_core] NZ_AUME01000001.1 length: 2801036
[dwgsim_core] 1 sequences, total length: 2801036
[dwgsim_core] Currently on:
[dwgsim_core] 4082
[dwgsim_core] Complete!
[dwgsim_core] NZ_KB904327.1 length: 2650218
[dwgsim_core] 1 sequences, total length: 2650218
[dwgsim_core] Currently on:
[dwgsim_core] 6705
[dwgsim_core] Complete!
[dwgsim_core] NZ_GL982513.1 length: 3127990
[dwgsim_core] 1 sequences, total length: 3127990
[dwgsim_core] Currently on:
[dwgsim_core] 3549
[dwgsim_core] Complete!
[dwgsim_core] NZ_LFQU01000001.1 length: 3789170
[dwgsim_core] 1 sequences, total length: 3789170
[dwgsim_core] Currently on:
[dwgsim_core] 12882
[dwgsim_core] Complete!
[dwgsim_core] NZ_JXQI01000001.1 length: 3264141
[dwgsim_core] 1 sequences, total length: 3264141
[dwgsim_core] Currently on:
[dwgsim_core] 6969
[dwgsim_core] Complete!
[dwgsim_core] NZ_JH379330.1 length: 3098792
[dwgsim_core] 1 sequences, total length: 3098792
[dwgsim_core] Currently on:
[dwgsim_core] 16278
[dwgsim_core] Complete!
[dwgsim_core] NC_018011.1 length: 3734239
[dwgsim_core] 1 sequences, total length: 3734239
[dwgsim_core] Currently on:
[dwgsim_core] 112612
[dwgsim_core] Complete!
[dwgsim_core] NZ_CAPH01000060.1 length: 2773414
[dwgsim_core] 1 sequences, total length: 2773414
[dwgsim_core] Currently on:
[dwgsim_core] 8787
[dwgsim_core] Complete!
[dwgsim_core] NZ_JH370371.1 length: 2855479
[dwgsim_core] 1 sequences, total length: 2855479
[dwgsim_core] Currently on:
[dwgsim_core] 3323
[dwgsim_core] Complete!
[dwgsim_core] NZ_HE978577.1 length: 3163705
[dwgsim_core] 1 sequences, total length: 3163705
[dwgsim_core] Currently on:
[dwgsim_core] 74973
[dwgsim_core] Complete!
[dwgsim_core] NZ_DS499581.1 length: 2550788
[dwgsim_core] 1 sequences, total length: 2550788
[dwgsim_core] Currently on:
[dwgsim_core] 441872
[dwgsim_core] Complete!
[dwgsim_core] NZ_HE978647.1 length: 4017659
[dwgsim_core] 1 sequences, total length: 4017659
[dwgsim_core] Currently on:
[dwgsim_core] 17610
[dwgsim_core] Complete!
[dwgsim_core] NC_021030.1 length: 3763317
[dwgsim_core] 1 sequences, total length: 3763317
[dwgsim_core] Currently on:
[dwgsim_core] 252160
[dwgsim_core] Complete!
[dwgsim_core] NZ_CAEG01000038.1 length: 3489982
[dwgsim_core] 1 sequences, total length: 3489982
[dwgsim_core] Currently on:
[dwgsim_core] 9797
[dwgsim_core] Complete!
[dwgsim_core] NC_009615.1 length: 4811379
[dwgsim_core] 1 sequences, total length: 4811379
[dwgsim_core] Currently on:
[dwgsim_core] 274249
[dwgsim_core] Complete!
[dwgsim_core] NZ_KQ033912.1 length: 6486817
[dwgsim_core] 1 sequences, total length: 6486817
[dwgsim_core] Currently on:
[dwgsim_core] 9766
[dwgsim_core] Complete!
[dwgsim_core] NZ_JH976465.1 length: 4629111
[dwgsim_core] 1 sequences, total length: 4629111
[dwgsim_core] Currently on:
[dwgsim_core] 131790
[dwgsim_core] Complete!
[dwgsim_core] NZ_DS996928.1 length: 3028406
[dwgsim_core] 1 sequences, total length: 3028406
[dwgsim_core] Currently on:
[dwgsim_core] 6933
[dwgsim_core] Complete!
[dwgsim_core] NZ_CP015406.2 length: 3818478
[dwgsim_core] 1 sequences, total length: 3818478
[dwgsim_core] Currently on:
[dwgsim_core] 4698
[dwgsim_core] Complete!
[dwgsim_core] NZ_CP011307.1 length: 3376475
[dwgsim_core] 1 sequences, total length: 3376475
[dwgsim_core] Currently on:
[dwgsim_core] 3667
[dwgsim_core] Complete!
[dwgsim_core] NZ_LN866265.1 length: 3453868
[dwgsim_core] 1 sequences, total length: 3453868
[dwgsim_core] Currently on:
[dwgsim_core] 7829
[dwgsim_core] Complete!
[dwgsim_core] NZ_DS480330.1 length: 2954806
[dwgsim_core] 1 sequences, total length: 2954806
[dwgsim_core] Currently on:
[dwgsim_core] 11148
[dwgsim_core] Complete!
[dwgsim_core] NZ_GG730334.1 length: 3842844
[dwgsim_core] 1 sequences, total length: 3842844
[dwgsim_core] Currently on:
[dwgsim_core] 14530
[dwgsim_core] Complete!
[dwgsim_core] NZ_KI271072.1 length: 3448356
[dwgsim_core] 1 sequences, total length: 3448356
[dwgsim_core] Currently on:
[dwgsim_core] 11182
[dwgsim_core] Complete!
[dwgsim_core] NZ_DS264288.1 length: 2871055
[dwgsim_core] 1 sequences, total length: 2871055
[dwgsim_core] Currently on:
[dwgsim_core] 12114
[dwgsim_core] Complete!
[dwgsim_core] NC_012778.1 length: 2144190
[dwgsim_core] 1 sequences, total length: 2144190
[dwgsim_core] Currently on:
[dwgsim_core] 29070
[dwgsim_core] Complete!
[dwgsim_core] NZ_ACEP01000175.1 length: 3292736
[dwgsim_core] 1 sequences, total length: 3292736
[dwgsim_core] Currently on:
[dwgsim_core] 6394
[dwgsim_core] Complete!
[dwgsim_core] NZ_LLKB01000001.1 length: 3610635
[dwgsim_core] 1 sequences, total length: 3610635
[dwgsim_core] Currently on:
[dwgsim_core] 4438
[dwgsim_core] Complete!
[dwgsim_core] NC_012781.1 length: 3449685
[dwgsim_core] 1 sequences, total length: 3449685
[dwgsim_core] Currently on:
[dwgsim_core] 163452
[dwgsim_core] Complete!
[dwgsim_core] NZ_KB290627.1 length: 2772948
[dwgsim_core] 1 sequences, total length: 2772948
[dwgsim_core] Currently on:
[dwgsim_core] 7171
[dwgsim_core] Complete!
[dwgsim_core] NZ_DS264342.1 length: 3626038
[dwgsim_core] 1 sequences, total length: 3626038
[dwgsim_core] Currently on:
[dwgsim_core] 37205
[dwgsim_core] Complete!
[dwgsim_core] NZ_KE993269.1 length: 3575614
[dwgsim_core] 1 sequences, total length: 3575614
[dwgsim_core] Currently on:
[dwgsim_core] 11660
[dwgsim_core] Complete!
[dwgsim_core] NZ_DS264383.1 length: 2742106
[dwgsim_core] 1 sequences, total length: 2742106
[dwgsim_core] Currently on:
[dwgsim_core] 28260
[dwgsim_core] Complete!
[dwgsim_core] NZ_GG663534.1 length: 2496339
[dwgsim_core] 1 sequences, total length: 2496339
[dwgsim_core] Currently on:
[dwgsim_core] 6781
[dwgsim_core] Complete!
[dwgsim_core] NZ_GG662017.1 length: 3242335
[dwgsim_core] 1 sequences, total length: 3242335
[dwgsim_core] Currently on:
[dwgsim_core] 19172
[dwgsim_core] Complete!
[dwgsim_core] NZ_DS483542.1 length: 3103207
[dwgsim_core] 1 sequences, total length: 3103207
[dwgsim_core] Currently on:
[dwgsim_core] 20714
[dwgsim_core] Complete!
[dwgsim_core] NZ_AAXA02000016.1 length: 3186181
[dwgsim_core] 1 sequences, total length: 3186181
[dwgsim_core] Currently on:
[dwgsim_core] 16538
[dwgsim_core] Complete!
[dwgsim_core] NZ_DS264419.1 length: 2915783
[dwgsim_core] 1 sequences, total length: 2915783
[dwgsim_core] Currently on:
[dwgsim_core] 10361
[dwgsim_core] Complete!
[dwgsim_core] NZ_KB851182.1 length: 6377378
[dwgsim_core] 1 sequences, total length: 6377378
[dwgsim_core] Currently on:
[dwgsim_core] 4508
[dwgsim_core] Complete!
[dwgsim_core] NZ_JH376420.1 length: 6647780
[dwgsim_core] 1 sequences, total length: 6647780
[dwgsim_core] Currently on:
[dwgsim_core] 3566
[dwgsim_core] Complete!
[dwgsim_core] NZ_GL834305.1 length: 5353008
[dwgsim_core] 1 sequences, total length: 5353008
[dwgsim_core] Currently on:
[dwgsim_core] 6220
[dwgsim_core] Complete!
[dwgsim_core] NZ_CYXV01000001.1 length: 3568198
[dwgsim_core] 1 sequences, total length: 3568198
[dwgsim_core] Currently on:
[dwgsim_core] 65876
[dwgsim_core] Complete!
[dwgsim_core] NC_015977.1 length: 3592125
[dwgsim_core] 1 sequences, total length: 3592125
[dwgsim_core] Currently on:
[dwgsim_core] 27457
[dwgsim_core] Complete!
[dwgsim_core] NZ_GG692814.1 length: 4412385
[dwgsim_core] 1 sequences, total length: 4412385
[dwgsim_core] Currently on:
[dwgsim_core] 65300
[dwgsim_core] Complete!
[dwgsim_core] NZ_ACFY01000179.1 length: 4050242
[dwgsim_core] 1 sequences, total length: 4050242
[dwgsim_core] Currently on:
[dwgsim_core] 53377
[dwgsim_core] Complete!
[dwgsim_core] NZ_DS995353.1 length: 3996608
[dwgsim_core] 1 sequences, total length: 3996608
[dwgsim_core] Currently on:
[dwgsim_core] 10184
[dwgsim_core] Complete!
[dwgsim_core] NZ_JPJG01000001.1 length: 2731004
[dwgsim_core] 1 sequences, total length: 2731004
[dwgsim_core] Currently on:
[dwgsim_core] 42209
[dwgsim_core] Complete!
[dwgsim_core] NZ_KI271644.1 length: 3595655
[dwgsim_core] 1 sequences, total length: 3595655
[dwgsim_core] Currently on:
[dwgsim_core] 25452
[dwgsim_core] Complete!
[dwgsim_core] NZ_KI391947.1 length: 3197405
[dwgsim_core] 1 sequences, total length: 3197405
[dwgsim_core] Currently on:
[dwgsim_core] 13084
[dwgsim_core] Complete!
[dwgsim_core] NZ_DS544194.1 length: 3719958
[dwgsim_core] 1 sequences, total length: 3719958
[dwgsim_core] Currently on:
[dwgsim_core] 15678
[dwgsim_core] Complete!
[dwgsim_core] NZ_GG697168.2 length: 3090539
[dwgsim_core] 1 sequences, total length: 3090539
[dwgsim_core] Currently on:
[dwgsim_core] 85019
[dwgsim_core] Complete!
[dwgsim_core] NZ_DS480351.1 length: 3270409
[dwgsim_core] 1 sequences, total length: 3270409
[dwgsim_core] Currently on:
[dwgsim_core] 5979
[dwgsim_core] Complete!
[dwgsim_core] NZ_KB907512.1 length: 2664365
[dwgsim_core] 1 sequences, total length: 2664365
[dwgsim_core] Currently on:
[dwgsim_core] 19994
[dwgsim_core] Complete!
[dwgsim_core] NZ_HF545616.1 length: 2968510
[dwgsim_core] 1 sequences, total length: 2968510
[dwgsim_core] Currently on:
[dwgsim_core] 193528
[dwgsim_core] Complete!
[dwgsim_core] NZ_KI260285.1 length: 3098737
[dwgsim_core] 1 sequences, total length: 3098737
[dwgsim_core] Currently on:
[dwgsim_core] 5944
[dwgsim_core] Complete!
[dwgsim_core] NZ_BBDW01000001.1 length: 3259889
[dwgsim_core] 1 sequences, total length: 3259889
[dwgsim_core] Currently on:
[dwgsim_core] 19674
[dwgsim_core] Complete!
[dwgsim_core] NZ_DS990209.1 length: 2731695
[dwgsim_core] 1 sequences, total length: 2731695
[dwgsim_core] Currently on:
[dwgsim_core] 5503
[dwgsim_core] Complete!
[dwgsim_core] NZ_LMUA01000001.1 length: 3952595
[dwgsim_core] 1 sequences, total length: 3952595
[dwgsim_core] Currently on:
[dwgsim_core] 12044
[dwgsim_core] Complete!
[dwgsim_core] NZ_GG704779.1 length: 3245571
[dwgsim_core] 1 sequences, total length: 3245571
[dwgsim_core] Currently on:
[dwgsim_core] 3949
[dwgsim_core] Complete!
[dwgsim_core] NZ_AKCB01000001.1 length: 3861369
[dwgsim_core] 1 sequences, total length: 3861369
[dwgsim_core] Currently on:
[dwgsim_core] 4197
[dwgsim_core] Complete!
[dwgsim_core] NZ_AUKY01000001.1 length: 2337626
[dwgsim_core] 1 sequences, total length: 2337626
[dwgsim_core] Currently on:
[dwgsim_core] 3511
[dwgsim_core] Complete!
[dwgsim_core] NZ_GL830843.1 length: 2122904
[dwgsim_core] 1 sequences, total length: 2122904
[dwgsim_core] Currently on:
[dwgsim_core] 8858
[dwgsim_core] Complete!
[dwgsim_core] NZ_GG698602.1 length: 1895960
[dwgsim_core] 1 sequences, total length: 1895960
[dwgsim_core] Currently on:
[dwgsim_core] 34416
[dwgsim_core] Complete!
[dwgsim_core] NC_013520.1 length: 2132142
[dwgsim_core] 1 sequences, total length: 2132142
[dwgsim_core] Currently on:
[dwgsim_core] 7322
[dwgsim_core] Complete!
[dwgsim_core] NZ_GL883674.1 length: 2832616
[dwgsim_core] 1 sequences, total length: 2832616
[dwgsim_core] Currently on:
[dwgsim_core] 25230
[dwgsim_core] Complete!
[dwgsim_core] NZ_KE150480.1 length: 2988348
[dwgsim_core] 1 sequences, total length: 2988348
[dwgsim_core] Currently on:
[dwgsim_core] 53039
[dwgsim_core] Complete!
[dwgsim_core] NZ_JNJP01000001.1 length: 4632412
[dwgsim_core] 1 sequences, total length: 4632412
[dwgsim_core] Currently on:
[dwgsim_core] 6377
[dwgsim_core] Complete!
[dwgsim_core] NC_010655.1 length: 2664102
[dwgsim_core] 1 sequences, total length: 2664102
[dwgsim_core] Currently on:
[dwgsim_core] 33917
[dwgsim_core] Complete!
[dwgsim_core] NC_004703.1 length: 33038
[dwgsim_core] 1 sequences, total length: 33038
[dwgsim_core] Currently on:
[dwgsim_core] 5817
[dwgsim_core] Complete!
[dwgsim_core] NC_012782.1 length: 687209
[dwgsim_core] 1 sequences, total length: 687209
[dwgsim_core] Currently on:
[dwgsim_core] 9026
[dwgsim_core] Complete!
[dwgsim_core] NZ_KE951405.1 length: 2366735
[dwgsim_core] 1 sequences, total length: 2366735
[dwgsim_core] Currently on:
[dwgsim_core] 151286
[dwgsim_core] Complete!
[dwgsim_core] NZ_AUBL01000001.1 length: 3532293
[dwgsim_core] 1 sequences, total length: 3532293
[dwgsim_core] Currently on:
[dwgsim_core] 50050
[dwgsim_core] Complete!
[dwgsim_core] NZ_KE386871.1 length: 3420289
[dwgsim_core] 1 sequences, total length: 3420289
[dwgsim_core] Currently on:
[dwgsim_core] 69787
[dwgsim_core] Complete!
[dwgsim_core] NZ_JH470338.1 length: 2205815
[dwgsim_core] 1 sequences, total length: 2205815
[dwgsim_core] Currently on:
[dwgsim_core] 141560
[dwgsim_core] Complete!
[dwgsim_core] NZ_CP017298.1 length: 2141493
[dwgsim_core] 1 sequences, total length: 2141493
[dwgsim_core] Currently on:
[dwgsim_core] 15637
[dwgsim_core] Complete!
[dwgsim_core] NZ_KE951832.1 length: 3330361
[dwgsim_core] 1 sequences, total length: 3330361
[dwgsim_core] Currently on:
[dwgsim_core] 61412
[dwgsim_core] Complete!
[dwgsim_core] NZ_JH711481.1 length: 3371154
[dwgsim_core] 1 sequences, total length: 3371154
[dwgsim_core] Currently on:
[dwgsim_core] 148921
[dwgsim_core] Complete!
[dwgsim_core] NZ_DS264586.1 length: 2393968
[dwgsim_core] 1 sequences, total length: 2393968
[dwgsim_core] Currently on:
[dwgsim_core] 75417
[dwgsim_core] Complete!
[dwgsim_core] NZ_CP014232.1 length: 3042917
[dwgsim_core] 1 sequences, total length: 3042917
[dwgsim_core] Currently on:
[dwgsim_core] 322930
[dwgsim_core] Complete!
[dwgsim_core] NZ_GL985606.1 length: 2828282
[dwgsim_core] 1 sequences, total length: 2828282
[dwgsim_core] Currently on:
[dwgsim_core] 87777
[dwgsim_core] Complete!
[dwgsim_core] NZ_CP007519.1 length: 2338390
[dwgsim_core] 1 sequences, total length: 2338390
[dwgsim_core] Currently on:
[dwgsim_core] 30327
[dwgsim_core] Complete!
[dwgsim_core] NC_022198.1 length: 2031902
[dwgsim_core] 1 sequences, total length: 2031902
[dwgsim_core] Currently on:
[dwgsim_core] 35733
[dwgsim_core] Complete!
[dwgsim_core] NZ_KB290817.1 length: 2809766
[dwgsim_core] 1 sequences, total length: 2809766
[dwgsim_core] Currently on:
[dwgsim_core] 146571
[dwgsim_core] Complete!
[dwgsim_core] NZ_ACSH02000008.1 length: 2856058
[dwgsim_core] 1 sequences, total length: 2856058
[dwgsim_core] Currently on:
[dwgsim_core] 1048923
[dwgsim_core] Complete!
[dwgsim_core] NZ_JVEY01000001.1 length: 2354886
[dwgsim_core] 1 sequences, total length: 2354886
[dwgsim_core] Currently on:
[dwgsim_core] 10563
[dwgsim_core] Complete!
[dwgsim_core] NZ_JPVS01000001.1 length: 2639735
[dwgsim_core] 1 sequences, total length: 2639735
[dwgsim_core] Currently on:
[dwgsim_core] 667625
[dwgsim_core] Complete!
[dwgsim_core] NC_013715.1 length: 2264603
[dwgsim_core] 1 sequences, total length: 2264603
[dwgsim_core] Currently on:
[dwgsim_core] 342533
[dwgsim_core] Complete!
[dwgsim_core] NC_018142.1 length: 3449360
[dwgsim_core] 1 sequences, total length: 3449360
[dwgsim_core] Currently on:
[dwgsim_core] 56297
[dwgsim_core] Complete!
[dwgsim_core] NZ_GG774898.1 length: 2113629
[dwgsim_core] 1 sequences, total length: 2113629
[dwgsim_core] Currently on:
[dwgsim_core] 24488
[dwgsim_core] Complete!
[dwgsim_core] NZ_JDFF01000001.1 length: 2040410
[dwgsim_core] 1 sequences, total length: 2040410
[dwgsim_core] Currently on:
[dwgsim_core] 30799
[dwgsim_core] Complete!
[dwgsim_core] NZ_ACNN01000037.1 length: 2064868
[dwgsim_core] 1 sequences, total length: 2064868
[dwgsim_core] Currently on:
[dwgsim_core] 8683
[dwgsim_core] Complete!
[dwgsim_core] NZ_KQ960406.1 length: 2355993
[dwgsim_core] 1 sequences, total length: 2355993
[dwgsim_core] Currently on:
[dwgsim_core] 320374
[dwgsim_core] Complete!
[dwgsim_core] NZ_GG700647.1 length: 2585769
[dwgsim_core] 1 sequences, total length: 2585769
[dwgsim_core] Currently on:
[dwgsim_core] 33400
[dwgsim_core] Complete!
[dwgsim_core] NZ_BAKF01000148.1 length: 3003640
[dwgsim_core] 1 sequences, total length: 3003640
[dwgsim_core] Currently on:
[dwgsim_core] 35086
[dwgsim_core] Complete!
[dwgsim_core] NC_015311.1 length: 2937589
[dwgsim_core] 1 sequences, total length: 2937589
[dwgsim_core] Currently on:
[dwgsim_core] 9240
[dwgsim_core] Complete!
[dwgsim_core] NZ_BAJX01000055.1 length: 2950347
[dwgsim_core] 1 sequences, total length: 2950347
[dwgsim_core] Currently on:
[dwgsim_core] 174521
[dwgsim_core] Complete!
[dwgsim_core] NC_017860.1 length: 2699447
[dwgsim_core] 1 sequences, total length: 2699447
[dwgsim_core] Currently on:
[dwgsim_core] 11151
[dwgsim_core] Complete!
[dwgsim_core] NZ_KB899210.1 length: 3508889
[dwgsim_core] 1 sequences, total length: 3508889
[dwgsim_core] Currently on:
[dwgsim_core] 24164
[dwgsim_core] Complete!
[dwgsim_core] NZ_KB908319.1 length: 3314825
[dwgsim_core] 1 sequences, total length: 3314825
[dwgsim_core] Currently on:
[dwgsim_core] 10596
[dwgsim_core] Complete!
[dwgsim_core] NC_014370.1 length: 3168292
[dwgsim_core] 1 sequences, total length: 3168292
[dwgsim_core] Currently on:
[dwgsim_core] 482085
[dwgsim_core] Complete!
[dwgsim_core] NZ_KB904327.1 length: 2650218
[dwgsim_core] 1 sequences, total length: 2650218
[dwgsim_core] Currently on:
[dwgsim_core] 167875
[dwgsim_core] Complete!
[dwgsim_core] NZ_GL982464.1 length: 2669423
[dwgsim_core] 1 sequences, total length: 2669423
[dwgsim_core] Currently on:
[dwgsim_core] 40935
[dwgsim_core] Complete!
[dwgsim_core] NZ_BAJC01000094.1 length: 3078929
[dwgsim_core] 1 sequences, total length: 3078929
[dwgsim_core] Currently on:
[dwgsim_core] 30940
[dwgsim_core] Complete!
[dwgsim_core] NZ_BAJQ01000138.1 length: 2829128
[dwgsim_core] 1 sequences, total length: 2829128
[dwgsim_core] Currently on:
[dwgsim_core] 19209
[dwgsim_core] Complete!
[dwgsim_core] NZ_GL982513.1 length: 3127990
[dwgsim_core] 1 sequences, total length: 3127990
[dwgsim_core] Currently on:
[dwgsim_core] 158798
[dwgsim_core] Complete!
[dwgsim_core] NZ_BAKN01000082.1 length: 2976323
[dwgsim_core] 1 sequences, total length: 2976323
[dwgsim_core] Currently on:
[dwgsim_core] 11696
[dwgsim_core] Complete!
[dwgsim_core] NZ_GL629647.1 length: 3274971
[dwgsim_core] 1 sequences, total length: 3274971
[dwgsim_core] Currently on:
[dwgsim_core] 59544
[dwgsim_core] Complete!
[dwgsim_core] NZ_CP016204.1 length: 3296420
[dwgsim_core] 1 sequences, total length: 3296420
[dwgsim_core] Currently on:
[dwgsim_core] 67380
[dwgsim_core] Complete!
[dwgsim_core] NC_016610.1 length: 3405521
[dwgsim_core] 1 sequences, total length: 3405521
[dwgsim_core] Currently on:
[dwgsim_core] 15983
[dwgsim_core] Complete!
[dwgsim_core] NZ_ACLQ01000037.1 length: 2667498
[dwgsim_core] 1 sequences, total length: 2667498
[dwgsim_core] Currently on:
[dwgsim_core] 178992
[dwgsim_core] Complete!
[dwgsim_core] NZ_KE150242.1 length: 2746468
[dwgsim_core] 1 sequences, total length: 2746468
[dwgsim_core] Currently on:
[dwgsim_core] 64214
[dwgsim_core] Complete!
[dwgsim_core] NC_013162.1 length: 2612925
[dwgsim_core] 1 sequences, total length: 2612925
[dwgsim_core] Currently on:
[dwgsim_core] 87297
[dwgsim_core] Complete!
[dwgsim_core] NZ_KN389983.1 length: 2538178
[dwgsim_core] 1 sequences, total length: 2538178
[dwgsim_core] Currently on:
[dwgsim_core] 99697
[dwgsim_core] Complete!
[dwgsim_core] NZ_GL872413.1 length: 2548702
[dwgsim_core] 1 sequences, total length: 2548702
[dwgsim_core] Currently on:
[dwgsim_core] 17954
[dwgsim_core] Complete!
[dwgsim_core] NZ_JNJO01000001.1 length: 1795754
[dwgsim_core] 1 sequences, total length: 1795754
[dwgsim_core] Currently on:
[dwgsim_core] 121025
[dwgsim_core] Complete!
[dwgsim_core] NZ_KI535340.1 length: 2043469
[dwgsim_core] 1 sequences, total length: 2043469
[dwgsim_core] Currently on:
[dwgsim_core] 15167
[dwgsim_core] Complete!
[dwgsim_core] NZ_GG694024.1 length: 1947256
[dwgsim_core] 1 sequences, total length: 1947256
[dwgsim_core] Currently on:
[dwgsim_core] 56246
[dwgsim_core] Complete!
[dwgsim_core] NC_021175.1 length: 2142100
[dwgsim_core] 1 sequences, total length: 2142100
[dwgsim_core] Currently on:
[dwgsim_core] 20947
[dwgsim_core] Complete!
[dwgsim_core] NC_009785.1 length: 2196662
[dwgsim_core] 1 sequences, total length: 2196662
[dwgsim_core] Currently on:
[dwgsim_core] 62002
[dwgsim_core] Complete!
[dwgsim_core] NZ_GL732439.1 length: 1906074
[dwgsim_core] 1 sequences, total length: 1906074
[dwgsim_core] Currently on:
[dwgsim_core] 242340
[dwgsim_core] Complete!
[dwgsim_core] NC_022246.1 length: 1996214
[dwgsim_core] 1 sequences, total length: 1996214
[dwgsim_core] Currently on:
[dwgsim_core] 18300
[dwgsim_core] Complete!
[dwgsim_core] NC_013853.1 length: 2146611
[dwgsim_core] 1 sequences, total length: 2146611
[dwgsim_core] Currently on:
[dwgsim_core] 37823
[dwgsim_core] Complete!
[dwgsim_core] NC_004350.2 length: 2032925
[dwgsim_core] 1 sequences, total length: 2032925
[dwgsim_core] Currently on:
[dwgsim_core] 8869
[dwgsim_core] Complete!
[dwgsim_core] NZ_AORU01000001.1 length: 2184872
[dwgsim_core] 1 sequences, total length: 2184872
[dwgsim_core] Currently on:
[dwgsim_core] 84790
[dwgsim_core] Complete!
[dwgsim_core] NC_015678.1 length: 2153652
[dwgsim_core] 1 sequences, total length: 2153652
[dwgsim_core] Currently on:
[dwgsim_core] 216892
[dwgsim_core] Complete!
[dwgsim_core] NC_003098.1 length: 2038615
[dwgsim_core] 1 sequences, total length: 2038615
[dwgsim_core] Currently on:
[dwgsim_core] 15884
[dwgsim_core] Complete!
[dwgsim_core] NZ_CP009913.1 length: 2188923
[dwgsim_core] 1 sequences, total length: 2188923
[dwgsim_core] Currently on:
[dwgsim_core] 372035
[dwgsim_core] Complete!
[dwgsim_core] NC_009009.1 length: 2388435
[dwgsim_core] 1 sequences, total length: 2388435
[dwgsim_core] Currently on:
[dwgsim_core] 162019
[dwgsim_core] Complete!
[dwgsim_core] NZ_CP013939.1 length: 1787436
[dwgsim_core] 1 sequences, total length: 1787436
[dwgsim_core] Currently on:
[dwgsim_core] 19577
[dwgsim_core] Complete!
[dwgsim_core] NZ_KI535270.1 length: 1544705
[dwgsim_core] 1 sequences, total length: 1544705
[dwgsim_core] Currently on:
[dwgsim_core] 13089
[dwgsim_core] Complete!
[dwgsim_core] NZ_CP012068.1 length: 1739380
[dwgsim_core] 1 sequences, total length: 1739380
[dwgsim_core] Currently on:
[dwgsim_core] 37277
[dwgsim_core] Complete!
[dwgsim_core] NZ_GL890583.1 length: 3288790
[dwgsim_core] 1 sequences, total length: 3288790
[dwgsim_core] Currently on:
[dwgsim_core] 23012
[dwgsim_core] Complete!
[dwgsim_core] NZ_KI535366.1 length: 3479274
[dwgsim_core] 1 sequences, total length: 3479274
[dwgsim_core] Currently on:
[dwgsim_core] 8960
[dwgsim_core] Complete!
[dwgsim_core] NZ_GL622296.1 length: 3082764
[dwgsim_core] 1 sequences, total length: 3082764
[dwgsim_core] Currently on:
[dwgsim_core] 35080
[dwgsim_core] Complete!
[dwgsim_core] NZ_JH815185.1 length: 2932181
[dwgsim_core] 1 sequences, total length: 2932181
[dwgsim_core] Currently on:
[dwgsim_core] 55363
[dwgsim_core] Complete!
[dwgsim_core] NZ_KE148312.1 length: 2470873
[dwgsim_core] 1 sequences, total length: 2470873
[dwgsim_core] Currently on:
[dwgsim_core] 25606
[dwgsim_core] Complete!
[dwgsim_core] NZ_GG668575.1 length: 2707374
[dwgsim_core] 1 sequences, total length: 2707374
[dwgsim_core] Currently on:
[dwgsim_core] 79930
[dwgsim_core] Complete!
[dwgsim_core] NZ_JH590861.1 length: 2313672
[dwgsim_core] 1 sequences, total length: 2313672
[dwgsim_core] Currently on:
[dwgsim_core] 23545
[dwgsim_core] Complete!
[dwgsim_core] NZ_AUKY01000001.1 length: 2337626
[dwgsim_core] 1 sequences, total length: 2337626
[dwgsim_core] Currently on:
[dwgsim_core] 9274
[dwgsim_core] Complete!
[dwgsim_core] NZ_AUFU01000001.1 length: 2350939
[dwgsim_core] 1 sequences, total length: 2350939
[dwgsim_core] Currently on:
[dwgsim_core] 24887
[dwgsim_core] Complete!
[dwgsim_core] NZ_GG749298.1 length: 2055193
[dwgsim_core] 1 sequences, total length: 2055193
[dwgsim_core] Currently on:
[dwgsim_core] 52047
[dwgsim_core] Complete!
[dwgsim_core] NZ_GL538175.1 length: 1765898
[dwgsim_core] 1 sequences, total length: 1765898
[dwgsim_core] Currently on:
[dwgsim_core] 45169
[dwgsim_core] Complete!
[dwgsim_core] NC_013520.1 length: 2132142
[dwgsim_core] 1 sequences, total length: 2132142
[dwgsim_core] Currently on:
[dwgsim_core] 374589
[dwgsim_core] Complete!
[dwgsim_core] NZ_CP009761.1 length: 1627009
[dwgsim_core] 1 sequences, total length: 1627009
[dwgsim_core] Currently on:
[dwgsim_core] 9993
[dwgsim_core] Complete!
[dwgsim_core] NZ_MLQO01000001.1 length: 2143987
[dwgsim_core] 1 sequences, total length: 2143987
[dwgsim_core] Currently on:
[dwgsim_core] 138473
[dwgsim_core] Complete!
[dwgsim_core] NZ_KN173677.1 length: 2546362
[dwgsim_core] 1 sequences, total length: 2546362
[dwgsim_core] Currently on:
[dwgsim_core] 522676
[dwgsim_core] Complete!
[dwgsim_core] NC_013192.1 length: 2465610
[dwgsim_core] 1 sequences, total length: 2465610
[dwgsim_core] Currently on:
[dwgsim_core] 46533
[dwgsim_core] Complete!
[dwgsim_core] NZ_KB890259.1 length: 2144796
[dwgsim_core] 1 sequences, total length: 2144796
[dwgsim_core] Currently on:
[dwgsim_core] 26208
[dwgsim_core] Complete!
[dwgsim_core] NZ_CP012410.1 length: 2444904
[dwgsim_core] 1 sequences, total length: 2444904
[dwgsim_core] Currently on:
[dwgsim_core] 70334
[dwgsim_core] Complete!
[dwgsim_core] NZ_KB891038.1 length: 2317159
[dwgsim_core] 1 sequences, total length: 2317159
[dwgsim_core] Currently on:
[dwgsim_core] 64751
[dwgsim_core] Complete!
[dwgsim_core] NZ_GL636062.1 length: 3151995
[dwgsim_core] 1 sequences, total length: 3151995
[dwgsim_core] Currently on:
[dwgsim_core] 94607
[dwgsim_core] Complete!
[dwgsim_core] NZ_CP012073.1 length: 2925783
[dwgsim_core] 1 sequences, total length: 2925783
[dwgsim_core] Currently on:
[dwgsim_core] 16873
[dwgsim_core] Complete!
[dwgsim_core] NZ_EQ973327.1 length: 2171355
[dwgsim_core] 1 sequences, total length: 2171355
[dwgsim_core] Currently on:
[dwgsim_core] 17830
[dwgsim_core] Complete!
[dwgsim_core] NZ_GG665875.1 length: 2406715
[dwgsim_core] 1 sequences, total length: 2406715
[dwgsim_core] Currently on:
[dwgsim_core] 64129
[dwgsim_core] Complete!
[dwgsim_core] NZ_ACDY02000035.1 length: 1873113
[dwgsim_core] 1 sequences, total length: 1873113
[dwgsim_core] Currently on:
[dwgsim_core] 42072
[dwgsim_core] Complete!
[dwgsim_core] NZ_CP007726.1 length: 2256647
[dwgsim_core] 1 sequences, total length: 2256647
[dwgsim_core] Currently on:
[dwgsim_core] 84338
[dwgsim_core] Complete!
[dwgsim_core] NC_002946.2 length: 2153922
[dwgsim_core] 1 sequences, total length: 2153922
[dwgsim_core] Currently on:
[dwgsim_core] 9425
[dwgsim_core] Complete!
[dwgsim_core] NC_014752.1 length: 2220606
[dwgsim_core] 1 sequences, total length: 2220606
[dwgsim_core] Currently on:
[dwgsim_core] 12057
[dwgsim_core] Complete!
[dwgsim_core] NC_003112.2 length: 2272360
[dwgsim_core] 1 sequences, total length: 2272360
[dwgsim_core] Currently on:
[dwgsim_core] 23627
[dwgsim_core] Complete!
[dwgsim_core] NZ_GL635793.1 length: 2169497
[dwgsim_core] 1 sequences, total length: 2169497
[dwgsim_core] Currently on:
[dwgsim_core] 541681
[dwgsim_core] Complete!
[dwgsim_core] NZ_CP012541.1 length: 1840041
[dwgsim_core] 1 sequences, total length: 1840041
[dwgsim_core] Currently on:
[dwgsim_core] 29055
[dwgsim_core] Complete!
[dwgsim_core] NZ_CP012196.1 length: 2281652
[dwgsim_core] 1 sequences, total length: 2281652
[dwgsim_core] Currently on:
[dwgsim_core] 12383
[dwgsim_core] Complete!
[dwgsim_core] NZ_AMZQ01000001.1 length: 2125393
[dwgsim_core] 1 sequences, total length: 2125393
[dwgsim_core] Currently on:
[dwgsim_core] 19036
[dwgsim_core] Complete!
[dwgsim_core] NZ_GG694050.1 length: 2642744
[dwgsim_core] 1 sequences, total length: 2642744
[dwgsim_core] Currently on:
[dwgsim_core] 60864
[dwgsim_core] Complete!
[dwgsim_core] NZ_CP012067.1 length: 2414427
[dwgsim_core] 1 sequences, total length: 2414427
[dwgsim_core] Currently on:
[dwgsim_core] 79387
[dwgsim_core] Complete!
[dwgsim_core] NC_000907.1 length: 1830138
[dwgsim_core] 1 sequences, total length: 1830138
[dwgsim_core] Currently on:
[dwgsim_core] 24531
[dwgsim_core] Complete!
[dwgsim_core] NZ_AFNK01000087.1 length: 2144403
[dwgsim_core] 1 sequences, total length: 2144403
[dwgsim_core] Currently on:
[dwgsim_core] 43863
[dwgsim_core] Complete!
[dwgsim_core] NZ_CDON01000001.1 length: 2492818
[dwgsim_core] 1 sequences, total length: 2492818
[dwgsim_core] Currently on:
[dwgsim_core] 9917
[dwgsim_core] Complete!
[dwgsim_core] NC_002967.9 length: 2843201
[dwgsim_core] 1 sequences, total length: 2843201
[dwgsim_core] Currently on:
[dwgsim_core] 9055
[dwgsim_core] Complete!
[dwgsim_core] NZ_AKIL01000158.1 length: 585376
[dwgsim_core] 1 sequences, total length: 585376
[dwgsim_core] Currently on:
[dwgsim_core] 16110
[dwgsim_core] Complete!
[dwgsim_core] NZ_CP014232.1 length: 3042917
[dwgsim_core] 1 sequences, total length: 3042917
[dwgsim_core] Currently on:
[dwgsim_core] 888
[dwgsim_core] Complete!
[dwgsim_core] NZ_CP012390.1 length: 1915154
[dwgsim_core] 1 sequences, total length: 1915154
[dwgsim_core] Currently on:
[dwgsim_core] 833
[dwgsim_core] Complete!
[dwgsim_core] NZ_GG667030.1 length: 2437556
[dwgsim_core] 1 sequences, total length: 2437556
[dwgsim_core] Currently on:
[dwgsim_core] 1154
[dwgsim_core] Complete!
[dwgsim_core] NC_012590.1 length: 2790189
[dwgsim_core] 1 sequences, total length: 2790189
[dwgsim_core] Currently on:
[dwgsim_core] 9436
[dwgsim_core] Complete!
[dwgsim_core] NC_007164.1 length: 2462499
[dwgsim_core] 1 sequences, total length: 2462499
[dwgsim_core] Currently on:
[dwgsim_core] 1361
[dwgsim_core] Complete!
[dwgsim_core] NC_012704.1 length: 2446804
[dwgsim_core] 1 sequences, total length: 2446804
[dwgsim_core] Currently on:
[dwgsim_core] 19314
[dwgsim_core] Complete!
[dwgsim_core] NZ_ACSH02000008.1 length: 2856058
[dwgsim_core] 1 sequences, total length: 2856058
[dwgsim_core] Currently on:
[dwgsim_core] 1033
[dwgsim_core] Complete!
[dwgsim_core] NZ_CP004353.1 length: 2931780
[dwgsim_core] 1 sequences, total length: 2931780
[dwgsim_core] Currently on:
[dwgsim_core] 546
[dwgsim_core] Complete!
[dwgsim_core] NZ_HF570958.1 length: 5162321
[dwgsim_core] 1 sequences, total length: 5162321
[dwgsim_core] Currently on:
[dwgsim_core] 2058
[dwgsim_core] Complete!
[dwgsim_core] NZ_JHEI01000001.1 length: 4143510
[dwgsim_core] 1 sequences, total length: 4143510
[dwgsim_core] Currently on:
[dwgsim_core] 619
[dwgsim_core] Complete!
[dwgsim_core] NZ_LDRD01000001.1 length: 2298321
[dwgsim_core] 1 sequences, total length: 2298321
[dwgsim_core] Currently on:
[dwgsim_core] 1097
[dwgsim_core] Complete!
[dwgsim_core] NC_012803.1 length: 2501097
[dwgsim_core] 1 sequences, total length: 2501097
[dwgsim_core] Currently on:
[dwgsim_core] 647
[dwgsim_core] Complete!
[dwgsim_core] NZ_JPVS01000001.1 length: 2639735
[dwgsim_core] 1 sequences, total length: 2639735
[dwgsim_core] Currently on:
[dwgsim_core] 782
[dwgsim_core] Complete!
[dwgsim_core] NZ_CP013126.1 length: 3651382
[dwgsim_core] 1 sequences, total length: 3651382
[dwgsim_core] Currently on:
[dwgsim_core] 2587
[dwgsim_core] Complete!
[dwgsim_core] NZ_KE384018.1 length: 2938512
[dwgsim_core] 1 sequences, total length: 2938512
[dwgsim_core] Currently on:
[dwgsim_core] 2280
[dwgsim_core] Complete!
[dwgsim_core] NC_006085.1 length: 2560265
[dwgsim_core] 1 sequences, total length: 2560265
[dwgsim_core] Currently on:
[dwgsim_core] 8024590
[dwgsim_core] Complete!
[dwgsim_core] NC_021064.1 length: 2526138
[dwgsim_core] 1 sequences, total length: 2526138
[dwgsim_core] Currently on:
[dwgsim_core] 51801
[dwgsim_core] Complete!
[dwgsim_core] NZ_AOSS01000001.1 length: 2131944
[dwgsim_core] 1 sequences, total length: 2131944
[dwgsim_core] Currently on:
[dwgsim_core] 530739
[dwgsim_core] Complete!
[dwgsim_core] NZ_AFAM01000022.1 length: 2623500
[dwgsim_core] 1 sequences, total length: 2623500
[dwgsim_core] Currently on:
[dwgsim_core] 57837
[dwgsim_core] Complete!
[dwgsim_core] NZ_AFUN01000050.1 length: 2431162
[dwgsim_core] 1 sequences, total length: 2431162
[dwgsim_core] Currently on:
[dwgsim_core] 124180
[dwgsim_core] Complete!
[dwgsim_core] NZ_CP012938.1 length: 6472489
[dwgsim_core] 1 sequences, total length: 6472489
[dwgsim_core] Currently on:
[dwgsim_core] 1545
[dwgsim_core] Complete!
[dwgsim_core] NZ_DS990135.1 length: 4422104
[dwgsim_core] 1 sequences, total length: 4422104
[dwgsim_core] Currently on:
[dwgsim_core] 480
[dwgsim_core] Complete!
[dwgsim_core] NZ_DS499677.1 length: 4009989
[dwgsim_core] 1 sequences, total length: 4009989
[dwgsim_core] Currently on:
[dwgsim_core] 1080
[dwgsim_core] Complete!
[dwgsim_core] NC_004663.1 length: 6260361
[dwgsim_core] 1 sequences, total length: 6260361
[dwgsim_core] Currently on:
[dwgsim_core] 532
[dwgsim_core] Complete!
[dwgsim_core] NZ_DS362249.1 length: 4719417
[dwgsim_core] 1 sequences, total length: 4719417
[dwgsim_core] Currently on:
[dwgsim_core] 1889
[dwgsim_core] Complete!
[dwgsim_core] NC_009614.1 length: 5163189
[dwgsim_core] 1 sequences, total length: 5163189
[dwgsim_core] Currently on:
[dwgsim_core] 3143
[dwgsim_core] Complete!
[dwgsim_core] NZ_GG703878.1 length: 3512733
[dwgsim_core] 1 sequences, total length: 3512733
[dwgsim_core] Currently on:
[dwgsim_core] 2649
[dwgsim_core] Complete!
[dwgsim_core] NZ_DS499581.1 length: 2550788
[dwgsim_core] 1 sequences, total length: 2550788
[dwgsim_core] Currently on:
[dwgsim_core] 748
[dwgsim_core] Complete!
[dwgsim_core] NC_007795.1 length: 2821361
[dwgsim_core] 1 sequences, total length: 2821361
[dwgsim_core] Currently on:
[dwgsim_core] 708
[dwgsim_core] Complete!
[dwgsim_core] NZ_CP007601.1 length: 2443604
[dwgsim_core] 1 sequences, total length: 2443604
[dwgsim_core] Currently on:
[dwgsim_core] 33542
[dwgsim_core] Complete!
[dwgsim_core] NC_004461.1 length: 2499279
[dwgsim_core] 1 sequences, total length: 2499279
[dwgsim_core] Currently on:
[dwgsim_core] 392417
[dwgsim_core] Complete!
[dwgsim_core] NC_007168.1 length: 2685015
[dwgsim_core] 1 sequences, total length: 2685015
[dwgsim_core] Currently on:
[dwgsim_core] 946
[dwgsim_core] Complete!
[dwgsim_core] NZ_GL545252.1 length: 2268537
[dwgsim_core] 1 sequences, total length: 2268537
[dwgsim_core] Currently on:
[dwgsim_core] 2380
[dwgsim_core] Complete!
[dwgsim_core] NC_007350.1 length: 2516575
[dwgsim_core] 1 sequences, total length: 2516575
[dwgsim_core] Currently on:
[dwgsim_core] 544
[dwgsim_core] Complete!
[dwgsim_core] NZ_GG694024.1 length: 1947256
[dwgsim_core] 1 sequences, total length: 1947256
[dwgsim_core] Currently on:
[dwgsim_core] 557
[dwgsim_core] Complete!
[dwgsim_core] NZ_GG700812.1 length: 1277759
[dwgsim_core] 1 sequences, total length: 1277759
[dwgsim_core] Currently on:
[dwgsim_core] 712
[dwgsim_core] Complete!
[dwgsim_core] NZ_CP018809.1 length: 1672949
[dwgsim_core] 1 sequences, total length: 1672949
[dwgsim_core] Currently on:
[dwgsim_core] 882
[dwgsim_core] Complete!
[dwgsim_core] NC_009785.1 length: 2196662
[dwgsim_core] 1 sequences, total length: 2196662
[dwgsim_core] Currently on:
[dwgsim_core] 489
[dwgsim_core] Complete!
[dwgsim_core] NZ_GL732439.1 length: 1906074
[dwgsim_core] 1 sequences, total length: 1906074
[dwgsim_core] Currently on:
[dwgsim_core] 720
[dwgsim_core] Complete!
[dwgsim_core] NC_013853.1 length: 2146611
[dwgsim_core] 1 sequences, total length: 2146611
[dwgsim_core] Currently on:
[dwgsim_core] 2310
[dwgsim_core] Complete!
[dwgsim_core] NZ_AORU01000001.1 length: 2184872
[dwgsim_core] 1 sequences, total length: 2184872
[dwgsim_core] Currently on:
[dwgsim_core] 1013
[dwgsim_core] Complete!
[dwgsim_core] NC_003098.1 length: 2038615
[dwgsim_core] 1 sequences, total length: 2038615
[dwgsim_core] Currently on:
[dwgsim_core] 826
[dwgsim_core] Complete!
[dwgsim_core] NC_009009.1 length: 2388435
[dwgsim_core] 1 sequences, total length: 2388435
[dwgsim_core] Currently on:
[dwgsim_core] 1772
[dwgsim_core] Complete!
[dwgsim_core] NZ_GL622359.1 length: 2363292
[dwgsim_core] 1 sequences, total length: 2363292
[dwgsim_core] Currently on:
[dwgsim_core] 9639
[dwgsim_core] Complete!
[dwgsim_core] NC_012781.1 length: 3449685
[dwgsim_core] 1 sequences, total length: 3449685
[dwgsim_core] Currently on:
[dwgsim_core] 1606
[dwgsim_core] Complete!
[dwgsim_core] NZ_DS264342.1 length: 3626038
[dwgsim_core] 1 sequences, total length: 3626038
[dwgsim_core] Currently on:
[dwgsim_core] 1607
[dwgsim_core] Complete!
[dwgsim_core] NZ_AAXA02000016.1 length: 3186181
[dwgsim_core] 1 sequences, total length: 3186181
[dwgsim_core] Currently on:
[dwgsim_core] 484
[dwgsim_core] Complete!
[dwgsim_core] NZ_CYXV01000001.1 length: 3568198
[dwgsim_core] 1 sequences, total length: 3568198
[dwgsim_core] Currently on:
[dwgsim_core] 600
[dwgsim_core] Complete!
[dwgsim_core] NZ_ACFY01000179.1 length: 4050242
[dwgsim_core] 1 sequences, total length: 4050242
[dwgsim_core] Currently on:
[dwgsim_core] 1183
[dwgsim_core] Complete!
[dwgsim_core] NZ_DS480351.1 length: 3270409
[dwgsim_core] 1 sequences, total length: 3270409
[dwgsim_core] Currently on:
[dwgsim_core] 35135
[dwgsim_core] Complete!
[dwgsim_core] NZ_HF545616.1 length: 2968510
[dwgsim_core] 1 sequences, total length: 2968510
[dwgsim_core] Currently on:
[dwgsim_core] 744
[dwgsim_core] Complete!
[dwgsim_core] NZ_BBDW01000001.1 length: 3259889
[dwgsim_core] 1 sequences, total length: 3259889
[dwgsim_core] Currently on:
[dwgsim_core] 687
[dwgsim_core] Complete!
[dwgsim_core] NZ_GG698602.1 length: 1895960
[dwgsim_core] 1 sequences, total length: 1895960
[dwgsim_core] Currently on:
[dwgsim_core] 617
[dwgsim_core] Complete!
[dwgsim_core] NC_013520.1 length: 2132142
[dwgsim_core] 1 sequences, total length: 2132142
[dwgsim_core] Currently on:
[dwgsim_core] 2881
[dwgsim_core] Complete!
[dwgsim_core] NZ_HG326663.1 length: 2366224
[dwgsim_core] 1 sequences, total length: 2366224
[dwgsim_core] Currently on:
[dwgsim_core] 939
[dwgsim_core] Complete!
[dwgsim_core] NC_013171.1 length: 1883067
[dwgsim_core] 1 sequences, total length: 1883067
[dwgsim_core] Currently on:
[dwgsim_core] 802
[dwgsim_core] Complete!
[dwgsim_core] NZ_HG003688.1 length: 2266008
[dwgsim_core] 1 sequences, total length: 2266008
[dwgsim_core] Currently on:
[dwgsim_core] 3168
[dwgsim_core] Complete!
[dwgsim_core] NC_010376.1 length: 1797577
[dwgsim_core] 1 sequences, total length: 1797577
[dwgsim_core] Currently on:
[dwgsim_core] 2285
[dwgsim_core] Complete!
[dwgsim_core] NZ_HE978566.1 length: 2101936
[dwgsim_core] 1 sequences, total length: 2101936
[dwgsim_core] Currently on:
[dwgsim_core] 594
[dwgsim_core] Complete!
[dwgsim_core] NZ_BAEW01000056.1 length: 1885815
[dwgsim_core] 1 sequences, total length: 1885815
[dwgsim_core] Currently on:
[dwgsim_core] 16545
[dwgsim_core] Complete!
[dwgsim_core] NZ_GL883082.1 length: 3369386
[dwgsim_core] 1 sequences, total length: 3369386
[dwgsim_core] Currently on:
[dwgsim_core] 886
[dwgsim_core] Complete!
[dwgsim_core] NZ_GL636062.1 length: 3151995
[dwgsim_core] 1 sequences, total length: 3151995
[dwgsim_core] Currently on:
[dwgsim_core] 515
[dwgsim_core] Complete!
[dwgsim_core] NZ_CP012195.1 length: 1641887
[dwgsim_core] 1 sequences, total length: 1641887
[dwgsim_core] Currently on:
[dwgsim_core] 4687
[dwgsim_core] Complete!
[dwgsim_core] NC_011750.1 length: 5132068
[dwgsim_core] 1 sequences, total length: 5132068
[dwgsim_core] Currently on:
[dwgsim_core] 687
[dwgsim_core] Complete!
[dwgsim_core] NZ_AMRJ01000001.1 length: 3665796
[dwgsim_core] 1 sequences, total length: 3665796
[dwgsim_core] Currently on:
[dwgsim_core] 3649
[dwgsim_core] Complete!
[dwgsim_core] NZ_CP014234.1 length: 2434688
[dwgsim_core] 1 sequences, total length: 2434688
[dwgsim_core] Currently on:
[dwgsim_core] 873
[dwgsim_core] Complete!
[dwgsim_core] NZ_AKIL01000158.1 length: 585376
[dwgsim_core] 1 sequences, total length: 585376
[dwgsim_core] Currently on:
[dwgsim_core] 568786
[dwgsim_core] Complete!
[dwgsim_core] NC_005008.1 length: 65386
[dwgsim_core] 1 sequences, total length: 65386
[dwgsim_core] Currently on:
[dwgsim_core] 885
[dwgsim_core] Complete!
[dwgsim_core] NC_028962.1 length: 140961
[dwgsim_core] 1 sequences, total length: 140961
[dwgsim_core] Currently on:
[dwgsim_core] 8674
[dwgsim_core] Complete!
[dwgsim_core] NC_031119.1 length: 29347
[dwgsim_core] 1 sequences, total length: 29347
[dwgsim_core] Currently on:
[dwgsim_core] 520
[dwgsim_core] Complete!
[dwgsim_core] NC_031003.1 length: 29254
[dwgsim_core] 1 sequences, total length: 29254
[dwgsim_core] Currently on:
[dwgsim_core] 707
[dwgsim_core] Complete!
[dwgsim_core] NC_028967.1 length: 29605
[dwgsim_core] 1 sequences, total length: 29605
[dwgsim_core] Currently on:
[dwgsim_core] 978
[dwgsim_core] Complete!
[dwgsim_core] NC_027400.1 length: 29264
[dwgsim_core] 1 sequences, total length: 29264
[dwgsim_core] Currently on:
[dwgsim_core] 590
[dwgsim_core] Complete!
[dwgsim_core] NC_027359.1 length: 29491
[dwgsim_core] 1 sequences, total length: 29491
[dwgsim_core] Currently on:
[dwgsim_core] 1268
[dwgsim_core] Complete!
[dwgsim_core] NC_019915.1 length: 40917
[dwgsim_core] 1 sequences, total length: 40917
[dwgsim_core] Currently on:
[dwgsim_core] 1074
[dwgsim_core] Complete!
[dwgsim_core] NC_028821.1 length: 40670
[dwgsim_core] 1 sequences, total length: 40670
[dwgsim_core] Currently on:
[dwgsim_core] 2058
[dwgsim_core] Complete!
[dwgsim_core] NC_018851.1 length: 29516
[dwgsim_core] 1 sequences, total length: 29516
[dwgsim_core] Currently on:
[dwgsim_core] 1168
[dwgsim_core] Complete!
[dwgsim_core] NC_018847.1 length: 29516
[dwgsim_core] 1 sequences, total length: 29516
[dwgsim_core] Currently on:
[dwgsim_core] 922
[dwgsim_core] Complete!
[dwgsim_core] NC_027620.1 length: 29726
[dwgsim_core] 1 sequences, total length: 29726
[dwgsim_core] Currently on:
[dwgsim_core] 938
[dwgsim_core] Complete!
[dwgsim_core] NC_018842.1 length: 29348
[dwgsim_core] 1 sequences, total length: 29348
[dwgsim_core] Currently on:
[dwgsim_core] 1047
[dwgsim_core] Complete!
[dwgsim_core] NC_018840.1 length: 29612
[dwgsim_core] 1 sequences, total length: 29612
[dwgsim_core] Currently on:
[dwgsim_core] 10470
[dwgsim_core] Complete!
[dwgsim_core] NC_018852.1 length: 29506
[dwgsim_core] 1 sequences, total length: 29506
[dwgsim_core] Currently on:
[dwgsim_core] 500
[dwgsim_core] Complete!
[dwgsim_core] NC_018841.1 length: 29574
[dwgsim_core] 1 sequences, total length: 29574
[dwgsim_core] Currently on:
[dwgsim_core] 2502
[dwgsim_core] Complete!
[dwgsim_core] NC_018849.1 length: 29202
[dwgsim_core] 1 sequences, total length: 29202
[dwgsim_core] Currently on:
[dwgsim_core] 686
[dwgsim_core] Complete!
[dwgsim_core] NC_018834.1 length: 29214
[dwgsim_core] 1 sequences, total length: 29214
[dwgsim_core] Currently on:
[dwgsim_core] 684
[dwgsim_core] Complete!
[dwgsim_core] NC_015453.1 length: 29017
[dwgsim_core] 1 sequences, total length: 29017
[dwgsim_core] Currently on:
[dwgsim_core] 603
[dwgsim_core] Complete!
[dwgsim_core] NC_027336.1 length: 29503
[dwgsim_core] 1 sequences, total length: 29503
[dwgsim_core] Currently on:
[dwgsim_core] 1553
[dwgsim_core] Complete!
[dwgsim_core] NC_022338.1 length: 29514
[dwgsim_core] 1 sequences, total length: 29514
[dwgsim_core] Currently on:
[dwgsim_core] 713
[dwgsim_core] Complete!
[dwgsim_core] NC_022337.1 length: 29467
[dwgsim_core] 1 sequences, total length: 29467
[dwgsim_core] Currently on:
[dwgsim_core] 838
[dwgsim_core] Complete!
[dwgsim_core] NC_027385.1 length: 29261
[dwgsim_core] 1 sequences, total length: 29261
[dwgsim_core] Currently on:
[dwgsim_core] 557
[dwgsim_core] Complete!
[dwgsim_core] NC_027401.1 length: 29751
[dwgsim_core] 1 sequences, total length: 29751
[dwgsim_core] Currently on:
[dwgsim_core] 738
[dwgsim_core] Complete!
[dwgsim_core] NC_027367.1 length: 29003
[dwgsim_core] 1 sequences, total length: 29003
[dwgsim_core] Currently on:
[dwgsim_core] 495
[dwgsim_core] Complete!
[dwgsim_core] NC_027389.1 length: 29494
[dwgsim_core] 1 sequences, total length: 29494
[dwgsim_core] Currently on:
[dwgsim_core] 1376
[dwgsim_core] Complete!
[dwgsim_core] NC_027370.1 length: 29428
[dwgsim_core] 1 sequences, total length: 29428
[dwgsim_core] Currently on:
[dwgsim_core] 514
[dwgsim_core] Complete!
[dwgsim_core] NC_027624.1 length: 29594
[dwgsim_core] 1 sequences, total length: 29594
[dwgsim_core] Currently on:
[dwgsim_core] 761
[dwgsim_core] Complete!
[dwgsim_core] NC_027627.1 length: 29440
[dwgsim_core] 1 sequences, total length: 29440
[dwgsim_core] Currently on:
[dwgsim_core] 531
[dwgsim_core] Complete!
[dwgsim_core] NC_008722.1 length: 43420
[dwgsim_core] 1 sequences, total length: 43420
[dwgsim_core] Currently on:
[dwgsim_core] 768
[dwgsim_core] Complete!
[dwgsim_core] NC_018284.1 length: 42123
[dwgsim_core] 1 sequences, total length: 42123
[dwgsim_core] Currently on:
[dwgsim_core] 598
[dwgsim_core] Complete!
[dwgsim_core] NC_023582.1 length: 92417
[dwgsim_core] 1 sequences, total length: 92417
[dwgsim_core] Currently on:
[dwgsim_core] 2112
[dwgsim_core] Complete!
[dwgsim_core] NC_024355.1 length: 93794
[dwgsim_core] 1 sequences, total length: 93794
[dwgsim_core] Currently on:
[dwgsim_core] 6908
[dwgsim_core] Complete!
In [14]:
!combine_seqs.py -i ../results/simulations/ -t FASTQ -o ../results/simulations/
../results/simulations/oral.fastq oral 10000002
../results/simulations/skin.fastq skin 10000001
../results/simulations/stool.fastq stool 10000000
In [17]:
df_simulations = pd.DataFrame(dict_sizes)
df_simulations.head()
Out[17]:
oral
skin
stool
k__Bacteria;p__;c__;o__;f__;g__;s__bacterium_LF-3;t__
NaN
NaN
6163.0
k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__Actinomycetaceae;g__Actinobaculum;s__Actinobaculum_sp._oral_taxon_183;t__Actinobaculum_sp._oral_taxon_183_str._F0552
151286.0
NaN
NaN
k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__Actinomycetaceae;g__Actinomyces;s__Actinomyces_dentalis;t__Actinomyces_dentalis_DSM_19115
50050.0
NaN
NaN
k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__Actinomycetaceae;g__Actinomyces;s__Actinomyces_gerencseriae;t__Actinomyces_gerencseriae_DSM_6844
69787.0
NaN
NaN
k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__Actinomycetaceae;g__Actinomyces;s__Actinomyces_graevenitzii;t__Actinomyces_graevenitzii_C83
141560.0
NaN
NaN
In [19]:
df_simulations = df_simulations.fillna(0)
df_simulations = df_simulations.astype(int)
df_simulations.to_csv("../results/simulations/taxatable.strain.txt", sep="\t", index_label="#OTU ID")
Content source: knights-lab/analysis_SHOGUN
Similar notebooks: