In [1]:
import os
os.chdir("opt/blastplus-parsing/")
from blastplus_taxa_barh import *
In [6]:
%matplotlib inline
blast_data = blast_in("/home/paul/outfmt6.all.uniq")
topfreq_desc = genus(blast_data, int(12),int(15))
plot_in(topfreq_desc)
In [7]:
blast_data
Out[7]:
0
1
2
3
4
5
6
7
8
9
10
11
12
13
0
TR10144|c0_g1_i1
gi|544770924|ref|WP_021196208.1|
100.00
20
0
0
60
1
64
83
terpene utilization protein AtuA [Ralstonia sp...
1235457
Ralstonia sp. AU12-08
1.000000e-03
1
TR10186|c0_g1_i1
gi|490789651|ref|WP_004651801.1|
100.00
19
0
0
3
59
391
409
hypothetical protein [Acinetobacter sp. ANC 3994]
1217715
Acinetobacter sp. ANC 3994
2.000000e-05
2
TR10270|c0_g1_i1
gi|224103219|ref|XP_002312970.1|
95.00
20
1
0
60
1
505
524
predicted protein [Populus trichocarpa]
3694
Populus trichocarpa
4.000000e-06
3
TR10309|c0_g1_i1
gi|255576756|ref|XP_002529265.1|
95.00
20
1
0
1
60
57
76
60S ribosomal protein L13, putative [Ricinus c...
3988
Ricinus communis
7.000000e-06
4
TR10330|c0_g1_i1
gi|554633737|ref|WP_023168797.1|
100.00
19
0
0
58
2
205
223
bifunctional aconitate hydratase 2/2-methyliso...
28901
Salmonella enterica
1.000000e-04
5
TR1034|c0_g1_i1
gi|224102199|ref|XP_002312586.1|
90.00
20
2
0
60
1
115
134
predicted protein [Populus trichocarpa]
3694
Populus trichocarpa
6.000000e-04
6
TR10409|c0_g1_i1
gi|225431469|ref|XP_002274337.1|
100.00
19
0
0
2
58
962
980
PREDICTED: callose synthase 5-like [Vitis vini...
29760
Vitis vinifera
4.000000e-04
7
TR10427|c0_g1_i1
gi|544520098|ref|XP_005593331.1|
89.47
19
2
0
58
2
64
82
PREDICTED: ferritin heavy polypeptide-like 17-...
9541
Macaca fascicularis
5.000000e-05
8
TR1045|c0_g1_i1
gi|518547591|ref|WP_019717798.1|
100.00
19
0
0
3
59
381
399
type VI secretion protein [Ralstonia solanacea...
305
Ralstonia solanacearum
4.000000e-04
9
TR10483|c0_g1_i1
gi|565399811|ref|XP_006365436.1|
100.00
19
0
0
59
3
43
61
PREDICTED: 40S ribosomal protein S15a-1 [Solan...
4113
Solanum tuberosum
1.000000e-06
10
TR10506|c0_g1_i1
gi|385336819|ref|YP_005890766.1|
100.00
20
0
0
1
60
69
88
50S ribosomal protein L1, partial [Neisseria g...
485;940296
Neisseria gonorrhoeae;Neisseria gonorrhoeae TC...
9.000000e-05
11
TR1052|c0_g1_i1
gi|449533903|ref|XP_004173910.1|
94.74
19
1
0
58
2
130
148
PREDICTED: 60S ribosomal protein L6-like, part...
3659
Cucumis sativus
8.000000e-05
12
TR10567|c0_g1_i1
gi|224056687|ref|XP_002298973.1|
89.47
19
2
0
3
59
224
242
predicted protein [Populus trichocarpa]
3694
Populus trichocarpa
2.000000e-05
13
TR1081|c0_g1_i1
gi|255539224|ref|XP_002510677.1|
90.00
20
2
0
60
1
165
184
catalase, putative [Ricinus communis]
3988
Ricinus communis
1.000000e-05
14
TR10821|c0_g1_i1
gi|486141519|ref|WP_001515708.1|
100.00
19
0
0
2
58
46
64
hypothetical protein, partial [Escherichia coli]
562
Escherichia coli
5.000000e-05
15
TR1098|c0_g1_i1
gi|407940733|ref|YP_006856374.1|
100.00
19
0
0
58
2
558
576
hypothetical protein C380_20210 [Acidovorax sp...
358220
Acidovorax sp. KKS102
3.000000e-04
16
TR11060|c0_g1_i1
gi|356565778|ref|XP_003551114.1|
100.00
19
0
0
3
59
202
220
PREDICTED: thiol protease aleurain-like [Glyci...
3847
Glycine max
1.000000e-05
17
TR1123|c0_g1_i1
gi|255577702|ref|XP_002529727.1|
85.71
21
2
1
1
60
1058
1078
transferase, transferring glycosyl groups, put...
3988
Ricinus communis
8.000000e-04
18
TR11449|c0_g1_i1
gi|490832535|ref|WP_004694620.1|
100.00
19
0
0
2
58
15
33
hypothetical protein, partial [Acinetobacter j...
40214
Acinetobacter johnsonii
2.000000e-05
19
TR11510|c0_g1_i1
gi|308813361|ref|XP_003083987.1|
100.00
20
0
0
1
60
412
431
putative hsp70 (ISS) [Ostreococcus tauri]
70448
Ostreococcus tauri
3.000000e-05
20
TR11698|c0_g1_i1
gi|544767987|ref|WP_021193304.1|
100.00
20
0
0
60
1
188
207
hypothetical protein [Ralstonia sp. AU12-08]
1235457
Ralstonia sp. AU12-08
6.000000e-06
21
TR11747|c0_g1_i1
gi|449433954|ref|XP_004134761.1|
100.00
19
0
0
58
2
52
70
PREDICTED: ATP synthase gamma chain, chloropla...
3659
Cucumis sativus
7.000000e-04
22
TR11802|c0_g1_i1
gi|545239476|ref|WP_021538441.1|
95.00
20
1
0
60
1
177
196
monofunctional biosynthetic peptidoglycan tran...
562
Escherichia coli
9.000000e-07
23
TR11809|c0_g1_i1
gi|489155933|ref|WP_003065614.1|
100.00
20
0
0
1
60
230
249
acyl-CoA dehydrogenase [Comamonas testosteroni]
285
Comamonas testosteroni
8.000000e-05
24
TR11907|c0_g1_i1
gi|224139156|ref|XP_002322994.1|
100.00
19
0
0
2
58
332
350
predicted protein [Populus trichocarpa]
3694
Populus trichocarpa
2.000000e-04
25
TR12160|c0_g1_i1
gi|460383900|ref|XP_004237654.1|
100.00
19
0
0
59
3
51
69
PREDICTED: 60S ribosomal protein L12-like [Sol...
4081
Solanum lycopersicum
4.000000e-04
26
TR12160|c0_g2_i1
gi|460383900|ref|XP_004237654.1|
100.00
19
0
0
59
3
51
69
PREDICTED: 60S ribosomal protein L12-like [Sol...
4081
Solanum lycopersicum
4.000000e-04
27
TR12171|c0_g1_i1
gi|357463095|ref|XP_003601829.1|
85.00
20
3
0
60
1
40
59
B12D protein [Medicago truncatula]
3880
Medicago truncatula
6.000000e-04
28
TR12382|c0_g1_i1
gi|510917996|ref|WP_016238978.1|
100.00
19
0
0
58
2
174
192
hypothetical protein [Escherichia coli]
562
Escherichia coli
6.000000e-04
29
TR12586|c0_g1_i1
gi|159462534|ref|XP_001689497.1|
100.00
19
0
0
2
58
102
120
sugar nucleotide epimerase [Chlamydomonas rein...
3055
Chlamydomonas reinhardtii
6.000000e-06
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
853
TR7622|c0_g1_i1
gi|115466364|ref|NP_001056781.1|
93.75
16
1
0
49
2
97
112
Os06g0144200 [Oryza sativa Japonica Group]
39947
Oryza sativa Japonica Group
9.000000e-05
854
TR7785|c0_g1_i1
gi|224106976|ref|XP_002314330.1|
100.00
19
0
0
2
58
52
70
predicted protein [Populus trichocarpa]
3694
Populus trichocarpa
3.000000e-04
855
TR7812|c0_g1_i1
gi|224166509|ref|XP_002338946.1|
94.74
19
1
0
59
3
68
86
tubulin, beta chain [Populus trichocarpa]
3694
Populus trichocarpa
6.000000e-06
856
TR7880|c0_g1_i1
gi|302811169|ref|XP_002987274.1|
100.00
19
0
0
58
2
146
164
hypothetical protein SELMODRAFT_271906 [Selagi...
88036
Selaginella moellendorffii
2.000000e-04
857
TR7909|c0_g1_i1
gi|565343801|ref|XP_006339015.1|
100.00
20
0
0
1
60
302
321
PREDICTED: ribulose bisphosphate carboxylase/o...
4113
Solanum tuberosum
6.000000e-08
858
TR8011|c0_g1_i1
gi|519090388|ref|WP_020246263.1|
100.00
19
0
0
59
3
41
59
NUDIX hydrolase, partial [Escherichia coli]
562
Escherichia coli
1.000000e-05
859
TR8060|c0_g1_i1
gi|485949398|ref|WP_001496796.1|
100.00
20
0
0
1
60
26
45
integrase core domain protein, partial [Escher...
562
Escherichia coli
9.000000e-08
860
TR8140|c0_g1_i1
gi|187926278|ref|YP_001892623.1|
100.00
20
0
0
1
60
509
528
oxidoreductase [Ralstonia pickettii 12J]
329;402626;428406
Ralstonia pickettii;Ralstonia pickettii 12J;Ra...
2.000000e-06
861
TR8427|c0_g1_i1
gi|544768741|ref|WP_021194054.1|
100.00
20
0
0
60
1
21
40
hypothetical protein [Ralstonia sp. AU12-08]
1235457
Ralstonia sp. AU12-08
3.000000e-04
862
TR8497|c0_g1_i1
gi|485787508|ref|WP_001409753.1|
100.00
20
0
0
1
60
40
59
transposase [Escherichia coli]
562
Escherichia coli
4.000000e-07
863
TR8556|c0_g1_i1
gi|18422603|ref|NP_568650.1|
94.74
19
1
0
58
2
95
113
pollen_Ole_e_I-domain containing protein [Arab...
3702
Arabidopsis thaliana
3.000000e-05
864
TR857|c0_g1_i1
gi|297807495|ref|XP_002871631.1|
94.74
19
1
0
59
3
363
381
hypothetical protein ARALYDRAFT_488325 [Arabid...
81972
Arabidopsis lyrata subsp. lyrata
1.000000e-04
865
TR8659|c0_g1_i1
gi|544768409|ref|WP_021193725.1|
100.00
19
0
0
3
59
296
314
NADPH:quinone oxidoreductase [Ralstonia sp. AU...
1235457
Ralstonia sp. AU12-08
4.000000e-04
866
TR8738|c0_g1_i1
gi|242086805|ref|XP_002439235.1|
90.00
20
2
0
60
1
156
175
hypothetical protein SORBIDRAFT_09g002840 [Sor...
4558
Sorghum bicolor
1.000000e-05
867
TR8738|c0_g2_i1
gi|242086805|ref|XP_002439235.1|
90.00
20
2
0
60
1
156
175
hypothetical protein SORBIDRAFT_09g002840 [Sor...
4558
Sorghum bicolor
1.000000e-05
868
TR8799|c0_g1_i1
gi|224114511|ref|XP_002332348.1|
100.00
20
0
0
60
1
79
98
predicted protein [Populus trichocarpa]
3694
Populus trichocarpa
1.000000e-07
869
TR8824|c0_g1_i1
gi|224078686|ref|XP_002305603.1|
100.00
19
0
0
58
2
162
180
predicted protein [Populus trichocarpa]
3694
Populus trichocarpa
5.000000e-04
870
TR8942|c0_g1_i1
gi|445964789|ref|WP_000042644.1|
100.00
19
0
0
58
2
76
94
hypothetical protein, partial [Salmonella ente...
28901
Salmonella enterica
6.000000e-07
871
TR9142|c0_g1_i1
gi|544772510|ref|WP_021197774.1|
100.00
20
0
0
60
1
102
121
hypothetical protein, partial [Ralstonia sp. A...
1235457
Ralstonia sp. AU12-08
1.000000e-05
872
TR9181|c0_g1_i1
gi|255570327|ref|XP_002526123.1|
84.21
19
3
0
59
3
30
48
aldo/keto reductase, putative [Ricinus communis]
3988
Ricinus communis
5.000000e-04
873
TR9236|c0_g1_i1
gi|294658086|ref|XP_460403.2|
100.00
20
0
0
60
1
110
129
DEHA2F00968p [Debaryomyces hansenii CBS767]
284592
Debaryomyces hansenii CBS767
5.000000e-05
874
TR9466|c0_g1_i1
gi|255570203|ref|XP_002526062.1|
100.00
20
0
0
1
60
421
440
calcium-dependent protein kinase, putative [Ri...
3988
Ricinus communis
3.000000e-05
875
TR9541|c0_g1_i1
gi|487603737|ref|WP_001732123.1|
100.00
19
0
0
3
59
29
47
penicillin-binding protein 2, partial [Salmone...
28901
Salmonella enterica
2.000000e-05
876
TR9566|c0_g1_i1
gi|496576614|ref|WP_009277732.1|
100.00
20
0
0
60
1
5
24
transposase [Ralstonia sp. 5_7_47FAA]
658664
Ralstonia sp. 5_7_47FAA
1.000000e-06
877
TR9572|c0_g1_i1
gi|460374217|ref|XP_004232908.1|
85.00
20
3
0
60
1
87
106
PREDICTED: auxin-repressed 12.5 kDa protein-li...
4081
Solanum lycopersicum
3.000000e-04
878
TR9614|c0_g1_i1
gi|565436038|ref|XP_006281481.1|
100.00
19
0
0
2
58
156
174
hypothetical protein CARUB_v10027565mg [Capsel...
81985
Capsella rubella
3.000000e-05
879
TR9750|c0_g1_i1
gi|194288896|ref|YP_002004803.1|
100.00
19
0
0
59
3
27
45
cyanophycin synthetase [Cupriavidus taiwanensi...
164546;977880
Cupriavidus taiwanensis;Cupriavidus taiwanensi...
1.000000e-05
880
TR9755|c0_g1_i1
gi|359475712|ref|XP_003631735.1|
100.00
20
0
0
1
60
247
266
PREDICTED: uncharacterized protein LOC10023311...
29760
Vitis vinifera
8.000000e-06
881
TR9944|c0_g1_i1
gi|495004122|ref|WP_007730136.1|
100.00
19
0
0
3
59
185
203
UPF0246 protein YaaA [Cronobacter dublinensis]
413497
Cronobacter dublinensis
7.000000e-05
882
TR9963|c0_g1_i1
gi|356498184|ref|XP_003517933.1|
90.00
20
2
0
1
60
220
239
PREDICTED: hypersensitive-induced response pro...
3847
Glycine max
2.000000e-04
883 rows × 14 columns
In [8]:
topfreq_desc
Out[8]:
Escherichia 96
Ralstonia 79
Solanum 75
Populus 62
Arabidopsis 61
Glycine 37
Vitis 34
Cucumis 31
Ricinus 29
Medicago 29
Salmonella 25
Oryza 23
Cicer 16
Capsella 16
Fragaria 15
dtype: int64
In [9]:
plot_in(topfreq_desc)
In [ ]:
Content source: Perugolate/blastplus-parsing
Similar notebooks: