In [1]:
import os
os.chdir("opt/blastplus-parsing/")
from blastplus_taxa_barh import *

In [6]:
%matplotlib inline
blast_data = blast_in("/home/paul/outfmt6.all.uniq")
topfreq_desc = genus(blast_data, int(12),int(15))
plot_in(topfreq_desc)



In [7]:
blast_data


Out[7]:
0 1 2 3 4 5 6 7 8 9 10 11 12 13
0 TR10144|c0_g1_i1 gi|544770924|ref|WP_021196208.1| 100.00 20 0 0 60 1 64 83 terpene utilization protein AtuA [Ralstonia sp... 1235457 Ralstonia sp. AU12-08 1.000000e-03
1 TR10186|c0_g1_i1 gi|490789651|ref|WP_004651801.1| 100.00 19 0 0 3 59 391 409 hypothetical protein [Acinetobacter sp. ANC 3994] 1217715 Acinetobacter sp. ANC 3994 2.000000e-05
2 TR10270|c0_g1_i1 gi|224103219|ref|XP_002312970.1| 95.00 20 1 0 60 1 505 524 predicted protein [Populus trichocarpa] 3694 Populus trichocarpa 4.000000e-06
3 TR10309|c0_g1_i1 gi|255576756|ref|XP_002529265.1| 95.00 20 1 0 1 60 57 76 60S ribosomal protein L13, putative [Ricinus c... 3988 Ricinus communis 7.000000e-06
4 TR10330|c0_g1_i1 gi|554633737|ref|WP_023168797.1| 100.00 19 0 0 58 2 205 223 bifunctional aconitate hydratase 2/2-methyliso... 28901 Salmonella enterica 1.000000e-04
5 TR1034|c0_g1_i1 gi|224102199|ref|XP_002312586.1| 90.00 20 2 0 60 1 115 134 predicted protein [Populus trichocarpa] 3694 Populus trichocarpa 6.000000e-04
6 TR10409|c0_g1_i1 gi|225431469|ref|XP_002274337.1| 100.00 19 0 0 2 58 962 980 PREDICTED: callose synthase 5-like [Vitis vini... 29760 Vitis vinifera 4.000000e-04
7 TR10427|c0_g1_i1 gi|544520098|ref|XP_005593331.1| 89.47 19 2 0 58 2 64 82 PREDICTED: ferritin heavy polypeptide-like 17-... 9541 Macaca fascicularis 5.000000e-05
8 TR1045|c0_g1_i1 gi|518547591|ref|WP_019717798.1| 100.00 19 0 0 3 59 381 399 type VI secretion protein [Ralstonia solanacea... 305 Ralstonia solanacearum 4.000000e-04
9 TR10483|c0_g1_i1 gi|565399811|ref|XP_006365436.1| 100.00 19 0 0 59 3 43 61 PREDICTED: 40S ribosomal protein S15a-1 [Solan... 4113 Solanum tuberosum 1.000000e-06
10 TR10506|c0_g1_i1 gi|385336819|ref|YP_005890766.1| 100.00 20 0 0 1 60 69 88 50S ribosomal protein L1, partial [Neisseria g... 485;940296 Neisseria gonorrhoeae;Neisseria gonorrhoeae TC... 9.000000e-05
11 TR1052|c0_g1_i1 gi|449533903|ref|XP_004173910.1| 94.74 19 1 0 58 2 130 148 PREDICTED: 60S ribosomal protein L6-like, part... 3659 Cucumis sativus 8.000000e-05
12 TR10567|c0_g1_i1 gi|224056687|ref|XP_002298973.1| 89.47 19 2 0 3 59 224 242 predicted protein [Populus trichocarpa] 3694 Populus trichocarpa 2.000000e-05
13 TR1081|c0_g1_i1 gi|255539224|ref|XP_002510677.1| 90.00 20 2 0 60 1 165 184 catalase, putative [Ricinus communis] 3988 Ricinus communis 1.000000e-05
14 TR10821|c0_g1_i1 gi|486141519|ref|WP_001515708.1| 100.00 19 0 0 2 58 46 64 hypothetical protein, partial [Escherichia coli] 562 Escherichia coli 5.000000e-05
15 TR1098|c0_g1_i1 gi|407940733|ref|YP_006856374.1| 100.00 19 0 0 58 2 558 576 hypothetical protein C380_20210 [Acidovorax sp... 358220 Acidovorax sp. KKS102 3.000000e-04
16 TR11060|c0_g1_i1 gi|356565778|ref|XP_003551114.1| 100.00 19 0 0 3 59 202 220 PREDICTED: thiol protease aleurain-like [Glyci... 3847 Glycine max 1.000000e-05
17 TR1123|c0_g1_i1 gi|255577702|ref|XP_002529727.1| 85.71 21 2 1 1 60 1058 1078 transferase, transferring glycosyl groups, put... 3988 Ricinus communis 8.000000e-04
18 TR11449|c0_g1_i1 gi|490832535|ref|WP_004694620.1| 100.00 19 0 0 2 58 15 33 hypothetical protein, partial [Acinetobacter j... 40214 Acinetobacter johnsonii 2.000000e-05
19 TR11510|c0_g1_i1 gi|308813361|ref|XP_003083987.1| 100.00 20 0 0 1 60 412 431 putative hsp70 (ISS) [Ostreococcus tauri] 70448 Ostreococcus tauri 3.000000e-05
20 TR11698|c0_g1_i1 gi|544767987|ref|WP_021193304.1| 100.00 20 0 0 60 1 188 207 hypothetical protein [Ralstonia sp. AU12-08] 1235457 Ralstonia sp. AU12-08 6.000000e-06
21 TR11747|c0_g1_i1 gi|449433954|ref|XP_004134761.1| 100.00 19 0 0 58 2 52 70 PREDICTED: ATP synthase gamma chain, chloropla... 3659 Cucumis sativus 7.000000e-04
22 TR11802|c0_g1_i1 gi|545239476|ref|WP_021538441.1| 95.00 20 1 0 60 1 177 196 monofunctional biosynthetic peptidoglycan tran... 562 Escherichia coli 9.000000e-07
23 TR11809|c0_g1_i1 gi|489155933|ref|WP_003065614.1| 100.00 20 0 0 1 60 230 249 acyl-CoA dehydrogenase [Comamonas testosteroni] 285 Comamonas testosteroni 8.000000e-05
24 TR11907|c0_g1_i1 gi|224139156|ref|XP_002322994.1| 100.00 19 0 0 2 58 332 350 predicted protein [Populus trichocarpa] 3694 Populus trichocarpa 2.000000e-04
25 TR12160|c0_g1_i1 gi|460383900|ref|XP_004237654.1| 100.00 19 0 0 59 3 51 69 PREDICTED: 60S ribosomal protein L12-like [Sol... 4081 Solanum lycopersicum 4.000000e-04
26 TR12160|c0_g2_i1 gi|460383900|ref|XP_004237654.1| 100.00 19 0 0 59 3 51 69 PREDICTED: 60S ribosomal protein L12-like [Sol... 4081 Solanum lycopersicum 4.000000e-04
27 TR12171|c0_g1_i1 gi|357463095|ref|XP_003601829.1| 85.00 20 3 0 60 1 40 59 B12D protein [Medicago truncatula] 3880 Medicago truncatula 6.000000e-04
28 TR12382|c0_g1_i1 gi|510917996|ref|WP_016238978.1| 100.00 19 0 0 58 2 174 192 hypothetical protein [Escherichia coli] 562 Escherichia coli 6.000000e-04
29 TR12586|c0_g1_i1 gi|159462534|ref|XP_001689497.1| 100.00 19 0 0 2 58 102 120 sugar nucleotide epimerase [Chlamydomonas rein... 3055 Chlamydomonas reinhardtii 6.000000e-06
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
853 TR7622|c0_g1_i1 gi|115466364|ref|NP_001056781.1| 93.75 16 1 0 49 2 97 112 Os06g0144200 [Oryza sativa Japonica Group] 39947 Oryza sativa Japonica Group 9.000000e-05
854 TR7785|c0_g1_i1 gi|224106976|ref|XP_002314330.1| 100.00 19 0 0 2 58 52 70 predicted protein [Populus trichocarpa] 3694 Populus trichocarpa 3.000000e-04
855 TR7812|c0_g1_i1 gi|224166509|ref|XP_002338946.1| 94.74 19 1 0 59 3 68 86 tubulin, beta chain [Populus trichocarpa] 3694 Populus trichocarpa 6.000000e-06
856 TR7880|c0_g1_i1 gi|302811169|ref|XP_002987274.1| 100.00 19 0 0 58 2 146 164 hypothetical protein SELMODRAFT_271906 [Selagi... 88036 Selaginella moellendorffii 2.000000e-04
857 TR7909|c0_g1_i1 gi|565343801|ref|XP_006339015.1| 100.00 20 0 0 1 60 302 321 PREDICTED: ribulose bisphosphate carboxylase/o... 4113 Solanum tuberosum 6.000000e-08
858 TR8011|c0_g1_i1 gi|519090388|ref|WP_020246263.1| 100.00 19 0 0 59 3 41 59 NUDIX hydrolase, partial [Escherichia coli] 562 Escherichia coli 1.000000e-05
859 TR8060|c0_g1_i1 gi|485949398|ref|WP_001496796.1| 100.00 20 0 0 1 60 26 45 integrase core domain protein, partial [Escher... 562 Escherichia coli 9.000000e-08
860 TR8140|c0_g1_i1 gi|187926278|ref|YP_001892623.1| 100.00 20 0 0 1 60 509 528 oxidoreductase [Ralstonia pickettii 12J] 329;402626;428406 Ralstonia pickettii;Ralstonia pickettii 12J;Ra... 2.000000e-06
861 TR8427|c0_g1_i1 gi|544768741|ref|WP_021194054.1| 100.00 20 0 0 60 1 21 40 hypothetical protein [Ralstonia sp. AU12-08] 1235457 Ralstonia sp. AU12-08 3.000000e-04
862 TR8497|c0_g1_i1 gi|485787508|ref|WP_001409753.1| 100.00 20 0 0 1 60 40 59 transposase [Escherichia coli] 562 Escherichia coli 4.000000e-07
863 TR8556|c0_g1_i1 gi|18422603|ref|NP_568650.1| 94.74 19 1 0 58 2 95 113 pollen_Ole_e_I-domain containing protein [Arab... 3702 Arabidopsis thaliana 3.000000e-05
864 TR857|c0_g1_i1 gi|297807495|ref|XP_002871631.1| 94.74 19 1 0 59 3 363 381 hypothetical protein ARALYDRAFT_488325 [Arabid... 81972 Arabidopsis lyrata subsp. lyrata 1.000000e-04
865 TR8659|c0_g1_i1 gi|544768409|ref|WP_021193725.1| 100.00 19 0 0 3 59 296 314 NADPH:quinone oxidoreductase [Ralstonia sp. AU... 1235457 Ralstonia sp. AU12-08 4.000000e-04
866 TR8738|c0_g1_i1 gi|242086805|ref|XP_002439235.1| 90.00 20 2 0 60 1 156 175 hypothetical protein SORBIDRAFT_09g002840 [Sor... 4558 Sorghum bicolor 1.000000e-05
867 TR8738|c0_g2_i1 gi|242086805|ref|XP_002439235.1| 90.00 20 2 0 60 1 156 175 hypothetical protein SORBIDRAFT_09g002840 [Sor... 4558 Sorghum bicolor 1.000000e-05
868 TR8799|c0_g1_i1 gi|224114511|ref|XP_002332348.1| 100.00 20 0 0 60 1 79 98 predicted protein [Populus trichocarpa] 3694 Populus trichocarpa 1.000000e-07
869 TR8824|c0_g1_i1 gi|224078686|ref|XP_002305603.1| 100.00 19 0 0 58 2 162 180 predicted protein [Populus trichocarpa] 3694 Populus trichocarpa 5.000000e-04
870 TR8942|c0_g1_i1 gi|445964789|ref|WP_000042644.1| 100.00 19 0 0 58 2 76 94 hypothetical protein, partial [Salmonella ente... 28901 Salmonella enterica 6.000000e-07
871 TR9142|c0_g1_i1 gi|544772510|ref|WP_021197774.1| 100.00 20 0 0 60 1 102 121 hypothetical protein, partial [Ralstonia sp. A... 1235457 Ralstonia sp. AU12-08 1.000000e-05
872 TR9181|c0_g1_i1 gi|255570327|ref|XP_002526123.1| 84.21 19 3 0 59 3 30 48 aldo/keto reductase, putative [Ricinus communis] 3988 Ricinus communis 5.000000e-04
873 TR9236|c0_g1_i1 gi|294658086|ref|XP_460403.2| 100.00 20 0 0 60 1 110 129 DEHA2F00968p [Debaryomyces hansenii CBS767] 284592 Debaryomyces hansenii CBS767 5.000000e-05
874 TR9466|c0_g1_i1 gi|255570203|ref|XP_002526062.1| 100.00 20 0 0 1 60 421 440 calcium-dependent protein kinase, putative [Ri... 3988 Ricinus communis 3.000000e-05
875 TR9541|c0_g1_i1 gi|487603737|ref|WP_001732123.1| 100.00 19 0 0 3 59 29 47 penicillin-binding protein 2, partial [Salmone... 28901 Salmonella enterica 2.000000e-05
876 TR9566|c0_g1_i1 gi|496576614|ref|WP_009277732.1| 100.00 20 0 0 60 1 5 24 transposase [Ralstonia sp. 5_7_47FAA] 658664 Ralstonia sp. 5_7_47FAA 1.000000e-06
877 TR9572|c0_g1_i1 gi|460374217|ref|XP_004232908.1| 85.00 20 3 0 60 1 87 106 PREDICTED: auxin-repressed 12.5 kDa protein-li... 4081 Solanum lycopersicum 3.000000e-04
878 TR9614|c0_g1_i1 gi|565436038|ref|XP_006281481.1| 100.00 19 0 0 2 58 156 174 hypothetical protein CARUB_v10027565mg [Capsel... 81985 Capsella rubella 3.000000e-05
879 TR9750|c0_g1_i1 gi|194288896|ref|YP_002004803.1| 100.00 19 0 0 59 3 27 45 cyanophycin synthetase [Cupriavidus taiwanensi... 164546;977880 Cupriavidus taiwanensis;Cupriavidus taiwanensi... 1.000000e-05
880 TR9755|c0_g1_i1 gi|359475712|ref|XP_003631735.1| 100.00 20 0 0 1 60 247 266 PREDICTED: uncharacterized protein LOC10023311... 29760 Vitis vinifera 8.000000e-06
881 TR9944|c0_g1_i1 gi|495004122|ref|WP_007730136.1| 100.00 19 0 0 3 59 185 203 UPF0246 protein YaaA [Cronobacter dublinensis] 413497 Cronobacter dublinensis 7.000000e-05
882 TR9963|c0_g1_i1 gi|356498184|ref|XP_003517933.1| 90.00 20 2 0 1 60 220 239 PREDICTED: hypersensitive-induced response pro... 3847 Glycine max 2.000000e-04

883 rows × 14 columns


In [8]:
topfreq_desc


Out[8]:
Escherichia    96
Ralstonia      79
Solanum        75
Populus        62
Arabidopsis    61
Glycine        37
Vitis          34
Cucumis        31
Ricinus        29
Medicago       29
Salmonella     25
Oryza          23
Cicer          16
Capsella       16
Fragaria       15
dtype: int64

In [9]:
plot_in(topfreq_desc)



In [ ]: