In [1]:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import cPickle as cpk
%matplotlib inline
In [2]:
#Load PhytoKEGG Annotations
AllPhytoKO_ann=pd.read_table('AllPhytoKegg_annotated.tab', header=False, delimiter='\t')
In [3]:
AllPhytoKO_ann
Out[3]:
kID
Ampcof8
K01762
Ampcof9
K01455
Ampcof22
K15106
Ampcof34
K09838
Ampcof42
K08675
Ampcof48
K03885
Ampcof56
K06639
Ampcof60
K09506
Ampcof64
K09291
Ampcof72
K07769
Ampcof80
K03350
Ampcof89
K12194
Ampcof96
K03004
Ampcof97
K14713
Ampcof133
K16833
Ampcof154
K07652
Ampcof180
K03267
Ampcof188
K02938
Ampcof196
K11292
Ampcof205
K09540
Ampcof222
K01556
Ampcof234
K17871
Ampcof240
K05034
Ampcof249
K03324
Ampcof257
K00045
Ampcof264
K15181
Ampcof268
K17778
Ampcof269
K01874
Ampcof270
K05656
Ampcof271
K03152
...
...
Scyaps32057
K09013
Scyaps32061
K04043
Scyaps32063
K08592
Scyaps32064
K04488
Scyaps32065
K02437
Scyaps32070
K02209
Scyaps32086
K02880
Scyaps32095
K09490
Scyaps32106
K09502
Scyaps32107
K03231
Scyaps32132
K12606
Scyaps32191
K01201
Scyaps32194
K14566
Scyaps32200
K01079
Scyaps32206
K05869
Scyaps32210
K02641
Scyaps32214
K03942
Scyaps32215
K07874
Scyaps32216
K09420
Scyaps32238
K01854
Scyaps32260
K02893
Scyaps32261
K13337
Scyaps32272
K13094
Scyaps32273
K00480
Scyaps32274
K01581
Scyaps32288
K11251
Scyaps32296
K07300
Scyaps32299
K08008
Scyaps32305
K15450
Scyaps32306
K13348
561156 rows × 1 columns
In [4]:
InsituCounts=pd.read_table('Data/AllInsitu.tab', index_col='gID')
In [6]:
#normalize to the library size
InsituTPM=InsituCounts.copy()
InsituTPM[['S1', 'S2', 'S3', 'S4', 'S5']]=(InsituCounts[['S1', 'S2', 'S3', 'S4', 'S5']]/InsituCounts[['S1', 'S2', 'S3', 'S4', 'S5']].sum())*10**6
#drop any all zero reads for simplicity
InsituCounts=InsituCounts[(InsituCounts.T != 0).any()]
InsituTPM=InsituTPM[(InsituTPM.T != 0).any()]
#Add annotation information
InsituCounts=InsituCounts.join(AllPhytoKO_ann)
InsituTPM=InsituTPM.join(AllPhytoKO_ann)
InsituCounts=InsituCounts.dropna()
InsituTPM=InsituTPM.dropna()
In [7]:
#load in the species/group information
Group_Species=pd.read_table('GrpSpecies',delimiter=' ').T.drop(['MMETSP',
'MMETSP.1']).T.drop_duplicates().set_index('SName')
In [19]:
InsituTPM.groupby(['kID','sgID']).sum().reset_index()
Out[19]:
kID
sgID
S1
S2
S3
S4
S5
0
K00001
Aletam
0.000000
0.000000
0.011808
0.079876
0.000000
1
K00001
Ampcof
0.011243
0.044565
0.011808
0.000000
0.000000
2
K00001
Ampmas
0.011243
0.014855
0.035423
0.053250
0.000000
3
K00001
Astgla
0.000000
0.000000
0.011808
0.000000
0.000000
4
K00001
Attsep
0.112434
0.133696
0.047231
0.026625
0.178521
5
K00001
Aulsub
0.000000
0.000000
0.000000
0.000000
0.022315
6
K00001
Chacf.
0.067460
0.029710
0.023615
0.053250
0.022315
7
K00001
Chadeb
0.078704
0.059420
0.460501
0.931881
0.781030
8
K00001
Chadic
0.000000
0.000000
0.000000
0.000000
0.000000
9
K00001
Chaneo
0.000000
0.000000
0.106270
0.266252
0.178521
10
K00001
Chasp
0.011243
0.029710
0.153500
0.159751
0.133891
11
K00001
Chreri
0.000000
0.000000
0.023615
0.000000
0.022315
12
K00001
Corpen
0.000000
0.014855
0.011808
0.000000
0.000000
13
K00001
Crycoh
0.134920
0.103985
0.129885
0.079876
0.044630
14
K00001
Cycmen
0.753306
0.222826
0.106270
0.266252
0.044630
15
K00001
Cylclo
0.022487
0.029710
0.011808
0.000000
0.000000
16
K00001
Dacfra
0.056217
0.000000
0.094462
0.053250
0.022315
17
K00001
Detcon
0.000000
0.044565
0.011808
0.000000
0.044630
18
K00001
Ditbri
0.056217
0.000000
0.047231
0.026625
0.022315
19
K00001
Durbal
0.000000
0.014855
0.000000
0.053250
0.000000
20
K00001
Entsp
0.000000
0.000000
0.000000
0.000000
0.000000
21
K00001
Eucant
0.247354
0.297101
0.614002
1.198133
1.495115
22
K00001
Extspi
0.044973
0.014855
0.023615
0.000000
0.089261
23
K00001
Glefol
0.033730
0.000000
0.000000
0.000000
0.000000
24
K00001
Graoce
0.000000
0.000000
0.011808
0.000000
0.000000
25
K00001
Heltam
0.000000
0.000000
0.011808
0.000000
0.000000
26
K00001
Hetrot
0.000000
0.000000
0.000000
0.000000
0.022315
27
K00001
Karbre
0.011243
0.000000
0.035423
0.053250
0.156206
28
K00001
Karmic
0.022487
0.029710
0.011808
0.026625
0.000000
29
K00001
Kryfol
0.022487
0.014855
0.059039
0.026625
0.044630
...
...
...
...
...
...
...
...
272444
K18277
Pelbei
0.460978
0.326811
0.590386
0.186376
0.870291
272445
K18277
Peraci
21.857112
42.366628
17.369167
24.308786
28.027832
272446
K18277
Phaant
0.134920
0.133696
0.165308
0.106501
0.156206
272447
K18277
Phasp
0.000000
0.000000
0.011808
0.000000
0.000000
272448
K18277
Proala
0.000000
0.044565
0.035423
0.000000
0.000000
272449
K18277
Proine
0.000000
0.029710
0.035423
0.053250
0.044630
272450
K18277
Proret
0.089947
0.252536
0.177116
0.133126
0.178521
272451
K18277
Pseare
0.112434
0.029710
0.047231
0.106501
0.000000
272452
K18277
Pseaus
0.022487
0.029710
0.023615
0.079876
0.022315
272453
K18277
Psedel
0.056217
0.000000
0.011808
0.053250
0.022315
272454
K18277
Psefra
0.044973
0.014855
0.000000
0.133126
0.089261
272455
K18277
Psepun
0.044973
0.014855
0.094462
0.000000
0.000000
272456
K18277
Pyrbah
0.202381
0.222826
0.259770
0.213001
0.178521
272457
K18277
Scrhan
0.146164
0.356521
0.318809
0.266252
0.178521
272458
K18277
Scrtro
0.022487
0.089130
0.047231
0.053250
0.022315
272459
K18277
Skedoh
0.089947
7.620645
0.047231
0.026625
0.111576
272460
K18277
Skemar
0.674602
43.644163
0.153500
0.106501
1.004182
272461
K18277
Skemen
0.089947
0.133696
0.000000
0.026625
0.022315
272462
K18277
Stacon
0.011243
0.029710
0.118077
0.213001
0.044630
272463
K18277
Stetur
0.168651
0.059420
0.177116
0.213001
0.245467
272464
K18277
Struni
0.213624
0.014855
0.000000
0.000000
0.000000
272465
K18277
Synrec
0.000000
0.029710
0.047231
0.000000
0.044630
272466
K18277
Thaant
0.528438
0.118840
1.464158
0.612379
1.829843
272467
K18277
Thafra
0.011243
0.029710
0.118077
0.079876
0.089261
272468
K18277
Thamin
0.101190
0.014855
0.011808
0.079876
0.044630
272469
K18277
Thanit
0.101190
0.059420
0.059039
0.079876
0.022315
272470
K18277
Thapun
0.000000
0.014855
0.000000
0.000000
0.000000
272471
K18277
Tharot
0.000000
0.044565
0.082654
0.079876
0.022315
272472
K18277
Thasp
0.033730
0.014855
0.047231
0.079876
0.000000
272473
K18277
Tridub
0.056217
0.059420
0.212539
0.186376
0.401673
272474 rows × 7 columns
In [ ]:
gs=Group_Species.index
for g in gs:
print g
In [63]:
Group_Species
Out[63]:
Grp
SName
Ochsp
Ochrophyta
Prypar
Haptophyta
Graoce
Bacillariophyta
Corhys
Bacillariophyta
Rhomar
Rhodophyta
Skecos
Bacillariophyta
Nitsp
Bacillariophyta
Odoaur
Bacillariophyta
Cylclo
Bacillariophyta
Urosp
Ciliophora
Dinsp
Ochrophyta
Karbre
Dinophyta
Dolten
Chlorophyta
Neppyr
Chlorophyta
Crypar
Cryptophyta
Eutgym
Euglenozoa
Lotoce
Cercozoa
Lotglo
Cercozoa
Lotamo
Cercozoa
Hemand
Cryptophyta
Bignat
Chlorarachniophyta
Guithe
Cryptophyta
Polpar
Chlorophyta
Pyrpar
Chlorophyta
D1
Unknown
Alemon
Dinophyta
Ptedan
Ochrophyta
ParImp
Ochrophyta
Acasp
Sarcomastigophora
GonPac
Cryptophyta
...
...
Hemruf
Cryptophyta
Biglon
Chlorarachniophyta
Gepoce
Haptophyta
Eupcra
Ciliophora
Ammsp
Foraminifera
Batpra
Chlorophyta
Psehei
Bacillariophyta
Chabre
Bacillariophyta
Aleand
Dinophyta
Eucant
Bacillariophyta
Ptesp
Chlorophyta
Gonspi
Pyrrophycophyta
Polgla
Alveolata
Hetarc
Alveolata
Craaus
Ochrophyta
Entsp
Bacillariophyta
Pyrsp
Chlorophyta
CCMP2111
Unknown
Chadic
Bacillariophyta
Attsep
Bacillariophyta
Madery
Rhodophyta
Vitbra
Alveolata
Synpus
Unknown
RCC701
Unknown
RCC1871
Unknown
Branut
Alveolata
Phacor
Haptophyta
CCMP2135
Unknown
Mansp
Chlorophyta
CCMP1999
Unknown
283 rows × 1 columns
In [ ]:
Content source: KujawinskiLaboratory/NB_Distribution
Similar notebooks: