In [4]:
from om import base, settings
from om.components import *
from om.data import *
from om.util import *
from scipy.spatial.distance import pdist, squareform
from scipy.cluster.hierarchy import linkage, dendrogram
import pandas as pd
import numpy as np
import math,cobra
ome = base.Session()
ged = GeneExpressionData
dged = DifferentialGeneExpressionData
cpge = ChIPPeakGeneExpression
In [5]:
ome.query(ChIPPeakAnalysis).all()
Out[5]:
[ChIP Peak Analysis (#331, ChIPExo-Crp_wt_glucose_NH4Cl_O2_anti-crp_default_peaks): Environment: C:glucose, N:NH4Cl, e:O2 ,
ChIP Peak Analysis (#332, ChIPExo-RpoD_wt_glucose_cytidine_O2_anti-rpod_default_peaks): Environment: C:glucose, N:cytidine, e:O2 ,
ChIP Peak Analysis (#333, ChIPExo-Nac_Nac8myc_glucose_glutamine_O2_anti-myc_default_peaks): Environment: C:glucose, N:glutamine, e:O2 ,
ChIP Peak Analysis (#334, ChIPExo-Crp_wt_fructose_NH4Cl_O2_anti-crp_default_peaks): Environment: C:fructose, N:NH4Cl, e:O2 ,
ChIP Peak Analysis (#335, ChIPExo-Nac_Nac8myc_glucose_NH4Cl_O2_anti-myc_default_peaks): Environment: C:glucose, N:NH4Cl, e:O2 ,
ChIP Peak Analysis (#336, ChIPExo-Crp_wt_glycerol_NH4Cl_O2_anti-crp_default_peaks): Environment: C:glycerol, N:NH4Cl, e:O2 ,
ChIP Peak Analysis (#337, ChIPExo-NtrC_NtrC8myc_glucose_cytosine_O2_anti-myc_default_peaks): Environment: C:glucose, N:cytosine, e:O2 ,
ChIP Peak Analysis (#338, ChIPchip-Cra_Cra8myc_glucose_NH4Cl_O2_anti-myc_default_peaks): Environment: C:glucose, N:NH4Cl, e:O2 ,
ChIP Peak Analysis (#339, ChIPchip-Lrp_Lrp8myc_glucose_NH4Cl_O2_anti-myc_default_peaks): Environment: C:glucose, N:NH4Cl, e:O2 ,
ChIP Peak Analysis (#340, ChIPExo-ArcA_ArcA8myc_glucose_NH4Cl_anaerobic_anti-myc_default_peaks): Environment: C:glucose, N:NH4Cl, e:anaerobic ,
ChIP Peak Analysis (#341, ChIPExo-RpoD_wt_fructose_NH4Cl_O2_anti-rpod_default_peaks): Environment: C:fructose, N:NH4Cl, e:O2 ,
ChIP Peak Analysis (#342, ChIPchip-Crp_Crp8myc_fructose_NH4Cl_O2_anti-myc_default_peaks): Environment: C:fructose, N:NH4Cl, e:O2 ,
ChIP Peak Analysis (#343, ChIPExo-RpoD_wt_glucose_NH4Cl_O2_anti-rpod_default_peaks): Environment: C:glucose, N:NH4Cl, e:O2 ,
ChIP Peak Analysis (#344, ChIPExo-RpoD_wt_glycerol_NH4Cl_O2_anti-rpod_default_peaks): Environment: C:glycerol, N:NH4Cl, e:O2 ,
ChIP Peak Analysis (#345, ChIPExo-Crp_delAr2_glycerol_NH4Cl_O2_anti-crp_default_peaks): Environment: C:glycerol, N:NH4Cl, e:O2 ,
ChIP Peak Analysis (#346, ChIPExo-Crp_Ar3_glycerol_NH4Cl_O2_anti-crp_default_peaks): Environment: C:glycerol, N:NH4Cl, e:O2 ,
ChIP Peak Analysis (#347, ChIPchip-ArcA_ArcA8myc_glucose_NH4Cl_NO3_anti-myc_default_peaks): Environment: C:glucose, N:NH4Cl, e:NO3 ,
ChIP Peak Analysis (#348, ChIPExo-RpoD_wt_glucose_glutamine_O2_anti-rpod_default_peaks): Environment: C:glucose, N:glutamine, e:O2 ,
ChIP Peak Analysis (#349, ChIPExo-Nac_Nac8myc_glucose_cytidine_O2_anti-myc_default_peaks): Environment: C:glucose, N:cytidine, e:O2 ,
ChIP Peak Analysis (#350, ChIPExo-RpoN_wt_glucose_NH4Cl_O2_anti-rpon_default_peaks): Environment: C:glucose, N:NH4Cl, e:O2 ,
ChIP Peak Analysis (#351, ChIPExo-RpoN_wt_glucose_glutamine_O2_anti-rpon_default_peaks): Environment: C:glucose, N:glutamine, e:O2 ,
ChIP Peak Analysis (#352, ChIPExo-Crp_delta-crp_glycerol_NH4Cl_O2_anti-crp_default_peaks): Environment: C:glycerol, N:NH4Cl, e:O2 ,
ChIP Peak Analysis (#353, ChIPExo-Crp_delAr1_glycerol_NH4Cl_O2_anti-crp_default_peaks): Environment: C:glycerol, N:NH4Cl, e:O2 ,
ChIP Peak Analysis (#354, ChIPExo-RpoN_wt_glucose_cytidine_O2_anti-rpon_default_peaks): Environment: C:glucose, N:cytidine, e:O2 ,
ChIP Peak Analysis (#355, ChIPExo-Fur_Fur8myc_glucose_NH4Cl_O2_anti-myc_FeCl2_default_peaks): Environment: C:glucose, N:NH4Cl, e:O2 FeCl2,
ChIP Peak Analysis (#356, ChIPExo-Fnr_Fnr8myc_glucose_NH4Cl_anaerobic_anti-myc_default_peaks): Environment: C:glucose, N:NH4Cl, e:anaerobic ,
ChIP Peak Analysis (#357, ChIPchip-Fnr_Fnr8myc_glucose_NH4Cl_NO3_anti-myc_default_peaks): Environment: C:glucose, N:NH4Cl, e:NO3 ,
ChIP Peak Analysis (#358, ChIPchip-Fnr_Fnr8myc_glucose_NH4Cl_anaerobic_anti-myc_default_peaks): Environment: C:glucose, N:NH4Cl, e:anaerobic ,
ChIP Peak Analysis (#359, ChIPExo-NtrC_NtrC8myc_glucose_glutamine_O2_anti-myc_default_peaks): Environment: C:glucose, N:glutamine, e:O2 ,
ChIP Peak Analysis (#360, ChIPchip-Crp_Crp8myc_acetate_NH4Cl_O2_anti-myc_default_peaks): Environment: C:acetate, N:NH4Cl, e:O2 ,
ChIP Peak Analysis (#361, ChIPExo-RpoA_wt_glucose_NH4Cl_O2_anti-rpoA_default_peaks): Environment: C:glucose, N:NH4Cl, e:O2 ,
ChIP Peak Analysis (#362, ChIPchip-ArgR_ArgR8myc_glucose_NH4Cl_O2_anti-myc_default_peaks): Environment: C:glucose, N:NH4Cl, e:O2 ,
ChIP Peak Analysis (#363, ChIPExo-Nac_Nac8myc_glucose_cytosine_O2_anti-myc_default_peaks): Environment: C:glucose, N:cytosine, e:O2 ,
ChIP Peak Analysis (#364, ChIPExo-Fur_Fur8myc_glucose_NH4Cl_O2_anti-myc_dpd_default_peaks): Environment: C:glucose, N:NH4Cl, e:O2 dpd,
ChIP Peak Analysis (#365, ChIPExo-NtrC_NtrC8myc_glucose_NH4Cl_O2_anti-myc_default_peaks): Environment: C:glucose, N:NH4Cl, e:O2 ,
ChIP Peak Analysis (#366, ChIPExo-RpoS_wt_glucose_NH4Cl_O2_anti-rpos_default_peaks): Environment: C:glucose, N:NH4Cl, e:O2 ,
ChIP Peak Analysis (#367, ChIPExo-Crp_Crp8myc_fructose_NH4Cl_O2_anti-crp_default_peaks): Environment: C:fructose, N:NH4Cl, e:O2 ,
ChIP Peak Analysis (#368, ChIPExo-RpoD_wt_glucose_cytosine_O2_anti-rpod_default_peaks): Environment: C:glucose, N:cytosine, e:O2 ,
ChIP Peak Analysis (#369, ChIPExo-Crp_wt_glycerol_NH4Cl_O2_anti-crp_rif_default_peaks): Environment: C:glycerol, N:NH4Cl, e:O2 rif,
ChIP Peak Analysis (#370, ChIPExo-Crp_Crp8myc_glycerol_NH4Cl_O2_anti-crp_default_peaks): Environment: C:glycerol, N:NH4Cl, e:O2 ,
ChIP Peak Analysis (#371, ChIPExo-NtrC_NtrC8myc_glucose_cytidine_O2_anti-myc_default_peaks): Environment: C:glucose, N:cytidine, e:O2 ,
ChIP Peak Analysis (#372, ChIPExo-RpoN_wt_glucose_NH4Cl_O2_anti-rpon_nitrogenstudy_default_peaks): Environment: C:glucose, N:NH4Cl, e:O2 nitrogenstudy,
ChIP Peak Analysis (#373, ChIPchip-Cra_Cra8myc_acetate_NH4Cl_O2_anti-myc_default_peaks): Environment: C:acetate, N:NH4Cl, e:O2 ,
ChIP Peak Analysis (#374, ChIPchip-PurR_PurR8myc_glucose_NH4Cl_O2_anti-myc_default_peaks): Environment: C:glucose, N:NH4Cl, e:O2 ,
ChIP Peak Analysis (#375, ChIPchip-Cra_Cra8myc_glucose_NH4Cl_anaerobic_anti-myc_default_peaks): Environment: C:glucose, N:NH4Cl, e:anaerobic ,
ChIP Peak Analysis (#376, ChIPExo-Crp_Crp8myc_glucose_NH4Cl_O2_anti-myc_default_peaks): Environment: C:glucose, N:NH4Cl, e:O2 ,
ChIP Peak Analysis (#377, ChIPExo-RpoN_wt_glucose_cytosine_O2_anti-rpon_default_peaks): Environment: C:glucose, N:cytosine, e:O2 ,
ChIP Peak Analysis (#378, ChIPExo-Crp_delAr1delAr2_glycerol_NH4Cl_O2_anti-crp_default_peaks): Environment: C:glycerol, N:NH4Cl, e:O2 ,
ChIP Peak Analysis (#379, ChIPchip-ArcA_ArcA8myc_glucose_NH4Cl_anaerobic_anti-myc_default_peaks): Environment: C:glucose, N:NH4Cl, e:anaerobic ,
ChIP Peak Analysis (#380, ChIPExo-RpoD_wt_glucose_NH4Cl_O2_anti-rpod_nitrogenstudy_default_peaks): Environment: C:glucose, N:NH4Cl, e:O2 nitrogenstudy,
ChIP Peak Analysis (#381, ChIPExo-Crp_Crp8myc_glycerol_NH4Cl_O2_anti-myc_default_peaks): Environment: C:glycerol, N:NH4Cl, e:O2 ,
ChIP Peak Analysis (#447, ChIPExo-Crp_delAr1_glycerol_NH4Cl_O2_anti-crp_gps-curated-HL28Aug14_peaks): Environment: C:glycerol, N:NH4Cl, e:O2 ,
ChIP Peak Analysis (#448, ChIPExo-Crp_delAr1delAr2_glycerol_NH4Cl_O2_anti-crp_gps-curated-HL28Aug14_peaks): Environment: C:glycerol, N:NH4Cl, e:O2 ,
ChIP Peak Analysis (#449, ChIPExo-Crp_delAr2_glycerol_NH4Cl_O2_anti-crp_gps-curated-HL28Aug14_peaks): Environment: C:glycerol, N:NH4Cl, e:O2 ,
ChIP Peak Analysis (#450, ChIPExo-Crp_wt_fructose_NH4Cl_O2_anti-crp_gps-curated-HL28Aug14_peaks): Environment: C:fructose, N:NH4Cl, e:O2 ,
ChIP Peak Analysis (#451, ChIPExo-Crp_wt_glucose_NH4Cl_O2_anti-crp_gps-curated-HL28Aug14_peaks): Environment: C:glucose, N:NH4Cl, e:O2 ,
ChIP Peak Analysis (#452, ChIPExo-Crp_wt_glycerol_NH4Cl_O2_anti-crp_gps-curated-HL28Aug14_peaks): Environment: C:glycerol, N:NH4Cl, e:O2 ]
In [2]:
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import numpy as np
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['is_train'] = np.random.uniform(0, 1, len(df)) <= .75
df['species'] = pd.Categorical(iris.target, iris.target_names)
df.head()
train, test = df[df['is_train']==True], df[df['is_train']==False]
features = df.columns[:4]
clf = RandomForestClassifier(n_jobs=2)
y, _ = pd.factorize(train['species'])
clf.fit(train[features], y)
preds = iris.target_names[clf.predict(test[features])]
pd.crosstab(test['species'], preds, rownames=['actual'], colnames=['preds'])
Out[2]:
preds
setosa
versicolor
virginica
actual
setosa
15
0
0
versicolor
0
9
1
virginica
0
3
10
In [6]:
clf.feature_importances_
Out[6]:
array([ 0.17111367, 0.03150372, 0.46515458, 0.33222803])
In [7]:
df
Out[7]:
sepal length (cm)
sepal width (cm)
petal length (cm)
petal width (cm)
is_train
species
0
5.1
3.5
1.4
0.2
True
setosa
1
4.9
3.0
1.4
0.2
False
setosa
2
4.7
3.2
1.3
0.2
True
setosa
3
4.6
3.1
1.5
0.2
True
setosa
4
5.0
3.6
1.4
0.2
False
setosa
5
5.4
3.9
1.7
0.4
True
setosa
6
4.6
3.4
1.4
0.3
True
setosa
7
5.0
3.4
1.5
0.2
True
setosa
8
4.4
2.9
1.4
0.2
True
setosa
9
4.9
3.1
1.5
0.1
True
setosa
10
5.4
3.7
1.5
0.2
True
setosa
11
4.8
3.4
1.6
0.2
True
setosa
12
4.8
3.0
1.4
0.1
True
setosa
13
4.3
3.0
1.1
0.1
True
setosa
14
5.8
4.0
1.2
0.2
False
setosa
15
5.7
4.4
1.5
0.4
True
setosa
16
5.4
3.9
1.3
0.4
False
setosa
17
5.1
3.5
1.4
0.3
False
setosa
18
5.7
3.8
1.7
0.3
True
setosa
19
5.1
3.8
1.5
0.3
True
setosa
20
5.4
3.4
1.7
0.2
True
setosa
21
5.1
3.7
1.5
0.4
True
setosa
22
4.6
3.6
1.0
0.2
False
setosa
23
5.1
3.3
1.7
0.5
True
setosa
24
4.8
3.4
1.9
0.2
False
setosa
25
5.0
3.0
1.6
0.2
True
setosa
26
5.0
3.4
1.6
0.4
True
setosa
27
5.2
3.5
1.5
0.2
True
setosa
28
5.2
3.4
1.4
0.2
False
setosa
29
4.7
3.2
1.6
0.2
True
setosa
...
...
...
...
...
...
...
120
6.9
3.2
5.7
2.3
True
virginica
121
5.6
2.8
4.9
2.0
False
virginica
122
7.7
2.8
6.7
2.0
True
virginica
123
6.3
2.7
4.9
1.8
False
virginica
124
6.7
3.3
5.7
2.1
True
virginica
125
7.2
3.2
6.0
1.8
False
virginica
126
6.2
2.8
4.8
1.8
True
virginica
127
6.1
3.0
4.9
1.8
True
virginica
128
6.4
2.8
5.6
2.1
True
virginica
129
7.2
3.0
5.8
1.6
True
virginica
130
7.4
2.8
6.1
1.9
True
virginica
131
7.9
3.8
6.4
2.0
True
virginica
132
6.4
2.8
5.6
2.2
True
virginica
133
6.3
2.8
5.1
1.5
True
virginica
134
6.1
2.6
5.6
1.4
True
virginica
135
7.7
3.0
6.1
2.3
True
virginica
136
6.3
3.4
5.6
2.4
True
virginica
137
6.4
3.1
5.5
1.8
False
virginica
138
6.0
3.0
4.8
1.8
False
virginica
139
6.9
3.1
5.4
2.1
True
virginica
140
6.7
3.1
5.6
2.4
True
virginica
141
6.9
3.1
5.1
2.3
True
virginica
142
5.8
2.7
5.1
1.9
True
virginica
143
6.8
3.2
5.9
2.3
False
virginica
144
6.7
3.3
5.7
2.5
True
virginica
145
6.7
3.0
5.2
2.3
True
virginica
146
6.3
2.5
5.0
1.9
True
virginica
147
6.5
3.0
5.2
2.0
True
virginica
148
6.2
3.4
5.4
2.3
True
virginica
149
5.9
3.0
5.1
1.8
True
virginica
150 rows × 6 columns
In [ ]:
Content source: steve-federowicz/om
Similar notebooks: