In [1]:
import numpy as np
In [2]:
import os
In [3]:
import pandas as pd
In [4]:
from sklearn.cluster import bicluster
2015 December 4-6
Loading and exploring UTSW RNAi dataset...
In [5]:
dfFile = os.path.join('..', 'data', 'siRNA_dataframe.csv')
In [6]:
RNAiDf = pd.read_csv(dfFile, index_col=0)
In [9]:
RNAiDf.tail()
Out[9]:
Impute NaN values by replacing with the column median. Choosing the column b/c more points exist to give better statistics along with the attempting to establish that the gene deletion in question has little effect on the cell line.
In [7]:
rplVals = np.nanmedian(RNAiDf.values, axis=0)
In [8]:
for i,col in enumerate(RNAiDf.columns):
RNAiDf[col].replace(np.nan, rplVals[i], inplace=True)
In [9]:
modelSC = bicluster.SpectralCoclustering()
In [10]:
modelSC.fit(RNAiDf.values)
In [11]:
modelSB = bicluster.SpectralBiclustering()
In [12]:
modelSB.fit(RNAiDf.values)
In [ ]: