In [12]:
import numpy
import scipy
import scipy.cluster.hierarchy as sch
import pandas as pd
import matplotlib.pylab as plt
import seaborn as sns

%matplotlib inline

In [13]:
def fileToDict(file_name):
    with open(file_name) as in_file:
        dic = {}
        header = in_file.readline().strip().split(",")
        for idx in xrange(len(header)):
            header[idx]  =  header[idx]
            dic[header[idx]] = []

        for line in in_file:
            line = line.strip().split(",")
            for idx in xrange(len(line)):
                val = line[idx]
                try:
                    val = numpy.double(val)
                except ValueError:
                    pass
                dic[header[idx]].append(val)
            
    return dic, header

In [11]:
fileDic, header = fileToDict("/home/steen176/advbioinf/cluster/20161122_MayAppleExpressionData.csv")
#Use Gene Name as row names
df = pd.DataFrame(fileDic, index = fileDic["Gene Name"]
                  , columns = header[1:])



In [17]:
#Timepoint as  cluster
df = df.transpose()
#print(df)
distances = sch.distance.pdist(df, metric="euclidean")
clustering = sch.linkage(distances, method='complete')
tree = sch.dendrogram(clustering)



In [16]:
#Genes as cluster
distances = sch.distance.pdist(df, metric = "correlation")
clustering = sch.linkage(distances, method="complete")
tree = sch.dendrogram(clustering, leaf_font_size=2,
                        color_threshold = 4, labels = fileDic["Gene Name"])



In [33]:
sns.clustermap(df.transpose(), method="complete", 
               metric="euclidean", row_cluster=False)


Out[33]:
<seaborn.matrix.ClusterGrid at 0x7f3141e6ca50>