In [409]:
import pandas as pd
import palettable as pal
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42
%matplotlib inline
import cPickle as cpk
In [410]:
unconnectedCounts = cpk.load(open('gatherCounts_unconnected_norm2mean.2016.03.31.pickle','rb'))
unconnectedCounts.head(2)
Out[410]:
In [411]:
connectedCounts = cpk.load(open('gatherCounts_norm2mean.2016.03.31.pickle','rb'))
connectedCounts.head(2)
Out[411]:
In [412]:
#well, reading in was easy, not the questions is how to plot these?
#first attempt is way, way too busy:
In [413]:
cpdCols = colLabel[2::2]
geneCols = colLabel[3::2]
In [414]:
tData = connectedCounts.loc[:,geneCols]
tData.plot(kind = 'barh')
Out[414]:
In [415]:
from IPython.core.display import Image
Image(filename = ('CombinedKOandCO_Kmeans_allGenesAndCpds.png'))
Out[415]:
In [416]:
#cheat...manually matched up the figures for the connected and unconnected K means groups and used the titles
#in the figure to make a table in Excel...read that in here
cf = pd.read_csv('SummarizeConnected_and_Unconnected.csv',header=0)
In [417]:
cf
Out[417]:
In [418]:
#have an empty row...
cf.iloc[12,:]
Out[418]:
In [419]:
cf = cf.drop([12]) #delete the empty row
In [420]:
#make the labels to use on the figure
makeNclusters = 6
bar_labels = []
for item in range(makeNclusters):
bar_labels.append('Km' + str(item))
In [421]:
bar_labels
Out[421]:
In [422]:
# %pull out the gene and cpd data...and set the index to the Kmeans columns
justGenes = cf[cf.loc[:,'type']=='gene']
justGenes.set_index('Kmeans',inplace=True)
justCpds = cf[cf.loc[:,'type']=='cpd']
justCpds.set_index('Kmeans',inplace=True)
In [423]:
# Setting the positions and width for the bars
pos = list(range(len(justGenes)))
wid = 0.25
colors=pal.colorbrewer.qualitative.Set1_4.hex_colors
# Plotting the bars
fig, ax = plt.subplots(figsize=(10,5))
# Create a bar with pre_score data,
# in position pos,
plt.bar([p - wid for p in pos],justGenes['unconnected'], wid, color=colors[0])
# add in some more data,
plt.bar(pos, justGenes['connected'], wid, color=colors[1])
# now bring in the compounds...made those negative numbers
plt.bar([p - wid for p in pos], -justCpds['unconnected'], width=wid,color=colors[2])
# #final set...seems odd, but could not figure out how to call both genes/compounds at once
plt.bar(pos,-justCpds['connected'], width=wid,color=colors[3])
# Set the y axis label
ax.set_ylabel('# of genes/compounds')
# Set the chart's title
ax.set_title('neg #s are compounds')
# Set the position of the x ticks
ax.set_xticks(range(len(justGenes)))
ax.set_xticklabels(bar_labels)
# Setting the x-axis and y-axis limits
plt.xlim(min(pos)-wid*2, max(pos)+wid*2)
# Adding the legend and showing the plot
plt.legend(['unconnected gene','connected gene','unconnected cpd','connected cpd'], loc='upper left')
# plt.grid()
# plt.show() #oddlly...if I turn this on, any attempt to save gives a blank figure?
#add a straight (black) line at zero:
plt.plot([-5,6],[0,0],color = '#000000')
plt.savefig('connectedVunconnected.png')
In [ ]: