In [1]:
%run '../ipython_startup.py'
In [2]:
import cPickle as pickle
In [3]:
fname = '../../pipeline_output/100_genome_simulation/fb551_100_genome_flag_line_bias.csv'
df = pd.read_csv(fname, index_col='fusion_id')
df.head(3)
Out[3]:
In [4]:
# Plot heatmap
fig = plt.figure(figsize=(8, 8))
plt.imshow(df, aspect='auto', cmap=plt.cm.Spectral)
plt.xlabel('lines')
plt.ylabel('Exonic Regions')
plt.title(u'Exonic regions that showed bias in 100 genome simulation.\nBias towards Tester (=1) and Bias towards Line (=-1)')
plt.colorbar()
plt.tight_layout()
plt.savefig('../../pipeline_output/100_genome_simulation/heatmap_genome_ambiguity_fb551_100_genome_simulation.png')
In [5]:
# Summarize exonic region bias across lines
numLineBias = df.apply(lambda x: sum(abs(x)), axis=1)
numLine = df.count(axis=1)
# Create flags DataFrame
flags = fg.FlagsDataFrame(index=df.index)
# Flag exonic regions with no bias
flags.addColumn('flag_exons_no_bias_simulated_lines', (numLineBias == 0))
# Flag exonic regions with any bias
flags.addColumn('flag_exons_w_bias_simulated_lines', (numLineBias > 0))
# Flag exonic regions with bias in 50% lines
flags.addColumn('flag_exons_bias_in_half_simulated_lines', (numLineBias >= 50))
# Flag exonic regions with bias in all 100 lines
flags.addColumn('flag_exons_bias_in_all_simulated_lines', (numLineBias == 100))
# Output counts
print(flags.sum())
# Output total number of exonic regions
print('Number of exonic regions: {}'.format(flags.shape[0]))
In [7]:
# Export to excel table to publication
flagsI = flags.reset_index()
flagsI.to_excel('../../pipeline_output/100_genome_simulation/flag_exonic_region_w_and_wo_bias_100_genome_simulation.xls', index=None)
In [8]:
# Export list of fusion ids to drop (currently 807 in all lines)
dropList = flagsI.loc[flagsI['flag_exons_bias_in_all_simulated_lines'] == 1, 'fusion_id'].tolist()
pickle.dump(dropList, open(os.path.join(PROJ, 'pipeline_output/100_genome_simulation/exonic_region_drop_list.pkl'), 'wb'))
In [ ]: