In [ ]:
import common
In [41]:
common.plot_color_legend()
In [44]:
common.plot_dropout_interactive()
In [39]:
In [2]:
import macosko2015
import seaborn as sns
In [3]:
expression, cell_metadata, gene_metadata = macosko2015.load_big_clusters()
In [4]:
cluster_ids_unique = cell_metadata['cluster_id'].unique()
cluster_ids_unique
Out[4]:
In [6]:
import pandas as pd
%matplotlib inline
In [13]:
cluster_n_to_name = {24: 'Rods', 25: 'Cones',
26: 'Bipolar cells (group1)',
27: 'Bipolar cells (group2)',
33: 'Bipolar cells (group3)',
34: 'Muller glia'}
cluster_id_to_name = dict(('cluster_{}'.format(str(i).zfill(2)), name)
for i, name in cluster_n_to_name.items())
colors = sns.color_palette(palette='Set2', n_colors=len(cluster_ids_unique))
id_to_color = dict(zip(cluster_ids_unique, colors))
id_to_color
Out[13]:
In [17]:
color_labels = pd.Series[id_to_color[i] for i in cell_metadata.loc[expression.index, 'cluster_id']]
cluster_names_to_color = dict((cluster_id_to_name[i], id_to_color[i])
for i in cluster_ids_unique)
cluster_names_to_color = pd.Series(cluster_names_to_color)
cluster_names_to_color
Out[17]:
In [20]:
corr = expression.corr()
corr.head()
Out[20]:
In [21]:
common.clustermap(expression.T.corr(), row_colors=color_labels, col_colors=color_labels)
# plt.show()
Out[21]:
In [19]:
colors = sns.color_palette(palette='Set2', n_colors=len(cluster_ids_unique))
name_to_color = dict(zip(unique_cluster_names, colors))
name_to_color
Out[19]:
In [20]:
color_labels = [name_to_color[name] for name in cell_metadata.loc[
expression.index, 'cluster_id']]
In [11]:
expression.head()
Out[11]:
In [9]:
cell_metadata.head()
Out[9]:
In [ ]:
import os
import common
# Assign notebook and folder names
notebook_name = '04_dropout_widget'
figure_folder = os.path.join(common.FIGURE_FOLDER, notebook_name)
data_folder = os.path.join(common.DATA_FOLDER, notebook_name)
# Make the folders
! mkdir -p $figure_folder
! mkdir -p $data_folder
In [3]:
common.expression.multiply?
In [2]:
In [ ]:
common.
In [14]:
%load_ext autoreload
%autoreload 2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
# %matplotlib inline
In [3]:
import macosko2015
expression, cell_metadata, gene_metadata = macosko2015.load_big_clusters()
expression.head()
Out[3]:
In [5]:
cell_metadata.head()
Out[5]:
In [ ]:
expression = pd.read_csv()
In [3]:
expression, cell_metadata, gene_metadata = macosko2015.load_big_clusters()
print(expression.shape)
expression.head()
Out[3]:
In [4]:
print(cell_metadata.shape)
cell_metadata.head()
Out[4]:
In [5]:
print(gene_metadata.shape)
gene_metadata.head()
Out[5]:
In [7]:
# %matplotlib notebook
In [8]:
import itertools
In [9]:
# cluster_name_to_ids = {'Horizontal cells': 1, 'Retinal ganglion cells': 2,
# 'Amacrine cells': range(3, 24), "Rods": 24,
# 'Cones': 25, 'Bipolar cells': range(26, 34),
# 'Muller glia': 34, 'Astrocytes': 35,
# 'Fibroblasts': 36, 'Vascular endothelium': 37,
# 'Pericytes': 38, 'Microglia': 39}
# [zip(itertools.repeat(name), i)
# for name, i in cluster_name_to_ids.items()]
In [10]:
# input_folder = os.path.join(common.DATA_FOLDER, '002_robust_pca')
# csv = os.path.join(input_folder, 'lowrank.csv')
# lowrank = pd.read_csv(csv, index_col=0)
# print(lowrank.shape)
# lowrank.head()
In [11]:
cluster_ids = np.unique(ds.cell_metadata.sel(cell_feature='cluster_id'))
cluster_ids
In [ ]:
cluster_n_to_name = {24: 'Rods', 25: 'Cones',
26: 'Bipolar cells (group1)',
27: 'Bipolar cells (group2)',
33: 'Bipolar cells (group3)',
34: 'Muller glia'}
cluster_id_to_name = dict(('cluster_{}'.format(str(i).zfill(2)), name)
for i, name in cluster_n_to_name.items())
cluster_id_to_name
In [ ]:
import matplotlib as mpl
In [ ]:
ds.expression.indexes['cell']
In [ ]:
colors = sns.color_palette(palette='Set2', n_colors=len(cluster_ids))
# print(colors)
id_to_color = dict(zip(cluster_ids, map(mpl.colors.rgb2hex, colors)))
id_to_color
In [ ]:
ds.cell_metadata['cell_feature']
In [ ]:
cluster_names_to_color = dict((cluster_id_to_name[i], id_to_color[i]) for i in cluster_ids)
cluster_names_to_color
In [ ]:
dropmask = np.random.randn(*ds.expression.shape) > -1
dropmask.shape
In [ ]:
dropped = pd.DataFrame(ds.expression.values * dropmask)
print(dropped.shape)
dropped.head()
# dropped
In [ ]:
common.clustermap(dropped.T.corr(method='spearman'), col_colors=color_labels)
In [ ]:
plt.show()
In [ ]:
from ipywidgets import interact
from ipywidgets import IntRangeSlider
# table1_t = table1.T
# lowrank_t = lowrank.T
from ipywidgets import IntSlider
expression = ds.expression.to_pandas()
print(expression.shape)
expression.head()
In [ ]:
cluster_ids_in_data = ds.cell_metadata.sel(cell_feature='cluster_id').values
# cluster_ids_in_data
In [ ]:
color_labels = [id_to_color[i] for i in cluster_ids_in_data]
color_labels[:4]
In [12]:
def plot_dropout(percent_gene_dropout=50,
correlation='pearson', linkage_method='ward',
distance_metric='euclidean', #dataset='original'
):
# if dataset == 'original':
# data = expression
# elif dataset == 'low-rank':
# data = lowrank_t
title = '{}%, {}, {}, {}'.format(percent_gene_dropout,
correlation, linkage_method,
distance_metric, )
threshold = percent_gene_dropout / 100.
print('threshold', threshold)
mask = np.random.uniform(size=expression.shape) > threshold
print(mask.shape)
data = expression * mask
print(data.head())
g = common.clustermap(data.corr(method=correlation),
col_colors=color_labels,
row_colors=color_labels,
metric=distance_metric,
method=linkage_method,
figsize=(4, 4))
g.fig.suptitle(title)
plt.show()
def plot_dropout_interactive():
interact(plot_dropout,
percent_gene_dropout=IntSlider(value=0, min=0, max=100, step=10),
correlation=['pearson', 'spearman'],
linkage_method=['ward', 'average', 'single', "complete"],
distance_metric=['euclidean', "cityblock"],
# dataset=['original', 'low-rank']
)
plot_dropout_interactive()
In [ ]:
warnings.onceregistry
In [ ]:
common.clustermap(table1.T.corr(method='spearman'), col_colors=color_labels)
In [ ]:
import sys
sys.path.extend(['/Users/olgabot/code/robust-pca/', '/Users/olgabot/code/rpcaADMM/'])
import r_pca
import rpcaADMM
In [ ]:
%%time
rpca_alm = r_pca.R_pca(dropped.as_matrix(), lmbda=0.1)
rpca_alm.fit()
In [ ]:
rpca_alm.lmbda
In [ ]:
sns.heatmap(dropped)
In [ ]:
sns.heatmap(rpca_alm.L)
In [ ]:
sns.heatmap(rpca_alm.S)
In [ ]:
L = pd.DataFrame(rpca_alm.L, index=dropped.index, columns=dropped.columns)
print(L.shape)
L.head()
In [ ]:
common.clustermap(L.T.corr(method='spearman'), col_colors=color_labels)
In [ ]: