notebook.community

Edit and run



In [16]:

    
import os
import common

# Assign notebook and folder names
notebook_name = '06_exploring_with_josh'
figure_folder = os.path.join(common.FIGURE_FOLDER, notebook_name)
data_folder = os.path.join(common.DATA_FOLDER, notebook_name)

# Make the folders
! mkdir -p $figure_folder
! mkdir -p $data_folder



In [17]:

    
%pdb









    



Automatic pdb calling has been turned OFF



In [39]:

    
%load_ext autoreload
%autoreload 2

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
%matplotlib inline









    



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload



In [24]:

    
input_folder = os.path.join(common.DATA_FOLDER, '001_downsample_macosko_data')

csv = os.path.join(input_folder, 'expression_table1_subset.csv')

table1 = pd.read_csv(csv, index_col=0)
print(table1.shape)
table1.head()









    



(300, 259)






    Out[24]:







  
    
      
      RHO
      GNAT1
      SLC24A1
      PDE6B
      PDC
      CNGA1
      RP1
      SAG
      NR2E3
      NRL
      ...
      SLC6A6
      MAP1B
      TMA7
      STX3
      SYT1
      CRX
      SNAP25
      MPP4
      NEUROD1
      A930011O12RIK
    
    
      barcode
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      r1_TTCCTGCTAGGC
      14
      3
      1
      3
      12
      0
      1
      7
      2
      2
      ...
      1
      1
      2
      0
      0
      0
      0
      1
      0
      0
    
    
      r1_TGGAGATACTCT
      23
      8
      6
      4
      13
      9
      2
      19
      1
      1
      ...
      3
      0
      2
      1
      0
      1
      0
      2
      0
      1
    
    
      r1_CGTCTACATCCG
      14
      4
      7
      1
      6
      3
      0
      13
      2
      2
      ...
      0
      1
      0
      3
      0
      1
      0
      2
      0
      0
    
    
      r1_CAAGCTTGGCGC
      62
      18
      10
      20
      29
      2
      8
      31
      9
      2
      ...
      0
      5
      7
      3
      2
      6
      2
      3
      7
      11
    
    
      r1_ACTCACATAGAG
      10
      1
      0
      1
      5
      2
      1
      7
      3
      1
      ...
      1
      1
      2
      3
      1
      2
      1
      0
      3
      0
    
  

5 rows × 259 columns

Assign colors basd on clusters



In [ ]:

    
cluster_name_to_id = {'Horizontal cells': [1], 'Retinal Ganglion cells': [2], 
                      'Amacrine cells': np.arange(3, 24),
                      'Rods', [24], 'Cones': [25], 
                      'Bipolar cells': np.arange(26, 34),
                      ''}



In [5]:

    
cluster_identities = pd.read_table('macosko2015/retina_clusteridentities.txt', header=None,
                                   names=['barcode', 'cluster_id'], index_col=0, squeeze=True)
print(cluster_identities.shape)
cluster_identities.head()









    



(44808,)






    Out[5]:





barcode
r1_GGCCGCAGTCCG     2
r1_CTTGTGCGGGAA     2
r1_GCGCAACTGCTC     2
r1_GATTGGGAGGCA     2
r1_GTGCCGCCTCTC    25
Name: cluster_id, dtype: int64



In [6]:

    
cluster_identities_table1 = cluster_identities.loc[table1.index]
cluster_identities_table1.head()









    Out[6]:





barcode
r1_TTCCTGCTAGGC    24
r1_TGGAGATACTCT    24
r1_CGTCTACATCCG    24
r1_CAAGCTTGGCGC    24
r1_ACTCACATAGAG    24
Name: cluster_id, dtype: int64



In [15]:

    
cluster_ids = cluster_identities_table1.unique()
cluster_ids









    Out[15]:





array([24, 25, 26, 27, 33, 34])



In [32]:

    
cluster_names = cluster_identities_table1.map(common.cluster_id_to_name)
cluster_names.head()









    Out[32]:





barcode
r1_TTCCTGCTAGGC    Rods
r1_TGGAGATACTCT    Rods
r1_CGTCTACATCCG    Rods
r1_CAAGCTTGGCGC    Rods
r1_ACTCACATAGAG    Rods
Name: cluster_id, dtype: object



In [7]:

    
colors = sns.color_palette(palette='Set2', n_colors=len(cluster_ids))
id_to_color = dict(zip(cluster_ids, colors))

color_labels = [id_to_color[i] for i in cluster_identities_table1]
color_labels[:4]









    Out[7]:





[(0.40000000000000002, 0.76078431372549016, 0.6470588235294118),
 (0.40000000000000002, 0.76078431372549016, 0.6470588235294118),
 (0.40000000000000002, 0.76078431372549016, 0.6470588235294118),
 (0.40000000000000002, 0.76078431372549016, 0.6470588235294118)]

Spot check some genes



In [25]:

    
genes_of_interest = ['RHO', 'PAX6', 'GNAT1', 'SLC24A1']



In [56]:

    
subset = table1[genes_of_interest]
subset.head()









    Out[56]:







  
    
      
      RHO
      PAX6
      GNAT1
      SLC24A1
    
    
      barcode
      
      
      
      
    
  
  
    
      r1_TTCCTGCTAGGC
      14
      0
      3
      1
    
    
      r1_TGGAGATACTCT
      23
      1
      8
      6
    
    
      r1_CGTCTACATCCG
      14
      1
      4
      7
    
    
      r1_CAAGCTTGGCGC
      62
      0
      18
      10
    
    
      r1_ACTCACATAGAG
      10
      0
      1
      0



In [52]:

    
# subset_log = np.log(subset+1)
# subset_log.head()









    Out[52]:







  
    
      
      RHO
      PAX6
      GNAT1
      SLC24A1
    
    
      barcode
      
      
      
      
    
  
  
    
      r1_TTCCTGCTAGGC
      2.708050
      0.000000
      1.386294
      0.693147
    
    
      r1_TGGAGATACTCT
      3.178054
      0.693147
      2.197225
      1.945910
    
    
      r1_CGTCTACATCCG
      2.708050
      0.693147
      1.609438
      2.079442
    
    
      r1_CAAGCTTGGCGC
      4.143135
      0.000000
      2.944439
      2.397895
    
    
      r1_ACTCACATAGAG
      2.397895
      0.000000
      0.693147
      0.000000



In [57]:

    
subset_names = subset.join(cluster_names)
subset_names.head()









    Out[57]:







  
    
      
      RHO
      PAX6
      GNAT1
      SLC24A1
      cluster_id
    
    
      barcode
      
      
      
      
      
    
  
  
    
      r1_TTCCTGCTAGGC
      14
      0
      3
      1
      Rods
    
    
      r1_TGGAGATACTCT
      23
      1
      8
      6
      Rods
    
    
      r1_CGTCTACATCCG
      14
      1
      4
      7
      Rods
    
    
      r1_CAAGCTTGGCGC
      62
      0
      18
      10
      Rods
    
    
      r1_ACTCACATAGAG
      10
      0
      1
      0
      Rods



In [58]:

    
sns.pairplot(subset_names, hue='cluster_id')









    Out[58]:





<seaborn.axisgrid.PairGrid at 0x1320d9ef0>



In [46]:

    
np









    Out[46]:





<module 'numpy' from '/Users/olgabot/anaconda3/envs/cshl-sca-2017/lib/python3.6/site-packages/numpy/__init__.py'>



In [47]:

    
sns.pairplot(subset.apply(np.log), hue='cluster_id')









    



---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-47-0ab0021a6378> in <module>()
----> 1 sns.pairplot(subset.apply(np.log), hue='cluster_id')

~/anaconda3/envs/cshl-sca-2017/lib/python3.6/site-packages/pandas/core/frame.py in apply(self, func, axis, broadcast, raw, reduce, args, **kwds)
   4243         if isinstance(f, np.ufunc):
   4244             with np.errstate(all='ignore'):
-> 4245                 results = f(self.values)
   4246             return self._constructor(data=results, index=self.index,
   4247                                      columns=self.columns, copy=False)

AttributeError: 'int' object has no attribute 'log'

Plot the original, dropout'd data



In [8]:

    
sns.set(style='whitegrid')



In [9]:

    
mask = table1 == 0

fig, ax = plt.subplots()
sns.heatmap(table1, mask=mask, xticklabels=[], yticklabels=[])
ax.set(xlabel='Genes', ylabel='Cells')









    Out[9]:





[<matplotlib.text.Text at 0x10c690e80>, <matplotlib.text.Text at 0x10c8c2ba8>]

Maybe this is small enough for a clustered heatmap



In [11]:

    
clustergrid = sns.clustermap(table1, mask=mask, xticklabels=[], yticklabels=[], 
                             row_colors=color_labels)









    



/Users/olgabot/anaconda3/envs/cshl-sca-2017/lib/python3.6/site-packages/matplotlib/cbook.py:136: MatplotlibDeprecationWarning: The axisbg attribute was deprecated in version 2.0. Use facecolor instead.
  warnings.warn(message, mplDeprecation, stacklevel=1)

Add Robust PCA implementations to path



In [62]:

    
import sys

sys.path.extend(['/Users/olgabot/code/robust-pca/', '/Users/olgabot/code/rpcaADMM/'])

import r_pca
import rpcaADMM



In [63]:

    
r_pca.R_pca??



In [64]:

    
%%time
rpca_alm = r_pca.R_pca(table1.as_matrix())
rpca_alm.fit()









    



iteration: 1, error: 56422.70929985199
iteration: 100, error: 0.6880109089868683
iteration: 200, error: 0.14702804887281945
iteration: 251, error: 0.08979496489742976
CPU times: user 15.4 s, sys: 127 ms, total: 15.6 s
Wall time: 3.92 s



In [71]:

    
sns.distplot(s[s > 0.1], kde=False)









    Out[71]:





<matplotlib.axes._subplots.AxesSubplot at 0x116624e10>



In [72]:

    
diff = rpca_alm.L - table1



In [73]:

    
datasets = {'Original': table1, 'Low-Rank':rpca_alm.L, 'Sparse': rpca_alm.S, 
            'Difference: Original - Low-Rank': diff}

common.heatmaps(datasets)



In [74]:

    
L = pd.DataFrame(rpca_alm.L, index=table1.index, columns=table1.columns)
L.head()









    Out[74]:







  
    
      
      RHO
      GNAT1
      SLC24A1
      PDE6B
      PDC
      CNGA1
      RP1
      SAG
      NR2E3
      NRL
      ...
      SLC6A6
      MAP1B
      TMA7
      STX3
      SYT1
      CRX
      SNAP25
      MPP4
      NEUROD1
      A930011O12RIK
    
    
      barcode
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      r1_TTCCTGCTAGGC
      7.272295
      3.048168
      1.562439
      2.198954
      4.326513
      1.385031
      1.989380
      6.541110
      1.689744
      1.345555
      ...
      0.313114
      0.767164
      0.864139
      0.388544
      0.150222
      0.752207
      0.272070
      0.832861
      1.258661
      0.856469
    
    
      r1_TGGAGATACTCT
      13.174154
      5.480442
      2.635686
      3.713715
      7.521109
      2.310180
      3.257731
      11.804131
      2.741139
      2.363235
      ...
      0.815807
      1.031292
      1.597556
      0.999699
      0.884529
      1.033574
      0.724387
      1.236772
      2.351665
      1.286273
    
    
      r1_CGTCTACATCCG
      8.820288
      3.785870
      1.735920
      2.396476
      5.171789
      1.561243
      2.092837
      7.836292
      1.876383
      1.537487
      ...
      0.382253
      0.972003
      0.828423
      0.656586
      0.391978
      0.916053
      0.429003
      0.958758
      1.373776
      0.703660
    
    
      r1_CAAGCTTGGCGC
      25.982230
      11.021147
      5.608453
      7.951609
      15.774301
      4.953723
      7.427482
      24.229816
      6.077871
      4.769225
      ...
      1.644721
      3.139924
      3.271503
      2.906614
      1.999058
      2.751189
      2.020806
      3.190714
      5.678790
      3.812966
    
    
      r1_ACTCACATAGAG
      7.614892
      3.096781
      1.282597
      1.838656
      4.131712
      1.165996
      1.602792
      6.662531
      1.363853
      1.237918
      ...
      0.687691
      0.804546
      1.079846
      0.874430
      0.684653
      0.541885
      0.754388
      0.499074
      1.349995
      0.391946
    
  

5 rows × 259 columns



In [75]:

    
L_subset = L[genes_of_interest]
L_names = L_subset.join(cluster_names)

sns.pairplot(L_names, hue='cluster_id')









    Out[75]:





<seaborn.axisgrid.PairGrid at 0x117764860>



In [76]:

    
sns.distplot(table1.values.flat)









    Out[76]:





<matplotlib.axes._subplots.AxesSubplot at 0x11648ff28>



In [78]:

    
sns.distplot(L.values.flat)









    Out[78]:





<matplotlib.axes._subplots.AxesSubplot at 0x117523780>



In [79]:

    
diff = table1 - L
diff_tidy = diff.unstack().reset_index()
diff_tidy['dataset'] = 'Difference'

table1_tidy = table1.unstack().reset_index()
table1_tidy['dataset'] = 'Original'
L_tidy = L.unstack().reset_index()
L_tidy['dataset'] = 'Low-Rank'

tidy = pd.concat([table1_tidy, L_tidy, diff_tidy])
tidy = tidy.rename(columns={0: 'molecules'})
tidy.head()

sns.violinplot(x='dataset', y='molecules', data=tidy)









    Out[79]:





<matplotlib.axes._subplots.AxesSubplot at 0x11596ab38>



In [80]:

    
sns.boxplot(x='dataset', y='molecules', data=tidy)









    Out[80]:





<matplotlib.axes._subplots.AxesSubplot at 0x115c07278>



In [81]:

    
S = pd.DataFrame(rpca_alm.S, index=table1.index, columns=table1.columns)
S.head()









    Out[81]:







  
    
      
      RHO
      GNAT1
      SLC24A1
      PDE6B
      PDC
      CNGA1
      RP1
      SAG
      NR2E3
      NRL
      ...
      SLC6A6
      MAP1B
      TMA7
      STX3
      SYT1
      CRX
      SNAP25
      MPP4
      NEUROD1
      A930011O12RIK
    
    
      barcode
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      r1_TTCCTGCTAGGC
      6.727705
      -0.048168
      -0.562439
      0.801046
      7.673487
      -1.385031
      -0.989380
      0.458890
      0.310256
      0.654445
      ...
      0.686886
      0.232836
      1.135861
      -0.388544
      -0.150222
      -0.752207
      -0.272070
      0.167139
      -1.258661
      -0.856469
    
    
      r1_TGGAGATACTCT
      9.825846
      2.519558
      3.364314
      0.286285
      5.478891
      6.689820
      -1.257731
      7.195869
      -1.741139
      -1.363235
      ...
      2.184193
      -1.031292
      0.402444
      -0.000000
      -0.884529
      -0.033574
      -0.724387
      0.763228
      -2.351665
      -0.286273
    
    
      r1_CGTCTACATCCG
      5.179712
      0.214130
      5.264080
      -1.396476
      0.828211
      1.438757
      -2.092837
      5.163708
      0.123617
      0.462513
      ...
      -0.382253
      0.027997
      -0.828423
      2.343414
      -0.391978
      0.083947
      -0.429003
      1.041242
      -1.373776
      -0.703660
    
    
      r1_CAAGCTTGGCGC
      36.017770
      6.978853
      4.391547
      12.048391
      13.225699
      -2.953723
      0.572518
      6.770184
      2.922129
      -2.769225
      ...
      -1.644721
      1.860076
      3.728497
      0.093386
      -0.000000
      3.248811
      -0.020806
      -0.190714
      1.321210
      7.187034
    
    
      r1_ACTCACATAGAG
      2.385108
      -2.096781
      -1.282597
      -0.838656
      0.868288
      0.834004
      -0.602792
      0.337469
      1.636147
      -0.237918
      ...
      0.312309
      0.195454
      0.920154
      2.125570
      0.315347
      1.458115
      0.245612
      -0.499074
      1.650005
      -0.391946
    
  

5 rows × 259 columns



In [88]:

    
sns.boxplot(table1[genes_of_interest])









    



/Users/olgabot/anaconda3/envs/cshl-sca-2017/lib/python3.6/site-packages/seaborn/categorical.py:2171: UserWarning: The boxplot API has been changed. Attempting to adjust your arguments for the new API (which might not work). Please update your code. See the version 0.6 release notes for more info.
  warnings.warn(msg, UserWarning)






    Out[88]:





<matplotlib.axes._subplots.AxesSubplot at 0x11abaf0b8>



In [86]:

    
sns.boxplot(L[genes_of_interest])









    



/Users/olgabot/anaconda3/envs/cshl-sca-2017/lib/python3.6/site-packages/seaborn/categorical.py:2171: UserWarning: The boxplot API has been changed. Attempting to adjust your arguments for the new API (which might not work). Please update your code. See the version 0.6 release notes for more info.
  warnings.warn(msg, UserWarning)






    Out[86]:





<matplotlib.axes._subplots.AxesSubplot at 0x11a63b4a8>



In [87]:

    
sns.boxplot(S[genes_of_interest])









    



/Users/olgabot/anaconda3/envs/cshl-sca-2017/lib/python3.6/site-packages/seaborn/categorical.py:2171: UserWarning: The boxplot API has been changed. Attempting to adjust your arguments for the new API (which might not work). Please update your code. See the version 0.6 release notes for more info.
  warnings.warn(msg, UserWarning)






    Out[87]:





<matplotlib.axes._subplots.AxesSubplot at 0x11aa49470>



In [21]:

    
diff.head()









    Out[21]:







  
    
      
      RHO
      GNAT1
      SLC24A1
      PDE6B
      PDC
      CNGA1
      RP1
      SAG
      NR2E3
      NRL
      ...
      SLC6A6
      MAP1B
      TMA7
      STX3
      SYT1
      CRX
      SNAP25
      MPP4
      NEUROD1
      A930011O12RIK
    
    
      barcode
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      r1_TTCCTGCTAGGC
      -6.727705
      0.048168
      0.562439
      -0.801046
      -7.673487
      1.385031
      0.989380
      -0.458890
      -0.310256
      -0.654445
      ...
      -0.686886
      -0.232836
      -1.135861
      0.388544
      0.150222
      0.752207
      0.272070
      -0.167139
      1.258661
      0.856469
    
    
      r1_TGGAGATACTCT
      -9.825846
      -2.519558
      -3.364314
      -0.286285
      -5.478891
      -6.689820
      1.257731
      -7.195869
      1.741139
      1.363235
      ...
      -2.184193
      1.031292
      -0.402444
      -0.000301
      0.884529
      0.033574
      0.724387
      -0.763228
      2.351665
      0.286273
    
    
      r1_CGTCTACATCCG
      -5.179712
      -0.214130
      -5.264080
      1.396476
      -0.828211
      -1.438757
      2.092837
      -5.163708
      -0.123617
      -0.462513
      ...
      0.382253
      -0.027997
      0.828423
      -2.343414
      0.391978
      -0.083947
      0.429003
      -1.041242
      1.373776
      0.703660
    
    
      r1_CAAGCTTGGCGC
      -36.017770
      -6.978853
      -4.391547
      -12.048391
      -13.225699
      2.953723
      -0.572518
      -6.770184
      -2.922129
      2.769225
      ...
      1.644721
      -1.860076
      -3.728497
      -0.093386
      -0.000942
      -3.248811
      0.020806
      0.190714
      -1.321210
      -7.187034
    
    
      r1_ACTCACATAGAG
      -2.385108
      2.096781
      1.282597
      0.838656
      -0.868288
      -0.834004
      0.602792
      -0.337469
      -1.636147
      0.237918
      ...
      -0.312309
      -0.195454
      -0.920154
      -2.125570
      -0.315347
      -1.458115
      -0.245612
      0.499074
      -1.650005
      0.391946
    
  

5 rows × 259 columns



In [22]:

    
gr0 = rpca_alm.L > 0
diff_gr0 = table1 - gr0

datasets = {'Original': table1, 'Low-Rank':rpca_alm.L, 'Sparse': rpca_alm.S, 
            'Difference: Original - Low-Rank': diff_gr0}

common.heatmaps(datasets)



In [23]:

    
clustergrid = sns.clustermap(L, xticklabels=[], yticklabels=[], 
                             row_colors=color_labels)









    



/Users/olgabot/anaconda3/envs/cshl-sca-2017/lib/python3.6/site-packages/matplotlib/cbook.py:136: MatplotlibDeprecationWarning: The axisbg attribute was deprecated in version 2.0. Use facecolor instead.
  warnings.warn(message, mplDeprecation, stacklevel=1)



In [24]:

    
g_original = sns.clustermap(table1.T.corr(method='spearman'), xticklabels=[], yticklabels=[], 
                             col_colors=color_labels)









    



/Users/olgabot/anaconda3/envs/cshl-sca-2017/lib/python3.6/site-packages/matplotlib/cbook.py:136: MatplotlibDeprecationWarning: The axisbg attribute was deprecated in version 2.0. Use facecolor instead.
  warnings.warn(message, mplDeprecation, stacklevel=1)



In [88]:

    
import fastcluster



In [89]:

    
fastcluster.pdist?



In [95]:

    
table1_clustergrid = common.clustermap(table1.T.corr(method='spearman'), col_colors=color_labels)
table1_clustergrid.savefig(os.path.join(figure_folder, 'expression_table1_clustermap.pdf'))









    



/Users/olgabot/anaconda3/envs/cshl-sca-2017/lib/python3.6/site-packages/matplotlib/cbook.py:136: MatplotlibDeprecationWarning: The axisbg attribute was deprecated in version 2.0. Use facecolor instead.
  warnings.warn(message, mplDeprecation, stacklevel=1)



In [39]:

    
S.head()









    Out[39]:







  
    
      
      RHO
      GNAT1
      SLC24A1
      PDE6B
      PDC
      CNGA1
      RP1
      SAG
      NR2E3
      NRL
      ...
      SLC6A6
      MAP1B
      TMA7
      STX3
      SYT1
      CRX
      SNAP25
      MPP4
      NEUROD1
      A930011O12RIK
    
    
      barcode
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      r1_TTCCTGCTAGGC
      6.727705
      -0.048168
      -0.562439
      0.801046
      7.673487
      -1.385031
      -0.989380
      0.458890
      0.310256
      0.654445
      ...
      0.686886
      0.232836
      1.135861
      -0.388544
      -0.150222
      -0.752207
      -0.272070
      0.167139
      -1.258661
      -0.856469
    
    
      r1_TGGAGATACTCT
      9.825846
      2.519558
      3.364314
      0.286285
      5.478891
      6.689820
      -1.257731
      7.195869
      -1.741139
      -1.363235
      ...
      2.184193
      -1.031292
      0.402444
      -0.000000
      -0.884529
      -0.033574
      -0.724387
      0.763228
      -2.351665
      -0.286273
    
    
      r1_CGTCTACATCCG
      5.179712
      0.214130
      5.264080
      -1.396476
      0.828211
      1.438757
      -2.092837
      5.163708
      0.123617
      0.462513
      ...
      -0.382253
      0.027997
      -0.828423
      2.343414
      -0.391978
      0.083947
      -0.429003
      1.041242
      -1.373776
      -0.703660
    
    
      r1_CAAGCTTGGCGC
      36.017770
      6.978853
      4.391547
      12.048391
      13.225699
      -2.953723
      0.572518
      6.770184
      2.922129
      -2.769225
      ...
      -1.644721
      1.860076
      3.728497
      0.093386
      -0.000000
      3.248811
      -0.020806
      -0.190714
      1.321210
      7.187034
    
    
      r1_ACTCACATAGAG
      2.385108
      -2.096781
      -1.282597
      -0.838656
      0.868288
      0.834004
      -0.602792
      0.337469
      1.636147
      -0.237918
      ...
      0.312309
      0.195454
      0.920154
      2.125570
      0.315347
      1.458115
      0.245612
      -0.499074
      1.650005
      -0.391946
    
  

5 rows × 259 columns



In [49]:

    
sns.distplot(S.values.flat)









    Out[49]:





<matplotlib.axes._subplots.AxesSubplot at 0x1248a71d0>



In [52]:

    
np.median(S.values)









    Out[52]:





0.0



In [56]:

    
high_in_sparse = (S > 10).any()
print(high_in_sparse.sum())
S.loc[:, high_in_sparse]









    



88






    Out[56]:







  
    
      
      RHO
      GNAT1
      SLC24A1
      PDE6B
      PDC
      CNGA1
      RP1
      SAG
      NR2E3
      NRL
      ...
      TTYH1
      PAX6
      MGARP
      HSP90AA1
      SLC6A6
      MAP1B
      TMA7
      SYT1
      SNAP25
      A930011O12RIK
    
    
      barcode
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      r1_TTCCTGCTAGGC
      6.727705
      -0.048168
      -0.562439
      0.801046
      7.673487
      -1.385031
      -0.989380
      0.458890
      0.310256
      0.654445
      ...
      -0.000000
      -0.000000
      -0.302098
      -0.081445
      0.686886
      0.232836
      1.135861
      -0.150222
      -0.272070
      -0.856469
    
    
      r1_TGGAGATACTCT
      9.825846
      2.519558
      3.364314
      0.286285
      5.478891
      6.689820
      -1.257731
      7.195869
      -1.741139
      -1.363235
      ...
      1.331571
      0.998086
      -1.093359
      -1.256187
      2.184193
      -1.031292
      0.402444
      -0.884529
      -0.724387
      -0.286273
    
    
      r1_CGTCTACATCCG
      5.179712
      0.214130
      5.264080
      -1.396476
      0.828211
      1.438757
      -2.092837
      5.163708
      0.123617
      0.462513
      ...
      -0.341183
      0.999751
      1.902183
      3.171176
      -0.382253
      0.027997
      -0.828423
      -0.391978
      -0.429003
      -0.703660
    
    
      r1_CAAGCTTGGCGC
      36.017770
      6.978853
      4.391547
      12.048391
      13.225699
      -2.953723
      0.572518
      6.770184
      2.922129
      -2.769225
      ...
      0.814278
      0.000000
      -1.179403
      -0.254800
      -1.644721
      1.860076
      3.728497
      -0.000000
      -0.020806
      7.187034
    
    
      r1_ACTCACATAGAG
      2.385108
      -2.096781
      -1.282597
      -0.838656
      0.868288
      0.834004
      -0.602792
      0.337469
      1.636147
      -0.237918
      ...
      -0.082806
      -0.000000
      -0.509653
      -0.000000
      0.312309
      0.195454
      0.920154
      0.315347
      0.245612
      -0.391946
    
    
      r1_TAACGGACACGC
      21.380107
      1.663735
      -0.021046
      -0.222488
      12.299926
      4.287691
      -0.672176
      0.000000
      -2.237709
      9.267142
      ...
      4.740199
      0.000000
      0.000000
      -0.000000
      0.952529
      -1.441017
      -0.406451
      1.892411
      -1.088772
      0.354429
    
    
      r1_CGCATGGGATAC
      13.959024
      3.970865
      -1.348141
      -0.892688
      2.954439
      -0.215551
      1.402263
      -0.000000
      -0.442402
      5.729592
      ...
      2.569047
      0.000000
      -0.719147
      1.672199
      0.435023
      1.349910
      -0.755655
      -0.504931
      0.403079
      -0.428073
    
    
      r1_TAACGACGCTTG
      3.180794
      -0.447518
      0.109158
      -0.257555
      -2.279500
      0.203602
      3.582963
      2.171476
      -0.986389
      0.327993
      ...
      -0.133089
      -0.000000
      -0.236654
      0.158758
      -0.000000
      0.434394
      -0.410368
      -0.045431
      0.000000
      2.149970
    
    
      r1_TCGGCAGCCTCT
      0.000000
      6.456903
      -1.145033
      1.899016
      0.735434
      1.092624
      0.135634
      9.117565
      0.692410
      2.080789
      ...
      -0.266130
      0.000000
      0.000000
      -0.887233
      -0.409884
      -1.027818
      -0.134590
      -0.516927
      -0.389400
      -1.188493
    
    
      r1_TAGGATGCAAAC
      0.973777
      -2.880712
      2.373638
      0.301869
      -3.082936
      -2.280995
      5.395161
      0.074149
      8.163804
      0.806921
      ...
      -0.570322
      -0.000000
      -1.104568
      -2.087004
      -0.265266
      0.564875
      1.717807
      -0.707985
      -0.499107
      5.146910
    
    
      r1_CGGTTACAGTAG
      12.505315
      1.261856
      2.428380
      -1.565133
      -1.709170
      -1.225406
      3.767564
      3.748447
      0.259539
      -1.144302
      ...
      -0.334235
      0.000000
      -0.000000
      -0.807533
      2.114339
      0.758462
      1.419423
      1.893210
      0.000000
      -0.454211
    
    
      r1_AATCGGATACGT
      11.933689
      -0.000000
      2.991893
      1.107978
      -1.669472
      1.209005
      5.214369
      0.000000
      -0.186109
      1.227780
      ...
      -0.452205
      0.000000
      -1.065484
      1.154736
      0.482219
      0.946174
      -0.982452
      1.195778
      2.396707
      1.631832
    
    
      r1_AGTGGGCTTGAG
      7.947213
      4.715814
      -1.638802
      -1.356734
      0.401653
      -0.493621
      0.793865
      -0.000000
      5.179635
      0.523270
      ...
      -0.212292
      0.000000
      0.039120
      1.593622
      0.396195
      -0.750549
      -0.893586
      1.234860
      0.221824
      1.878540
    
    
      r1_TTCACCTACCGC
      10.781769
      1.930491
      0.537446
      -2.004599
      0.901076
      -0.239981
      -1.714882
      -0.079131
      2.512117
      -0.276403
      ...
      0.165085
      -0.000000
      0.017907
      0.238652
      -0.090347
      -0.579233
      1.459530
      -0.516233
      -0.232060
      -0.625630
    
    
      r1_TTATGTCGTCCT
      2.270350
      3.943689
      2.119016
      1.400406
      3.516772
      1.362922
      -2.202215
      1.637913
      -1.913613
      1.275680
      ...
      -0.533194
      0.000000
      -0.000000
      2.163319
      -0.309056
      3.075198
      0.000000
      0.209408
      -0.560234
      -0.595371
    
    
      r1_ATCAGCGCAGTC
      0.000000
      -0.565269
      0.021651
      0.567229
      -2.430373
      0.126985
      4.493267
      2.215334
      0.862813
      -0.714553
      ...
      -0.000000
      0.000000
      0.537775
      0.056646
      -0.073409
      0.411675
      0.550113
      -0.144370
      0.780021
      7.097770
    
    
      r1_CTTTATGGTGAC
      7.572463
      -0.903545
      -0.000000
      2.227700
      5.103486
      3.413487
      1.485007
      10.665482
      0.584033
      -0.507655
      ...
      1.319728
      0.000000
      0.206072
      -1.192753
      2.364657
      -1.385998
      -0.295737
      0.000000
      1.001619
      1.978113
    
    
      r1_GAATCGGGAACA
      0.130061
      -2.574404
      0.207521
      -0.441166
      -2.970246
      -0.523037
      3.736323
      3.206105
      -0.825140
      -0.521023
      ...
      1.157655
      -0.000000
      -0.024933
      0.125958
      -0.152853
      -0.815796
      1.130103
      0.207391
      -0.320568
      -0.858338
    
    
      r1_GAAGTGATCACC
      7.198676
      1.907994
      -0.363277
      -0.764196
      8.988425
      0.000000
      -1.520747
      9.571634
      -0.658129
      -1.910792
      ...
      0.000000
      0.000000
      -1.730510
      3.680007
      -0.000000
      0.058526
      0.000000
      -0.000000
      -0.994082
      -1.859172
    
    
      r1_AGTGGGCGGCCG
      8.848392
      0.895194
      -0.163581
      3.340794
      0.000000
      0.958213
      -0.538988
      -1.387186
      5.711675
      0.000000
      ...
      -0.115035
      0.000000
      0.269487
      1.013680
      -0.113011
      -0.443464
      -0.403637
      0.567080
      1.632395
      5.046891
    
    
      r1_ACTGATGATTAA
      -0.166009
      0.258877
      0.679583
      0.564598
      1.074236
      -0.259644
      -0.234408
      -0.000000
      -0.281426
      0.673068
      ...
      0.000000
      -0.000000
      0.771423
      -0.207019
      0.917991
      1.837832
      -0.114600
      -0.030735
      -0.216464
      -0.000000
    
    
      r1_CGCCCGTCTGTA
      11.020429
      0.000000
      0.081976
      -2.722703
      4.451928
      0.291661
      0.406819
      2.286490
      0.938283
      0.300258
      ...
      -0.292419
      0.000000
      0.727774
      0.068704
      -0.214730
      -0.000000
      -0.923698
      1.250572
      -0.505095
      -0.963568
    
    
      r1_GGGCTTGGGAAG
      -0.000000
      2.448623
      -0.000000
      0.493179
      -0.228614
      -0.881305
      -1.331266
      0.641512
      -0.062537
      1.000179
      ...
      -0.278898
      -0.000000
      3.872065
      1.717896
      -0.459817
      -0.628542
      0.258535
      4.167146
      1.243851
      0.345875
    
    
      r1_TTAATGACTACA
      -0.000000
      0.866470
      -1.088180
      3.399241
      0.000000
      1.020583
      6.455940
      -0.432606
      0.780824
      -0.900402
      ...
      -0.071543
      0.000000
      2.491098
      2.013177
      -0.132724
      -0.505365
      0.455063
      -0.155937
      0.761623
      3.373602
    
    
      r1_CGGCTGTCTGCT
      19.308350
      5.889286
      0.084709
      4.388549
      -0.419955
      -0.645631
      -2.231481
      3.700417
      -0.926757
      -1.735463
      ...
      0.000000
      -0.000000
      1.313213
      0.745053
      -0.448072
      -0.966678
      0.017107
      0.784523
      0.099009
      -0.836805
    
    
      r1_ATGATTATGGTT
      4.632167
      3.751768
      -0.967775
      -0.363413
      4.001080
      0.158328
      0.758410
      -2.679809
      -0.982320
      -0.870134
      ...
      -0.059819
      -0.000000
      -0.253736
      1.133169
      -0.000000
      -0.453014
      2.358229
      -0.074023
      -0.050509
      -0.206752
    
    
      r1_TTTACTTCAAGG
      -1.347545
      0.221071
      0.604238
      0.056137
      0.059580
      -0.206978
      9.143060
      0.000000
      -1.473954
      -0.173521
      ...
      -0.016236
      -0.000000
      -0.419281
      -0.088003
      -0.000000
      0.197948
      0.362459
      -0.000000
      -0.000000
      0.143271
    
    
      r1_ATGGCTCGCAAA
      6.387332
      -0.915061
      1.178258
      0.446398
      2.697413
      0.388423
      -2.239412
      2.655880
      -1.887992
      -1.644699
      ...
      -0.250518
      -0.000000
      0.000000
      2.312154
      -0.363505
      -0.821810
      -0.000000
      -0.645316
      0.432266
      -0.786629
    
    
      r1_CGATGGCTGGAC
      17.101099
      3.885594
      -0.803805
      -0.557101
      3.556439
      -0.605906
      -1.171681
      2.418840
      1.088428
      -0.678596
      ...
      -0.257935
      0.000000
      -0.000000
      -1.433706
      2.095816
      0.051474
      -1.320981
      0.017615
      0.947546
      -0.662259
    
    
      r1_GCGTGCTACTAC
      2.224974
      0.063991
      -1.916026
      -0.721554
      -1.373457
      0.313343
      -0.444382
      4.467587
      -2.033359
      1.282551
      ...
      0.485015
      0.000000
      -0.935773
      0.270485
      0.478233
      -0.788953
      -0.000000
      0.252681
      -0.563404
      0.925160
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      r1_CGAAACTATCGC
      2.195243
      0.011369
      0.000000
      -0.000000
      -0.107809
      0.000000
      -0.000000
      -0.534394
      -0.000000
      -0.000000
      ...
      -1.361469
      1.173269
      0.000000
      -0.291303
      0.000000
      0.775780
      -0.000000
      1.852239
      -0.000000
      0.000000
    
    
      r1_CCCCTCTCTGGC
      0.000000
      -0.000000
      0.000000
      -0.000000
      -0.114969
      -0.000000
      0.932384
      0.363043
      -0.009413
      -0.000000
      ...
      5.316246
      0.353796
      0.000000
      -0.077829
      -0.000000
      -0.146776
      0.000000
      0.000000
      0.015977
      -0.000000
    
    
      r1_ATCAATATTCTC
      -0.000000
      -0.000000
      0.000000
      0.000000
      0.817163
      0.000000
      -0.000000
      0.000000
      -0.000000
      0.000000
      ...
      1.092017
      0.044050
      4.603456
      -0.867112
      0.023728
      -0.000000
      -0.000000
      1.872482
      -0.000000
      -0.019444
    
    
      r1_TCTCTGTGACGC
      1.528452
      -0.012077
      0.011134
      1.023459
      0.917759
      1.983044
      -0.000000
      0.586865
      -0.000000
      0.000000
      ...
      2.050997
      -0.538215
      0.736698
      0.470899
      0.000000
      -0.000000
      0.014210
      0.000000
      0.000000
      -0.069790
    
    
      r1_GGCGGACTGCGT
      1.628706
      -0.096092
      -0.000000
      -0.016540
      3.745887
      -0.029418
      -0.050897
      0.000000
      -0.017742
      0.935019
      ...
      -2.207297
      -0.633798
      2.238783
      -0.073710
      1.017559
      -0.080742
      -0.000000
      0.037455
      0.000000
      0.000000
    
    
      r1_GAAGAGTATCTT
      -0.165410
      0.000000
      -0.000000
      0.000000
      0.855833
      0.994807
      -0.000000
      -0.000000
      0.013863
      0.000000
      ...
      -0.000000
      0.480201
      4.156182
      2.273838
      0.000000
      -0.000000
      0.000000
      1.541622
      -0.000000
      0.000000
    
    
      r1_GTTACACGAGTC
      1.943480
      -0.000000
      0.957764
      -0.000000
      -0.047855
      -0.000000
      0.000000
      -0.000000
      -0.000000
      0.000000
      ...
      5.790545
      0.630836
      -0.187083
      1.666287
      -0.000000
      -0.000000
      0.000000
      0.000000
      -0.000000
      0.000000
    
    
      r1_CATTGGTCTCAC
      3.070188
      -0.174132
      -1.029044
      -0.515659
      0.703719
      0.066136
      -1.424556
      0.614585
      0.795668
      -0.903366
      ...
      5.600525
      -0.425672
      10.884224
      -2.120829
      0.643494
      2.271292
      -0.588402
      -0.182806
      -0.312266
      0.047062
    
    
      r1_TATACTAAGTTT
      2.770301
      -0.445298
      0.776546
      -0.158671
      0.332642
      0.835105
      -0.002061
      1.015680
      -0.066518
      0.703998
      ...
      2.984812
      1.252434
      -0.264520
      -0.000000
      -0.000000
      -0.000000
      -0.105667
      3.254264
      0.579125
      0.817419
    
    
      r1_AGATCATCGTCC
      -0.000000
      -0.048309
      0.000000
      -0.000000
      0.866396
      -0.000000
      -0.000000
      0.000000
      0.999137
      0.000000
      ...
      -1.932864
      0.409695
      1.500891
      3.290392
      0.000000
      -0.000000
      -0.000000
      0.000000
      0.000000
      0.000000
    
    
      r1_CGGATTTACACT
      1.784631
      -0.000000
      0.000000
      -0.000000
      -0.045397
      -0.000000
      0.938590
      -0.002960
      0.007074
      0.000000
      ...
      0.000000
      2.048196
      4.595213
      -0.458753
      0.000000
      0.885203
      -0.000000
      -0.000000
      0.000000
      -0.000000
    
    
      r1_TATAGGAACAAA
      0.438695
      0.032053
      0.000000
      0.000000
      0.000000
      0.000000
      -0.002251
      0.562090
      0.000000
      -0.000000
      ...
      3.084729
      0.793422
      0.798523
      -0.000000
      0.974947
      0.893590
      0.000000
      0.883523
      -0.000000
      -0.000000
    
    
      r1_ACCATGTTGGGA
      1.041601
      0.219584
      -0.108511
      -0.156109
      0.000000
      -0.084239
      -0.019090
      -1.420963
      -0.089953
      -0.222936
      ...
      9.790146
      -0.121851
      7.362139
      -0.428579
      -0.000000
      -0.102834
      -0.036901
      -0.136796
      -0.000000
      0.000000
    
    
      r1_TCAAAGATAGGG
      -0.000000
      0.974528
      0.000000
      1.011693
      0.945346
      -0.000000
      -0.000000
      -0.000000
      0.000000
      -0.000000
      ...
      -0.000000
      0.622028
      1.443698
      -0.204755
      0.000000
      -0.008213
      0.000000
      0.000000
      -0.000000
      -0.000000
    
    
      r1_TTTATATTTGGG
      0.659228
      -0.000000
      0.007339
      -0.000000
      -0.045322
      -0.000000
      0.918379
      0.659567
      0.986917
      0.000000
      ...
      2.566947
      -0.151579
      -0.455806
      0.884180
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      -0.000000
    
    
      r1_CACACCGCGTAG
      1.186377
      0.307160
      0.821878
      -0.101919
      -0.506890
      -0.041569
      0.955889
      -0.016220
      -0.000000
      -0.149948
      ...
      -0.673384
      0.136749
      7.032807
      1.076661
      -0.075586
      -0.022994
      -0.000000
      1.865617
      -0.325853
      0.000000
    
    
      r1_GCTCGGTTAGTT
      -0.000000
      -0.235687
      -0.026272
      -0.074866
      3.449860
      -0.061053
      -0.000000
      -0.000000
      -0.039245
      0.872183
      ...
      10.688130
      0.627932
      -0.508217
      1.689609
      1.875097
      -0.000000
      0.927727
      -0.000000
      -0.000000
      -0.000000
    
    
      r1_TAGAGGCCTATA
      -0.000000
      -0.000000
      0.000000
      0.015248
      -0.031060
      0.000000
      0.000000
      -0.000000
      0.010458
      0.000000
      ...
      5.591531
      0.283414
      4.287004
      5.259208
      0.000000
      -0.000000
      0.018663
      -0.000000
      -0.000000
      -0.009140
    
    
      r1_TATAAAAAATTT
      -0.000000
      -0.007146
      0.000000
      -0.000000
      0.000000
      -0.000000
      -0.000000
      0.000000
      -0.000000
      0.000000
      ...
      -0.670504
      0.829290
      0.232661
      0.875470
      0.000000
      0.000000
      0.015976
      0.000000
      0.000000
      0.000000
    
    
      r1_TCTAATATTCGC
      -0.331777
      0.000000
      1.008258
      0.019039
      -0.000000
      0.000000
      2.973341
      0.683886
      -0.000000
      -0.000000
      ...
      3.736617
      -0.362063
      -2.351407
      -0.000000
      -0.000000
      0.000000
      -0.000000
      -0.000000
      0.000000
      0.943395
    
    
      r1_AGGGTGGGTACA
      -0.000000
      0.000000
      -0.020779
      0.997030
      -0.000000
      0.000000
      0.000000
      0.969479
      0.000000
      0.000000
      ...
      0.599839
      1.621370
      -0.534722
      -0.000000
      -0.000000
      -0.000000
      -0.021030
      0.070620
      0.040233
      -0.000000
    
    
      r1_AATGCTGCAAGA
      -0.000000
      0.007975
      -0.000000
      0.006630
      -0.008046
      0.000000
      0.000000
      -0.000000
      0.000000
      0.000000
      ...
      2.606610
      -0.746857
      -1.296138
      0.232468
      -0.000000
      0.000000
      0.000000
      -0.000000
      -0.000000
      0.000000
    
    
      r1_GTCGGGCCTTTC
      -0.213827
      -0.000000
      -0.009420
      0.000000
      -0.104125
      0.000000
      -0.000000
      0.559870
      0.012508
      0.000000
      ...
      -2.811222
      0.000000
      16.412101
      0.950820
      -0.000000
      0.000000
      1.889713
      -0.000000
      -0.000000
      -0.000000
    
    
      r1_GGGTCAGCGGCG
      0.920600
      1.260236
      -0.105873
      -0.157246
      -0.929874
      -0.083545
      -0.074611
      -0.552968
      0.880902
      0.779157
      ...
      4.037112
      -1.204179
      -4.443643
      -0.000000
      -0.000000
      -0.000000
      1.804412
      -0.000000
      -0.000000
      0.000000
    
    
      r1_CTGGACCTGCCC
      0.000000
      -0.191608
      -0.026919
      -0.016774
      -0.234621
      -0.024769
      -0.000000
      -0.661173
      -0.000000
      0.927448
      ...
      3.241622
      -0.367363
      6.982276
      -0.138741
      -0.000000
      -0.000000
      0.000000
      -0.006284
      -0.000000
      0.000000
    
    
      r1_AAGATATTGCTG
      -0.185538
      0.960408
      1.035136
      0.000000
      0.801181
      0.000000
      0.000000
      0.712134
      -0.000000
      -0.000000
      ...
      4.298957
      1.678050
      4.851130
      1.807929
      0.016362
      -0.030886
      0.000000
      0.000000
      -0.028761
      -0.057232
    
    
      r1_GAGACCTCATGG
      0.000000
      -0.713697
      -0.271694
      0.508802
      0.860829
      -0.239491
      0.488049
      -0.363523
      1.646615
      -0.267568
      ...
      3.518335
      -0.540237
      1.018306
      0.000000
      -0.000000
      0.666421
      -0.003371
      -0.136384
      0.778941
      1.797944
    
    
      r1_CGGAGCGCGACA
      1.052581
      -0.099141
      -0.000000
      -0.000000
      -0.136871
      0.984826
      -0.030601
      0.970290
      -0.000000
      -0.000000
      ...
      -1.453753
      -0.163724
      2.973280
      0.522468
      -0.000000
      0.000000
      -0.000000
      -0.000000
      0.000000
      -0.007794
    
    
      r1_AAGGACAGATCC
      0.000000
      1.739662
      2.596491
      -0.543682
      -0.524763
      -0.284546
      0.579656
      -1.319067
      -0.308557
      1.550640
      ...
      8.426755
      -0.829454
      3.722234
      0.369819
      0.000000
      -0.061672
      -0.120301
      -0.437313
      -0.014598
      -0.000000
    
    
      r1_ATATGCACCCTA
      0.864573
      -0.000000
      -0.000000
      -0.000000
      0.000000
      0.000000
      0.000000
      -0.000000
      -0.000000
      -0.000000
      ...
      -1.270910
      -0.296835
      4.645703
      -0.000000
      -0.000000
      -0.003225
      0.000000
      0.000000
      -0.000000
      0.000000
    
  

300 rows × 88 columns



In [47]:

    
data = S[S > 0]
data = data.fillna(0)

g_rpca = sns.clustermap(data, xticklabels=[], yticklabels=[], row_colors=color_labels)









    



/Users/olgabot/anaconda3/envs/cshl-sca-2017/lib/python3.6/site-packages/matplotlib/cbook.py:136: MatplotlibDeprecationWarning: The axisbg attribute was deprecated in version 2.0. Use facecolor instead.
  warnings.warn(message, mplDeprecation, stacklevel=1)



In [38]:

    
data = S.T.corr(method='spearman')
g_rpca = sns.clustermap(data, xticklabels=[], yticklabels=[],
                             col_colors=color_labels, row_colors=color_labels)









    



/Users/olgabot/anaconda3/envs/cshl-sca-2017/lib/python3.6/site-packages/matplotlib/cbook.py:136: MatplotlibDeprecationWarning: The axisbg attribute was deprecated in version 2.0. Use facecolor instead.
  warnings.warn(message, mplDeprecation, stacklevel=1)



In [44]:

    
data = S.corr(method='spearman')
g_rpca = sns.clustermap(data, xticklabels=[], yticklabels=[])









    



/Users/olgabot/anaconda3/envs/cshl-sca-2017/lib/python3.6/site-packages/matplotlib/cbook.py:136: MatplotlibDeprecationWarning: The axisbg attribute was deprecated in version 2.0. Use facecolor instead.
  warnings.warn(message, mplDeprecation, stacklevel=1)



In [96]:

    
data = L.T.corr(method='spearman')
g_rpca = common.clustermap(data, col_colors=color_labels)
g_rpca.savefig(os.path.join(figure_folder, 'low_rank_clustermap.pdf'))









    



/Users/olgabot/anaconda3/envs/cshl-sca-2017/lib/python3.6/site-packages/matplotlib/cbook.py:136: MatplotlibDeprecationWarning: The axisbg attribute was deprecated in version 2.0. Use facecolor instead.
  warnings.warn(message, mplDeprecation, stacklevel=1)



In [99]:

    
U, s, V = np.linalg.svd(L)
plt.plot(s[:10], 'o-')









    Out[99]:





[<matplotlib.lines.Line2D at 0x13e2cbcf8>]



In [100]:

    
U, s, V = np.linalg.svd(table1)
plt.plot(s[:10], 'o-')









    Out[100]:





[<matplotlib.lines.Line2D at 0x132a45cf8>]

So this seemed to have flipped some of the cells into different types, and made the within-cluster distances smaller



In [45]:

    
reconstructed = L + S

data = reconstructed.T.corr(method='spearman')
g_rpca = sns.clustermap(data, xticklabels=[], yticklabels=[],
                             col_colors=color_labels, row_colors=color_labels)









    



/Users/olgabot/anaconda3/envs/cshl-sca-2017/lib/python3.6/site-packages/matplotlib/cbook.py:136: MatplotlibDeprecationWarning: The axisbg attribute was deprecated in version 2.0. Use facecolor instead.
  warnings.warn(message, mplDeprecation, stacklevel=1)



In [81]:

    
csv = os.path.join(data_folder, 'sparse.csv')

S.to_csv(csv)



In [83]:

    
data_folder









    Out[83]:





'data/002_robust_pca'



In [82]:

    
csv = os.path.join(data_folder, 'lowrank.csv')

L.to_csv(csv)

Try ICA before and after



In [79]:

    
L.shape









    Out[79]:





(300, 259)



In [ ]:

    
from sklearn.decomposition import ICA

ica = ICA(n_components=)

ADMM implementation



In [36]:

    
reduced = rpcaADMM.rpcaADMM(table1)
# print(reduced.shape)
# reduced.head()









    



iter	    r norm	   eps pri	    s norm	  eps dual	 objective
   1	  386.9989	    5.6518	  560.3478	    5.5259	  46003.39
  10	  126.6648	    7.4377	   57.8995	    7.7838	 274951.69
  20	   19.5261	    7.9579	   26.0279	    7.5066	 297640.42
  30	   10.9950	    8.0745	   10.9821	    7.3901	 300438.68



In [62]:

    
rpcaADMM.rpcaADMM()



In [38]:

    
reduced.keys()









    Out[38]:





dict_keys(['objval', 'r_norm', 's_norm', 'eps_pri', 'eps_dual', 'addm_toc', 'admm_iter', 'X1_admm', 'X2_admm', 'X3_admm'])



In [76]:

    
ncols = 4
nrows = 1

axsize = 3

width = ncols * axsize
height = nrows * axsize

fig, axes = plt.subplots(ncols=ncols, figsize=(width, height))
axes_iter = axes.flat

x_keys = [key for key in reduced if key.startswith('X')]

ax = next(axes_iter)
data = table1
mask = data == 0
sns.heatmap(table1, mask=mask, ax=ax, xticklabels=[], yticklabels=[])
ax.set(title='Original')

for ax, key in zip(axes_iter, x_keys):
    data = reduced[key]
    mask = data == 0
    
    vmin = data.min().min()
    vmax = data.max().max()
    center = 0
    sns.heatmap(reduced[key], mask=mask, ax=ax, xticklabels=[], yticklabels=[])
    ax.set(title=key)



In [79]:

    
ncols = 4
nrows = 1

axsize = 3

width = ncols * axsize * 1.25
height = nrows * axsize

fig, axes = plt.subplots(ncols=ncols, figsize=(width, height))
axes_iter = axes.flat

x_keys = [key for key in reduced if key.startswith('X')]

ax = next(axes_iter)
common.heatmap(table1, ax=ax)
ax.set(title='Original')

for ax, key in zip(axes_iter, x_keys):
    common.heatmap(reduced[key], ax=ax)
    ax.set(title=key)



In [55]:

    
U, s, V = np.linalg.svd(reduced['X3_admm'])



In [61]:

    
reduced['X2_admm'][reduced['X2_admm'].nonzero()]









    Out[61]:





array([  0.06196856,   6.6911438 ,  13.40747184,   0.27285075,
        48.22056903,   2.24481196,   2.75026673,   0.05984601,   6.10384102])



In [80]:

    
ax









    



[autoreload of common failed: Traceback (most recent call last):
  File "/Users/olgabot/anaconda3/envs/cshl-sca-2017/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 246, in check
    superreload(m, reload, self.old_objects)
  File "/Users/olgabot/anaconda3/envs/cshl-sca-2017/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 369, in superreload
    module = reload(module)
  File "/Users/olgabot/anaconda3/envs/cshl-sca-2017/lib/python3.6/imp.py", line 314, in reload
    return importlib.reload(module)
  File "/Users/olgabot/anaconda3/envs/cshl-sca-2017/lib/python3.6/importlib/__init__.py", line 166, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 608, in _exec
  File "<frozen importlib._bootstrap_external>", line 674, in exec_module
  File "<frozen importlib._bootstrap_external>", line 781, in get_code
  File "<frozen importlib._bootstrap_external>", line 741, in source_to_code
  File "<frozen importlib._bootstrap>", line 205, in _call_with_frames_removed
  File "/Users/olgabot/code/cshl-singlecell-2017/notebooks/02_tissue_subpopulations/common.py", line 34
    for ax, (key, data) in zip(key, datas.items()):
                                                  ^
SyntaxError: unexpected EOF while parsing
]






    Out[80]:





<matplotlib.axes._subplots.AxesSubplot at 0x124f07048>



In [81]:

    
sns.heatmap??



In [ ]:

	RHO	GNAT1	SLC24A1	PDE6B	PDC	CNGA1	RP1	SAG	NR2E3	NRL	...	SLC6A6	MAP1B	TMA7	STX3	SYT1	CRX	SNAP25	MPP4	NEUROD1	A930011O12RIK
barcode
r1_TTCCTGCTAGGC	14	3	1	3	12	0	1	7	2	2	...	1	1	2	0	0	0	0	1	0	0
r1_TGGAGATACTCT	23	8	6	4	13	9	2	19	1	1	...	3	0	2	1	0	1	0	2	0	1
r1_CGTCTACATCCG	14	4	7	1	6	3	0	13	2	2	...	0	1	0	3	0	1	0	2	0	0
r1_CAAGCTTGGCGC	62	18	10	20	29	2	8	31	9	2	...	0	5	7	3	2	6	2	3	7	11
r1_ACTCACATAGAG	10	1	0	1	5	2	1	7	3	1	...	1	1	2	3	1	2	1	0	3	0

	RHO	PAX6	GNAT1	SLC24A1
barcode
r1_TTCCTGCTAGGC	2.708050	0.000000	1.386294	0.693147
r1_TGGAGATACTCT	3.178054	0.693147	2.197225	1.945910
r1_CGTCTACATCCG	2.708050	0.693147	1.609438	2.079442
r1_CAAGCTTGGCGC	4.143135	0.000000	2.944439	2.397895
r1_ACTCACATAGAG	2.397895	0.000000	0.693147	0.000000

	RHO	PAX6	GNAT1	SLC24A1	cluster_id
barcode
r1_TTCCTGCTAGGC	14	0	3	1	Rods
r1_TGGAGATACTCT	23	1	8	6	Rods
r1_CGTCTACATCCG	14	1	4	7	Rods
r1_CAAGCTTGGCGC	62	0	18	10	Rods
r1_ACTCACATAGAG	10	0	1	0	Rods

	RHO	GNAT1	SLC24A1	PDE6B	PDC	CNGA1	RP1	SAG	NR2E3	NRL	...	SLC6A6	MAP1B	TMA7	STX3	SYT1	CRX	SNAP25	MPP4	NEUROD1	A930011O12RIK
barcode
r1_TTCCTGCTAGGC	7.272295	3.048168	1.562439	2.198954	4.326513	1.385031	1.989380	6.541110	1.689744	1.345555	...	0.313114	0.767164	0.864139	0.388544	0.150222	0.752207	0.272070	0.832861	1.258661	0.856469
r1_TGGAGATACTCT	13.174154	5.480442	2.635686	3.713715	7.521109	2.310180	3.257731	11.804131	2.741139	2.363235	...	0.815807	1.031292	1.597556	0.999699	0.884529	1.033574	0.724387	1.236772	2.351665	1.286273
r1_CGTCTACATCCG	8.820288	3.785870	1.735920	2.396476	5.171789	1.561243	2.092837	7.836292	1.876383	1.537487	...	0.382253	0.972003	0.828423	0.656586	0.391978	0.916053	0.429003	0.958758	1.373776	0.703660
r1_CAAGCTTGGCGC	25.982230	11.021147	5.608453	7.951609	15.774301	4.953723	7.427482	24.229816	6.077871	4.769225	...	1.644721	3.139924	3.271503	2.906614	1.999058	2.751189	2.020806	3.190714	5.678790	3.812966
r1_ACTCACATAGAG	7.614892	3.096781	1.282597	1.838656	4.131712	1.165996	1.602792	6.662531	1.363853	1.237918	...	0.687691	0.804546	1.079846	0.874430	0.684653	0.541885	0.754388	0.499074	1.349995	0.391946

	RHO	GNAT1	SLC24A1	PDE6B	PDC	CNGA1	RP1	SAG	NR2E3	NRL	...	SLC6A6	MAP1B	TMA7	STX3	SYT1	CRX	SNAP25	MPP4	NEUROD1	A930011O12RIK
barcode
r1_TTCCTGCTAGGC	6.727705	-0.048168	-0.562439	0.801046	7.673487	-1.385031	-0.989380	0.458890	0.310256	0.654445	...	0.686886	0.232836	1.135861	-0.388544	-0.150222	-0.752207	-0.272070	0.167139	-1.258661	-0.856469
r1_TGGAGATACTCT	9.825846	2.519558	3.364314	0.286285	5.478891	6.689820	-1.257731	7.195869	-1.741139	-1.363235	...	2.184193	-1.031292	0.402444	-0.000000	-0.884529	-0.033574	-0.724387	0.763228	-2.351665	-0.286273
r1_CGTCTACATCCG	5.179712	0.214130	5.264080	-1.396476	0.828211	1.438757	-2.092837	5.163708	0.123617	0.462513	...	-0.382253	0.027997	-0.828423	2.343414	-0.391978	0.083947	-0.429003	1.041242	-1.373776	-0.703660
r1_CAAGCTTGGCGC	36.017770	6.978853	4.391547	12.048391	13.225699	-2.953723	0.572518	6.770184	2.922129	-2.769225	...	-1.644721	1.860076	3.728497	0.093386	-0.000000	3.248811	-0.020806	-0.190714	1.321210	7.187034
r1_ACTCACATAGAG	2.385108	-2.096781	-1.282597	-0.838656	0.868288	0.834004	-0.602792	0.337469	1.636147	-0.237918	...	0.312309	0.195454	0.920154	2.125570	0.315347	1.458115	0.245612	-0.499074	1.650005	-0.391946

	RHO	GNAT1	SLC24A1	PDE6B	PDC	CNGA1	RP1	SAG	NR2E3	NRL	...	SLC6A6	MAP1B	TMA7	STX3	SYT1	CRX	SNAP25	MPP4	NEUROD1	A930011O12RIK
barcode
r1_TTCCTGCTAGGC	-6.727705	0.048168	0.562439	-0.801046	-7.673487	1.385031	0.989380	-0.458890	-0.310256	-0.654445	...	-0.686886	-0.232836	-1.135861	0.388544	0.150222	0.752207	0.272070	-0.167139	1.258661	0.856469
r1_TGGAGATACTCT	-9.825846	-2.519558	-3.364314	-0.286285	-5.478891	-6.689820	1.257731	-7.195869	1.741139	1.363235	...	-2.184193	1.031292	-0.402444	-0.000301	0.884529	0.033574	0.724387	-0.763228	2.351665	0.286273
r1_CGTCTACATCCG	-5.179712	-0.214130	-5.264080	1.396476	-0.828211	-1.438757	2.092837	-5.163708	-0.123617	-0.462513	...	0.382253	-0.027997	0.828423	-2.343414	0.391978	-0.083947	0.429003	-1.041242	1.373776	0.703660
r1_CAAGCTTGGCGC	-36.017770	-6.978853	-4.391547	-12.048391	-13.225699	2.953723	-0.572518	-6.770184	-2.922129	2.769225	...	1.644721	-1.860076	-3.728497	-0.093386	-0.000942	-3.248811	0.020806	0.190714	-1.321210	-7.187034
r1_ACTCACATAGAG	-2.385108	2.096781	1.282597	0.838656	-0.868288	-0.834004	0.602792	-0.337469	-1.636147	0.237918	...	-0.312309	-0.195454	-0.920154	-2.125570	-0.315347	-1.458115	-0.245612	0.499074	-1.650005	0.391946