notebook.community

Edit and run



In [2]:

    
import os
import common

# Assign notebook and folder names
notebook_name = '02_robust_pca'
figure_folder = os.path.join(common.FIGURE_FOLDER, notebook_name)
data_folder = os.path.join(common.DATA_FOLDER, notebook_name)

# Make the folders
! mkdir -p $figure_folder
! mkdir -p $data_folder



In [3]:

    
%pdb









    



Automatic pdb calling has been turned ON



In [4]:

    
%load_ext autoreload
%autoreload 2

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
%matplotlib inline



In [5]:

    
input_folder = os.path.join(common.DATA_FOLDER, '001_downsample_macosko_data')

csv = os.path.join(input_folder, 'expression_table1_subset.csv')

table1 = pd.read_csv(csv, index_col=0)
print(table1.shape)
table1.head()









    



(300, 259)






    Out[5]:







  
    
      
      RHO
      GNAT1
      SLC24A1
      PDE6B
      PDC
      CNGA1
      RP1
      SAG
      NR2E3
      NRL
      ...
      SLC6A6
      MAP1B
      TMA7
      STX3
      SYT1
      CRX
      SNAP25
      MPP4
      NEUROD1
      A930011O12RIK
    
    
      barcode
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      r1_TTCCTGCTAGGC
      14
      3
      1
      3
      12
      0
      1
      7
      2
      2
      ...
      1
      1
      2
      0
      0
      0
      0
      1
      0
      0
    
    
      r1_TGGAGATACTCT
      23
      8
      6
      4
      13
      9
      2
      19
      1
      1
      ...
      3
      0
      2
      1
      0
      1
      0
      2
      0
      1
    
    
      r1_CGTCTACATCCG
      14
      4
      7
      1
      6
      3
      0
      13
      2
      2
      ...
      0
      1
      0
      3
      0
      1
      0
      2
      0
      0
    
    
      r1_CAAGCTTGGCGC
      62
      18
      10
      20
      29
      2
      8
      31
      9
      2
      ...
      0
      5
      7
      3
      2
      6
      2
      3
      7
      11
    
    
      r1_ACTCACATAGAG
      10
      1
      0
      1
      5
      2
      1
      7
      3
      1
      ...
      1
      1
      2
      3
      1
      2
      1
      0
      3
      0
    
  

5 rows × 259 columns

Assign colors basd on clusters



In [6]:

    
cluster_identities = pd.read_table('macosko2015/retina_clusteridentities.txt', header=None,
                                   names=['barcode', 'cluster_id'], index_col=0, squeeze=True)
print(cluster_identities.shape)
cluster_identities.head()









    



(44808,)






    Out[6]:





barcode
r1_GGCCGCAGTCCG     2
r1_CTTGTGCGGGAA     2
r1_GCGCAACTGCTC     2
r1_GATTGGGAGGCA     2
r1_GTGCCGCCTCTC    25
Name: cluster_id, dtype: int64



In [7]:

    
cluster_identities_table1 = cluster_identities.loc[table1.index]
cluster_identities_table1.head()









    Out[7]:





barcode
r1_TTCCTGCTAGGC    24
r1_TGGAGATACTCT    24
r1_CGTCTACATCCG    24
r1_CAAGCTTGGCGC    24
r1_ACTCACATAGAG    24
Name: cluster_id, dtype: int64



In [8]:

    
cluster_ids = cluster_identities_table1.unique()
colors = sns.color_palette(palette='Set2', n_colors=len(cluster_ids))
id_to_color = dict(zip(cluster_ids, colors))

color_labels = [id_to_color[i] for i in cluster_identities_table1]
color_labels[:4]









    Out[8]:





[(0.40000000000000002, 0.76078431372549016, 0.6470588235294118),
 (0.40000000000000002, 0.76078431372549016, 0.6470588235294118),
 (0.40000000000000002, 0.76078431372549016, 0.6470588235294118),
 (0.40000000000000002, 0.76078431372549016, 0.6470588235294118)]

Plot the original, dropout'd data



In [9]:

    
sns.set(style='whitegrid')



In [10]:

    
mask = table1 == 0

fig, ax = plt.subplots()
sns.heatmap(table1, mask=mask, xticklabels=[], yticklabels=[])
ax.set(xlabel='Genes', ylabel='Cells')









    Out[10]:





[<matplotlib.text.Text at 0x11a9df860>, <matplotlib.text.Text at 0x11a9cb5c0>]

Maybe this is small enough for a clustered heatmap



In [11]:

    
clustergrid = sns.clustermap(table1, mask=mask, xticklabels=[], yticklabels=[], 
                             row_colors=color_labels)









    



/Users/olgabot/anaconda3/envs/cshl-sca-2017/lib/python3.6/site-packages/matplotlib/cbook.py:136: MatplotlibDeprecationWarning: The axisbg attribute was deprecated in version 2.0. Use facecolor instead.
  warnings.warn(message, mplDeprecation, stacklevel=1)

Add Robust PCA implementations to path



In [12]:

    
import sys

sys.path.extend(['/Users/olgabot/code/robust-pca/', '/Users/olgabot/code/rpcaADMM/'])

import r_pca
import rpcaADMM



In [104]:

    
r_pca.R_pca??



In [13]:

    
%%time
rpca_alm = r_pca.R_pca(table1.as_matrix())
rpca_alm.fit()









    



iteration: 1, error: 56422.70929985199
iteration: 100, error: 0.6880109089868683
iteration: 200, error: 0.14702804887281945
iteration: 251, error: 0.08979496489742976
CPU times: user 15.1 s, sys: 145 ms, total: 15.3 s
Wall time: 3.9 s



In [14]:

    
rpca_alm.lmbda









    Out[14]:





0.057735026918962568



In [15]:

    
U, s, V = np.linalg.svd(rpca_alm.L)



In [16]:

    
U









    Out[16]:





array([[-0.04272573, -0.02831748, -0.00597595, ...,  0.00333803,
         0.00177218,  0.01580328],
       [-0.07864946, -0.04817703, -0.01001383, ..., -0.0114292 ,
         0.00301747,  0.00501156],
       [-0.05291033, -0.03174095, -0.00638981, ..., -0.00100707,
        -0.00933078,  0.01188982],
       ..., 
       [-0.01026694,  0.02322763, -0.03528178, ...,  0.03940001,
         0.00723089, -0.05019267],
       [-0.04240066,  0.0962479 , -0.13841497, ..., -0.00552324,
         0.00943978, -0.00371454],
       [-0.00583065,  0.02502047, -0.03610277, ..., -0.0189998 ,
        -0.03432167,  0.05701413]])



In [17]:

    
sns.distplot(s[s > 0.1], kde=False)









    Out[17]:





<matplotlib.axes._subplots.AxesSubplot at 0x11f209b70>



In [59]:

    
diff = rpca_alm.L - table1



In [60]:

    
datasets = {'Original': table1, 'Low-Rank':rpca_alm.L, 'Sparse': rpca_alm.S, 
            'Difference: Original - Low-Rank': diff}

common.heatmaps(datasets)



In [61]:

    
L = pd.DataFrame(rpca_alm.L, index=table1.index, columns=table1.columns)
L.head()









    Out[61]:







  
    
      
      RHO
      GNAT1
      SLC24A1
      PDE6B
      PDC
      CNGA1
      RP1
      SAG
      NR2E3
      NRL
      ...
      SLC6A6
      MAP1B
      TMA7
      STX3
      SYT1
      CRX
      SNAP25
      MPP4
      NEUROD1
      A930011O12RIK
    
    
      barcode
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      r1_TTCCTGCTAGGC
      7.272295
      3.048168
      1.562439
      2.198954
      4.326513
      1.385031
      1.989380
      6.541110
      1.689744
      1.345555
      ...
      0.313114
      0.767164
      0.864139
      0.388544
      0.150222
      0.752207
      0.272070
      0.832861
      1.258661
      0.856469
    
    
      r1_TGGAGATACTCT
      13.174154
      5.480442
      2.635686
      3.713715
      7.521109
      2.310180
      3.257731
      11.804131
      2.741139
      2.363235
      ...
      0.815807
      1.031292
      1.597556
      0.999699
      0.884529
      1.033574
      0.724387
      1.236772
      2.351665
      1.286273
    
    
      r1_CGTCTACATCCG
      8.820288
      3.785870
      1.735920
      2.396476
      5.171789
      1.561243
      2.092837
      7.836292
      1.876383
      1.537487
      ...
      0.382253
      0.972003
      0.828423
      0.656586
      0.391978
      0.916053
      0.429003
      0.958758
      1.373776
      0.703660
    
    
      r1_CAAGCTTGGCGC
      25.982230
      11.021147
      5.608453
      7.951609
      15.774301
      4.953723
      7.427482
      24.229816
      6.077871
      4.769225
      ...
      1.644721
      3.139924
      3.271503
      2.906614
      1.999058
      2.751189
      2.020806
      3.190714
      5.678790
      3.812966
    
    
      r1_ACTCACATAGAG
      7.614892
      3.096781
      1.282597
      1.838656
      4.131712
      1.165996
      1.602792
      6.662531
      1.363853
      1.237918
      ...
      0.687691
      0.804546
      1.079846
      0.874430
      0.684653
      0.541885
      0.754388
      0.499074
      1.349995
      0.391946
    
  

5 rows × 259 columns



In [63]:

    
sns.distplot(table1.values.flat)









    Out[63]:





<matplotlib.axes._subplots.AxesSubplot at 0x132164ac8>



In [62]:

    
sns.distplot(L.values.flat)









    Out[62]:





<matplotlib.axes._subplots.AxesSubplot at 0x12180e0f0>



In [101]:

    
diff = table1 - L
diff_tidy = diff.unstack().reset_index()
diff_tidy['dataset'] = 'Difference'

table1_tidy = table1.unstack().reset_index()
table1_tidy['dataset'] = 'Original'
L_tidy = L.unstack().reset_index()
L_tidy['dataset'] = 'Low-Rank'

tidy = pd.concat([table1_tidy, L_tidy, diff_tidy])
tidy = tidy.rename(columns={0: 'molecules'})
tidy.head()

sns.violinplot(x='dataset', y='molecules', data=tidy)









    Out[101]:







  
    
      
      level_0
      barcode
      molecules
      dataset
    
  
  
    
      0
      RHO
      r1_TTCCTGCTAGGC
      14.0
      Original
    
    
      1
      RHO
      r1_TGGAGATACTCT
      23.0
      Original
    
    
      2
      RHO
      r1_CGTCTACATCCG
      14.0
      Original
    
    
      3
      RHO
      r1_CAAGCTTGGCGC
      62.0
      Original
    
    
      4
      RHO
      r1_ACTCACATAGAG
      10.0
      Original



In [103]:

    
sns.boxplot(x='dataset', y='molecules', data=tidy)









    Out[103]:





<matplotlib.axes._subplots.AxesSubplot at 0x11f08da58>



In [37]:

    
S = pd.DataFrame(rpca_alm.S, index=table1.index, columns=table1.columns)
S.head()









    Out[37]:







  
    
      
      RHO
      GNAT1
      SLC24A1
      PDE6B
      PDC
      CNGA1
      RP1
      SAG
      NR2E3
      NRL
      ...
      SLC6A6
      MAP1B
      TMA7
      STX3
      SYT1
      CRX
      SNAP25
      MPP4
      NEUROD1
      A930011O12RIK
    
    
      barcode
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      r1_TTCCTGCTAGGC
      6.727705
      -0.048168
      -0.562439
      0.801046
      7.673487
      -1.385031
      -0.989380
      0.458890
      0.310256
      0.654445
      ...
      0.686886
      0.232836
      1.135861
      -0.388544
      -0.150222
      -0.752207
      -0.272070
      0.167139
      -1.258661
      -0.856469
    
    
      r1_TGGAGATACTCT
      9.825846
      2.519558
      3.364314
      0.286285
      5.478891
      6.689820
      -1.257731
      7.195869
      -1.741139
      -1.363235
      ...
      2.184193
      -1.031292
      0.402444
      -0.000000
      -0.884529
      -0.033574
      -0.724387
      0.763228
      -2.351665
      -0.286273
    
    
      r1_CGTCTACATCCG
      5.179712
      0.214130
      5.264080
      -1.396476
      0.828211
      1.438757
      -2.092837
      5.163708
      0.123617
      0.462513
      ...
      -0.382253
      0.027997
      -0.828423
      2.343414
      -0.391978
      0.083947
      -0.429003
      1.041242
      -1.373776
      -0.703660
    
    
      r1_CAAGCTTGGCGC
      36.017770
      6.978853
      4.391547
      12.048391
      13.225699
      -2.953723
      0.572518
      6.770184
      2.922129
      -2.769225
      ...
      -1.644721
      1.860076
      3.728497
      0.093386
      -0.000000
      3.248811
      -0.020806
      -0.190714
      1.321210
      7.187034
    
    
      r1_ACTCACATAGAG
      2.385108
      -2.096781
      -1.282597
      -0.838656
      0.868288
      0.834004
      -0.602792
      0.337469
      1.636147
      -0.237918
      ...
      0.312309
      0.195454
      0.920154
      2.125570
      0.315347
      1.458115
      0.245612
      -0.499074
      1.650005
      -0.391946
    
  

5 rows × 259 columns



In [21]:

    
diff.head()









    Out[21]:







  
    
      
      RHO
      GNAT1
      SLC24A1
      PDE6B
      PDC
      CNGA1
      RP1
      SAG
      NR2E3
      NRL
      ...
      SLC6A6
      MAP1B
      TMA7
      STX3
      SYT1
      CRX
      SNAP25
      MPP4
      NEUROD1
      A930011O12RIK
    
    
      barcode
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      r1_TTCCTGCTAGGC
      -6.727705
      0.048168
      0.562439
      -0.801046
      -7.673487
      1.385031
      0.989380
      -0.458890
      -0.310256
      -0.654445
      ...
      -0.686886
      -0.232836
      -1.135861
      0.388544
      0.150222
      0.752207
      0.272070
      -0.167139
      1.258661
      0.856469
    
    
      r1_TGGAGATACTCT
      -9.825846
      -2.519558
      -3.364314
      -0.286285
      -5.478891
      -6.689820
      1.257731
      -7.195869
      1.741139
      1.363235
      ...
      -2.184193
      1.031292
      -0.402444
      -0.000301
      0.884529
      0.033574
      0.724387
      -0.763228
      2.351665
      0.286273
    
    
      r1_CGTCTACATCCG
      -5.179712
      -0.214130
      -5.264080
      1.396476
      -0.828211
      -1.438757
      2.092837
      -5.163708
      -0.123617
      -0.462513
      ...
      0.382253
      -0.027997
      0.828423
      -2.343414
      0.391978
      -0.083947
      0.429003
      -1.041242
      1.373776
      0.703660
    
    
      r1_CAAGCTTGGCGC
      -36.017770
      -6.978853
      -4.391547
      -12.048391
      -13.225699
      2.953723
      -0.572518
      -6.770184
      -2.922129
      2.769225
      ...
      1.644721
      -1.860076
      -3.728497
      -0.093386
      -0.000942
      -3.248811
      0.020806
      0.190714
      -1.321210
      -7.187034
    
    
      r1_ACTCACATAGAG
      -2.385108
      2.096781
      1.282597
      0.838656
      -0.868288
      -0.834004
      0.602792
      -0.337469
      -1.636147
      0.237918
      ...
      -0.312309
      -0.195454
      -0.920154
      -2.125570
      -0.315347
      -1.458115
      -0.245612
      0.499074
      -1.650005
      0.391946
    
  

5 rows × 259 columns



In [22]:

    
gr0 = rpca_alm.L > 0
diff_gr0 = table1 - gr0

datasets = {'Original': table1, 'Low-Rank':rpca_alm.L, 'Sparse': rpca_alm.S, 
            'Difference: Original - Low-Rank': diff_gr0}

common.heatmaps(datasets)



In [23]:

    
clustergrid = sns.clustermap(L, xticklabels=[], yticklabels=[], 
                             row_colors=color_labels)









    



/Users/olgabot/anaconda3/envs/cshl-sca-2017/lib/python3.6/site-packages/matplotlib/cbook.py:136: MatplotlibDeprecationWarning: The axisbg attribute was deprecated in version 2.0. Use facecolor instead.
  warnings.warn(message, mplDeprecation, stacklevel=1)



In [24]:

    
g_original = sns.clustermap(table1.T.corr(method='spearman'), xticklabels=[], yticklabels=[], 
                             col_colors=color_labels)









    



/Users/olgabot/anaconda3/envs/cshl-sca-2017/lib/python3.6/site-packages/matplotlib/cbook.py:136: MatplotlibDeprecationWarning: The axisbg attribute was deprecated in version 2.0. Use facecolor instead.
  warnings.warn(message, mplDeprecation, stacklevel=1)



In [88]:

    
import fastcluster



In [89]:

    
fastcluster.pdist?



In [95]:

    
table1_clustergrid = common.clustermap(table1.T.corr(method='spearman'), col_colors=color_labels)
table1_clustergrid.savefig(os.path.join(figure_folder, 'expression_table1_clustermap.pdf'))









    



/Users/olgabot/anaconda3/envs/cshl-sca-2017/lib/python3.6/site-packages/matplotlib/cbook.py:136: MatplotlibDeprecationWarning: The axisbg attribute was deprecated in version 2.0. Use facecolor instead.
  warnings.warn(message, mplDeprecation, stacklevel=1)



In [39]:

    
S.head()









    Out[39]:







  
    
      
      RHO
      GNAT1
      SLC24A1
      PDE6B
      PDC
      CNGA1
      RP1
      SAG
      NR2E3
      NRL
      ...
      SLC6A6
      MAP1B
      TMA7
      STX3
      SYT1
      CRX
      SNAP25
      MPP4
      NEUROD1
      A930011O12RIK
    
    
      barcode
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      r1_TTCCTGCTAGGC
      6.727705
      -0.048168
      -0.562439
      0.801046
      7.673487
      -1.385031
      -0.989380
      0.458890
      0.310256
      0.654445
      ...
      0.686886
      0.232836
      1.135861
      -0.388544
      -0.150222
      -0.752207
      -0.272070
      0.167139
      -1.258661
      -0.856469
    
    
      r1_TGGAGATACTCT
      9.825846
      2.519558
      3.364314
      0.286285
      5.478891
      6.689820
      -1.257731
      7.195869
      -1.741139
      -1.363235
      ...
      2.184193
      -1.031292
      0.402444
      -0.000000
      -0.884529
      -0.033574
      -0.724387
      0.763228
      -2.351665
      -0.286273
    
    
      r1_CGTCTACATCCG
      5.179712
      0.214130
      5.264080
      -1.396476
      0.828211
      1.438757
      -2.092837
      5.163708
      0.123617
      0.462513
      ...
      -0.382253
      0.027997
      -0.828423
      2.343414
      -0.391978
      0.083947
      -0.429003
      1.041242
      -1.373776
      -0.703660
    
    
      r1_CAAGCTTGGCGC
      36.017770
      6.978853
      4.391547
      12.048391
      13.225699
      -2.953723
      0.572518
      6.770184
      2.922129
      -2.769225
      ...
      -1.644721
      1.860076
      3.728497
      0.093386
      -0.000000
      3.248811
      -0.020806
      -0.190714
      1.321210
      7.187034
    
    
      r1_ACTCACATAGAG
      2.385108
      -2.096781
      -1.282597
      -0.838656
      0.868288
      0.834004
      -0.602792
      0.337469
      1.636147
      -0.237918
      ...
      0.312309
      0.195454
      0.920154
      2.125570
      0.315347
      1.458115
      0.245612
      -0.499074
      1.650005
      -0.391946
    
  

5 rows × 259 columns



In [49]:

    
sns.distplot(S.values.flat)









    Out[49]:





<matplotlib.axes._subplots.AxesSubplot at 0x1248a71d0>



In [52]:

    
np.median(S.values)









    Out[52]:





0.0



In [56]:

    
high_in_sparse = (S > 10).any()
print(high_in_sparse.sum())
S.loc[:, high_in_sparse]









    



88






    Out[56]:







  
    
      
      RHO
      GNAT1
      SLC24A1
      PDE6B
      PDC
      CNGA1
      RP1
      SAG
      NR2E3
      NRL
      ...
      TTYH1
      PAX6
      MGARP
      HSP90AA1
      SLC6A6
      MAP1B
      TMA7
      SYT1
      SNAP25
      A930011O12RIK
    
    
      barcode
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      r1_TTCCTGCTAGGC
      6.727705
      -0.048168
      -0.562439
      0.801046
      7.673487
      -1.385031
      -0.989380
      0.458890
      0.310256
      0.654445
      ...
      -0.000000
      -0.000000
      -0.302098
      -0.081445
      0.686886
      0.232836
      1.135861
      -0.150222
      -0.272070
      -0.856469
    
    
      r1_TGGAGATACTCT
      9.825846
      2.519558
      3.364314
      0.286285
      5.478891
      6.689820
      -1.257731
      7.195869
      -1.741139
      -1.363235
      ...
      1.331571
      0.998086
      -1.093359
      -1.256187
      2.184193
      -1.031292
      0.402444
      -0.884529
      -0.724387
      -0.286273
    
    
      r1_CGTCTACATCCG
      5.179712
      0.214130
      5.264080
      -1.396476
      0.828211
      1.438757
      -2.092837
      5.163708
      0.123617
      0.462513
      ...
      -0.341183
      0.999751
      1.902183
      3.171176
      -0.382253
      0.027997
      -0.828423
      -0.391978
      -0.429003
      -0.703660
    
    
      r1_CAAGCTTGGCGC
      36.017770
      6.978853
      4.391547
      12.048391
      13.225699
      -2.953723
      0.572518
      6.770184
      2.922129
      -2.769225
      ...
      0.814278
      0.000000
      -1.179403
      -0.254800
      -1.644721
      1.860076
      3.728497
      -0.000000
      -0.020806
      7.187034
    
    
      r1_ACTCACATAGAG
      2.385108
      -2.096781
      -1.282597
      -0.838656
      0.868288
      0.834004
      -0.602792
      0.337469
      1.636147
      -0.237918
      ...
      -0.082806
      -0.000000
      -0.509653
      -0.000000
      0.312309
      0.195454
      0.920154
      0.315347
      0.245612
      -0.391946
    
    
      r1_TAACGGACACGC
      21.380107
      1.663735
      -0.021046
      -0.222488
      12.299926
      4.287691
      -0.672176
      0.000000
      -2.237709
      9.267142
      ...
      4.740199
      0.000000
      0.000000
      -0.000000
      0.952529
      -1.441017
      -0.406451
      1.892411
      -1.088772
      0.354429
    
    
      r1_CGCATGGGATAC
      13.959024
      3.970865
      -1.348141
      -0.892688
      2.954439
      -0.215551
      1.402263
      -0.000000
      -0.442402
      5.729592
      ...
      2.569047
      0.000000
      -0.719147
      1.672199
      0.435023
      1.349910
      -0.755655
      -0.504931
      0.403079
      -0.428073
    
    
      r1_TAACGACGCTTG
      3.180794
      -0.447518
      0.109158
      -0.257555
      -2.279500
      0.203602
      3.582963
      2.171476
      -0.986389
      0.327993
      ...
      -0.133089
      -0.000000
      -0.236654
      0.158758
      -0.000000
      0.434394
      -0.410368
      -0.045431
      0.000000
      2.149970
    
    
      r1_TCGGCAGCCTCT
      0.000000
      6.456903
      -1.145033
      1.899016
      0.735434
      1.092624
      0.135634
      9.117565
      0.692410
      2.080789
      ...
      -0.266130
      0.000000
      0.000000
      -0.887233
      -0.409884
      -1.027818
      -0.134590
      -0.516927
      -0.389400
      -1.188493
    
    
      r1_TAGGATGCAAAC
      0.973777
      -2.880712
      2.373638
      0.301869
      -3.082936
      -2.280995
      5.395161
      0.074149
      8.163804
      0.806921
      ...
      -0.570322
      -0.000000
      -1.104568
      -2.087004
      -0.265266
      0.564875
      1.717807
      -0.707985
      -0.499107
      5.146910
    
    
      r1_CGGTTACAGTAG
      12.505315
      1.261856
      2.428380
      -1.565133
      -1.709170
      -1.225406
      3.767564
      3.748447
      0.259539
      -1.144302
      ...
      -0.334235
      0.000000
      -0.000000
      -0.807533
      2.114339
      0.758462
      1.419423
      1.893210
      0.000000
      -0.454211
    
    
      r1_AATCGGATACGT
      11.933689
      -0.000000
      2.991893
      1.107978
      -1.669472
      1.209005
      5.214369
      0.000000
      -0.186109
      1.227780
      ...
      -0.452205
      0.000000
      -1.065484
      1.154736
      0.482219
      0.946174
      -0.982452
      1.195778
      2.396707
      1.631832
    
    
      r1_AGTGGGCTTGAG
      7.947213
      4.715814
      -1.638802
      -1.356734
      0.401653
      -0.493621
      0.793865
      -0.000000
      5.179635
      0.523270
      ...
      -0.212292
      0.000000
      0.039120
      1.593622
      0.396195
      -0.750549
      -0.893586
      1.234860
      0.221824
      1.878540
    
    
      r1_TTCACCTACCGC
      10.781769
      1.930491
      0.537446
      -2.004599
      0.901076
      -0.239981
      -1.714882
      -0.079131
      2.512117
      -0.276403
      ...
      0.165085
      -0.000000
      0.017907
      0.238652
      -0.090347
      -0.579233
      1.459530
      -0.516233
      -0.232060
      -0.625630
    
    
      r1_TTATGTCGTCCT
      2.270350
      3.943689
      2.119016
      1.400406
      3.516772
      1.362922
      -2.202215
      1.637913
      -1.913613
      1.275680
      ...
      -0.533194
      0.000000
      -0.000000
      2.163319
      -0.309056
      3.075198
      0.000000
      0.209408
      -0.560234
      -0.595371
    
    
      r1_ATCAGCGCAGTC
      0.000000
      -0.565269
      0.021651
      0.567229
      -2.430373
      0.126985
      4.493267
      2.215334
      0.862813
      -0.714553
      ...
      -0.000000
      0.000000
      0.537775
      0.056646
      -0.073409
      0.411675
      0.550113
      -0.144370
      0.780021
      7.097770
    
    
      r1_CTTTATGGTGAC
      7.572463
      -0.903545
      -0.000000
      2.227700
      5.103486
      3.413487
      1.485007
      10.665482
      0.584033
      -0.507655
      ...
      1.319728
      0.000000
      0.206072
      -1.192753
      2.364657
      -1.385998
      -0.295737
      0.000000
      1.001619
      1.978113
    
    
      r1_GAATCGGGAACA
      0.130061
      -2.574404
      0.207521
      -0.441166
      -2.970246
      -0.523037
      3.736323
      3.206105
      -0.825140
      -0.521023
      ...
      1.157655
      -0.000000
      -0.024933
      0.125958
      -0.152853
      -0.815796
      1.130103
      0.207391
      -0.320568
      -0.858338
    
    
      r1_GAAGTGATCACC
      7.198676
      1.907994
      -0.363277
      -0.764196
      8.988425
      0.000000
      -1.520747
      9.571634
      -0.658129
      -1.910792
      ...
      0.000000
      0.000000
      -1.730510
      3.680007
      -0.000000
      0.058526
      0.000000
      -0.000000
      -0.994082
      -1.859172
    
    
      r1_AGTGGGCGGCCG
      8.848392
      0.895194
      -0.163581
      3.340794
      0.000000
      0.958213
      -0.538988
      -1.387186
      5.711675
      0.000000
      ...
      -0.115035
      0.000000
      0.269487
      1.013680
      -0.113011
      -0.443464
      -0.403637
      0.567080
      1.632395
      5.046891
    
    
      r1_ACTGATGATTAA
      -0.166009
      0.258877
      0.679583
      0.564598
      1.074236
      -0.259644
      -0.234408
      -0.000000
      -0.281426
      0.673068
      ...
      0.000000
      -0.000000
      0.771423
      -0.207019
      0.917991
      1.837832
      -0.114600
      -0.030735
      -0.216464
      -0.000000
    
    
      r1_CGCCCGTCTGTA
      11.020429
      0.000000
      0.081976
      -2.722703
      4.451928
      0.291661
      0.406819
      2.286490
      0.938283
      0.300258
      ...
      -0.292419
      0.000000
      0.727774
      0.068704
      -0.214730
      -0.000000
      -0.923698
      1.250572
      -0.505095
      -0.963568
    
    
      r1_GGGCTTGGGAAG
      -0.000000
      2.448623
      -0.000000
      0.493179
      -0.228614
      -0.881305
      -1.331266
      0.641512
      -0.062537
      1.000179
      ...
      -0.278898
      -0.000000
      3.872065
      1.717896
      -0.459817
      -0.628542
      0.258535
      4.167146
      1.243851
      0.345875
    
    
      r1_TTAATGACTACA
      -0.000000
      0.866470
      -1.088180
      3.399241
      0.000000
      1.020583
      6.455940
      -0.432606
      0.780824
      -0.900402
      ...
      -0.071543
      0.000000
      2.491098
      2.013177
      -0.132724
      -0.505365
      0.455063
      -0.155937
      0.761623
      3.373602
    
    
      r1_CGGCTGTCTGCT
      19.308350
      5.889286
      0.084709
      4.388549
      -0.419955
      -0.645631
      -2.231481
      3.700417
      -0.926757
      -1.735463
      ...
      0.000000
      -0.000000
      1.313213
      0.745053
      -0.448072
      -0.966678
      0.017107
      0.784523
      0.099009
      -0.836805
    
    
      r1_ATGATTATGGTT
      4.632167
      3.751768
      -0.967775
      -0.363413
      4.001080
      0.158328
      0.758410
      -2.679809
      -0.982320
      -0.870134
      ...
      -0.059819
      -0.000000
      -0.253736
      1.133169
      -0.000000
      -0.453014
      2.358229
      -0.074023
      -0.050509
      -0.206752
    
    
      r1_TTTACTTCAAGG
      -1.347545
      0.221071
      0.604238
      0.056137
      0.059580
      -0.206978
      9.143060
      0.000000
      -1.473954
      -0.173521
      ...
      -0.016236
      -0.000000
      -0.419281
      -0.088003
      -0.000000
      0.197948
      0.362459
      -0.000000
      -0.000000
      0.143271
    
    
      r1_ATGGCTCGCAAA
      6.387332
      -0.915061
      1.178258
      0.446398
      2.697413
      0.388423
      -2.239412
      2.655880
      -1.887992
      -1.644699
      ...
      -0.250518
      -0.000000
      0.000000
      2.312154
      -0.363505
      -0.821810
      -0.000000
      -0.645316
      0.432266
      -0.786629
    
    
      r1_CGATGGCTGGAC
      17.101099
      3.885594
      -0.803805
      -0.557101
      3.556439
      -0.605906
      -1.171681
      2.418840
      1.088428
      -0.678596
      ...
      -0.257935
      0.000000
      -0.000000
      -1.433706
      2.095816
      0.051474
      -1.320981
      0.017615
      0.947546
      -0.662259
    
    
      r1_GCGTGCTACTAC
      2.224974
      0.063991
      -1.916026
      -0.721554
      -1.373457
      0.313343
      -0.444382
      4.467587
      -2.033359
      1.282551
      ...
      0.485015
      0.000000
      -0.935773
      0.270485
      0.478233
      -0.788953
      -0.000000
      0.252681
      -0.563404
      0.925160
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      r1_CGAAACTATCGC
      2.195243
      0.011369
      0.000000
      -0.000000
      -0.107809
      0.000000
      -0.000000
      -0.534394
      -0.000000
      -0.000000
      ...
      -1.361469
      1.173269
      0.000000
      -0.291303
      0.000000
      0.775780
      -0.000000
      1.852239
      -0.000000
      0.000000
    
    
      r1_CCCCTCTCTGGC
      0.000000
      -0.000000
      0.000000
      -0.000000
      -0.114969
      -0.000000
      0.932384
      0.363043
      -0.009413
      -0.000000
      ...
      5.316246
      0.353796
      0.000000
      -0.077829
      -0.000000
      -0.146776
      0.000000
      0.000000
      0.015977
      -0.000000
    
    
      r1_ATCAATATTCTC
      -0.000000
      -0.000000
      0.000000
      0.000000
      0.817163
      0.000000
      -0.000000
      0.000000
      -0.000000
      0.000000
      ...
      1.092017
      0.044050
      4.603456
      -0.867112
      0.023728
      -0.000000
      -0.000000
      1.872482
      -0.000000
      -0.019444
    
    
      r1_TCTCTGTGACGC
      1.528452
      -0.012077
      0.011134
      1.023459
      0.917759
      1.983044
      -0.000000
      0.586865
      -0.000000
      0.000000
      ...
      2.050997
      -0.538215
      0.736698
      0.470899
      0.000000
      -0.000000
      0.014210
      0.000000
      0.000000
      -0.069790
    
    
      r1_GGCGGACTGCGT
      1.628706
      -0.096092
      -0.000000
      -0.016540
      3.745887
      -0.029418
      -0.050897
      0.000000
      -0.017742
      0.935019
      ...
      -2.207297
      -0.633798
      2.238783
      -0.073710
      1.017559
      -0.080742
      -0.000000
      0.037455
      0.000000
      0.000000
    
    
      r1_GAAGAGTATCTT
      -0.165410
      0.000000
      -0.000000
      0.000000
      0.855833
      0.994807
      -0.000000
      -0.000000
      0.013863
      0.000000
      ...
      -0.000000
      0.480201
      4.156182
      2.273838
      0.000000
      -0.000000
      0.000000
      1.541622
      -0.000000
      0.000000
    
    
      r1_GTTACACGAGTC
      1.943480
      -0.000000
      0.957764
      -0.000000
      -0.047855
      -0.000000
      0.000000
      -0.000000
      -0.000000
      0.000000
      ...
      5.790545
      0.630836
      -0.187083
      1.666287
      -0.000000
      -0.000000
      0.000000
      0.000000
      -0.000000
      0.000000
    
    
      r1_CATTGGTCTCAC
      3.070188
      -0.174132
      -1.029044
      -0.515659
      0.703719
      0.066136
      -1.424556
      0.614585
      0.795668
      -0.903366
      ...
      5.600525
      -0.425672
      10.884224
      -2.120829
      0.643494
      2.271292
      -0.588402
      -0.182806
      -0.312266
      0.047062
    
    
      r1_TATACTAAGTTT
      2.770301
      -0.445298
      0.776546
      -0.158671
      0.332642
      0.835105
      -0.002061
      1.015680
      -0.066518
      0.703998
      ...
      2.984812
      1.252434
      -0.264520
      -0.000000
      -0.000000
      -0.000000
      -0.105667
      3.254264
      0.579125
      0.817419
    
    
      r1_AGATCATCGTCC
      -0.000000
      -0.048309
      0.000000
      -0.000000
      0.866396
      -0.000000
      -0.000000
      0.000000
      0.999137
      0.000000
      ...
      -1.932864
      0.409695
      1.500891
      3.290392
      0.000000
      -0.000000
      -0.000000
      0.000000
      0.000000
      0.000000
    
    
      r1_CGGATTTACACT
      1.784631
      -0.000000
      0.000000
      -0.000000
      -0.045397
      -0.000000
      0.938590
      -0.002960
      0.007074
      0.000000
      ...
      0.000000
      2.048196
      4.595213
      -0.458753
      0.000000
      0.885203
      -0.000000
      -0.000000
      0.000000
      -0.000000
    
    
      r1_TATAGGAACAAA
      0.438695
      0.032053
      0.000000
      0.000000
      0.000000
      0.000000
      -0.002251
      0.562090
      0.000000
      -0.000000
      ...
      3.084729
      0.793422
      0.798523
      -0.000000
      0.974947
      0.893590
      0.000000
      0.883523
      -0.000000
      -0.000000
    
    
      r1_ACCATGTTGGGA
      1.041601
      0.219584
      -0.108511
      -0.156109
      0.000000
      -0.084239
      -0.019090
      -1.420963
      -0.089953
      -0.222936
      ...
      9.790146
      -0.121851
      7.362139
      -0.428579
      -0.000000
      -0.102834
      -0.036901
      -0.136796
      -0.000000
      0.000000
    
    
      r1_TCAAAGATAGGG
      -0.000000
      0.974528
      0.000000
      1.011693
      0.945346
      -0.000000
      -0.000000
      -0.000000
      0.000000
      -0.000000
      ...
      -0.000000
      0.622028
      1.443698
      -0.204755
      0.000000
      -0.008213
      0.000000
      0.000000
      -0.000000
      -0.000000
    
    
      r1_TTTATATTTGGG
      0.659228
      -0.000000
      0.007339
      -0.000000
      -0.045322
      -0.000000
      0.918379
      0.659567
      0.986917
      0.000000
      ...
      2.566947
      -0.151579
      -0.455806
      0.884180
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      -0.000000
    
    
      r1_CACACCGCGTAG
      1.186377
      0.307160
      0.821878
      -0.101919
      -0.506890
      -0.041569
      0.955889
      -0.016220
      -0.000000
      -0.149948
      ...
      -0.673384
      0.136749
      7.032807
      1.076661
      -0.075586
      -0.022994
      -0.000000
      1.865617
      -0.325853
      0.000000
    
    
      r1_GCTCGGTTAGTT
      -0.000000
      -0.235687
      -0.026272
      -0.074866
      3.449860
      -0.061053
      -0.000000
      -0.000000
      -0.039245
      0.872183
      ...
      10.688130
      0.627932
      -0.508217
      1.689609
      1.875097
      -0.000000
      0.927727
      -0.000000
      -0.000000
      -0.000000
    
    
      r1_TAGAGGCCTATA
      -0.000000
      -0.000000
      0.000000
      0.015248
      -0.031060
      0.000000
      0.000000
      -0.000000
      0.010458
      0.000000
      ...
      5.591531
      0.283414
      4.287004
      5.259208
      0.000000
      -0.000000
      0.018663
      -0.000000
      -0.000000
      -0.009140
    
    
      r1_TATAAAAAATTT
      -0.000000
      -0.007146
      0.000000
      -0.000000
      0.000000
      -0.000000
      -0.000000
      0.000000
      -0.000000
      0.000000
      ...
      -0.670504
      0.829290
      0.232661
      0.875470
      0.000000
      0.000000
      0.015976
      0.000000
      0.000000
      0.000000
    
    
      r1_TCTAATATTCGC
      -0.331777
      0.000000
      1.008258
      0.019039
      -0.000000
      0.000000
      2.973341
      0.683886
      -0.000000
      -0.000000
      ...
      3.736617
      -0.362063
      -2.351407
      -0.000000
      -0.000000
      0.000000
      -0.000000
      -0.000000
      0.000000
      0.943395
    
    
      r1_AGGGTGGGTACA
      -0.000000
      0.000000
      -0.020779
      0.997030
      -0.000000
      0.000000
      0.000000
      0.969479
      0.000000
      0.000000
      ...
      0.599839
      1.621370
      -0.534722
      -0.000000
      -0.000000
      -0.000000
      -0.021030
      0.070620
      0.040233
      -0.000000
    
    
      r1_AATGCTGCAAGA
      -0.000000
      0.007975
      -0.000000
      0.006630
      -0.008046
      0.000000
      0.000000
      -0.000000
      0.000000
      0.000000
      ...
      2.606610
      -0.746857
      -1.296138
      0.232468
      -0.000000
      0.000000
      0.000000
      -0.000000
      -0.000000
      0.000000
    
    
      r1_GTCGGGCCTTTC
      -0.213827
      -0.000000
      -0.009420
      0.000000
      -0.104125
      0.000000
      -0.000000
      0.559870
      0.012508
      0.000000
      ...
      -2.811222
      0.000000
      16.412101
      0.950820
      -0.000000
      0.000000
      1.889713
      -0.000000
      -0.000000
      -0.000000
    
    
      r1_GGGTCAGCGGCG
      0.920600
      1.260236
      -0.105873
      -0.157246
      -0.929874
      -0.083545
      -0.074611
      -0.552968
      0.880902
      0.779157
      ...
      4.037112
      -1.204179
      -4.443643
      -0.000000
      -0.000000
      -0.000000
      1.804412
      -0.000000
      -0.000000
      0.000000
    
    
      r1_CTGGACCTGCCC
      0.000000
      -0.191608
      -0.026919
      -0.016774
      -0.234621
      -0.024769
      -0.000000
      -0.661173
      -0.000000
      0.927448
      ...
      3.241622
      -0.367363
      6.982276
      -0.138741
      -0.000000
      -0.000000
      0.000000
      -0.006284
      -0.000000
      0.000000
    
    
      r1_AAGATATTGCTG
      -0.185538
      0.960408
      1.035136
      0.000000
      0.801181
      0.000000
      0.000000
      0.712134
      -0.000000
      -0.000000
      ...
      4.298957
      1.678050
      4.851130
      1.807929
      0.016362
      -0.030886
      0.000000
      0.000000
      -0.028761
      -0.057232
    
    
      r1_GAGACCTCATGG
      0.000000
      -0.713697
      -0.271694
      0.508802
      0.860829
      -0.239491
      0.488049
      -0.363523
      1.646615
      -0.267568
      ...
      3.518335
      -0.540237
      1.018306
      0.000000
      -0.000000
      0.666421
      -0.003371
      -0.136384
      0.778941
      1.797944
    
    
      r1_CGGAGCGCGACA
      1.052581
      -0.099141
      -0.000000
      -0.000000
      -0.136871
      0.984826
      -0.030601
      0.970290
      -0.000000
      -0.000000
      ...
      -1.453753
      -0.163724
      2.973280
      0.522468
      -0.000000
      0.000000
      -0.000000
      -0.000000
      0.000000
      -0.007794
    
    
      r1_AAGGACAGATCC
      0.000000
      1.739662
      2.596491
      -0.543682
      -0.524763
      -0.284546
      0.579656
      -1.319067
      -0.308557
      1.550640
      ...
      8.426755
      -0.829454
      3.722234
      0.369819
      0.000000
      -0.061672
      -0.120301
      -0.437313
      -0.014598
      -0.000000
    
    
      r1_ATATGCACCCTA
      0.864573
      -0.000000
      -0.000000
      -0.000000
      0.000000
      0.000000
      0.000000
      -0.000000
      -0.000000
      -0.000000
      ...
      -1.270910
      -0.296835
      4.645703
      -0.000000
      -0.000000
      -0.003225
      0.000000
      0.000000
      -0.000000
      0.000000
    
  

300 rows × 88 columns



In [47]:

    
data = S[S > 0]
data = data.fillna(0)

g_rpca = sns.clustermap(data, xticklabels=[], yticklabels=[], row_colors=color_labels)









    



/Users/olgabot/anaconda3/envs/cshl-sca-2017/lib/python3.6/site-packages/matplotlib/cbook.py:136: MatplotlibDeprecationWarning: The axisbg attribute was deprecated in version 2.0. Use facecolor instead.
  warnings.warn(message, mplDeprecation, stacklevel=1)



In [38]:

    
data = S.T.corr(method='spearman')
g_rpca = sns.clustermap(data, xticklabels=[], yticklabels=[],
                             col_colors=color_labels, row_colors=color_labels)









    



/Users/olgabot/anaconda3/envs/cshl-sca-2017/lib/python3.6/site-packages/matplotlib/cbook.py:136: MatplotlibDeprecationWarning: The axisbg attribute was deprecated in version 2.0. Use facecolor instead.
  warnings.warn(message, mplDeprecation, stacklevel=1)



In [44]:

    
data = S.corr(method='spearman')
g_rpca = sns.clustermap(data, xticklabels=[], yticklabels=[])









    



/Users/olgabot/anaconda3/envs/cshl-sca-2017/lib/python3.6/site-packages/matplotlib/cbook.py:136: MatplotlibDeprecationWarning: The axisbg attribute was deprecated in version 2.0. Use facecolor instead.
  warnings.warn(message, mplDeprecation, stacklevel=1)



In [96]:

    
data = L.T.corr(method='spearman')
g_rpca = common.clustermap(data, col_colors=color_labels)
g_rpca.savefig(os.path.join(figure_folder, 'low_rank_clustermap.pdf'))









    



/Users/olgabot/anaconda3/envs/cshl-sca-2017/lib/python3.6/site-packages/matplotlib/cbook.py:136: MatplotlibDeprecationWarning: The axisbg attribute was deprecated in version 2.0. Use facecolor instead.
  warnings.warn(message, mplDeprecation, stacklevel=1)



In [99]:

    
U, s, V = np.linalg.svd(L)
plt.plot(s[:10], 'o-')









    Out[99]:





[<matplotlib.lines.Line2D at 0x13e2cbcf8>]



In [100]:

    
U, s, V = np.linalg.svd(table1)
plt.plot(s[:10], 'o-')









    Out[100]:





[<matplotlib.lines.Line2D at 0x132a45cf8>]

So this seemed to have flipped some of the cells into different types, and made the within-cluster distances smaller



In [45]:

    
reconstructed = L + S

data = reconstructed.T.corr(method='spearman')
g_rpca = sns.clustermap(data, xticklabels=[], yticklabels=[],
                             col_colors=color_labels, row_colors=color_labels)









    



/Users/olgabot/anaconda3/envs/cshl-sca-2017/lib/python3.6/site-packages/matplotlib/cbook.py:136: MatplotlibDeprecationWarning: The axisbg attribute was deprecated in version 2.0. Use facecolor instead.
  warnings.warn(message, mplDeprecation, stacklevel=1)



In [81]:

    
csv = os.path.join(data_folder, 'sparse.csv')

S.to_csv(csv)



In [83]:

    
data_folder









    Out[83]:





'data/002_robust_pca'



In [82]:

    
csv = os.path.join(data_folder, 'lowrank.csv')

L.to_csv(csv)

Try ICA before and after



In [79]:

    
L.shape









    Out[79]:





(300, 259)



In [ ]:

    
from sklearn.decomposition import ICA

ica = ICA(n_components=)

ADMM implementation



In [36]:

    
reduced = rpcaADMM.rpcaADMM(table1)
# print(reduced.shape)
# reduced.head()









    



iter	    r norm	   eps pri	    s norm	  eps dual	 objective
   1	  386.9989	    5.6518	  560.3478	    5.5259	  46003.39
  10	  126.6648	    7.4377	   57.8995	    7.7838	 274951.69
  20	   19.5261	    7.9579	   26.0279	    7.5066	 297640.42
  30	   10.9950	    8.0745	   10.9821	    7.3901	 300438.68



In [62]:

    
rpcaADMM.rpcaADMM()



In [38]:

    
reduced.keys()









    Out[38]:





dict_keys(['objval', 'r_norm', 's_norm', 'eps_pri', 'eps_dual', 'addm_toc', 'admm_iter', 'X1_admm', 'X2_admm', 'X3_admm'])



In [76]:

    
ncols = 4
nrows = 1

axsize = 3

width = ncols * axsize
height = nrows * axsize

fig, axes = plt.subplots(ncols=ncols, figsize=(width, height))
axes_iter = axes.flat

x_keys = [key for key in reduced if key.startswith('X')]

ax = next(axes_iter)
data = table1
mask = data == 0
sns.heatmap(table1, mask=mask, ax=ax, xticklabels=[], yticklabels=[])
ax.set(title='Original')

for ax, key in zip(axes_iter, x_keys):
    data = reduced[key]
    mask = data == 0
    
    vmin = data.min().min()
    vmax = data.max().max()
    center = 0
    sns.heatmap(reduced[key], mask=mask, ax=ax, xticklabels=[], yticklabels=[])
    ax.set(title=key)



In [79]:

    
ncols = 4
nrows = 1

axsize = 3

width = ncols * axsize * 1.25
height = nrows * axsize

fig, axes = plt.subplots(ncols=ncols, figsize=(width, height))
axes_iter = axes.flat

x_keys = [key for key in reduced if key.startswith('X')]

ax = next(axes_iter)
common.heatmap(table1, ax=ax)
ax.set(title='Original')

for ax, key in zip(axes_iter, x_keys):
    common.heatmap(reduced[key], ax=ax)
    ax.set(title=key)



In [55]:

    
U, s, V = np.linalg.svd(reduced['X3_admm'])



In [61]:

    
reduced['X2_admm'][reduced['X2_admm'].nonzero()]









    Out[61]:





array([  0.06196856,   6.6911438 ,  13.40747184,   0.27285075,
        48.22056903,   2.24481196,   2.75026673,   0.05984601,   6.10384102])



In [80]:

    
ax









    



[autoreload of common failed: Traceback (most recent call last):
  File "/Users/olgabot/anaconda3/envs/cshl-sca-2017/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 246, in check
    superreload(m, reload, self.old_objects)
  File "/Users/olgabot/anaconda3/envs/cshl-sca-2017/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 369, in superreload
    module = reload(module)
  File "/Users/olgabot/anaconda3/envs/cshl-sca-2017/lib/python3.6/imp.py", line 314, in reload
    return importlib.reload(module)
  File "/Users/olgabot/anaconda3/envs/cshl-sca-2017/lib/python3.6/importlib/__init__.py", line 166, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 608, in _exec
  File "<frozen importlib._bootstrap_external>", line 674, in exec_module
  File "<frozen importlib._bootstrap_external>", line 781, in get_code
  File "<frozen importlib._bootstrap_external>", line 741, in source_to_code
  File "<frozen importlib._bootstrap>", line 205, in _call_with_frames_removed
  File "/Users/olgabot/code/cshl-singlecell-2017/notebooks/02_tissue_subpopulations/common.py", line 34
    for ax, (key, data) in zip(key, datas.items()):
                                                  ^
SyntaxError: unexpected EOF while parsing
]






    Out[80]:





<matplotlib.axes._subplots.AxesSubplot at 0x124f07048>



In [81]:

    
sns.heatmap??



In [ ]:

	RHO	GNAT1	SLC24A1	PDE6B	PDC	CNGA1	RP1	SAG	NR2E3	NRL	...	SLC6A6	MAP1B	TMA7	STX3	SYT1	CRX	SNAP25	MPP4	NEUROD1	A930011O12RIK
barcode
r1_TTCCTGCTAGGC	14	3	1	3	12	0	1	7	2	2	...	1	1	2	0	0	0	0	1	0	0
r1_TGGAGATACTCT	23	8	6	4	13	9	2	19	1	1	...	3	0	2	1	0	1	0	2	0	1
r1_CGTCTACATCCG	14	4	7	1	6	3	0	13	2	2	...	0	1	0	3	0	1	0	2	0	0
r1_CAAGCTTGGCGC	62	18	10	20	29	2	8	31	9	2	...	0	5	7	3	2	6	2	3	7	11
r1_ACTCACATAGAG	10	1	0	1	5	2	1	7	3	1	...	1	1	2	3	1	2	1	0	3	0

	RHO	GNAT1	SLC24A1	PDE6B	PDC	CNGA1	RP1	SAG	NR2E3	NRL	...	SLC6A6	MAP1B	TMA7	STX3	SYT1	CRX	SNAP25	MPP4	NEUROD1	A930011O12RIK
barcode
r1_TTCCTGCTAGGC	7.272295	3.048168	1.562439	2.198954	4.326513	1.385031	1.989380	6.541110	1.689744	1.345555	...	0.313114	0.767164	0.864139	0.388544	0.150222	0.752207	0.272070	0.832861	1.258661	0.856469
r1_TGGAGATACTCT	13.174154	5.480442	2.635686	3.713715	7.521109	2.310180	3.257731	11.804131	2.741139	2.363235	...	0.815807	1.031292	1.597556	0.999699	0.884529	1.033574	0.724387	1.236772	2.351665	1.286273
r1_CGTCTACATCCG	8.820288	3.785870	1.735920	2.396476	5.171789	1.561243	2.092837	7.836292	1.876383	1.537487	...	0.382253	0.972003	0.828423	0.656586	0.391978	0.916053	0.429003	0.958758	1.373776	0.703660
r1_CAAGCTTGGCGC	25.982230	11.021147	5.608453	7.951609	15.774301	4.953723	7.427482	24.229816	6.077871	4.769225	...	1.644721	3.139924	3.271503	2.906614	1.999058	2.751189	2.020806	3.190714	5.678790	3.812966
r1_ACTCACATAGAG	7.614892	3.096781	1.282597	1.838656	4.131712	1.165996	1.602792	6.662531	1.363853	1.237918	...	0.687691	0.804546	1.079846	0.874430	0.684653	0.541885	0.754388	0.499074	1.349995	0.391946

	level_0	barcode	molecules	dataset
0	RHO	r1_TTCCTGCTAGGC	14.0	Original
1	RHO	r1_TGGAGATACTCT	23.0	Original
2	RHO	r1_CGTCTACATCCG	14.0	Original
3	RHO	r1_CAAGCTTGGCGC	62.0	Original
4	RHO	r1_ACTCACATAGAG	10.0	Original

	RHO	GNAT1	SLC24A1	PDE6B	PDC	CNGA1	RP1	SAG	NR2E3	NRL	...	SLC6A6	MAP1B	TMA7	STX3	SYT1	CRX	SNAP25	MPP4	NEUROD1	A930011O12RIK
barcode
r1_TTCCTGCTAGGC	6.727705	-0.048168	-0.562439	0.801046	7.673487	-1.385031	-0.989380	0.458890	0.310256	0.654445	...	0.686886	0.232836	1.135861	-0.388544	-0.150222	-0.752207	-0.272070	0.167139	-1.258661	-0.856469
r1_TGGAGATACTCT	9.825846	2.519558	3.364314	0.286285	5.478891	6.689820	-1.257731	7.195869	-1.741139	-1.363235	...	2.184193	-1.031292	0.402444	-0.000000	-0.884529	-0.033574	-0.724387	0.763228	-2.351665	-0.286273
r1_CGTCTACATCCG	5.179712	0.214130	5.264080	-1.396476	0.828211	1.438757	-2.092837	5.163708	0.123617	0.462513	...	-0.382253	0.027997	-0.828423	2.343414	-0.391978	0.083947	-0.429003	1.041242	-1.373776	-0.703660
r1_CAAGCTTGGCGC	36.017770	6.978853	4.391547	12.048391	13.225699	-2.953723	0.572518	6.770184	2.922129	-2.769225	...	-1.644721	1.860076	3.728497	0.093386	-0.000000	3.248811	-0.020806	-0.190714	1.321210	7.187034
r1_ACTCACATAGAG	2.385108	-2.096781	-1.282597	-0.838656	0.868288	0.834004	-0.602792	0.337469	1.636147	-0.237918	...	0.312309	0.195454	0.920154	2.125570	0.315347	1.458115	0.245612	-0.499074	1.650005	-0.391946

	RHO	GNAT1	SLC24A1	PDE6B	PDC	CNGA1	RP1	SAG	NR2E3	NRL	...	SLC6A6	MAP1B	TMA7	STX3	SYT1	CRX	SNAP25	MPP4	NEUROD1	A930011O12RIK
barcode
r1_TTCCTGCTAGGC	-6.727705	0.048168	0.562439	-0.801046	-7.673487	1.385031	0.989380	-0.458890	-0.310256	-0.654445	...	-0.686886	-0.232836	-1.135861	0.388544	0.150222	0.752207	0.272070	-0.167139	1.258661	0.856469
r1_TGGAGATACTCT	-9.825846	-2.519558	-3.364314	-0.286285	-5.478891	-6.689820	1.257731	-7.195869	1.741139	1.363235	...	-2.184193	1.031292	-0.402444	-0.000301	0.884529	0.033574	0.724387	-0.763228	2.351665	0.286273
r1_CGTCTACATCCG	-5.179712	-0.214130	-5.264080	1.396476	-0.828211	-1.438757	2.092837	-5.163708	-0.123617	-0.462513	...	0.382253	-0.027997	0.828423	-2.343414	0.391978	-0.083947	0.429003	-1.041242	1.373776	0.703660
r1_CAAGCTTGGCGC	-36.017770	-6.978853	-4.391547	-12.048391	-13.225699	2.953723	-0.572518	-6.770184	-2.922129	2.769225	...	1.644721	-1.860076	-3.728497	-0.093386	-0.000942	-3.248811	0.020806	0.190714	-1.321210	-7.187034
r1_ACTCACATAGAG	-2.385108	2.096781	1.282597	0.838656	-0.868288	-0.834004	0.602792	-0.337469	-1.636147	0.237918	...	-0.312309	-0.195454	-0.920154	-2.125570	-0.315347	-1.458115	-0.245612	0.499074	-1.650005	0.391946