notebook.community

Edit and run



In [1]:

    
from collections import defaultdict
from random import random
import pysam
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import glob
import os
import sklearn
from sklearn import cluster
import cPickle as pickle









    



/home/avi/miniconda2/lib/python2.7/site-packages/IPython/html.py:14: ShimWarning: The `IPython.html` package has been deprecated since IPython 4.0. You should import from `notebook` instead. `IPython.html.widgets` has moved to `ipywidgets`.
  "`IPython.html.widgets` has moved to `ipywidgets`.", ShimWarning)



In [2]:

    
kal = pd.read_csv('./kal-1k.mat', index_col=[0])
utl = pd.read_csv('./utl-1k.mat', index_col=[0])
alv = pd.read_csv('./fast-eq-1k.mat', index_col=[0])



In [3]:

    
tr_results = pd.read_table("../../featureDump/tenx_counts_new/outs/filtered_gene_bc_matrices/hg19/barcodes.tsv", header=None)[0].values
# tr_results['tr'] = [0]*504 + [1]*516
# import collections
# print [item for item, count in collections.Counter(tr_results.index).items() if count > 1]
# tr_results = tr_results.drop(['AGCTCCTGTTGTGGAG', 'ACGCCAGTCGGATGGA', 'GGCAATTAGGAATCGC'])
tr_results = [x.replace("-1", "") for x in tr_results]



In [ ]:



In [ ]:



In [4]:

    
tenx = pd.read_table("../../featureDump/tenx_counts_new/outs/human_counts.tsv", sep=',').set_index("Unnamed: 0")
tenx.index = [x.replace("hg19_","") for x in tenx.index]
tenx.columns = [x.replace("-1","") for x in tenx.columns]
# tenx = tenx.drop(['AGCTCCTGTTGTGGAG', 'ACGCCAGTCGGATGGA', 'GGCAATTAGGAATCGC'], 1)



In [5]:

    
tenx.shape, utl.shape, kal.shape, alv.shape
# utl.shape, kal.shape, alv.shape









    Out[5]:





((58278, 501), (44021, 1017), (51937, 1017), (53284, 1017))



In [6]:

    
def get_corr_list(df):
    df_list = []
    for x in df.corr(method="spearman").values:
        for e in x.tolist():
            if e !=0 and e != 1:
                df_list.append(e)
    return df_list



In [7]:

    
# first_cluster = tr_results[tr_results['tr'] == 1].index
# second_cluster = tr_results[tr_results['tr'] == 0].index



In [8]:

    
na = [x for x in tr_results if x in utl.columns]



In [ ]:



In [9]:

    
usub = utl[na].fillna(0)
ksub = kal[na].fillna(0)
asub = alv[na].fillna(0)
tsub = tenx[na].fillna(0)

usub = usub[(usub.T != 0).any()]
ksub = ksub[(ksub.T != 0).any()]
asub = asub[(asub.T != 0).any()]
tsub = tsub[(tsub.T != 0).any()]



In [10]:

    
tsub.shape, usub.shape, ksub.shape, asub.shape
# usub.shape, ksub.shape, asub.shape









    Out[10]:





((21903, 501), (35063, 501), (41150, 501), (42424, 501))



In [ ]:



In [13]:

    
usub.drop([x for x in usub.index if 'ENSMUSG' in x ], inplace=True)

ksub.drop([x for x in ksub.index if 'ENSMUSG' in x ], inplace=True)

asub.drop([x for x in asub.index if 'ENSMUSG' in x ], inplace=True)

tenx.drop([x for x in tenx.index if 'ENSMUSG' in x ], inplace=True)
# tenx.index = [x.replace('mm10_','') for x in tenx.index]



In [14]:

    
tsub.shape, usub.shape, ksub.shape, asub.shape









    Out[14]:





((21903, 501), (24470, 501), (28596, 501), (28944, 501))



In [ ]:



In [17]:

    
usub = usub.loc[tsub.index]
ksub = ksub.loc[tsub.index]
asub = asub.loc[tsub.index]



In [18]:

    
tsub.shape, usub.shape, ksub.shape, asub.shape









    Out[18]:





((21903, 501), (21903, 501), (21903, 501), (21903, 501))



In [19]:

    
sum(tsub.sum()), sum(usub.sum()), sum(ksub.sum()), sum(asub.sum())









    Out[19]:





(17618591, 16255142.0, 18260462.0, 18056122.0)



In [ ]:



In [ ]:



In [20]:

    
utl_list = get_corr_list(usub)
kal_list = get_corr_list(ksub)
alv_list = get_corr_list(asub)
tenx_list = get_corr_list(tenx)



In [ ]:



In [21]:

    
#human
sns.kdeplot(np.array(utl_list), label = "Umi-tools")
sns.kdeplot(np.array(alv_list), label = "Alevin")
sns.kdeplot(np.array(tenx_list), label = "10x")
sns.kdeplot(np.array(kal_list), label = "kal")
plt.legend()









    Out[21]:





<matplotlib.legend.Legend at 0x7f5a668c5b90>



In [19]:

    
#mouse
sns.kdeplot(np.array(utl_list), label = "Umi-tools")
sns.kdeplot(np.array(alv_list), label = "Alevin")
sns.kdeplot(np.array(tenx_list), label = "10x")
sns.kdeplot(np.array(kal_list), label = "kal")
plt.legend()









    Out[19]:





<matplotlib.legend.Legend at 0x7f73b67d8bd0>



In [ ]:



In [28]:

    
sns.kdeplot(np.array(alv_list), label = "Alevin")
sns.kdeplot(np.array(kal_list), label = "kal")
plt.legend()









    Out[28]:





<matplotlib.legend.Legend at 0x7f73b6dcf790>



In [ ]: