In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [10]:
def get_left_boundary(lsfreq):
    iota = np.array(range(len(lsfreq)))
    for x1, y1 in enumerate(lsfreq):
        if (x1%500 == 0):
            print "\r Parsed "+str(x1*100/len(lsfreq))+"%("+str(x1)+") of Cellular Barcodes",
        if y1==lsfreq[0]:
            continue
        isUp = False
        m = y1/x1
        Y = np.multiply(iota, m)
        for i in range(x1+1, len(lsfreq)):
            if Y[i]>lsfreq[i]:
                isUp = True
                break
        if(isUp == False):
            return x1, y1

In [ ]:


In [13]:
freq100 = pd.read_table('./alv_out_100/alevin/frequency.txt', header=None)
freq1k = pd.read_table('./alv_out/alevin/frequency.txt', header=None)
freq12k = pd.read_table('./alv_out_12k/alevin/frequency.txt', header=None)
freq4k = pd.read_table('./alv_out_4k/alevin/frequency.txt', header=None)

In [ ]:


In [14]:
#4k pbmc

In [26]:
lsfreq = np.log(np.cumsum(np.array(freq4k[1].tolist()[::-1])))
x1,y1 = get_left_boundary(lsfreq)
print "\nDetected " + str(len(lsfreq)-x1) +" barcodes"
plt.plot(lsfreq)
plt.plot([x1], [y1], 'r^')
plt.show()


 Parsed 95%(95000) of Cellular Barcodes                                                                                                                                                                                     
Detected 4893 barcodes

In [ ]:


In [ ]:


In [20]:
#1k cells

In [27]:
lsfreq = np.log(np.cumsum(np.array(freq1k[1].tolist()[::-1])))
x1,y1 = get_left_boundary(lsfreq)
print "\nDetected " + str(len(lsfreq)-x1) +" barcodes"
plt.plot(lsfreq)
plt.plot([x1], [y1], 'r^')
plt.show()


 Parsed 98%(98500) of Cellular Barcodes                                                                                                                                                                                       
Detected 1152 barcodes

In [ ]:


In [ ]:


In [22]:
#100 cells

In [28]:
lsfreq = np.log(np.cumsum(np.array(freq100[1].tolist()[::-1])))
x1,y1 = get_left_boundary(lsfreq)
print "\nDetected " + str(len(lsfreq)-x1) +" barcodes"
plt.plot(lsfreq)
plt.plot([x1], [y1], 'r^')
plt.show()


 Parsed 99%(99000) of Cellular Barcodes                                                                                                                                                                                         
Detected 651 barcodes

In [ ]:


In [ ]:


In [ ]:
#12k cells

In [29]:
lsfreq = np.log(np.cumsum(np.array(freq12k[1].tolist()[::-1])))
x1,y1 = get_left_boundary(lsfreq)
print "\nDetected " + str(len(lsfreq)-x1) +" barcodes"
plt.plot(lsfreq)
plt.plot([x1], [y1], 'r^')
plt.show()


 Parsed 86%(86500) of Cellular Barcodes                                                                                                                                                                          
Detected 13014 barcodes

In [ ]:


In [ ]: