In [1]:
import pandas as pd
import numpy as np
import matplotlib as mpl
%matplotlib inline
import matplotlib.pyplot as plt
from collections import Counter
from collections import defaultdict
import pysam
from pyfasta import Fasta
from intervaltree import IntervalTree

In [2]:
gtf = defaultdict(lambda : defaultdict(list))
with open('../data/mohu.gtf') as f:
    for line in f:
        if line[0] != '#':
            toks = line.strip().split('\t')
            chro = toks[0]
            feature = toks[2]
            start = toks[3]
            end = toks[4]
            if feature in ['gene', 'transcript', 'exon']:
                #extract start, end , id of the feature
                gtf[chro][feature].append((start, end, ''.join(\
                                               [ x.replace(feature+'_id','').replace('"','').strip() \
                                                for x in toks[-1].split(';') \
                                                if feature+'_id' in x] )))

In [3]:
exonDict = {}
for x in gtf:
    tree = IntervalTree()
    for (s,e, det) in gtf[x]['exon']:
        tree[int(s):int(e)+1] = det
    exonDict[x] = tree

In [ ]:


In [ ]:


In [ ]:
#txp.bam

In [4]:
count = defaultdict(int)
with pysam.AlignmentFile('../data/txp_nameSorted_subsample.bam', 'rb') as geFile:
    count_align = 0
    for alignment in geFile:
        count_align += 1
        if (count_align%1000 == 0):
            print "\r processed #alignments:" + str(count_align),
        try:
            count[alignment.qname] += 1
        except:
            print 'NH tag needed'
            exit(0)


 processed #alignments:6492000                                                                                                                                              

In [5]:
Counter(count.values()),


Out[5]:
(Counter({1: 6492611}),)

In [6]:
geneMap = pd.read_table('../data/geneMap.tsv', header=None).set_index(1).to_dict()[0]

In [7]:
tx_gene_counts = defaultdict(int)

In [8]:
txpList = defaultdict(list)
with pysam.AlignmentFile('../data/txp_nameSorted_subsample.bam', 'rb') as geFile:
    count_align = 0
    for alignment in geFile:
        count_align += 1
        if (count_align%1000 == 0):
            print "\r processed #alignments:" + str(count_align),
        try:
            nh = count[alignment.qname]
            txpList[nh].append(alignment.reference_name)
        except:
            print 'NH tag needed'
            exit(0)


 processed #alignments:6492000                                                                                                                                                                                

In [9]:
sameGeneCount = defaultdict(int)
if 0 in txpList.keys():
    del txpList[0]
for key, v in txpList.items():
    for x in range(0,len(v), key):
        t = set([])
        for y in range(key):
            t.add(geneMap[v[x+y]])
        if len(t) == 1:
            tx_gene_counts[list(t)[0]] += 1
            sameGeneCount[key] += 1
recoverable = 0
for k,v in sameGeneCount.items():
    recoverable += v
    print str(k) + ":" + str((float(k) * v) / len(txpList[k]))
print recoverable


1:1.0
6492611

In [10]:
len(tx_gene_counts)


Out[10]:
47107

In [11]:
totGene = 0
for chro in gtf:
    for l in gtf[chro]:
        if l == "gene":
            totGene += len(gtf[chro][l])
print totGene


108964

In [ ]:


In [ ]:


In [ ]:
#gene.bam

In [12]:
count = defaultdict(int)
with pysam.AlignmentFile('../data/gene_nameSorted_subsample.bam', 'rb') as geFile:
    count_align = 0
    for alignment in geFile:
        count_align += 1
        if (count_align%1000 == 0):
            print "\r processed #alignments:" + str(count_align),
        try:
            count[alignment.qname] += 1
        except:
            print 'NH tag needed'
            exit(0)


 processed #alignments:7196000                                                                                                                                                                       

In [13]:
Counter(count.values())


Out[13]:
Counter({1: 7196939})

In [14]:
ge_gene_counts = defaultdict(int)

In [15]:
exonMap = pd.read_table('../data/exonMap.tsv', header=None).set_index(0).to_dict()[1]

In [16]:
geList = defaultdict(list)
with pysam.AlignmentFile('../data/gene_nameSorted_subsample.bam', 'rb') as geFile:
    count_align = 0
    for alignment in geFile:
        count_align += 1
        if (count_align%1000 == 0):
            print "\r processed #alignments:" + str(count_align),
        try:
            nh = count[alignment.qname]
            geList[nh].append((alignment.reference_name, alignment.pos))
        except:
            print 'NH tag needed'
            exit(0)


 processed #alignments:7196000                                                                                                                                                                                                                        

In [18]:
errors = []
sameGeneCount = defaultdict(int)
ge_gene_counts = defaultdict(int)
count_align = 0
if 0 in geList.keys():
    del geList[0]
for key, v in geList.items():
    for x in range(0,len(v), key):
        count_align += 1
        if (count_align%10000 == 0):
            print "\r processed #alignments:" + str(count_align),
        t = set([])
        for y in range(key):
            intv = []
            name, pos = v[x+y]
            try:
                intv = exonDict[name][pos]
            except:
                errors.append(name)
            if len(intv) > 0:
                t.add(exonMap[list(intv)[0].data])
        if len(t) == 1:
            ge_gene_counts[list(t)[0]] += 1
            sameGeneCount[key] += 1
recoverable = 0
for k,v in sameGeneCount.items():
    recoverable += v
    print str(k) + ":" + str((float(k) * v) / len(geList[k]))
print recoverable, len(ge_gene_counts)


 processed #alignments:7190000                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             1:0.812603247019
5848256 40778

In [20]:
x = Counter(errors)
sumv = 0
for k, v in x.items():
    sumv += v
print sumv


1235

In [ ]:


In [ ]:


In [30]:
#Analysis

In [21]:
geDf = pd.DataFrame.from_dict(ge_gene_counts.items()).set_index(0).rename(columns={1:'gene'})
txDf = pd.DataFrame.from_dict(tx_gene_counts.items()).set_index(0).rename(columns={1:'txp'})

In [22]:
combineCounts = pd.concat([geDf, txDf], axis=1)

In [23]:
combineCounts['diff'] = combineCounts['gene'] - combineCounts['txp']

In [24]:
diff = combineCounts.sort_values(by='diff', ascending=False).dropna()

In [25]:
fc = diff.div(diff['gene'], axis='index').sort_values(by='diff', ascending=False)

In [26]:
goi = fc[fc['diff']>0.95].index

In [27]:
hGoi = []
mGoi = []
for g in goi:
    if 'MUSG' in g:
        mGoi.append(g)
    else:
        hGoi.append(g)
print len(mGoi), len(hGoi), len(goi)


65 157 222

In [42]:
geneId2Name = defaultdict(set)
with open('../data/mohu.gtf') as f:
    for line in f:
        if line[0] != '#':
            toks = line.strip().split('\t')
            feature = toks[2]
            if feature == 'gene':
                details = toks[-1].split(";")
                if 'gene_id' not in details[0] and 'gene_name' not in details[2]:
                    print error
                    break
                gId = details[0].replace('gene_id "', "").rstrip('"').strip()
                gName = details[2].replace('gene_name "', "").rstrip('"').strip()
                geneId2Name[gId].add(gName)

In [29]:
for x in hGoi:
    print list(geneId2Name[x])[0]


CTB-63M22.1
MIR4426
AP000350.10
MORF4L1P1
RP4-706A16.3
AC016700.6
RPL21P28
AC010733.5
LINC02067
HNRNPCP2
AC097523.1
RP1-159A19.3
HNRNPA1P8
RP11-296A18.5
RP11-183G22.1
RPL10P6
AC022210.2
RP4-539M6.19
RP11-90H3.1
RP11-331N16.1
RPS26P15
EEF1A1P11
AC105399.2
RPL12P14
ISY1-RAB43
MIR7111
PPIAP11
RP11-366M4.17
AC079922.2
PPIAP16
RP11-371A22.1
PPIAP29
RP11-364L4.1
DNAJC25-GNG10
PDIA3P1
RPS29P16
RPL12P16
RP11-164H5.1
RP11-278C7.1
RPL17P36
MROH7-TTC4
HIST2H4A
EIF2S2P4
MTCO2P12
RP5-827C21.2
MRPL55
HNRNPA1P12
AC012671.2
RP11-118H4.1
CBWD7
RPS23P8
POTEF
TRIM39-RPP21
RP11-730G20.2
RP11-543B16.1
RPL36AP26
EIF4A1P4
MIR1282
RP3-417G15.1
RP11-79P5.3
KB-1683C8.1
SNHG11
RP11-407N17.3
ABI1
PPIAL4A
ATP6V1C2
RP11-393N4.2
RPL4P2
SH3D19
SDR39U1
RP11-79E3.1
HMGN1P4
SEM1P1
RPL23AP18
RP11-410L14.1
NPIPB3
TECRP1
RP11-91I11.1
RPS29P8
RP1-72A23.3
NAP1L1P1
LINC01623
SLC9B1
CYP51A1P1
RP11-864I4.1
RP11-120B7.1
RPL10P9
RPLP0P6
HSPA1A
SMARCE1P6
RP11-203F10.6
GAS5-AS1
RP13-15M17.1
HMGB1P26
NPIPA3
RP11-512F24.1
RPL12P2
ST13P2
RPL10AP6
RPL36AP13
RPL26P19
MT-TL2
DEXI
RP11-29H23.5
GNAS
RP4-800G7.1
RPS7P3
MTND6P22
RPS21P4
MAGED4B
CTA-242H14.1
RP11-489M13.1
RPS23P1
RP11-545M17.2
RP11-428L21.2
RP3-340B19.2
UBA52P5
DNLZ
AC079250.1
RPS24P8
HNRNPA1P39
POC1B-GALNT4
NAP1L1P3
BIN1
GDF5OS
FTH1P2
RP11-270C12.3
EIF4G1
RP5-1056L3.3
HNRNPA1P35
ACAA1
BTF3P9
FAM192BP
RP6-218J18.2
HMGN2P19
GCOM2
YWHAZP4
RP3-342P20.2
ANAPC11
CUTA
SRXN1
ELOBP2
YWHAZP2
GMPR2
RP11-257P3.3
U2AF1L5
RPL39P18
RACK1
HMGB1P4
TPT1P6
TVP23C-CDRT4
RP13-270P17.1
LOXL3
AC005062.2
ESRRA
CARM1
RPL23AP43

In [30]:
for x in mGoi:
    print list(geneId2Name[x])[0]


Gm15772
Gm10335
Rpl27-ps1
Gm24276
Bdnf
Gm37348
Gm14165
Gm15459
Gm11575
Gm8337
Gm13182
Rps12-ps9
Ctnnd1
Rpl21-ps11
Gm15745
Gm4735
Gm8618
Gm9843
mt-Tv
Pcdhga1
Gm7285
Gm10224
Gm12013
Gm11930
Gm11675
Gm6450
Fam177a
Gm15744
Gm15460
Gm6430
Gm29257
Gm12372
Plec
Gm7380
Gm3851
Gse1
Gm21811
Gm8129
Gm10705
Gm42666
Gm11407
Ndufab1-ps
Gm37108
Mir692-2
Gm12468
Srrd
Rpl31-ps14
RP23-37D14.5
Ap3s1-ps2
Gm14274
RP23-468J15.3
Gm43518
Rps12-ps10
Gm7730
Gm12983
Gm8885
Gm45456
Ube2e1
Gm5850
Rpl15-ps3
Gm14279
Gm37009
Rps18-ps1
Rps12-ps5
Gm17994

In [31]:
hGenes= []
mGenes = []
for k,v in geneId2Name.items():
    if 'MUSG' in k:
        mGenes.append(list(v)[0])
    else:
        hGenes.append(list(v)[0])

In [32]:
len(hGenes), len(mGenes), len(hGenes) + len(mGenes)


Out[32]:
(58278, 50686, 108964)

In [33]:
with open("hGenes.txt", 'w') as f:
    for x in hGenes:
        f.write(x+'\n')
with open("mGenes.txt", 'w') as f:
    for x in mGenes:
        f.write(x+'\n')

In [ ]:


In [161]:
#import htseq

In [45]:
htseq = pd.read_table('../data/gene_subsample.htseq', header=None).set_index(0).rename(columns={1:'gene'})

In [55]:
htseq.sum()


Out[55]:
gene    4271139
dtype: int64

In [46]:
combineCounts = pd.concat([htseq, txDf], axis=1).fillna(0) + 1

In [47]:
combineCounts['fc'] = combineCounts['gene'] / combineCounts['txp']

In [48]:
fc = np.log(combineCounts.sort_values(by='fc', ascending=False))

In [49]:
goi = fc[fc['fc']>0.5].index

In [50]:
hGoi = []
mGoi = []
for g in goi:
    if 'MUSG' in g:
        mGoi.append(g)
    else:
        hGoi.append(g)
print len(mGoi), len(hGoi), len(goi)


896 1773 2669

In [51]:
for x in hGoi:
    print list(geneId2Name[x])[0]


RP11-466H18.1
RPL21P16
ANKRD36BP1
RPL39P3
RP11-40C6.2
RPL23AP42
LRRC75A
RP11-778D9.4
AC105399.2
RPS19P3
MRPL55
RP4-717I23.2
MORF4L1P1
TCEA1P2
SDR39U1
RP5-1052M9.1
RPS23P8
RPS24P7
ABI1
RP11-84E17.1
SUB1P3
GNAS
RP1-159A19.3
SNHG11
RP11-345K9.3
RPS21P4
AC012671.2
RP11-543E8.2
LRRC37A13P
ACAA1
TMED1
ANAPC11
DEXI
EIF4G1
RACK1
RP11-393N4.2
CUTA
RP11-79E3.1
MPG
RPL12P47
PSMB3P2
RP5-837I24.5
RPL21P28
SAMD11
MINOS1P2
GMPR2
ALDOA
AC144530.1
QARS
CTD-2206G10.2
CTB-63M22.1
RP11-715I4.1
RP11-365F18.1
RP3-417G15.1
RPS23P1
CTA-242H14.1
RP5-1056L3.3
BIN1
AC007318.5
APTX
GUK1
UBA52P5
AC026271.5
BRF2
RPL39P18
EEF1D
RPL10AP6
RP11-196G18.24
IGSF10
COX6CP1
RNU6-1064P
CTA-351J1.1
RP11-262M14.2
AC011753.6
RP3-370M22.8
FTH1
CARM1
RP11-332E4.1
APBB1
RPS5P2
CTC-340A15.1
RPS20P1
RP1-180M12.1
UBE2V1P2
AC109638.1
EIF3IP1
KAT5
AC005262.4
RPL18P11
KBTBD4
RP11-389O22.4
RP6-218J18.2
CTNND1
PDIA3P1
TRMT112P3
YWHAZP3
RP11-6K23.1
RP11-51L5.2
MLST8
AC008155.1
RP3-348I23.3
KREMEN2
TXNP7
RP11-428L21.2
AC010095.6
FTH1P4
ETF1P1
PLCG1-AS1
DMKN
RP1-72A23.3
RPL18AP15
RP11-371A22.1
COX20P1
RP11-545M17.2
ATP6AP2
NTMT1
PAX6
STUB1
RP1-266L20.4
RP11-34C15.2
AC024082.4
RPS4XP3
CTC-329H14.1
WDR73
RPL23AP76
RP11-390F4.2
RPL23AP72
ZMAT5
AC079150.2
RPS19P1
RPL37AP1
RP11-730G20.2
ATG4D
HNRNPCP2
RPS2
RP11-832N8.1
RP11-270C12.3
AC004453.8
CTB-96E2.7
AP001628.6
RP11-15B17.1
RP11-462B18.1
ALDH7A1P1
H2AFZP6
RLIMP2
GTF2F2P1
C1DP1
HNRNPABP1
RP11-383G6.4
RP11-575L7.8
RP11-552O4.2
RP11-90H3.1
RPL39P
RP11-713P14.1
RAB34
RPS29P9
PIH1D1
UBL5P2
HSPE1P5
RP11-69L16.5
HADH
RP11-597K23.1
NAP1L1P3
TXNP1
RP11-507E23.1
LDHAP2
EEF1DP2
HMGN2P5
NAP1L1P1
RPL35AP21
RP11-57B24.2
ELOBP2
RPS12P2
PSMA3P
AC013399.3
HNRNPCP3
RP11-336N8.4
RP11-560I19.2
ATP5JP1
RP11-396C23.3
RNU6-6P
H2AFZP4
RP11-455B2.9
PPP1R14BP3
RAP1AP
RP11-278L15.6
MAGOH2P
YWHABP2
EIF3KP3
RPS29P16
RPL41P5
AC007193.9
PTMAP4
HINT1P1
DBIP1
RPS8P4
RPS29P5
LLNLR-245B6.1
RPL21P1
RP11-278C7.1
MYL6P5
UBE2D3P1
TMEM205
RPL30P4
RPL21P119
LA16c-306E5.1
RPS3AP35
RPS29P8
RP11-281O15.7
CDK5RAP3
PFDN5
RP11-159F24.2
EPB41L2
TPT1P4
DDT
RP1-181J22.1
AC091633.2
RP11-74E22.5
CTD-2620I22.2
RPS29P15
C16orf58
RP1-125I3.4
RP11-12K11.1
AC104843.4
COX6CP8
TUFMP1
RP11-183C12.1
RP11-192C21.3
RP11-72J9.1
RP5-935K16.2
CTB-58E17.10
RP11-288C17.1
CIZ1
ILF2P1
SNRPGP3
RPL12P50
AC007016.3
RP11-484D4.3
RP11-408I18.9
ELOCP32
ANAPC10P1
AC008427.2
RPL37P25
RP11-364M6.1
CALM1P2
RNPS1P1
RP11-274J15.2
FAM212B-AS1
AC114755.3
AC006033.22
RP11-661A12.8
ASAH1
RPS15AP17
RP11-793I11.1
RPF2P1
AC010894.4
RP3-522D1.2
AC005229.7
TNNT1
RPSAP74
PARD3
SNX32
RPLP1P13
RP11-51L5.1
RP11-15E18.5
CMB9-55A18.1
UBE3A
GAS5
FOXD1
DDX10P2
NDUFV2P1
SYBU
RP4-604A21.1
AC012065.5
NAP1L4P3
TP53
YY1AP1
RP11-372E1.1
RPL23AP2
SEC31A
AC092664.1
AL590762.11
RP11-158L12.2
RP11-923I11.3
RNU4-89P
RP11-72B4.2
EIF4A2P4
RP11-18B3.3
AC006028.11
DCAF13P3
CTD-2410N18.1
RP4-758J24.4
AC091133.1
PAQR6
UBE2CP1
RP5-926E3.1
RP11-575C1.1
MRPL42P6
RP11-417B4.3
SUMO2P1
RRM1-AS1
RP11-57H14.2
TECRP1
RP11-73M18.7
CTD-2299I21.1
CAP1P2
RP1-15D7.1
RP11-323I15.2
RP11-384B12.2
RPL9P30
CHORDC2P
RP3-430N8.11
RP11-101K23.1
EEF1A1P2
DDX18P4
MRPL48P1
RP11-1072C15.7
HSPE1P27
RP5-1174J21.1
RP5-933K21.2
DCTN1
ARHGAP42P2
RP11-109P14.10
RP11-734J24.1
RP11-109G23.1
RP11-359I18.1
RPS29P6
RP11-596C23.6
MINOS1P3
RP11-760D2.10
RP1-52D1.1
HMGN1P15
LINC01687
RP11-402P6.6
NDUFB4P1
PEBP4
RBBP4P5
RP13-270P17.3
KPNA2P2
RP11-8P13.1
RPL39P26
CTD-2369P2.5
AC004692.5
SNRPGP17
IMMTP1
HMGB3P8
DLEU2_2
RP11-181C21.4
RNF212B
RP11-513D5.2
SNORD63
ANAPC15
RPL38P3
NHP2P1
MUTYH
RPL26P6
RPS8P3
CAMK2B
NID2
AC104389.32
RP11-401E9.3
MBNL1
RP4-738P11.4
EIF3KP1
WDR6
TBCAP1
RP11-386I14.3
ENY2
RBM39
GPS1
RPL30P7
RPLP1P6
RPS2P24
ST7
SHISA5
AC005262.2
RPS17P13
HNRNPUP1
SNORD14A
CSNK1A1L
RP11-415H23.4
PARP6
RP11-553E24.1
RP11-313M3.1
RPL7AP38
CTD-2311A18.1
FAM204BP
RPL12P40
PQBP1
LRFN4
CTD-2108O9.4
RP11-115D7.3
GAPDHP68
BCS1L
RPL36AP43
PSMA2P3
AC098614.1
RPL7AP10
NDUFS7
RPS11P5
RP3-486I3.4
AC013404.1
EEF1A1P42
EEF1B2P7
RPS18P13
RP1-179N16.3
AC097523.1
RP11-420H19.3
CALM1P1
RPS29P22
EWSR1
RP11-459A10.1
RP11-63K6.4
C1QBPP2
FOXM1
C1orf112
PLPP1
TXNRD1
AC073063.10
RP11-478C6.4
EIF2S2P4
ATP6V1C2
RP11-297L17.6
FTH1P3
NOC2L
CIRBP
RP5-890O15.3
OS9
U91328.21
RP11-775D22.3
AC004381.8
GDI2P1
AP002954.4
GAPDHP37
RP11-813P10.2
CTC-506B8.1
RP5-866L20.2
DAZAP2P1
UBE2CP3
RP11-613G13.1
RPL36P4
RPS4XP15
CKBP1
UQCRHP4
SRSF9P1
CTD-2200P10.3
RP3-406P24.3
RP11-451O18.1
AC093106.6
CTB-167B5.1
RPL19P14
RPS12P15
RPL23AP83
AC012501.3
NDUFB4P10
RP11-212P7.1
NPM1P43
MCTS2P
XRCC6P5
RP11-373E16.1
RP11-335F8.2
AC005229.5
RPS3AP39
AK6P2
PIN1P1
RPL7P2
AP001062.9
RPL21P9
HMGB1P16
AC002117.1
IPO7P1
SNORA67
AC009302.2
AL157871.2
AC097359.2
RP11-356I18.1
UBE2D3P2
AC016700.6
AC079809.2
RAC1P3
RP11-548H3.1
PTGES3P4
MRPS18CP6
AGER
RP11-692C23.1
RP11-478H16.1
RPS29P7
AC079741.2
RP11-64K12.8
ZNF133
F2
RP11-635N19.2
RN7SL419P
HSBP1P2
RPL35AP31
RPL15P18
CIR1P1
RPS27P27
TUBB4AP1
RFC3P1
AC093110.3
RP11-973N13.5
YWHAZP8
RP11-321L2.1
CTC-463N11.4
RP11-416N13.1
RNU6-885P
RNU7-111P
RNU6-1016P
RP11-473O4.1
RP11-39H3.1
RP11-562A8.4
RP4-566D2.1
RP11-229P13.23
RP11-32P22.1
RP11-382A20.1
RANP5
TPT1P12
FTLP5
CTC-360J11.4
CTD-2269F5.1
RP11-732A19.9
FNDC1-IT1
CHMP1B-AS1
RPL23AP17
GAPDHP70
AC025918.2
FUNDC2P1
PPIAP22
CTD-3006G17.2
AP000569.9
CBX3P3
COX6CP2
PTP4A1P2
RP11-109P14.9
RP11-599J14.2
RP11-291L19.1
CTB-175E5.7
BTF3P13
MTCO3P23
RP11-297K8.2
TMPRSS9
RP11-792A8.3
KB-1073A2.1
MIR218-1
MTCO3P22
RP11-252I14.1
RP11-118M9.3
RP11-533K11.1
RPL35AP3
RP11-226P1.1
CTC-498J12.3
RP11-136C24.2
GCC2-AS1
RP11-1113L8.6
CYP51A1P1
APOOP4
HSPA8P7
AC074121.4
MRPS21P1
SNORD99
NHP2P2
RP11-471B22.2
HNRNPA1P29
RP11-393K12.4
HNRNPA1P57
RNU6-890P
FNTAP2
CTD-2158P22.4
NEK2P4
SLC25A3P2
RP11-1103G16.1
SCARNA18B
RP11-432J24.3
RP11-9M16.3
GAPDHP25
NR1H3
RP11-159H10.4
RP11-237P21.1
RP11-317B3.2
DYNLT3P2
AC023449.2
RP11-274M17.1
RP11-213N20.1
RP11-321C24.4
RP11-629B11.4
RNA5SP383
ILF2P2
TMED10P1
PSMA6P4
SLC26A10
MRPL30P1
OR2W6P
COX6CP3
MRPS21P2
ARMC10P1
CTB-187L3.1
ATP5HP1
MYL8P
RPL9P16
RP11-167P22.4
GNG5P5
ESRRG
RP11-56G10.2
VSTM2A
KNOP1P4
CHCHD3P1
RP11-341G5.2
RAD23BP1
ACTG1P1
SLC9B1
RP11-223P11.3
RP5-854E16.2
RP11-506H20.1
RPL12P39
AC104306.1
MAP2K4P1
COLEC11
RP11-154D3.1
RPSAP44
RP11-245G13.1
HNRNPH3P1
NSRP1P1
RP11-494K3.2
CSNK1A1P1
RP11-462L8.1
RP11-807C20.2
CHMP4BP1
AC007969.5
SAP30BP
SHMT2
EPN2
ESRRA
RPL4P1
CTB-47B8.1
PCBP2
RP1-199J3.5
PMS1
RPL36AP29
TMSB10P1
RPS4XP8
ACTG1P12
DLG1
SDHA
NPRL2
CAST
REPS1
ACADVL
RPL21P4
HSD17B4
CCDC189
ATXN2L
NRG4
RP11-444B24.2
SRP72P1
RP11-777B9.5
RP11-26H16.1
RPS23P7
ZFAND6
RP1-40G4P.1
ELP4
RPL8
TFIP11
RP11-428G5.1
RP11-62H7.3
LSM3P3
EEF1DP1
C9orf43
RP11-365O16.1
TPT1P6
ALDH7A1
ZSCAN32
SELENBP1
CTC-338M12.4
CMC2
TCF4
RAD17
HSP90AB6P
CTD-2102P23.1
RPL30P14
A1BG-AS1
SLC4A7
AURKA
RP11-3L10.3
TPT1P9
RP11-201O14.2
PSME2P6
RPS29P3
AC139085.2
HNRNPA1P59
CALM2P4
COX5AP2
UBBP3
RP11-98I9.4
XRCC6P2
RP11-337A23.5
RPL21P44
TTC3-AS1
RP5-1053E7.3
ME2P1
RPS3P6
HSPA8P1
RP11-19G24.2
RP11-195F19.9
RP11-742D12.1
RP11-79L9.2
RPL4P6
COX6B1P1
LAGE3P1
SNORA33
RP11-686D22.6
RP11-410L14.1
DNAJC8P1
RP11-767N6.2
AC073415.2
RP4-548D19.3
RP11-770J1.3
AC007559.1
RPL30P12
GS1-21A4.2
ZBTB8OSP2
RPL9P7
EIF1P4
CHADL
HNRNPKP1
XRCC6P1
NAP1L1
RP11-422P24.9
CTD-2666L21.2
HMBS
BCAP29
SRRM1P3
RP11-114H7.1
SLC37A4
NDRG2
RNH1
PABPC1P3
CCNE2
SLC25A3
RP11-4M23.4
ZNF821
RP11-137N23.1
RPL26P37
SPATA1
STAU2
DMTF1
ITGB1BP1
MEF2C
CD63
FGFR1
RP11-93K22.1
ACTG2
RP11-69L16.3
RP11-128A17.1
RPL39P6
EMX1
SPAG17
YWHABP1
COX7A2P2
RP11-57C19.6
AC004383.3
RP11-501C14.5
CPNE1
RPL18P13
PBRM1
SLC9A6
AMZ2
PITPNA-AS1
PPIAP2
BCL2L12
FDFT1
LRRC75A-AS1
MAZ
BRD8
RPL35AP35
RP11-30K9.7
MTMR3
RP11-14K2.1
RP11-699A7.1
RP11-644F5.15
RP11-377K22.2
XRCC3
RP11-290L1.4
ECHDC1
UBA52
CALD1
PGAP2
RP11-216N21.1
SOD1P3
GGT7
SNRPD2P1
RPL13AP
GPSM2
RP11-464D20.2
SEPT7P7
RPS15AP40
SNORD38A
KB-1205A7.1
NOL8
AC007383.6
RPL17P36
RP11-829H16.2
RP1-223E5.4
RPL15P20
PSMA4
RP11-415I12.1
CTD-2666L21.3
RP5-864K19.4
AC019129.1
RPS15AP1
RP11-259P15.1
B3GAT2
RPL7AP3
DTNB
LRIG1
RP11-289K10.1
RP11-91G21.1
HYI
TTC8
TSR3
NDUFB2
VTI1B
CD46
CHEK2
PDSS1P1
AC005104.3
RP11-211A18.1
RP11-68I18.10
ACTG1P23
HERC2P10
RP11-110I1.5
RPL23AP85
CTA-85E5.7
CTD-2537L20.1
AC069257.6
RP1-152L7.8
RPL29P12
SNORD46
RP11-100N3.2
PGGT1BP1
HIST1H4D
OR7E33P
SLC25A30-AS1
MYL6P1
SEC14L1P1
ENPP7P4
PAICSP7
RP11-516C1.1
RP11-87N24.1
NCAPD2P1
RP11-290D2.5
KRT18P58
RPL35P4
Y_RNA
MIR6797
RNU6-238P
HSPA8P4
HNRNPCL3
RPL23AP30
RP11-411B10.8
ANKRD30A
RP3-400B16.4
KCTD11
AP006216.10
RPL7AP23
MIR3939
CTB-95D12.1
ACTR3P2
AC114755.2
RP11-15P13.1
PSAT1P4
EIF4A1P6
RP11-968A15.8
CACNA1C
GAPDHP65
RPL27P12
XRCC6P3
RP11-203M5.2
RP11-452D12.1
RP11-452G18.1
RP11-579D7.2
TAB3-AS2
SERF1AP1
RP3-430N8.10
MIR5585
MTHFD2P5
ATP6V1E1P1
RP11-170L3.4
RP11-297A16.2
RNU6ATAC16P
ZNF807
RP11-252I13.1
SNORD100
RP11-651P23.2
RP4-799P18.5
TPT1P13
HSPD1P3
NDUFAF4P2
HMGN1P17
RP11-689J19.1
INTS4P1
PCF11-AS1
PMM2P1
RP11-762I7.4
RP11-457K10.2
RP11-2B6.3
RN7SL473P
AP006621.8
CTD-2547G23.2
RP11-956E11.1
RP11-240E2.2
MIR222
LINC01918
RBM22P3
FTOP1
RP11-679C8.2
MIR320B2
RP1-128M12.3
PDHA2
DYNLT3P1
RP11-831H9.3
RP4-784A16.3
CYCSP28
CTD-2666L21.1
Metazoa_SRP
STMN1P1
HLA-DOB
AC004461.4
RP11-296P7.4
MIR659
FTLP1
CTD-2046J7.1
RPL7AP51
CTD-3137H5.4
MIR570
VDAC3P1
TPMTP3
CTD-2517M22.17
RP11-485O10.2
U82670.9
RPL12P43
AC116614.1
RP11-317O24.3
RPSAP50
RNU7-19P
ISCA2P1
MTNR1A
RPS26P45
RP11-215D10.1
GAPDHP61
RP3-415N12.1
RP11-234P3.4
AC027612.1
RPS11P7
MIR4782
LINC01220
ATXN2-AS
Y_RNA
RP5-1170D6.1
KCNQ1OT1_3
RPL36P19
RP1-177A13.1
PDLIM1P1
RP11-458F8.1
RP11-493L12.6
RP11-77I22.4
RP4-616B8.4
RP11-46B11.2
MYL1
RPS2P36
HMGN1P36
ALG3P1
THAP12P4
DDX3P3
IGLVIVOR22-1
AC004549.6
ABHD16B
RAD51AP1P1
RP11-388P9.3
Y_RNA
AC093166.1
RP11-463M14.1
RP5-1077B9.5
RPL21P133
RPL31P35
RN7SL149P
PCNAP4
RP11-147K6.1
RP1-151F17.1
AL049758.2
Metazoa_SRP
HMGB1P51
RP3-467N11.2
RP11-171I2.5
PPP1R7
RP11-60E8.2
AC093690.1
TAF9BP1
TMA16P2
RP11-83J16.3
HNRNPA1P28
HNRNPH1P2
LARS2-AS1
RPS27AP8
H2AFZP5
Y_RNA
CHCHD2P5
RP11-133N21.10
RP4-777G9.1
WDR45BP1
AC083949.1
PCDH12
RN7SL192P
RNU6-925P
LLNLR-307A6.1
ZBTB8OSP1
RP11-562A8.5
CYB5AP4
RAD17P1
MCRIP2P1
RP11-81A1.3
PHBP18
CTC-512J14.5
AL162151.3
TUBB8P10
RP11-293K19.1
RNU6-1199P
CTBP2P8
KRASP1
PRMT1P1
AC139143.2
RAD1P1
RARRES2P1
RP3-419C19.2
CBX1P4
EFNA2
RPL32P23
ATP5HP3
RPS26P24
FTLP4
RP11-554D14.2
CRYGC
TGM4
RP4-613B23.1
RP11-493P1.2
AP001619.3
CTD-3110H11.1
RPS27AP7
RP1-101K10.4
CAMK2N2
RP11-420L9.2
GXYLT1P2
CACTIN-AS1
AC064850.4
RP1-172N19.3
COX5BP6
FABP5P12
RP11-73E6.2
ANP32BP2
PLAC8L1
RP11-46H11.2
AF038458.4
RP11-44D5.2
RP11-488I4.2
RP11-692D12.1
RP11-413H22.2
FAM192BP
SNX6P1
PANCR
RP11-213G2.5
RPL35AP22
RP3-508I15.14
CTD-2182N23.1
SNORA7
RPS3AP3
RP11-24N18.1
CTD-2062O1.1
HMGB2P1
MTCO1P24
RPL17P49
CYCSP43
CTB-161J7.2
RHBDL3
RP11-452L6.7
RP11-98D18.16
CTD-2623H2.7
RPS12P28
DNM1P51
RPL23AP34
GTF2IP14
HSPE1P16
RP11-439I14.1
DNAJC19P1
RP11-384C12.1
RP11-403E24.1
RP11-429G19.3
RPL34P17
RP11-454F8.4
REP15
RPS12P27
RPS26P3
RP11-158N24.1
MORF4L1P5
RPS3AP9
RPS26P6
RPS5P7
ADAMTS20
RPS29P30
MAFK
YWHAQP6
TAGAP
RP11-21A7A.3
PTCHD3P2
CTD-2278B20.1
RP1-73A14.1
RP1-7G5.5
TUBBP6
RP11-627K11.3
RP11-405F3.2
RPL23AP27
CTD-3057O21.1
LARP1BP1
AC005730.2
AP000320.6
ZNF560
CTB-134F13.1
RP11-233G1.4
RP11-466C23.5
AC005943.6
METTL8P1
TPT1P10
ATP5LP6
SNRPGP13
ELOCP29
ST5
CRABP1
EIF4A1P13
FTLP14
RP11-242O24.3
RP11-444J21.2
RP11-165N19.2
RPL36P2
EZH2P1
SYT2
CYCSP41
RP11-27G24.1
RPS8P6
AC104651.2
SLC47A2
TIMM9P1
RP11-264B14.1
AKR1B1P8
KRT18P43
RP11-815J4.7
RP11-210H10__A.1
AC004941.5
RCN3
RP11-618K16.4
RP11-269F19.2
SNORA26
CDK5P1
CTC-232P5.4
CTC-507E2.2
RP11-707G14.1
RP11-745A24.1
RP11-773H22.4
ATP1B1P1
RPL7AP69
RNU6-37P
RPS4XP18
ADPGK-AS1
RP5-996D20.3
RP5-928E24.2
RP11-98E6.1
RNU6-822P
TNNC2
CTB-174O21.2
RP11-1035H13.2
CTD-2515C13.1
LSM3P5
CTD-2012K14.4
RP11-292K15.2
VDAC2P3
AC007237.2
Y_RNA
RP11-241J12.3
NDUFB10P1
RP11-261C10.4
TAT-AS1
AC009500.2
ANO7L1
AC079987.2
RPS26P56
RP11-153F5.2
RP5-890E16.4
NMD3P1
H2AFZP1
MORF4L1P4
RP11-5P22.1
GAPDHP28
VDAC1P7
AKR1B1P2
CCT5P2
PGAM2
RPL23AP36
SNORA3
NDUFA5P3
PDCL3P1
L34079.4
TDGF1P7
HM13-AS1
DUTP6
RP11-298C2.1
RPL23AP91
SELENOP
CTD-2538A21.1
EIF4BP9
RP11-328C8.2
AC010468.3
RP11-184J23.2
RP5-968P14.2
AC006942.4
RP11-539G18.1
AKR1B1P7
AC005546.2
FBXW11P1
HNRNPA1P9
RPL10P11
RP11-272G22.2
CTAGE10P
RN7SKP261
PPP1R14BP5
ARF1P2
CCT5P1
TTC4P1
NDRG4
RPL7AP52
RP11-326L17.1
RPL7P31
AC093166.4
CTD-2240J17.2
RP11-443F16.1
HMGB3P20
RPL9P15
ZFPM1
MIR16-2
MIR148A
RP11-525E9.1
AP000936.5
CXorf58
RP11-15A1.8
ARMC8P1
SDAD1P2
RP11-337C18.4
RP11-541H12.1
CTNNA1P1
RP11-382N13.4
RP11-755F10.1
NLRP3P1
RP11-567I13.1
RP11-783K16.13
MIR320D1
RP11-762B21.5
RP11-466F5.4
RP11-122G18.9
RP11-274M17.2
ELOCP21
RP11-752G15.10
RP1-140J1.4
BRCC3P1
RP4-725G10.4
RPL7P39
EI24P1
RP11-134F2.7
S100A1
RP3-395C13.1
AC114755.4
AC125238.3
SMG1P2
AC018865.5
CBWD7
CTC-487M23.7
FTLP15
RN7SL239P
RP11-21C4.4
RP11-527N22.2
RP11-91J19.3
RP11-286E11.1
RP11-220H4.6
CTD-2154B17.4
ARHGAP42P1
ADM5
RCC2P3
CTD-2031P19.4
RP5-1132H15.2
RPS29P4
RPL30P10
RPL35AP15
PKMP2
Z83851.4
U6
RP11-845C23.3
SAR1AP4
CTD-2544N14.3
RNU1-103P
SUMO2P18
LINC01399
RP5-1029F21.2
MTND1P15
ARL5AP1
SUB1P2
UBE2Q2P10
NDUFB4P11
MT1DP
AC079135.1
RNU6-395P
RP5-1041C10.3
RPS3P7
RP11-688G15.3
RPS21P1
RP11-25P11.2
GOLGA2P1
RP11-22N19.2
FNDC8
RP4-580N22.2
Y_RNA
CNN3P1
RNU7-140P
PAPOLB
CTD-2014B16.1
NADK2-AS1
RP3-414A15.10
RP11-328J14.2
RP11-316E14.2
RP11-400K9.2
MTCO1P49
CTD-2302E22.6
RP11-332H17.1
AC007969.4
FTH1P15
RP11-520H14.1
DUTP8
GLYCTK-AS1
RP4-580O19.2
KIAA1217
RP11-350N15.6
FAF2P1
COX6B1P4
GNL3LP1
RNA5SP473
AC092646.2
AF146191.4
FDPSP2
CTD-2302E22.1
CTD-2325A15.5
RP5-849L7.1
PTPRD
RP1-261G23.7
RP11-401F2.4
TPT1P14
RP11-309L24.2
RP11-207F8.1
RP11-671E7.1
HSBP1P1
AC008781.7
CTD-2515O10.5
RPS7P6
AC006042.8
GMFBP1
MTND5P35
LL22NC03-32F9.1
Y_RNA
PIGPP3
AC074290.1
RP11-97N10.1
RFKP1
KRT18P65
RP3-402G11.28
CTD-2315A10.1
RP11-452J6.2
RP11-363H12.1
TLE1P1
PDCL3P6
AC067945.3
RP11-1275H24.2
RPS3AP1
CTD-2298J14.1
LINC01277
GEMIN8P4
RP1-317E23.3
RNU6-1282P
NENFP1
HMGB1P31
NDUFAB1P1
RP11-558F24.2
RP11-90H3.2
PRELID3BP10
LINC00570
RP3-378P9.1
HNRNPA1P27
QRSL1P1
CTD-2017F17.2
RP11-1000B6.8
RPL7P55
POTEKP
RPL23AP26
RNU6-786P
RP11-421N10.1
RP4-620F22.3
ST13P20
HCG16
NDUFB4P4
RP11-164N20.2
AL133244.1
RPL22P11
COX4I1P1
RP11-574K11.5
RP11-697N18.3
KLK14
RNU6-8
LRRC6
RP11-487E1.2
RP4-680D5.8
PABPC1P11
AC007881.4
CTD-2129N1.1
RAB11AP1
HNRNPMP1
RP11-797D24.4
RPS26P4
AC022201.5
RP11-307L3.2
RP11-477G18.2
RNU7-179P
EI24P2
MTND2P5
RP11-423F24.3
EZR-AS1
snoU2-30
SERPINH1P1
ATG3P1
HIGD1AP12
UBE2V2P3
AC098592.6
C11orf91
RP11-1G11.2
RP11-365H22.2
RP11-1017G21.3
ARHGAP9
RP11-432N13.2
CTD-2110K23.1
PFN1P10
RPL12P29
TPT1P2
RP1-59D14.10
RP1-261G23.4
CTB-20D2.1
RP1-122O8.7
TMEM256P1
RP11-498C9.16
RPS29P19
RP11-249L21.4
LRP1
EEF1A1P33
RP11-378A13.2
AC007390.4
CTD-2377D24.2
RP11-91I11.1
LYPLAL1-AS1
RPL35AP
SNCAIP
RP11-183G22.1
EEF1A1P37
RP11-336N8.1
RPS2P32
HCST
RP11-249C24.12
HIST1H3A
DNAJC19P7
HMGB1P36
AIMP1P1
GAPDHP31
RP11-51I5.1
PIGPP2
RP11-346C16.1
TBCAP2
SNORD4A
RP11-297K7.1
UBE2V1P9
RNA5SP340
RP11-417N10.5
HNRNPCP7
RP1-190J20.2
BX842568.2
RP11-524F11.2
RP11-296I10.3
PSMC1P7
NRADDP
RP4-744I24.4
RP11-3K24.3
SMC4P1
HNRNPA1P63
RP11-30P6.6
MRPS5P3
RP11-857B24.3
RP11-697N18.2
RP11-688D15.1
HOTAIR_2
RP11-368P15.1
ATP5G1P4
ANXA2P3
EIF3LP1
RNU6-670P
TIMM9P3
ENO4
RPSAP71
CTD-2336O2.2
AC006026.13
ERHP1
EIF3KP2
CYCSP32
RP4-535B20.2
DHFRP5
RP11-71H17.1
HMGN3P1
C6orf47-AS1
RNU6-857P
KCTD14
RPS15AP25
RANP8
GOLGA6L3
SNRPFP2
IPO8P1
MZT1P1
RP11-56B16.2
RP11-109N23.5
Y_RNA
RPL21P32
RP11-895M11.2
Y_RNA
AC005042.2
RPL35AP12
AC015933.2
RPL37P10
COPS8P2
RP11-348M3.2
RP11-982M15.2
RP11-131L23.1
Y_RNA
RNU6-353P
ATOH8
BMP8A
C8orf59P2
PAICSP2
RP11-50D9.4
CTD-2026K11.4
IRF7
HCG25
SEM1P1
DLEU2_4
RP11-560J1.2
RP11-115A15.2
KRTCAP3
AC006369.2
RP11-392O18.2
RP11-709P2.1
RP11-17A1.3
MRPS16P2
RP11-592P9.3
RPL18AP14
SPSB3
RP5-968D22.1
RP3-509L4.3
EI24P3
RP13-140E4.1
BRIX1P1
RP11-191L9.5
KRT18P8
NACA
SHKBP1
RPL11P5
GOSR2
AC004967.7
JSRP1
ERC1
IFFO1
RPL7P52
RP11-841O20.2
YWHAEP5
RAB23
CTD-2515H24.1
KCNQ2
FDXR
COL18A1
RTN4
MFF
FTLP2
TMEM179B
ZFPL1
RP11-152C15.1
LUC7L
HNRNPCP4
IPO9-AS1
RPL7P54
RP11-2B6.2
RP11-404O13.4
CD59
REXO2
RPS17P5
RPS4XP2
NME7
IFRD2
RP11-568J23.1
RPSAP13
RP3-461F17.1
ZNF219
RP11-399D15.1
UBE2L5P
KIF9
DNM1L
RPS15
RPS24
TTN-AS1
NAGK
SQSTM1
RP11-501C14.7
RELL2
RP11-611O2.5
CENPT
RPL23AP57
RP11-366M4.17
AC087163.2
RP11-94D20.1
SERBP1P3
RP11-33A14.3
NSD2
RPL10AP5
CTD-2349P21.1
UBXN1
CTC-421K24.1
RP4-570O12.2
RP11-483K5.2
ZEB2
ZMYND8
ABCG1
RPS24P6
MSL3
AC012354.8
ENDOG
EEF1A1P16
RANBP3
ALDH3A2
TPM1
CTD-3193K9.4
HMGB1P11
RPL23AP89
RP11-397P13.7
CUEDC1
NACAP2
AC006960.5
GAPDHP21
RP11-59H1.1
RPL32P26
FAM13A-AS1
EEF1A1P30
RP11-732A19.1
LSM3P2
RP4-631H13.4
RPL23AP55
SCMH1
RP11-449H3.2
CCT6P4
RP11-126F18.2
PRMT5
DBI
ADGRL2
RP11-112J1.1
EMC6
AC016712.2
RPL37P15
PTTG3P
RP4-816N1.6
GSTP1P1
RP11-700J17.2
RP1-50A13.1
ACTG1P14
RP5-1050D4.5
RPS7P13
RP11-567G24.1
RPS4XP4
IRF3
FADD
PDE9A
NEPRO
AGPAT1
HNRNPC
SSBP1
RP1-89D4.1
RPS19P7
ATF2
C7orf49
CERS2
PYURF
ILF3
HSF1
SRSF5
HSPE1P21
RP3-339A18.3
TUBAP
RP11-56D16.8
RP11-959F10.4
MIPEPP3
RPL39P39
FUT8-AS1
RP11-561B11.1
MCFD2P1
VPS26AP1
TRAF4
HAS3
ATP5G1P8
RPL9P31
SLC25A5P8
SETP9
RP11-632L2.2
FDPSP1
RP11-1072C15.1
SLC25A5P7
RP11-889L3.4
SMARCE1P1
RP11-119H12.1
ATP5HP2
RP11-613M10.6
AC007386.3
RP11-66N24.3
CTD-2122P11.1
RP3-391O22.1
MT1XP1
AC007566.10
RPL23AP92
FERP1
SF3A3P1
RP11-1029M24.1
ACTBP12
HMGN1P37
Y_RNA
RPL10P18
ANP32C
STX8P1
DYNLL1P7
PRC1-AS1
HNRNPCP6
RP3-426I6.2
TFAMP1
H3F3BP2
RPL36AP33
AC096649.1
RPL12P26
CTC-512J14.1
RP11-140L24.3
SNORD104
AHNAK2
UBA52P3
ELOBP1
AC009517.2
SNRPGP19
RPL10P19
CUX1
RPS3AP47
AIMP2
JMJD8
RPL34P31
PIGH
SUN2

In [52]:
fc


Out[52]:
gene txp fc
ENSG00000244398.1 7.509335 0.000000 7.509335
ENSG00000220842.6 6.091310 0.000000 6.091310
ENSMUSG00000109509.1 5.402677 0.000000 5.402677
ENSMUSG00000072940.5 5.384495 0.000000 5.384495
ENSG00000214262.4 5.342334 0.000000 5.342334
ENSG00000235174.1 5.187386 0.000000 5.187386
ENSG00000219928.2 5.181784 0.000000 5.181784
ENSG00000234851.4 5.117994 0.000000 5.117994
ENSMUSG00000020140.15 5.056246 0.000000 5.056246
ENSMUSG00000048482.14 5.627621 0.693147 4.934474
ENSG00000181350.11 4.905275 0.000000 4.905275
ENSG00000228205.1 4.890349 0.000000 4.890349
ENSMUSG00000068397.4 4.882802 0.000000 4.882802
ENSMUSG00000066443.5 4.736198 0.000000 4.736198
ENSG00000237077.1 6.603944 1.945910 4.658034
ENSMUSG00000069862.6 6.634633 2.079442 4.555192
ENSMUSG00000102349.1 4.430817 0.000000 4.430817
ENSG00000240463.1 7.069023 2.708050 4.360973
ENSMUSG00000106258.1 4.356709 0.000000 4.356709
ENSG00000162910.18 6.746412 2.639057 4.107355
ENSG00000229567.1 3.912023 0.000000 3.912023
ENSG00000218283.2 4.574711 0.693147 3.881564
ENSG00000230409.3 3.806662 0.000000 3.806662
ENSG00000100445.17 5.318120 1.609438 3.708682
ENSG00000226532.1 5.572154 1.945910 3.626244
ENSMUSG00000043770.10 3.583519 0.000000 3.583519
ENSG00000230629.2 5.777652 2.197225 3.580428
ENSMUSG00000080811.1 4.663439 1.098612 3.564827
ENSMUSG00000027637.3 5.739793 2.197225 3.542568
ENSG00000227361.1 3.496508 0.000000 3.496508
... ... ... ...
ENSG00000169100.13 0.000000 7.130099 -7.130099
ENSMUSG00000036192.15 0.693147 7.936660 -7.243513
ENSMUSG00000105008.1 2.397895 9.684149 -7.286254
ENSMUSG00000100862.1 0.693147 7.989221 -7.296074
ENSMUSG00000100595.1 0.000000 7.331715 -7.331715
ENSMUSG00000037754.13 0.000000 7.427144 -7.427144
ENSMUSG00000057036.7 0.000000 7.430707 -7.430707
ENSG00000237039.1 0.000000 7.443078 -7.443078
ENSMUSG00000020738.16 0.000000 7.466228 -7.466228
ENSG00000227077.3 0.000000 7.498316 -7.498316
ENSMUSG00000108443.1 0.000000 7.504942 -7.504942
ENSMUSG00000048758.14 0.000000 7.513709 -7.513709
ENSMUSG00000045132.3 0.000000 7.519150 -7.519150
ENSG00000163359.15 1.945910 9.472243 -7.526333
ENSMUSG00000102070.1 0.000000 7.637716 -7.637716
ENSMUSG00000091228.1 0.000000 7.639642 -7.639642
ENSMUSG00000076138.1 0.000000 7.731053 -7.731053
ENSMUSG00000074887.4 0.000000 7.807510 -7.807510
ENSMUSG00000101249.1 0.000000 7.849324 -7.849324
ENSG00000011052.21 0.000000 7.872455 -7.872455
ENSMUSG00000064354.1 0.000000 8.026170 -8.026170
ENSG00000130032.15 1.098612 9.267665 -8.169053
ENSMUSG00000064357.1 0.000000 8.254009 -8.254009
ENSMUSG00000066315.9 0.000000 8.279951 -8.279951
ENSMUSG00000108249.1 0.000000 8.393442 -8.393442
ENSG00000067066.16 0.693147 9.169727 -8.476580
ENSG00000254772.9 0.000000 8.570355 -8.570355
ENSMUSG00000064358.1 0.693147 9.303284 -8.610137
ENSMUSG00000057359.7 0.000000 8.610319 -8.610319
ENSMUSG00000101111.1 0.000000 9.107975 -9.107975

108964 rows × 3 columns


In [53]:
plt.plot(fc['fc'].values)
plt.xlabel('fc sorted gene id')
plt.ylabel('log fold change (pc=1)')


Out[53]:
<matplotlib.text.Text at 0x221b61c50>

In [54]:
plt.hist(fc['fc'].values)


Out[54]:
(array([  2.20000000e+01,   9.30000000e+01,   4.88000000e+02,
          2.53700000e+03,   1.03090000e+04,   9.44230000e+04,
          9.86000000e+02,   8.70000000e+01,   1.70000000e+01,
          2.00000000e+00]),
 array([-9.10797537, -7.4462443 , -5.78451324, -4.12278218, -2.46105111,
        -0.79932005,  0.86241101,  2.52414208,  4.18587314,  5.8476042 ,
         7.50933527]),
 <a list of 10 Patch objects>)

In [ ]:


In [ ]:


In [ ]:
#subsample sam by tossing bad alignments

In [14]:
def dumpBestAlignment(wfile, aligmns):
    if len(aligmns) == 1:
        wfile.write(aligmns[0])
    elif len(aligmns) != 0:
        index = -1
        cigar = []
        for aln in aligmns:
            cig = aln.cigartuples
            cigSum = 0
            for k,v in cig:
                if k == 0:
                    cigSum += v
            cigar.append(cigSum)
        if len(cigar) != len(aligmns):
            print "cigar error"
        maxInd = np.argmax(cigar)
        wfile.write(aligmns[maxInd])

def subsampleSam(name):
    nh = 0
    aligmns = []
    with pysam.AlignmentFile(name, 'rb') as geFile, pysam.AlignmentFile(name[:-4]+'_subsample.bam', 'wb', template=geFile) as wFile:
        count_align = 0
        for alignment in geFile:
            count_align += 1
            if (count_align%1000 == 0):
                print "\r processed #alignments:" + str(count_align),
            if nh == 0:
                dumpBestAlignment(wFile, aligmns)
                nh = alignment.get_tag('NH') - 1
                curReadName = alignment.qname
                aligmns= [alignment]
            else:
                if curReadName != alignment.qname:
                    print "error"
                    break
                nh -= 1
                aligmns.append(alignment)

# subsampleSam('../data/txp_nameSorted.bam')
subsampleSam('../data/gene_nameSorted.bam')


 processed #alignments:11547000                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              

In [ ]: