Load and untidying 10x matrix data



In [64]:

    
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
# Numerical python library (pronounced "num-pie")
import numpy as np
# Dataframes in Python
import pandas as pd
# Statistical plotting library we'll use
import seaborn as sns
# Get basename of input file
import os
# Statistic model package for Generalized Linear Models
import statsmodels.api as sm
# Stats package for chi-square distribution
from scipy.stats import chi2

# This is necessary to show the plotted figures inside the notebook -- "inline" with the notebook cells
%matplotlib inline
## Add more when seeing new modules in other notebooks!



In [65]:

    
# Assign variables for input file names
inDir = "../AGGallSampWGFP/"
inMtx = inDir+"outs/filtered_gene_bc_matrices_mex/cellRanger_danRer10wReporter/matrix.mtx"
inGenes = inDir+"outs/filtered_gene_bc_matrices_mex/cellRanger_danRer10wReporter/genes.tsv"
inBarcodes = inDir+"outs/filtered_gene_bc_matrices_mex/cellRanger_danRer10wReporter/barcodes.tsv"



In [66]:

    
# Load long format data and convert to wide format
## Read in mtx. The raw data from 10X genomics should work.
mtx = pd.read_table(inMtx,
                    sep=" ", header=None, low_memory=False, skiprows=3)
## Set column names for the input mtx
colName = ["GeneID", "CellID", "GeneCounts"]#,"Barcode","SampleID","GFP","ClusterID"]
mtx.columns = colName
## Convert this into a gene-by-cell matrix. Unwinding long format to wider format
widemtx = mtx.pivot(index='CellID', columns='GeneID', values='GeneCounts')
#widemtx
# Replace NaN with zero
widemtxWzero = widemtx.fillna(value=0)
print(widemtxWzero.shape)
widemtxWzero.head()









    



(18773, 24122)






    Out[66]:







  
    
      GeneID
      1
      2
      4
      5
      6
      7
      8
      9
      10
      11
      ...
      28956
      28957
      28958
      28960
      28961
      28962
      28963
      28964
      28965
      28980
    
    
      CellID
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      4.0
      0.0
    
    
      2
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      3
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      2.0
      0.0
    
    
      4
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      5
      0.0
      0.0
      0.0
      0.0
      3.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      2.0
      1.0
    
  

5 rows × 24122 columns



In [ ]:



In [67]:

    
# Read in the gene list
genes = pd.read_table(inGenes, header=None)
genes.columns = ["EnsemblID","GeneShortName"]
genes.index = list(range(1,28982))
print(genes.head())
# Make a dictionary to store the gene number and GeneShortName assignments
dictGenNumName = {}
for i in range(1,28982):
    dictGenNumName[i] = [genes["EnsemblID"][i],genes["GeneShortName"][i]]
print(dictGenNumName[1])
# Convert GeneID (numbers) into gene names
GeneNames = []
noConvNum = 0
for GeneID in widemtxWzero.columns:
    try:
        GeneName = dictGenNumName[GeneID][0]
    except KeyError:
        GeneName = GeneID
        noConvNum += 1
    GeneNames.append(GeneName)
print(noConvNum,"of GeneIDs not converted.")
print("length of GeneNames: ",len(GeneNames))
# Reassign all lables in the cell by gene matrix (geneID -> geneName)
widemtxWzero.columns = GeneNames
print(widemtxWzero.head())









    



            EnsemblID       GeneShortName
1  ENSDARG00000104632                RERG
2  ENSDARG00000100660  ENSDARG00000100660
3  ENSDARG00000098417                syn3
4  ENSDARG00000100422               ptpro
5  ENSDARG00000102128                eps8
['ENSDARG00000104632', 'RERG']
0 of GeneIDs not converted.
length of GeneNames:  24122
        ENSDARG00000104632  ENSDARG00000100660  ENSDARG00000100422  \
CellID                                                               
1                      0.0                 0.0                 0.0   
2                      0.0                 0.0                 0.0   
3                      0.0                 0.0                 0.0   
4                      0.0                 0.0                 0.0   
5                      0.0                 0.0                 0.0   

        ENSDARG00000102128  ENSDARG00000103095  ENSDARG00000102226  \
CellID                                                               
1                      0.0                 0.0                 0.0   
2                      0.0                 0.0                 0.0   
3                      0.0                 0.0                 0.0   
4                      0.0                 0.0                 0.0   
5                      0.0                 3.0                 0.0   

        ENSDARG00000104049  ENSDARG00000102474  ENSDARG00000100143  \
CellID                                                               
1                      1.0                 0.0                 0.0   
2                      0.0                 0.0                 0.0   
3                      0.0                 0.0                 0.0   
4                      0.0                 0.0                 0.0   
5                      0.0                 0.0                 0.0   

        ENSDARG00000104839  ...   ENSDARG00000097721  ENSDARG00000051761  \
CellID                      ...                                            
1                      0.0  ...                  0.0                 0.0   
2                      0.0  ...                  0.0                 0.0   
3                      0.0  ...                  0.0                 0.0   
4                      0.0  ...                  0.0                 0.0   
5                      0.0  ...                  0.0                 0.0   

        ENSDARG00000093480  ENSDARG00000089892  ENSDARG00000051756  \
CellID                                                               
1                      0.0                 0.0                 0.0   
2                      0.0                 0.0                 0.0   
3                      0.0                 0.0                 0.0   
4                      0.0                 0.0                 0.0   
5                      0.0                 0.0                 0.0   

        ENSDARG00000102643  ENSDARG00000103220  ENSDARG00000102863  \
CellID                                                               
1                      1.0                 0.0                 0.0   
2                      0.0                 0.0                 0.0   
3                      1.0                 0.0                 0.0   
4                      0.0                 0.0                 0.0   
5                      0.0                 0.0                 0.0   

        ENSDARG00000101560  REP1  
CellID                            
1                      4.0   0.0  
2                      0.0   0.0  
3                      2.0   0.0  
4                      0.0   0.0  
5                      2.0   1.0  

[5 rows x 24122 columns]



In [ ]:



In [73]:

    
## Add cell barcodes
## Read in cell barcode file
metadata = pd.read_table(inBarcodes, header=None, names=["Barcodes"])
metadata.index = widemtxWzero.index
## Extract sample information from barcodes -> save into a new column
metadata["SampleID"] = [barcode.split("-")[1] for barcode in metadata["Barcodes"]]
## Map sampleID number to Stage/Type/Rep metadata
dict_sampIDtoMeta = {
    "1": ["0dpa","1"],
    "2": ["0dpa","2"],
    "3": ["1dpa","1"],
    "4": ["1dpa","2"],
    "5": ["2dpa","1"],
    "6": ["2dpa","2"],
    "7": ["4dpa","1"],
    "8": ["4dpa","2"],
                    }
metadata["Stage"] = [ dict_sampIDtoMeta[sampID][0] for sampID in metadata["SampleID"]]
metadata["Replicate"] = [ dict_sampIDtoMeta[sampID][1] for sampID in metadata["SampleID"]]
## Add single cell label to dataframe (this is for all samples)
metadata["Type"] = ["SC"] * len(metadata.index)
metadata["GFP"] = widemtxWzero["REP1"] > 0 
## how does our metadata look so far?
print("Current shape of metadata: ", metadata.shape)
print("And it looks like this: \n", metadata.head())
print("There are %d stages, %d types in the dataset." % (len(np.unique(metadata["Stage"])), len(np.unique(metadata["Type"]))))









    



Current shape of metadata:  (18773, 6)
And it looks like this: 
                   Barcodes SampleID Stage Replicate Type    GFP
CellID                                                         
1       AAACCTGAGAGTCGGT-1        1  0dpa         1   SC  False
2       AAACCTGCAAATCCGT-1        1  0dpa         1   SC  False
3       AAACCTGGTATAATGG-1        1  0dpa         1   SC  False
4       AAACCTGTCAGCTTAG-1        1  0dpa         1   SC  False
5       AAACCTGTCATATCGG-1        1  0dpa         1   SC   True
There are 4 stages, 1 types in the dataset.



In [ ]:

Prior filtering distributions



In [10]:

    
gCountInCell = (widemtxWzero > 0).sum(axis=1)



In [11]:

    
sns.distplot(gCountInCell)









    Out[11]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f50b1205048>



In [12]:

    
sns.distplot(widemtxWzero.sum(axis=1))









    Out[12]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f50ae436390>



In [12]:

    
min(widemtxWzero.sum(axis=1))









    Out[12]:





3270.0

Filter out low genes/ low cells!



In [70]:

    
# From the distribution above, consider rm cells out of 1000-3000 gene captured range
lowMask = (widemtxWzero > 0).sum(axis=1) >= 1000
highMask = (widemtxWzero > 0).sum(axis=1) <= 3000
mask = [all(tup) for tup in zip(lowMask, highMask)]
# There are  falling in this category before gene removal
len(mask)









    Out[70]:





18773



In [71]:

    
# Gene filter
## Calculate number of cells with a specific gene expression. Mask -> filter out genes with number of cells below 10
lowCellExpMask = (widemtxWzero > 0).sum(axis=0) > 10
np.sum(lowCellExpMask)









    Out[71]:





18830



In [72]:

    
## Remove lowly captured genes
expGfilt = widemtxWzero.T[lowCellExpMask]
## Remove cells with low expression profile
expG_CfT = expGfilt.T[mask]
print("Shape of DEG before filtering is: ", widemtxWzero.shape)
print("Shape of DEG after filtering is: ", expG_CfT.shape)
expG_CfT.head()









    



Shape of DEG before filtering is:  (18773, 24122)
Shape of DEG after filtering is:  (16172, 18830)






    Out[72]:







  
    
      
      ENSDARG00000100660
      ENSDARG00000100422
      ENSDARG00000102128
      ENSDARG00000103095
      ENSDARG00000102226
      ENSDARG00000104049
      ENSDARG00000102474
      ENSDARG00000100143
      ENSDARG00000104373
      ENSDARG00000098311
      ...
      ENSDARG00000078600
      ENSDARG00000097721
      ENSDARG00000051761
      ENSDARG00000093480
      ENSDARG00000089892
      ENSDARG00000051756
      ENSDARG00000102643
      ENSDARG00000103220
      ENSDARG00000101560
      REP1
    
    
      CellID
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      4.0
      0.0
    
    
      2
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      3
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      2.0
      0.0
    
    
      4
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      5
      0.0
      0.0
      0.0
      3.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      2.0
      1.0
    
  

5 rows × 18830 columns



In [ ]:

    
# Change cellID into "cell#"?
# cellIDlist = ["Cell"+str(i) for i in expG_CfT.index]
# expG_Cfilt = expG_CfT
# expG_Cfilt.index = cellIDlist
# expG_Cfilt.head()

Take in bulk-seq feature counts



In [79]:

    
# Read in feature count
FCfile = "/scratch/yhou/Fin_Reg/07_SingleCellRNA/public_data/09282016_HJ_GFPFinReg/FCs/092817_FinRegGFPwgeneID.fc"
bulkFC = pd.read_table(FCfile, index_col = 0)
print(bulkFC.shape)
bulkFC.head()









    



(28981, 8)






    Out[79]:







  
    
      
      01_4dpaGFPpo_rep5Aligned.sortedByCoord.out.bam
      02_4dpaGFPne_rep5Aligned.sortedByCoord.out.bam
      03_0dpaGFPpo_rep5Aligned.sortedByCoord.out.bam
      04_0dpaGFPne_rep5Aligned.sortedByCoord.out.bam
      05_4dpaGFPpo_rep6Aligned.sortedByCoord.out.bam
      06_4dpaGFPne_rep6Aligned.sortedByCoord.out.bam
      07_0dpaGFPpo_rep6Aligned.sortedByCoord.out.bam
      08_0dpaGFPne_rep6Aligned.sortedByCoord.out.bam
    
    
      Geneid
      
      
      
      
      
      
      
      
    
  
  
    
      ENSDARG00000104632
      896
      107
      189
      80
      897
      109
      431
      106
    
    
      ENSDARG00000100660
      663
      824
      809
      519
      482
      698
      795
      677
    
    
      ENSDARG00000098417
      4
      2
      0
      0
      3
      4
      1
      0
    
    
      ENSDARG00000100422
      1484
      1484
      102
      58
      1321
      1147
      182
      106
    
    
      ENSDARG00000102128
      892
      1929
      1143
      2554
      893
      2009
      1289
      2902



In [80]:

    
# Remove "Aligned.sortedByCoord.out.bam" in sample names
shortSampNames = [name.split("A")[0] for name in bulkFC.columns]
bulkFC.columns = shortSampNames
bulkFC.head()









    Out[80]:







  
    
      
      01_4dpaGFPpo_rep5
      02_4dpaGFPne_rep5
      03_0dpaGFPpo_rep5
      04_0dpaGFPne_rep5
      05_4dpaGFPpo_rep6
      06_4dpaGFPne_rep6
      07_0dpaGFPpo_rep6
      08_0dpaGFPne_rep6
    
    
      Geneid
      
      
      
      
      
      
      
      
    
  
  
    
      ENSDARG00000104632
      896
      107
      189
      80
      897
      109
      431
      106
    
    
      ENSDARG00000100660
      663
      824
      809
      519
      482
      698
      795
      677
    
    
      ENSDARG00000098417
      4
      2
      0
      0
      3
      4
      1
      0
    
    
      ENSDARG00000100422
      1484
      1484
      102
      58
      1321
      1147
      182
      106
    
    
      ENSDARG00000102128
      892
      1929
      1143
      2554
      893
      2009
      1289
      2902



In [21]:

    
bulkFC.sum(axis=0)









    Out[21]:





01_4dpaGFPpo_rep5    47061460
02_4dpaGFPne_rep5    47383189
03_0dpaGFPpo_rep5    27009827
04_0dpaGFPne_rep5    35852577
05_4dpaGFPpo_rep6    35610717
06_4dpaGFPne_rep6    43817832
07_0dpaGFPpo_rep6    26472854
08_0dpaGFPne_rep6    42742471
dtype: int64



In [76]:

    
np.average(expG_CfT.sum(axis=1))









    Out[76]:





8391.5390180558989



In [77]:

    
sns.distplot(expG_CfT.sum(axis=1))









    Out[77]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f50ad6e0da0>



In [ ]:

    
# Combine the two datasets -> with/without normalization
## Transform bulk feature count (so both df will be cell-by-gene)
# combDF = expG_Cfilt.T.join(bulkFC,how = 'left')
# print(combDF.shape)
# combDF.head()

Normalize bulk-seq data by total counts captured



In [81]:

    
# Normalize HJ's bulk by total read counts closer to single cell sets (round to ~20000)
## Append one row with total count to each column, then 
## Create the sum array
colSumFC = pd.DataFrame(bulkFC.sum(axis=0),columns=['Sum'])
colSumFC
## Append array to bulkFC dataframe
bulkFCwSum = bulkFC.append(colSumFC.T)
## Divide all values in each cell/sample by sum.this will normalize bulk data to 10,000 UMIs per sample)
bulkFCnorm = bulkFCwSum.divide(bulkFCwSum.iloc[28981] / 10000)



In [83]:

    
## Combine thie normalized set back with the singe cell set
## Transform bulk feature count (so both df will be cell-by-gene)
ncombDF = expG_CfT.T.join(bulkFCnorm,how = 'left')
print(ncombDF.shape)
ncombDF.head()









    



(18830, 16180)






    Out[83]:







  
    
      
      1
      2
      3
      4
      5
      6
      7
      9
      10
      11
      ...
      18770
      18773
      01_4dpaGFPpo_rep5
      02_4dpaGFPne_rep5
      03_0dpaGFPpo_rep5
      04_0dpaGFPne_rep5
      05_4dpaGFPpo_rep6
      06_4dpaGFPne_rep6
      07_0dpaGFPpo_rep6
      08_0dpaGFPne_rep6
    
  
  
    
      ENSDARG00000100660
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.140880
      0.173901
      0.299521
      0.144759
      0.135353
      0.159296
      0.300308
      0.158390
    
    
      ENSDARG00000100422
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.315332
      0.313191
      0.037764
      0.016177
      0.370956
      0.261766
      0.068750
      0.024800
    
    
      ENSDARG00000102128
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.189539
      0.407106
      0.423179
      0.712362
      0.250767
      0.458489
      0.486914
      0.678950
    
    
      ENSDARG00000103095
      0.0
      0.0
      0.0
      0.0
      3.0
      0.0
      0.0
      0.0
      0.0
      2.0
      ...
      0.0
      0.0
      1.152110
      0.925012
      2.966698
      1.124884
      1.113429
      0.844633
      2.642707
      0.957362
    
    
      ENSDARG00000102226
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.029323
      0.034189
      0.009626
      0.014504
      0.036225
      0.039253
      0.012088
      0.011230
    
  

5 rows × 16180 columns



In [84]:

    
## log transform and add GFP metadata
loggedexpwB = np.log2(ncombDF.T + 1)
loggedexpwB.tail()









    Out[84]:







  
    
      
      ENSDARG00000100660
      ENSDARG00000100422
      ENSDARG00000102128
      ENSDARG00000103095
      ENSDARG00000102226
      ENSDARG00000104049
      ENSDARG00000102474
      ENSDARG00000100143
      ENSDARG00000104373
      ENSDARG00000098311
      ...
      ENSDARG00000078600
      ENSDARG00000097721
      ENSDARG00000051761
      ENSDARG00000093480
      ENSDARG00000089892
      ENSDARG00000051756
      ENSDARG00000102643
      ENSDARG00000103220
      ENSDARG00000101560
      REP1
    
  
  
    
      04_0dpaGFPne_rep5
      0.195044
      0.023152
      0.775987
      1.087384
      0.020774
      0.260332
      1.041203
      0.570504
      0.143882
      0.228744
      ...
      0.055651
      0.056425
      0.050220
      0.144974
      0.050220
      0.777866
      1.691768
      0.444289
      1.741714
      0.018392
    
    
      05_4dpaGFPpo_rep6
      0.183140
      0.455182
      0.322813
      1.079586
      0.051337
      0.205447
      0.570157
      0.759146
      0.182069
      0.335391
      ...
      0.333463
      0.178494
      0.142244
      0.312737
      0.020515
      0.866590
      1.358315
      0.471928
      1.912934
      3.027165
    
    
      06_4dpaGFPne_rep6
      0.213249
      0.335444
      0.544475
      0.883334
      0.055548
      0.167663
      0.690760
      0.822824
      0.163847
      0.291733
      ...
      0.264856
      0.162377
      0.129045
      0.694427
      0.108119
      0.722833
      1.054790
      0.271963
      1.949547
      0.019621
    
    
      07_0dpaGFPpo_rep6
      0.378853
      0.095924
      0.572321
      1.865011
      0.017335
      0.279984
      0.840460
      0.798777
      0.111645
      0.448398
      ...
      0.077966
      0.060304
      0.057164
      0.183948
      0.023782
      1.032524
      2.639556
      1.569517
      1.686388
      2.015198
    
    
      08_0dpaGFPne_rep6
      0.212122
      0.035342
      0.747559
      0.968910
      0.016111
      0.216776
      0.809536
      0.405195
      0.125974
      0.190988
      ...
      0.051391
      0.048130
      0.037974
      0.116353
      0.101329
      0.682170
      1.588278
      0.400855
      1.647333
      0.019112
    
  

5 rows × 18830 columns

Make metadata for bulk-seq data, combine with sc metadata



In [101]:

    
## Extract bulk sample names
bulkMeta = pd.DataFrame(data = ["NaN1","NaN2","NaN3","NaN4","NaN5","NaN6","NaN7","NaN8"],
                        index = bulkFC.columns,columns=["Barcodes"])
## Add all required columns as in single cell metadata
bulkMeta["SampleID"] = ["9"] * 8
bulkMeta["Stage"] = ["4dpa"]*2+["0dpa"]*2+["4dpa"]*2+["0dpa"]*2
bulkMeta["Replicate"] = ["5"] * 4 + ["6"] * 4
bulkMeta["Type"] = ["Bulk"] * 8
bulkMeta["GFP"] = ["po" in str(index) for index in bulkMeta.index ]



In [ ]:

    
combMeta = metadata.append(bulkMeta)
combMeta



In [105]:

    
## Save wide mtx and the combined metadata to file
#loggedexpwB.to_csv("AGGallSamp_wBulkNorm_logged_widemtx.csv",compression='gzip')
combMeta.to_csv("AGGallSamp_wBulk_metadata.csv")



In [ ]:



In [26]:

    
# Save combined, log transformed matrix to file
#loggedexpwB.to_csv("samp1_wBulk_filtered_log2_cellgene_widemtx")



In [84]:

    
## Save data to file
#ncombDF.to_csv("../samp1wGFP/MtxSubsets/samp1_GFPpnbulk_comb_normed_filtered_widemtx.csv")

GeneID	1	2	4	5	6	7	8	9	10	11	...	28956	28957	28958	28960	28961	28962	28963	28964	28965	28980
CellID
1	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	4.0	0.0
2	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
3	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	2.0	0.0
4	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
5	0.0	0.0	0.0	0.0	3.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	2.0	1.0

	01_4dpaGFPpo_rep5Aligned.sortedByCoord.out.bam	02_4dpaGFPne_rep5Aligned.sortedByCoord.out.bam	03_0dpaGFPpo_rep5Aligned.sortedByCoord.out.bam	04_0dpaGFPne_rep5Aligned.sortedByCoord.out.bam	05_4dpaGFPpo_rep6Aligned.sortedByCoord.out.bam	06_4dpaGFPne_rep6Aligned.sortedByCoord.out.bam	07_0dpaGFPpo_rep6Aligned.sortedByCoord.out.bam	08_0dpaGFPne_rep6Aligned.sortedByCoord.out.bam
Geneid
ENSDARG00000104632	896	107	189	80	897	109	431	106
ENSDARG00000100660	663	824	809	519	482	698	795	677
ENSDARG00000098417	4	2	0	0	3	4	1	0
ENSDARG00000100422	1484	1484	102	58	1321	1147	182	106
ENSDARG00000102128	892	1929	1143	2554	893	2009	1289	2902

	5	11	...	01_4dpaGFPpo_rep5	02_4dpaGFPne_rep5	03_0dpaGFPpo_rep5	04_0dpaGFPne_rep5	05_4dpaGFPpo_rep6	06_4dpaGFPne_rep6	07_0dpaGFPpo_rep6	08_0dpaGFPne_rep6
ENSDARG00000100660	0.0	0.0	...	0.140880	0.173901	0.299521	0.144759	0.135353	0.159296	0.300308	0.158390
ENSDARG00000100422	0.0	0.0	...	0.315332	0.313191	0.037764	0.016177	0.370956	0.261766	0.068750	0.024800
ENSDARG00000102128	0.0	0.0	...	0.189539	0.407106	0.423179	0.712362	0.250767	0.458489	0.486914	0.678950
ENSDARG00000103095	3.0	2.0	...	1.152110	0.925012	2.966698	1.124884	1.113429	0.844633	2.642707	0.957362
ENSDARG00000102226	0.0	0.0	...	0.029323	0.034189	0.009626	0.014504	0.036225	0.039253	0.012088	0.011230

	ENSDARG00000100660	ENSDARG00000100422	ENSDARG00000102128	ENSDARG00000103095	ENSDARG00000102226	ENSDARG00000104049	ENSDARG00000102474	ENSDARG00000100143	ENSDARG00000104373	ENSDARG00000098311	...	ENSDARG00000078600	ENSDARG00000097721	ENSDARG00000051761	ENSDARG00000093480	ENSDARG00000089892	ENSDARG00000051756	ENSDARG00000102643	ENSDARG00000103220	ENSDARG00000101560	REP1
04_0dpaGFPne_rep5	0.195044	0.023152	0.775987	1.087384	0.020774	0.260332	1.041203	0.570504	0.143882	0.228744	...	0.055651	0.056425	0.050220	0.144974	0.050220	0.777866	1.691768	0.444289	1.741714	0.018392
05_4dpaGFPpo_rep6	0.183140	0.455182	0.322813	1.079586	0.051337	0.205447	0.570157	0.759146	0.182069	0.335391	...	0.333463	0.178494	0.142244	0.312737	0.020515	0.866590	1.358315	0.471928	1.912934	3.027165
06_4dpaGFPne_rep6	0.213249	0.335444	0.544475	0.883334	0.055548	0.167663	0.690760	0.822824	0.163847	0.291733	...	0.264856	0.162377	0.129045	0.694427	0.108119	0.722833	1.054790	0.271963	1.949547	0.019621
07_0dpaGFPpo_rep6	0.378853	0.095924	0.572321	1.865011	0.017335	0.279984	0.840460	0.798777	0.111645	0.448398	...	0.077966	0.060304	0.057164	0.183948	0.023782	1.032524	2.639556	1.569517	1.686388	2.015198
08_0dpaGFPne_rep6	0.212122	0.035342	0.747559	0.968910	0.016111	0.216776	0.809536	0.405195	0.125974	0.190988	...	0.051391	0.048130	0.037974	0.116353	0.101329	0.682170	1.588278	0.400855	1.647333	0.019112