In [3]:
import pandas as pd
import numpy as np
import os
coverage_file = os.path.expanduser("/Users/zamparol/projects/crisprML/data/annotations/coverage_count.bed")
In [6]:
coverages = pd.read_csv(coverage_file, sep='\t', header=None)
coverages.columns = ["chr", "start", "end", "transcript", "gene", "strand", "coverage"]
In [7]:
coverages.head()
Out[7]:
In [15]:
# number of transcripts per gene? Split into bins & plot
avg_tx_per_gene = coverages.groupby(["gene"])["coverage"].agg(['median', 'count'])
avg_tx_per_gene.columns = ["median exons per tx", "number of tx"]
In [16]:
avg_tx_per_gene
Out[16]:
In [ ]: