In [1]:
import sys
moduleLoc = "/Users/Lykke-AndersenLab/PycharmProjects/AnoThER-Seq"
if moduleLoc in sys.path:
print('woo')
else:
sys.path.append(moduleLoc)
import matplotlib.pyplot as plt
import tools
import pandas as pd
import seaborn as sns
import numpy as np
# Enable inline plotting
%matplotlib inline
In [2]:
#read tail files into a pd dataframe
import imp
imp.reload(tools)
folderLoc = "/Users/Lykke-AndersenLab/Dropbox/NoctData/"
controlName = "siLuc.tails"
KDname = "siNoct.tails"
WTname = "NoctWT.tails"
EAname = "NoctEA.tails"
control = tools.pdTailMaker(folderLoc+controlName)
print('Control In!')
knockdown = tools.pdTailMaker(folderLoc+KDname)
print('Knockdown In!')
WTaddback = tools.pdTailMaker(folderLoc+WTname)
print('WT In!')
EAaddback = tools.pdTailMaker(folderLoc+EAname)
print('EA In!')
In [3]:
minTail = -5
maxTail = 5
filtControl = control[(control['3Loc']+control['TailLength'])>=minTail]
filtControl = filtControl[(filtControl['3Loc']+filtControl['TailLength'])<=maxTail]
filtControl['Source'] = 'siLuc'
filtKD = knockdown[(knockdown['3Loc']+knockdown['TailLength'])>=minTail]
filtKD = filtKD[(filtKD['3Loc']+filtKD['TailLength'])<=maxTail]
filtKD['Source'] = 'siNoct'
filtWT = WTaddback[(WTaddback['3Loc']+WTaddback['TailLength'])>=minTail]
filtWT = filtWT[(filtWT['3Loc']+filtWT['TailLength'])<=maxTail]
filtWT['Source'] = 'WT Addback'
filtEA = EAaddback[(EAaddback['3Loc']+EAaddback['TailLength'])>=minTail]
filtEA = filtEA[(filtEA['3Loc']+filtEA['TailLength'])<=maxTail]
filtEA['Source'] = 'EA Addback'
combo = pd.concat([filtControl,filtKD,filtWT,filtEA],ignore_index=True)
print ("CONTROL\nInitial: {}\nFiltered: {}\n".format(len(control),len(filtControl)))
print ("KNOCKDOWN\nInitial: {}\nFiltered: {}\n".format(len(knockdown),len(filtKD)))
print ("WT\nInitial: {}\nFiltered: {}\n".format(len(WTaddback),len(filtWT)))
print ("EA\nInitial: {}\nFiltered: {}\n".format(len(EAaddback),len(filtEA)))
combo.head()
Out[3]:
In [4]:
values = filtControl['Type'].value_counts()
df1=pd.DataFrame(data=values)
df1['Source'] = 'Control'
df1=df1.assign(Percentage=df1['Type']/df1['Type'].sum())
values = filtKD['Type'].value_counts()
df2=pd.DataFrame(values)
df2['Source'] = 'Knockdown'
df2=df2.assign(Percentage=df2['Type']/df2['Type'].sum())
values = filtWT['Type'].value_counts()
df3=pd.DataFrame(values)
df3['Source'] = 'WT Addback'
df3=df3.assign(Percentage=df3['Type']/df3['Type'].sum())
values = filtEA['Type'].value_counts()
df4=pd.DataFrame(values[:-1])
df4['Source'] = 'EA Addback'
df4=df4.assign(Percentage=df4['Type']/df4['Type'].sum())
#typeCombo = pd.concat([df1,df2,df3,df4])
#sns.barplot(y='Percentage', data=typeCombo)
In [5]:
#Find a better way to represent this data
plt.subplots()
filtControl['Type'].value_counts().plot(kind='pie')
plt.subplots()
filtKD['Type'].value_counts().plot(kind='pie')
plt.subplots()
filtWT['Type'].value_counts().plot(kind='pie')
plt.subplots()
filtEA['Type'].value_counts().plot(kind='pie')
Out[5]:
In [6]:
df = pd.read_csv(folderLoc+"NoctVsWT_candidates.csv",index_col=False)
df.head(10)
Out[6]:
In [7]:
df = filtControl[filtControl['Gene'].str.contains('Mt_tRNA')]
In [8]:
plt.figure(figsize=(20,20))
df = filtControl[filtControl['Gene'].str.contains('Mt_tRNA')]
values, base = np.histogram(df['3Loc']+df['TailLength'], bins=20, normed=True)
cumulative = np.cumsum(values)
plt.step(base[:-1], cumulative, c='blue')
df = filtKD[filtKD['Gene'].str.contains('Mt_tRNA')]
values, base = np.histogram(df['3Loc']+df['TailLength'], bins=20, normed=True)
cumulative = np.cumsum(values)
plt.step(base[:-1], cumulative, c='green')
df = filtWT[filtWT['Gene'].str.contains('Mt_tRNA')]
values, base = np.histogram(df['3Loc']+df['TailLength'], bins=20, normed=True)
cumulative = np.cumsum(values)
plt.step(base[:-1], cumulative, c='red')
df = filtEA[filtEA['Gene'].str.contains('Mt_tRNA')]
values, base = np.histogram(df['3Loc']+df['TailLength'], bins=20, normed=True)
cumulative = np.cumsum(values)
plt.step(base[:-1], cumulative, c='yellow')
#Cumulative plot of mitochondrial tRNA lengths
Out[8]:
In [9]:
plt.figure(figsize=(20,20))
df = filtControl[filtControl['Gene'].str.contains('gtRNA')]
values, base = np.histogram(df['3Loc']+df['TailLength'], bins=20, normed=True)
cumulative = np.cumsum(values)
plt.step(base[:-1], cumulative, c='blue')
df = filtKD[filtKD['Gene'].str.contains('gtRNA')]
values, base = np.histogram(df['3Loc']+df['TailLength'], bins=20, normed=True)
cumulative = np.cumsum(values)
plt.step(base[:-1], cumulative, c='green')
df = filtWT[filtWT['Gene'].str.contains('gtRNA')]
values, base = np.histogram(df['3Loc']+df['TailLength'], bins=20, normed=True)
cumulative = np.cumsum(values)
plt.step(base[:-1], cumulative, c='red')
df = filtEA[filtEA['Gene'].str.contains('gtRNA')]
values, base = np.histogram(df['3Loc']+df['TailLength'], bins=20, normed=True)
cumulative = np.cumsum(values)
plt.step(base[:-1], cumulative, c='yellow')
#cumulative plots of tRNA
Out[9]:
In [10]:
df1 = filtControl[filtControl['Gene'].str.contains('gtRNA')]
df2 = filtKD[filtKD['Gene'].str.contains('gtRNA')]
df3 = filtWT[filtWT['Gene'].str.contains('gtRNA')]
df4 = filtEA[filtEA['Gene'].str.contains('gtRNA')]
In [97]:
#TPM Calculations
df1 = filtControl[filtControl['Gene'].str.contains('tRNA')]
TPM1 = pd.DataFrame(df1['Gene'].value_counts()).rename(columns={'Gene':'Count'})
TPM1['Source'] = 'Control'
TPM1['TPM'] = TPM1['Count']/(len(filtControl)/1000000)
df2 = filtKD[filtKD['Gene'].str.contains('tRNA')]
TPM2 = pd.DataFrame(df2['Gene'].value_counts()).rename(columns={'Gene':'Count'})
TPM2['Source'] = 'siNoct'
TPM2['TPM'] = TPM2['Count']/(len(filtKD)/1000000)
df3 = filtWT[filtWT['Gene'].str.contains('tRNA')]
TPM3 = pd.DataFrame(df3['Gene'].value_counts()).rename(columns={'Gene':'Count'})
TPM3['Source'] = 'WT Addback'
TPM3['TPM'] = TPM3['Count']/(len(filtWT)/1000000)
df4 = filtEA[filtEA['Gene'].str.contains('tRNA')]
TPM4 = pd.DataFrame(df4['Gene'].value_counts()).rename(columns={'Gene':'Count'})
TPM4['Source'] = 'EA Addback'
TPM4['TPM'] = TPM4['Count']/(len(filtEA)/1000000)
dude = pd.concat([TPM1,TPM2,TPM3,TPM4])
ind = dude.index[:5]
dude=dude.ix[ind]
dude
Out[97]:
In [98]:
plt.figure(figsize=(20,20))
sns.barplot(dude.index,dude.TPM,data=dude,hue='Source')
plt.xticks(rotation=-90)
Out[98]:
In [46]:
sns.barplot?
In [ ]: