In [314]:
import numpy as np
import pandas as pd
from tabulate import tabulate
from collections import defaultdict
from scipy.stats import fisher_exact, mannwhitneyu, wilcoxon, ranksums, chi2_contingency, chisquare
# import matplotlibtail
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
%matplotlib inline
All of the CPM values are log2 transformed. Be careful when doing calculations (logs are different). Usually the best thing is to unlog before calculation means etc.
These tables are completely unfilterted and contain also unexpressed genes. They need to be filtered
In [315]:
# Read in the RPKM tables
rpkm_gonad = pd.read_csv("/data/ucbtzim/ohnolog-dc/differential-expression/2015-10-27-MF-GONAD-RPKM-NORM-log2-edgeR.csv")
rpkm_spleen = pd.read_csv("/data/ucbtzim/ohnolog-dc/differential-expression/2015-10-27-MF-SPLEEN-RPKM-NORM-log2-edgeR.csv")
rpkm_heart = pd.read_csv("/data/ucbtzim/ohnolog-dc/differential-expression/2015-10-27-MF-HEART-RPKM-NORM-log2-edgeR.csv")
rpkm_liver = pd.read_csv("/data/ucbtzim/ohnolog-dc/differential-expression/2015-10-27-MF-LIVER-RPKM-NORM-log2-edgeR.csv")
# Read in the detag EdegR results
de_gonad = pd.read_csv("/data/ucbtzim/ohnolog-dc/differential-expression/2015-10-27-MF-GONAD-detags-edgeR.csv")
de_spleen = pd.read_csv("/data/ucbtzim/ohnolog-dc/differential-expression/2015-10-27-MF-SPLEEN-detags-edgeR.csv")
de_heart = pd.read_csv("/data/ucbtzim/ohnolog-dc/differential-expression/2015-10-27-MF-HEART-detags-edgeR.csv")
de_liver = pd.read_csv("/data/ucbtzim/ohnolog-dc/differential-expression/2015-10-27-MF-LIVER-detags-edgeR.csv")
The edegR logFC value is calculated internally using some additional processing.(see below)
Also "note that the first group listed in the pair is the baseline for the comparison—so if exactTest the pair is c("A","B") then the comparison is B - A, so genes with positive log-fold change are up-regulated in group B compared with group A (and vice versa for genes with negative log-fold change)." Source EdgeR: Manual
In our case this means that the Male expression is substracted from the Female expression (F-M): Positiv logFC is upregulation in females, negate logFC is upregulation in Males.
On the Z that means the close we get to 0.0 logFC the more DC.
Add some positional information. We need to know where genes are located
In [396]:
def read_gid_to_chr(infile):
gid_to_chr = {}
with open(infile, "rb") as handle:
for line in handle:
if line.startswith("Ensembl"):
continue
else:
line = line.rstrip().split()
gid_to_chr[line[0]] = line[1]
return gid_to_chr
gid_to_chr = read_gid_to_chr("/data/ucbtzim/ohnolog-dc/dc-comparison/Gallus_gallus.75.gid.chr.proteincoding.tsv")
Two functions to work with log2 expression
In [317]:
def anti_log2(i):
return np.power(2, i)
def log2(i):
return np.log2(i)
print type(np.power)
print type(np.log2)
In [318]:
# Print the first 5 lines of the dataframe
rpkm_gonad.head()
Out[318]:
In [319]:
# The differential expression results from edgeR
de_gonad.head()
Out[319]:
First we combine the RPKM (!) and differential dataframes to hold all the data in one place.
In [320]:
comb_gonad = pd.concat([rpkm_gonad, de_gonad], axis=1)
comb_heart = pd.concat([rpkm_heart, de_heart], axis=1)
comb_liver = pd.concat([rpkm_liver, de_liver], axis=1)
comb_spleen = pd.concat([rpkm_spleen, de_spleen], axis=1)
print comb_gonad.shape
print comb_heart.shape
# Liver has one female sample less
print comb_liver.shape
print comb_spleen.shape
print comb_gonad.dtypes
print comb_heart.dtypes
# Liver has one female sample less
print comb_liver.dtypes
print comb_spleen.dtypes
Now we filter out all scaffolds and the W chromosome. Only keep autosomes 1-28 and the Z chromosome
In [321]:
# No expression on chromosome 32, excluded
chicken_chromosomes = [str(i) for i in range(1, 29)] + ["Z"]
print "Valid chromosomes: ", chicken_chromosomes
comb_gonad = comb_gonad[comb_gonad.index.map(lambda x: gid_to_chr[x] in chicken_chromosomes)]
comb_heart = comb_heart[comb_heart.index.map(lambda x: gid_to_chr[x] in chicken_chromosomes)]
comb_liver = comb_liver[comb_liver.index.map(lambda x: gid_to_chr[x] in chicken_chromosomes)]
comb_spleen = comb_spleen[comb_spleen.index.map(lambda x: gid_to_chr[x] in chicken_chromosomes)]
print comb_gonad.shape
print comb_heart.shape
print comb_liver.shape
print comb_spleen.shape
In [ ]:
In [378]:
def filter_cpm(expr, mincpm):
'''Filter out lowly expressed genes based on a fixed minimum CPM threshold'''
filtered = expr[expr["logCPM"] > mincpm]
return filtered
# Beware average CPM is log2 transformed.
# For avgCPM of 2 use 1 | log2(2) = 1
comb_gonad_min2 = filter_cpm(comb_gonad, 1)
comb_heart_min2 = filter_cpm(comb_heart, 1)
comb_liver_min2 = filter_cpm(comb_liver, 1)
comb_spleen_min2 = filter_cpm(comb_spleen, 1)
# Drop ENSGALG00000018479. Looks like a W gene
# Does not affect the Z vs A comparison at all
# comb_gonad_min2 = comb_gonad_min2.drop("ENSGALG00000018479")
# comb_heart_min2 = comb_heart_min2.drop("ENSGALG00000018479")
# comb_liver_min2 = comb_liver_min2.drop("ENSGALG00000018479")
# comb_spleen_min2 = comb_spleen_min2.drop("ENSGALG00000018479")
print comb_gonad_min2.shape
print comb_heart_min2.shape
print comb_liver_min2.shape
print comb_spleen_min2.shape
print comb_gonad_min2.dtypes
print comb_heart_min2.dtypes
print comb_liver_min2.dtypes
print comb_spleen_min2.dtypes
In [395]:
Now have four different data frames with all the data required. Both dataframes contain males and females and autosomal + Z expression. Now we split the dataframes into one autosomal and one Z dataframe
In [379]:
def split_dataframe(data, genes):
'''Returns two dataframes. The first return value contains all rows that did match the genes.
The second dataframe contains only genes that did NOT match.
'''
match = data[data.index.isin(genes)]
nomatch = data[~data.index.isin(genes)]
return match, nomatch
# Get a list of Z linked gene IDs
Z_genes = [g for g, pos in gid_to_chr.items() if pos == "Z"]
print "Number of genes (protein coding) on the Z chromosome:", len(Z_genes)
print ""
# Filter the expression so it only contains Z expression
Z_comb_gonad_min2, AUTO_comb_gonad_min2 = split_dataframe(comb_gonad_min2, Z_genes)
Z_comb_heart_min2, AUTO_comb_heart_min2 = split_dataframe(comb_heart_min2, Z_genes)
Z_comb_liver_min2, AUTO_comb_liver_min2 = split_dataframe(comb_liver_min2, Z_genes)
Z_comb_spleen_min2, AUTO_comb_spleen_min2 = split_dataframe(comb_spleen_min2, Z_genes)
print tabulate([["Tissue", "Z", "Autosomes"],
["Gonad", Z_comb_gonad_min2.shape[0], AUTO_comb_gonad_min2.shape[0]],
["Heart", Z_comb_heart_min2.shape[0], AUTO_comb_heart_min2.shape[0]],
["Liver", Z_comb_liver_min2.shape[0], AUTO_comb_liver_min2.shape[0]],
["Spleen", Z_comb_spleen_min2.shape[0], AUTO_comb_spleen_min2.shape[0]]],
headers="firstrow")
print ""
print "Z overlap liver heart", len(set(Z_comb_liver_min2.index).intersection(Z_comb_heart_min2.index))
print "Z overlap liver spleen", len(set(Z_comb_liver_min2.index).intersection(Z_comb_spleen_min2.index))
print "Z overlap liver gonad", len(set(Z_comb_liver_min2.index).intersection(Z_comb_gonad_min2.index))
print
print "Z overlap heart spleen", len(set(Z_comb_heart_min2.index).intersection(Z_comb_spleen_min2.index))
print "Z overlap heart gonad", len(set(Z_comb_heart_min2.index).intersection(Z_comb_gonad_min2.index))
print
print "Z overlap gonad spleen", len(set(Z_comb_gonad_min2.index).intersection(Z_comb_spleen_min2.index))
In [323]:
In [383]:
def get_significant_de(detags, significance, foldchange):
'''Split a dataframe into sex-biased and unbiased expression.
A gene is biased when a) The FDR corrected p-value from exactTest() is above significance
b) the foldchange is larger/smaller than given
'''
biased = detags[(detags["FDR"] < significance)]
biased = biased[(biased["logFC"] < -foldchange) | (biased["logFC"] > foldchange)]
#Everything that is not in biased is unbiased
unbiased = detags[~detags.index.isin(biased.index)]
return biased, unbiased
# Filter with a significance threshold of 0.05
AUTO_comb_gonad_min2_biased, AUTO_comb_gonad_min2_unbiased = get_significant_de(AUTO_comb_gonad_min2, 0.05, 1)
AUTO_comb_heart_min2_biased, AUTO_comb_heart_min2_unbiased = get_significant_de(AUTO_comb_heart_min2, 0.05, 1)
AUTO_comb_liver_min2_biased, AUTO_comb_liver_min2_unbiased = get_significant_de(AUTO_comb_liver_min2, 0.05, 1)
AUTO_comb_spleen_min2_biased, AUTO_comb_spleen_min2_unbiased = get_significant_de(AUTO_comb_spleen_min2, 0.05, 1)
def percentage(part, whole):
return 100 * float(part)/float(whole)
#Number of genes expressed and sex-biased on a per tissue basis
print tabulate([["Tissue", "Sex-biased","%", "Unbiased", "%", "Total"],
["Gonad", AUTO_comb_gonad_min2_biased.shape[0], percentage(AUTO_comb_gonad_min2_biased.shape[0],AUTO_comb_gonad_min2.shape[0]), AUTO_comb_gonad_min2_unbiased.shape[0], percentage(AUTO_comb_gonad_min2_unbiased.shape[0],AUTO_comb_gonad_min2.shape[0]), AUTO_comb_gonad_min2.shape[0]],
["Spleen", AUTO_comb_spleen_min2_biased.shape[0], percentage(AUTO_comb_spleen_min2_biased.shape[0],AUTO_comb_spleen_min2.shape[0]), AUTO_comb_spleen_min2_unbiased.shape[0],percentage(AUTO_comb_spleen_min2_unbiased.shape[0],AUTO_comb_spleen_min2.shape[0]), AUTO_comb_spleen_min2.shape[0]],
["Heart", AUTO_comb_heart_min2_biased.shape[0], percentage(AUTO_comb_heart_min2_biased.shape[0],AUTO_comb_heart_min2.shape[0]), AUTO_comb_heart_min2_unbiased.shape[0],percentage(AUTO_comb_heart_min2_unbiased.shape[0],AUTO_comb_heart_min2.shape[0]), AUTO_comb_heart_min2.shape[0]],
["Liver", AUTO_comb_liver_min2_biased.shape[0], percentage(AUTO_comb_liver_min2_biased.shape[0],AUTO_comb_liver_min2.shape[0]), AUTO_comb_liver_min2_unbiased.shape[0], percentage(AUTO_comb_liver_min2_unbiased.shape[0],AUTO_comb_liver_min2.shape[0]), AUTO_comb_liver_min2.shape[0]]],
headers="firstrow")
def filter_foldchange(detags):
'''Filter by foldchange, here hardcoded for our dosage compensation criterion'''
DC = detags[(detags["logFC"] >= -0.5) & (detags["logFC"] <= 0.5)]
biased = detags[(detags["logFC"] < -0.5) | (detags["logFC"] > 0.5)]
return biased, DC
Z_de_gonad_min2_biased, Z_de_gonad_min2_dc = filter_foldchange(Z_comb_gonad_min2)
Z_de_spleen_min2_biased, Z_de_spleen_min2_dc = filter_foldchange(Z_comb_spleen_min2)
Z_de_heart_min2_biased, Z_de_heart_min2_dc = filter_foldchange(Z_comb_heart_min2)
Z_de_liver_min2_biased, Z_de_liver_min2_dc = filter_foldchange(Z_comb_liver_min2)
print tabulate([["Tissue", "Biased","%", "Compensated", "%", "Total"],
["Gonad", Z_de_gonad_min2_biased.shape[0], percentage(Z_de_gonad_min2_biased.shape[0],Z_comb_gonad_min2.shape[0]), Z_de_gonad_min2_dc.shape[0], percentage(Z_de_gonad_min2_dc.shape[0],Z_comb_gonad_min2.shape[0]), Z_comb_gonad_min2.shape[0]],
["Spleen", Z_de_spleen_min2_biased.shape[0], percentage(Z_de_spleen_min2_biased.shape[0],Z_comb_spleen_min2.shape[0]), Z_de_spleen_min2_dc.shape[0],percentage(Z_de_spleen_min2_dc.shape[0],Z_comb_spleen_min2.shape[0]), Z_comb_spleen_min2.shape[0]],
["Heart", Z_de_heart_min2_biased.shape[0], percentage(Z_de_heart_min2_biased.shape[0],Z_comb_heart_min2.shape[0]), Z_de_heart_min2_dc.shape[0],percentage(Z_de_heart_min2_dc.shape[0],Z_comb_heart_min2.shape[0]), Z_comb_heart_min2.shape[0]],
["Liver", Z_de_liver_min2_biased.shape[0], percentage(Z_de_liver_min2_biased.shape[0],Z_comb_liver_min2.shape[0]), Z_de_liver_min2_dc.shape[0], percentage(Z_de_liver_min2_dc.shape[0],Z_comb_liver_min2.shape[0]), Z_comb_liver_min2.shape[0]]],
headers="firstrow")
Slice the dataframes into male and female expression. Double check that the iloc slices are correct. NOTE: Maybe better to use sample ids?
In [325]:
male_Z_comb_gonad_min2 = Z_comb_gonad_min2.iloc[:,:4]
female_Z_comb_gonad_min2 = Z_comb_gonad_min2.iloc[:,4:8]
male_Z_comb_heart_min2 = Z_comb_heart_min2.iloc[:,:4]
female_Z_comb_heart_min2 = Z_comb_heart_min2.iloc[:,4:8]
#Liver has one female sample less
male_Z_comb_liver_min2 = Z_comb_liver_min2.iloc[:,:4]
female_Z_comb_liver_min2 = Z_comb_liver_min2.iloc[:,4:7]
male_Z_comb_spleen_min2 = Z_comb_spleen_min2.iloc[:,:4]
female_Z_comb_spleen_min2 = Z_comb_spleen_min2.iloc[:,4:8]
male_AUTO_comb_gonad_min2 = AUTO_comb_gonad_min2.iloc[:,:4]
female_AUTO_comb_gonad_min2 = AUTO_comb_gonad_min2.iloc[:,4:8]
male_AUTO_comb_heart_min2 = AUTO_comb_heart_min2.iloc[:,:4]
female_AUTO_comb_heart_min2 = AUTO_comb_heart_min2.iloc[:,4:8]
#Liver has one female sample less
male_AUTO_comb_liver_min2 = AUTO_comb_liver_min2.iloc[:,:4]
female_AUTO_comb_liver_min2 = AUTO_comb_liver_min2.iloc[:,4:7]
male_AUTO_comb_spleen_min2 = AUTO_comb_spleen_min2.iloc[:,:4]
female_AUTO_comb_spleen_min2 = AUTO_comb_spleen_min2.iloc[:,4:8]
print male_Z_comb_gonad_min2.shape
print female_Z_comb_gonad_min2.shape
# female_Z_comb_liver_min2.head()
# # male_Z_comb_liver_min2.head()
In [326]:
print "MEANS"
print male_Z_comb_gonad_min2.applymap(anti_log2).mean()
print female_Z_comb_gonad_min2.applymap(anti_log2).mean()
print male_Z_comb_heart_min2.applymap(anti_log2).mean()
print female_Z_comb_heart_min2.applymap(anti_log2).mean()
print male_Z_comb_liver_min2.applymap(anti_log2).mean()
print female_Z_comb_liver_min2.applymap(anti_log2).mean()
print male_Z_comb_spleen_min2.applymap(anti_log2).mean()
print female_Z_comb_spleen_min2.applymap(anti_log2).mean()
print "MEDIANS"
print male_Z_comb_gonad_min2.applymap(anti_log2).median()
print female_Z_comb_gonad_min2.applymap(anti_log2).median()
print male_Z_comb_heart_min2.applymap(anti_log2).median()
print female_Z_comb_heart_min2.applymap(anti_log2).median()
print male_Z_comb_liver_min2.applymap(anti_log2).median()
print female_Z_comb_liver_min2.applymap(anti_log2).median()
print male_Z_comb_spleen_min2.applymap(anti_log2).median()
print female_Z_comb_spleen_min2.applymap(anti_log2).median()
Now get the average expression for male and female autosomal expression across samples
In [327]:
def logged_data_mean(df):
'''Correctly take the mean of logged data. First unlog, then calculate mean, then log again'''
logRPKMmean = pd.Series(df.applymap(anti_log2).mean(axis=1).map(log2), name="logRPKMmean")
return logRPKMmean
male_Z_comb_gonad_min2 = pd.concat([male_Z_comb_gonad_min2, logged_data_mean(male_Z_comb_gonad_min2)], axis=1)
female_Z_comb_gonad_min2 = pd.concat([female_Z_comb_gonad_min2, logged_data_mean(female_Z_comb_gonad_min2)], axis=1)
male_Z_comb_heart_min2 = pd.concat([male_Z_comb_heart_min2, logged_data_mean(male_Z_comb_heart_min2)], axis=1)
female_Z_comb_heart_min2 = pd.concat([female_Z_comb_heart_min2, logged_data_mean(female_Z_comb_heart_min2)], axis=1)
male_Z_comb_liver_min2 = pd.concat([male_Z_comb_liver_min2, logged_data_mean(male_Z_comb_liver_min2)], axis=1)
female_Z_comb_liver_min2 = pd.concat([female_Z_comb_liver_min2, logged_data_mean(female_Z_comb_liver_min2)], axis=1)
male_Z_comb_spleen_min2 = pd.concat([male_Z_comb_spleen_min2, logged_data_mean(male_Z_comb_spleen_min2)], axis=1)
female_Z_comb_spleen_min2 = pd.concat([female_Z_comb_spleen_min2, logged_data_mean(female_Z_comb_spleen_min2)], axis=1)
male_AUTO_comb_gonad_min2 = pd.concat([male_AUTO_comb_gonad_min2, logged_data_mean(male_AUTO_comb_gonad_min2)], axis=1)
female_AUTO_comb_gonad_min2 = pd.concat([female_AUTO_comb_gonad_min2, logged_data_mean(female_AUTO_comb_gonad_min2)], axis=1)
male_AUTO_comb_heart_min2 = pd.concat([male_AUTO_comb_heart_min2, logged_data_mean(male_AUTO_comb_heart_min2)], axis=1)
female_AUTO_comb_heart_min2 = pd.concat([female_AUTO_comb_heart_min2, logged_data_mean(female_AUTO_comb_heart_min2)], axis=1)
male_AUTO_comb_liver_min2 = pd.concat([male_AUTO_comb_liver_min2, logged_data_mean(male_AUTO_comb_liver_min2)], axis=1)
female_AUTO_comb_liver_min2 = pd.concat([female_AUTO_comb_liver_min2, logged_data_mean(female_AUTO_comb_liver_min2)], axis=1)
male_AUTO_comb_spleen_min2 = pd.concat([male_AUTO_comb_spleen_min2, logged_data_mean(male_AUTO_comb_spleen_min2)], axis=1)
female_AUTO_comb_spleen_min2 = pd.concat([female_AUTO_comb_spleen_min2, logged_data_mean(female_AUTO_comb_spleen_min2)], axis=1)
print "Gonad male"
print '%.2f' % round(np.median(male_AUTO_comb_gonad_min2["logRPKMmean"]), 2)
print '%.2f' % round(np.median(male_Z_comb_gonad_min2["logRPKMmean"]), 2 )
print "means of logged data"
print '%.2f' % round(np.mean(male_AUTO_comb_gonad_min2["logRPKMmean"]), 2)
print '%.2f' % round(np.mean(male_Z_comb_gonad_min2["logRPKMmean"]), 2 )
print ranksums(male_AUTO_comb_gonad_min2["logRPKMmean"], male_Z_comb_gonad_min2["logRPKMmean"])
print '%.2f' % round(ranksums(male_AUTO_comb_gonad_min2["logRPKMmean"], male_Z_comb_gonad_min2["logRPKMmean"])[0], 2 )
#Calculate two sided p-value
print mannwhitneyu(male_AUTO_comb_gonad_min2["logRPKMmean"], male_Z_comb_gonad_min2["logRPKMmean"])[1]*2
print "---"
print "Gonad female"
print '%.2f' % round(np.median(female_AUTO_comb_gonad_min2["logRPKMmean"]), 2)
print '%.2f' % round(np.median(female_Z_comb_gonad_min2["logRPKMmean"]), 2)
print "means of logged data"
print '%.2f' % round(np.mean(female_AUTO_comb_gonad_min2["logRPKMmean"]), 2)
print '%.2f' % round(np.mean(female_Z_comb_gonad_min2["logRPKMmean"]), 2)
print ranksums(female_AUTO_comb_gonad_min2["logRPKMmean"], female_Z_comb_gonad_min2["logRPKMmean"])
print '%.2f' % round(ranksums(female_AUTO_comb_gonad_min2["logRPKMmean"], female_Z_comb_gonad_min2["logRPKMmean"])[0], 2 )
#Calculate two sided p-value
print mannwhitneyu(female_AUTO_comb_gonad_min2["logRPKMmean"], female_Z_comb_gonad_min2["logRPKMmean"])[1]*2
print "=============="
print "Heart male"
print "medians"
print '%.2f' % round(np.median(male_AUTO_comb_heart_min2["logRPKMmean"]), 2)
print '%.2f' % round(np.median(male_Z_comb_heart_min2["logRPKMmean"]), 2)
print "means of logged data"
print '%.2f' % round(np.mean(male_AUTO_comb_heart_min2["logRPKMmean"]), 2)
print '%.2f' % round(np.mean(male_Z_comb_heart_min2["logRPKMmean"]), 2)
print ranksums(male_AUTO_comb_heart_min2["logRPKMmean"], male_Z_comb_heart_min2["logRPKMmean"])
print '%.2f' % round(ranksums(male_AUTO_comb_heart_min2["logRPKMmean"], male_Z_comb_heart_min2["logRPKMmean"])[0], 2 )
#Calculate two sided p-value
print mannwhitneyu(male_AUTO_comb_heart_min2["logRPKMmean"], male_Z_comb_heart_min2["logRPKMmean"])[1]*2
print "---"
print "Heart female"
print "medians"
print '%.2f' % round(np.median(female_AUTO_comb_heart_min2["logRPKMmean"]), 2)
print '%.2f' % round(np.median(female_Z_comb_heart_min2["logRPKMmean"]), 2)
print "means of logged data"
print '%.2f' % round(np.mean(female_AUTO_comb_heart_min2["logRPKMmean"]), 2)
print '%.2f' % round(np.mean(female_Z_comb_heart_min2["logRPKMmean"]), 2)
print ranksums(female_AUTO_comb_heart_min2["logRPKMmean"], female_Z_comb_heart_min2["logRPKMmean"])
print '%.2f' % round(ranksums(female_AUTO_comb_heart_min2["logRPKMmean"], female_Z_comb_heart_min2["logRPKMmean"])[0], 2 )
#Calculate two sided p-value
print mannwhitneyu(female_AUTO_comb_heart_min2["logRPKMmean"], female_Z_comb_heart_min2["logRPKMmean"])[1]*2
print "=============="
print "Liver male"
print "medians"
print '%.2f' % round(np.median(male_AUTO_comb_liver_min2["logRPKMmean"]), 2)
print '%.2f' % round(np.median(male_Z_comb_liver_min2["logRPKMmean"]), 2)
print "means of logged data"
print '%.2f' % round(np.mean(male_AUTO_comb_liver_min2["logRPKMmean"]), 2)
print '%.2f' % round(np.mean(male_Z_comb_liver_min2["logRPKMmean"]), 2)
print ranksums(male_AUTO_comb_liver_min2["logRPKMmean"], male_Z_comb_liver_min2["logRPKMmean"])
print '%.2f' % round(ranksums(male_AUTO_comb_liver_min2["logRPKMmean"], male_Z_comb_liver_min2["logRPKMmean"])[0], 2 )
#Calculate two sided p-value
print mannwhitneyu(male_AUTO_comb_liver_min2["logRPKMmean"], male_Z_comb_liver_min2["logRPKMmean"])[1]*2
print "---"
print "Liver female"
print "medians"
print '%.2f' % round(np.median(female_AUTO_comb_liver_min2["logRPKMmean"]), 2)
print '%.2f' % round(np.median(female_Z_comb_liver_min2["logRPKMmean"]), 2)
print "means of logged data"
print '%.2f' % round(np.mean(female_AUTO_comb_liver_min2["logRPKMmean"]), 2)
print '%.2f' % round(np.mean(female_Z_comb_liver_min2["logRPKMmean"]), 2)
print ranksums(female_AUTO_comb_liver_min2["logRPKMmean"], female_Z_comb_liver_min2["logRPKMmean"])
print '%.2f' % round(ranksums(female_AUTO_comb_liver_min2["logRPKMmean"], female_Z_comb_liver_min2["logRPKMmean"])[0], 2 )
#Calculate two sided p-value
print mannwhitneyu(female_AUTO_comb_liver_min2["logRPKMmean"], female_Z_comb_liver_min2["logRPKMmean"])[1]*2
print "=============="
print "Spleen male"
print "medians"
print '%.2f' % round(np.median(male_AUTO_comb_spleen_min2["logRPKMmean"]), 2)
print '%.2f' % round(np.median(male_Z_comb_spleen_min2["logRPKMmean"]), 2)
print "means of logged data"
print '%.2f' % round(np.mean(male_AUTO_comb_spleen_min2["logRPKMmean"]), 2)
print '%.2f' % round(np.mean(male_Z_comb_spleen_min2["logRPKMmean"]), 2)
print ranksums(male_AUTO_comb_spleen_min2["logRPKMmean"], male_Z_comb_spleen_min2["logRPKMmean"])
print '%.2f' % round(ranksums(male_AUTO_comb_spleen_min2["logRPKMmean"], male_Z_comb_spleen_min2["logRPKMmean"])[0], 2 )
#Calculate two sided p-value
print mannwhitneyu(male_AUTO_comb_spleen_min2["logRPKMmean"], male_Z_comb_spleen_min2["logRPKMmean"])[1]*2
print "---"
print "Spleen female"
print "medians"
print '%.2f' % round(np.median(female_AUTO_comb_spleen_min2["logRPKMmean"]), 2)
print '%.2f' % round(np.median(female_Z_comb_spleen_min2["logRPKMmean"]), 2)
print "means of logged data"
print '%.2f' % round(np.mean(female_AUTO_comb_spleen_min2["logRPKMmean"]), 2)
print '%.2f' % round(np.mean(female_Z_comb_spleen_min2["logRPKMmean"]), 2)
print ranksums(female_AUTO_comb_spleen_min2["logRPKMmean"], female_Z_comb_spleen_min2["logRPKMmean"])
print '%.2f' % round(ranksums(female_AUTO_comb_spleen_min2["logRPKMmean"], female_Z_comb_spleen_min2["logRPKMmean"])[0], 2 )
#Calculate two sided p-value
print mannwhitneyu(female_AUTO_comb_spleen_min2["logRPKMmean"], female_Z_comb_spleen_min2["logRPKMmean"])[1]*2
print "=============="
In [328]:
print ranksums(female_Z_comb_gonad_min2["logRPKMmean"], male_Z_comb_gonad_min2["logRPKMmean"])
print ranksums(female_Z_comb_heart_min2["logRPKMmean"], male_Z_comb_heart_min2["logRPKMmean"])
print ranksums(female_Z_comb_liver_min2["logRPKMmean"], male_Z_comb_liver_min2["logRPKMmean"])
print ranksums(female_Z_comb_spleen_min2["logRPKMmean"], male_Z_comb_spleen_min2["logRPKMmean"])
NOTE: Sometimes similar gene expression across samples (once or twice) or sex-limited expression Causes ties and should thus maybe consider using mannwhitneyu test? http://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.mannwhitneyu.html#scipy.stats.mannwhitneyu NOTE 2015-09-30: Both tests provide almost identical results
In [329]:
# from collections import defaultdict
# D = defaultdict(list)
# for i,item in enumerate(male_Z_comb_liver_min2["logCPMmean"]):
# D[item].append(i)
# D = {k:v for k,v in D.items() if len(v)>1}
# print D
# # male_AUTO_comb_heart_min2["logCPMmean"][7871]
# print male_Z_comb_liver_min2.iloc[[406]]
# print male_Z_comb_liver_min2.iloc[[441]]
In [330]:
plot_ZvsA_data = {"Gonad":{},"Spleen":{},"Heart":{},"Liver":{}}
plot_ZvsA_data["Gonad"]["Male-Auto"] = male_AUTO_comb_gonad_min2["logRPKMmean"]
plot_ZvsA_data["Gonad"]["Female-Auto"] = female_AUTO_comb_gonad_min2["logRPKMmean"]
plot_ZvsA_data["Gonad"]["Male-Z"] = male_Z_comb_gonad_min2["logRPKMmean"]
plot_ZvsA_data["Gonad"]["Female-Z"] = female_Z_comb_gonad_min2["logRPKMmean"]
plot_ZvsA_data["Heart"]["Male-Auto"] = male_AUTO_comb_heart_min2["logRPKMmean"]
plot_ZvsA_data["Heart"]["Female-Auto"] = female_AUTO_comb_heart_min2["logRPKMmean"]
plot_ZvsA_data["Heart"]["Male-Z"] = male_Z_comb_heart_min2["logRPKMmean"]
plot_ZvsA_data["Heart"]["Female-Z"] = female_Z_comb_heart_min2["logRPKMmean"]
plot_ZvsA_data["Liver"]["Male-Auto"] = male_AUTO_comb_liver_min2["logRPKMmean"]
plot_ZvsA_data["Liver"]["Female-Auto"] = female_AUTO_comb_liver_min2["logRPKMmean"]
plot_ZvsA_data["Liver"]["Male-Z"] = male_Z_comb_liver_min2["logRPKMmean"]
plot_ZvsA_data["Liver"]["Female-Z"] = female_Z_comb_liver_min2["logRPKMmean"]
plot_ZvsA_data["Spleen"]["Male-Auto"] = male_AUTO_comb_spleen_min2["logRPKMmean"]
plot_ZvsA_data["Spleen"]["Female-Auto"] = female_AUTO_comb_spleen_min2["logRPKMmean"]
plot_ZvsA_data["Spleen"]["Male-Z"] = male_Z_comb_spleen_min2["logRPKMmean"]
plot_ZvsA_data["Spleen"]["Female-Z"] = female_Z_comb_spleen_min2["logRPKMmean"]
In [331]:
def color_boxes(bp):
red = False
for box in bp['boxes']:
# change outline color
box.set( color='black', linewidth=1)
if red == True:
box.set( facecolor = '#E9E9E9' )
red = False
else:
box.set( facecolor = '#2C3E50' )
red = True
for median in bp['medians']:
median.set(color='black', linewidth=1)
for whisker in bp['whiskers']:
whisker.set(color='black', linewidth=1, linestyle="-")
## change color and linewidth of the caps
for cap in bp['caps']:
cap.set(color='black', linewidth=1)
font = {'family' : 'Source Sans Pro',
'weight' : 'normal',
'size' : 11}
import matplotlib
matplotlib.rc('font', **font)
fig = plt.figure(figsize=(6, 6))
fig.subplots_adjust(wspace=0.1)
plotpos = 221
for name in ['Spleen', 'Heart', 'Liver', 'Gonad']:
ax = fig.add_subplot(plotpos)
plotpos+=1
bp = ax.boxplot([plot_ZvsA_data[name][item] for item in ['Male-Auto', 'Male-Z', 'Female-Auto', 'Female-Z']], sym="", widths=0.7, patch_artist=True, notch=True)
lengths = [len(plot_ZvsA_data[name][item]) for item in ['Male-Auto', 'Male-Z', 'Female-Auto', 'Female-Z']]
ax.set_xticklabels(['(%i)\nAA' %lengths[0] , '(%i)\nZZ' %lengths[1], '(%i)\nAA' %lengths[2], '(%i)\nZW' %lengths[3]])
ax.set_yticks(range(-2, 14, 2))
ax.set_ylim(bottom=-3)
color_boxes(bp)
ax.grid(False)
ax.annotate('Female', xy=(1, 0), xycoords='axes fraction',
xytext=(-20, -30), textcoords='offset points',
ha='right', va='top')
ax.annotate('Male', xy=(1, 0), xycoords='axes fraction',
xytext=(-95, -30), textcoords='offset points',
ha='right', va='top')
ax.patch.set_facecolor('None')
# Remove the ticks on the right and top side
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
l = Line2D([1,2],[10.7,10.7],color="black")
ax.add_line(l)
l = Line2D([1,1],[10.7,10.4],color="black")
ax.add_line(l)
l = Line2D([2,2],[10.7,10.4],color="black")
ax.add_line(l)
l = Line2D([3,4],[10.7,10.7],color="black")
ax.add_line(l)
l = Line2D([3,3],[10.7,10.4],color="black")
ax.add_line(l)
l = Line2D([4,4],[10.7,10.4],color="black")
ax.add_line(l)
plt.axvline(x=2.5, color='black', ls='-')
if name == "Gonad":
plt.title("(d) Gonad")
ax.text(1.35, 10.8, "ns", fontsize=11)
ax.text(3.35, 10.5, "***", fontsize=11)
elif name == "Heart":
plt.title("(b) Heart")
ax.text(1.35, 10.5, "***", fontsize=11)
ax.text(3.35, 10.5, "***", fontsize=11)
elif name == "Liver":
plt.title("(c) Liver")
ax.set_ylabel(r'$log_2$'+"(RPKM)")
ax.text(1.35, 10.5, "***", fontsize=11)
ax.text(3.35, 10.5, "***", fontsize=11)
elif name == "Spleen":
plt.title("(a) Spleen")
ax.set_ylabel(r'$log_2$'+"(RPKM)")
ax.text(1.35, 10.5, "***", fontsize=11)
ax.text(3.35, 10.5, "***", fontsize=11)
fig.tight_layout(pad=3)
plt.savefig("2015-10-27-ZvsA-expression.png", dpi=300)
plt.savefig("2015-10-27-ZvsA-expression.pdf", dpi=300)
In [332]:
plot_ZvsA_logFC = {"Gonad":{},"Spleen":{},"Heart":{},"Liver":{}}
plot_ZvsA_logFC["Gonad"]["Auto"] = AUTO_comb_gonad_min2["logFC"]
plot_ZvsA_logFC["Gonad"]["Z"] = Z_comb_gonad_min2["logFC"]
plot_ZvsA_logFC["Heart"]["Auto"] = AUTO_comb_heart_min2["logFC"]
plot_ZvsA_logFC["Heart"]["Z"] = Z_comb_heart_min2["logFC"]
plot_ZvsA_logFC["Liver"]["Auto"] = AUTO_comb_liver_min2["logFC"]
plot_ZvsA_logFC["Liver"]["Z"] = Z_comb_liver_min2["logFC"]
plot_ZvsA_logFC["Spleen"]["Auto"] = AUTO_comb_spleen_min2["logFC"]
plot_ZvsA_logFC["Spleen"]["Z"] = Z_comb_spleen_min2["logFC"]
In [333]:
def color_boxes(bp):
red = False
for box in bp['boxes']:
# change outline color
box.set( color='black', linewidth=1)
# box.set( color='black', linewidth=1)
if red == True:
box.set( facecolor = '#E9E9E9' )
red = False
else:
box.set( facecolor = '#2C3E50' )
red = True
for median in bp['medians']:
median.set(color='black', linewidth=1)
for whisker in bp['whiskers']:
whisker.set(color='black', linewidth=1, linestyle="-")
## change color and linewidth of the caps
for cap in bp['caps']:
cap.set(color='black', linewidth=1)
font = {'family' : 'Source Sans Pro',
'weight' : 'normal',
'size' : 11}
matplotlib.rc('font', **font)
fig, axes = plt.subplots(ncols=4, sharey=True, figsize=(6, 4))
fig.subplots_adjust(wspace=0.1)
for ax, name in zip(axes, ['Spleen', 'Heart', 'Liver', "Gonad"]):
bp = ax.boxplot([plot_ZvsA_logFC[name][item] for item in ['Auto', 'Z']], sym="", widths=0.7, patch_artist=True, notch=True)
lengths = [len(plot_ZvsA_logFC[name][item]) for item in ['Auto', 'Z']]
ax.set(xticklabels=['Auto\n(%i)' %lengths[0], 'Z\n(%i)' %lengths[1]])
color_boxes(bp)
ax.grid(False)
ax.patch.set_facecolor('None')
# Remove the ticks on the right and top side
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
ax.axhline(y=0, c="black",linewidth=1,zorder=0, linestyle="--")
if name == "Gonad":
ax.set_title("(d) Gonad")
elif name == "Spleen":
ax.set_title("(a) Spleen")
ax.set_ylabel('logFC')
elif name == "Liver":
ax.set_title("(c) Liver")
elif name == "Heart":
ax.set_title("(b) Heart")
else:
continue
fig.tight_layout()
In [334]:
print "Valid chromosomes: ", chicken_chromosomes
comb_gonad = comb_gonad[comb_gonad.index.map(lambda x: gid_to_chr[x] in chicken_chromosomes)]
comb_heart = comb_heart[comb_heart.index.map(lambda x: gid_to_chr[x] in chicken_chromosomes)]
comb_liver = comb_liver[comb_liver.index.map(lambda x: gid_to_chr[x] in chicken_chromosomes)]
comb_spleen = comb_spleen[comb_spleen.index.map(lambda x: gid_to_chr[x] in chicken_chromosomes)]
print comb_gonad.shape
print comb_heart.shape
print comb_liver.shape
print comb_spleen.shape
In [335]:
# For no CPM filtering just copy the dataframe
comb_gonad_unfiltered = comb_gonad.copy()
comb_heart_unfiltered = comb_heart.copy()
comb_liver_unfiltered = comb_liver.copy()
comb_spleen_unfiltered = comb_spleen.copy()
# Filter the expression so it only contains Z expression
Z_comb_gonad_unfiltered, AUTO_comb_gonad_unfiltered = split_dataframe(comb_gonad_unfiltered, Z_genes)
Z_comb_heart_unfiltered, AUTO_comb_heart_unfiltered = split_dataframe(comb_heart_unfiltered, Z_genes)
Z_comb_liver_unfiltered, AUTO_comb_liver_unfiltered = split_dataframe(comb_liver_unfiltered, Z_genes)
Z_comb_spleen_unfiltered, AUTO_comb_spleen_unfiltered = split_dataframe(comb_spleen_unfiltered, Z_genes)
male_Z_comb_gonad_unfiltered = Z_comb_gonad_unfiltered.iloc[:,:4]
female_Z_comb_gonad_unfiltered = Z_comb_gonad_unfiltered.iloc[:,4:8]
male_Z_comb_heart_unfiltered = Z_comb_heart_unfiltered.iloc[:,:4]
female_Z_comb_heart_unfiltered = Z_comb_heart_unfiltered.iloc[:,4:8]
#Liver has one female sample less
male_Z_comb_liver_unfiltered = Z_comb_liver_unfiltered.iloc[:,:4]
female_Z_comb_liver_unfiltered = Z_comb_liver_unfiltered.iloc[:,4:7]
male_Z_comb_spleen_unfiltered = Z_comb_spleen_unfiltered.iloc[:,:4]
female_Z_comb_spleen_unfiltered = Z_comb_spleen_unfiltered.iloc[:,4:8]
male_AUTO_comb_gonad_unfiltered = AUTO_comb_gonad_unfiltered.iloc[:,:4]
female_AUTO_comb_gonad_unfiltered = AUTO_comb_gonad_unfiltered.iloc[:,4:8]
male_AUTO_comb_heart_unfiltered = AUTO_comb_heart_unfiltered.iloc[:,:4]
female_AUTO_comb_heart_unfiltered = AUTO_comb_heart_unfiltered.iloc[:,4:8]
#Liver has one female sample less
male_AUTO_comb_liver_unfiltered = AUTO_comb_liver_unfiltered.iloc[:,:4]
female_AUTO_comb_liver_unfiltered = AUTO_comb_liver_unfiltered.iloc[:,4:7]
male_AUTO_comb_spleen_unfiltered = AUTO_comb_spleen_unfiltered.iloc[:,:4]
female_AUTO_comb_spleen_unfiltered = AUTO_comb_spleen_unfiltered.iloc[:,4:8]
male_Z_comb_gonad_unfiltered_ratio = male_Z_comb_gonad_unfiltered.applymap(anti_log2).median() / male_AUTO_comb_gonad_unfiltered.applymap(anti_log2).median()
female_Z_comb_gonad_unfiltered_ratio = female_Z_comb_gonad_unfiltered.applymap(anti_log2).median() / female_AUTO_comb_gonad_unfiltered.applymap(anti_log2).median()
male_Z_comb_spleen_unfiltered_ratio = male_Z_comb_spleen_unfiltered.applymap(anti_log2).median() / male_AUTO_comb_spleen_unfiltered.applymap(anti_log2).median()
female_Z_comb_spleen_unfiltered_ratio = female_Z_comb_spleen_unfiltered.applymap(anti_log2).median() / female_AUTO_comb_spleen_unfiltered.applymap(anti_log2).median()
male_Z_comb_heart_unfiltered_ratio = male_Z_comb_heart_unfiltered.applymap(anti_log2).median() / male_AUTO_comb_heart_unfiltered.applymap(anti_log2).median()
female_Z_comb_heart_unfiltered_ratio = female_Z_comb_heart_unfiltered.applymap(anti_log2).median() / female_AUTO_comb_heart_unfiltered.applymap(anti_log2).median()
male_Z_comb_liver_unfiltered_ratio = male_Z_comb_liver_unfiltered.applymap(anti_log2).median() / male_AUTO_comb_liver_unfiltered.applymap(anti_log2).median()
female_Z_comb_liver_unfiltered_ratio = female_Z_comb_liver_unfiltered.applymap(anti_log2).median() / female_AUTO_comb_liver_unfiltered.applymap(anti_log2).median()
In [336]:
female_Z_comb_liver_unfiltered.head()
Out[336]:
In [337]:
# Beware average CPM is log2 transformed.
# For avgCPM of 1 use 0 as filter
comb_gonad_min1 = filter_cpm(comb_gonad, 0)
comb_heart_min1 = filter_cpm(comb_heart, 0)
comb_liver_min1 = filter_cpm(comb_liver, 0)
comb_spleen_min1 = filter_cpm(comb_spleen, 0)
# Filter the expression so it only contains Z expression
Z_comb_gonad_min1, AUTO_comb_gonad_min1 = split_dataframe(comb_gonad_min1, Z_genes)
Z_comb_heart_min1, AUTO_comb_heart_min1 = split_dataframe(comb_heart_min1, Z_genes)
Z_comb_liver_min1, AUTO_comb_liver_min1 = split_dataframe(comb_liver_min1, Z_genes)
Z_comb_spleen_min1, AUTO_comb_spleen_min1 = split_dataframe(comb_spleen_min1, Z_genes)
male_Z_comb_gonad_min1 = Z_comb_gonad_min1.iloc[:,:4]
female_Z_comb_gonad_min1 = Z_comb_gonad_min1.iloc[:,4:8]
male_Z_comb_heart_min1 = Z_comb_heart_min1.iloc[:,:4]
female_Z_comb_heart_min1 = Z_comb_heart_min1.iloc[:,4:8]
#Liver has one female sample less
male_Z_comb_liver_min1 = Z_comb_liver_min1.iloc[:,:4]
female_Z_comb_liver_min1 = Z_comb_liver_min1.iloc[:,4:7]
male_Z_comb_spleen_min1 = Z_comb_spleen_min1.iloc[:,:4]
female_Z_comb_spleen_min1 = Z_comb_spleen_min1.iloc[:,4:8]
male_AUTO_comb_gonad_min1 = AUTO_comb_gonad_min1.iloc[:,:4]
female_AUTO_comb_gonad_min1 = AUTO_comb_gonad_min1.iloc[:,4:8]
male_AUTO_comb_heart_min1 = AUTO_comb_heart_min1.iloc[:,:4]
female_AUTO_comb_heart_min1 = AUTO_comb_heart_min1.iloc[:,4:8]
#Liver has one female sample less
male_AUTO_comb_liver_min1 = AUTO_comb_liver_min1.iloc[:,:4]
female_AUTO_comb_liver_min1 = AUTO_comb_liver_min1.iloc[:,4:7]
male_AUTO_comb_spleen_min1 = AUTO_comb_spleen_min1.iloc[:,:4]
female_AUTO_comb_spleen_min1 = AUTO_comb_spleen_min1.iloc[:,4:8]
male_Z_comb_gonad_min1_ratio = male_Z_comb_gonad_min1.applymap(anti_log2).median() / male_AUTO_comb_gonad_min1.applymap(anti_log2).median()
female_Z_comb_gonad_min1_ratio = female_Z_comb_gonad_min1.applymap(anti_log2).median() / female_AUTO_comb_gonad_min1.applymap(anti_log2).median()
male_Z_comb_spleen_min1_ratio = male_Z_comb_spleen_min1.applymap(anti_log2).median() / male_AUTO_comb_spleen_min1.applymap(anti_log2).median()
female_Z_comb_spleen_min1_ratio = female_Z_comb_spleen_min1.applymap(anti_log2).median() / female_AUTO_comb_spleen_min1.applymap(anti_log2).median()
male_Z_comb_heart_min1_ratio = male_Z_comb_heart_min1.applymap(anti_log2).median() / male_AUTO_comb_heart_min1.applymap(anti_log2).median()
female_Z_comb_heart_min1_ratio = female_Z_comb_heart_min1.applymap(anti_log2).median() / female_AUTO_comb_heart_min1.applymap(anti_log2).median()
male_Z_comb_liver_min1_ratio = male_Z_comb_liver_min1.applymap(anti_log2).median() / male_AUTO_comb_liver_min1.applymap(anti_log2).median()
female_Z_comb_liver_min1_ratio = female_Z_comb_liver_min1.applymap(anti_log2).median() / female_AUTO_comb_liver_min1.applymap(anti_log2).median()
In [338]:
# Beware average CPM is log2 transformed.
# For avgCPM of 2 use 1 | log2(2) = 1
comb_gonad_min2 = filter_cpm(comb_gonad, 1)
comb_heart_min2 = filter_cpm(comb_heart, 1)
comb_liver_min2 = filter_cpm(comb_liver, 1)
comb_spleen_min2 = filter_cpm(comb_spleen, 1)
# Filter the expression so it only contains Z expression
Z_comb_gonad_min2, AUTO_comb_gonad_min2 = split_dataframe(comb_gonad_min2, Z_genes)
Z_comb_heart_min2, AUTO_comb_heart_min2 = split_dataframe(comb_heart_min2, Z_genes)
Z_comb_liver_min2, AUTO_comb_liver_min2 = split_dataframe(comb_liver_min2, Z_genes)
Z_comb_spleen_min2, AUTO_comb_spleen_min2 = split_dataframe(comb_spleen_min2, Z_genes)
male_Z_comb_gonad_min2 = Z_comb_gonad_min2.iloc[:,:4]
female_Z_comb_gonad_min2 = Z_comb_gonad_min2.iloc[:,4:8]
male_Z_comb_heart_min2 = Z_comb_heart_min2.iloc[:,:4]
female_Z_comb_heart_min2 = Z_comb_heart_min2.iloc[:,4:8]
#Liver has one female sample less
male_Z_comb_liver_min2 = Z_comb_liver_min2.iloc[:,:4]
female_Z_comb_liver_min2 = Z_comb_liver_min2.iloc[:,4:7]
male_Z_comb_spleen_min2 = Z_comb_spleen_min2.iloc[:,:4]
female_Z_comb_spleen_min2 = Z_comb_spleen_min2.iloc[:,4:8]
male_AUTO_comb_gonad_min2 = AUTO_comb_gonad_min2.iloc[:,:4]
female_AUTO_comb_gonad_min2 = AUTO_comb_gonad_min2.iloc[:,4:8]
male_AUTO_comb_heart_min2 = AUTO_comb_heart_min2.iloc[:,:4]
female_AUTO_comb_heart_min2 = AUTO_comb_heart_min2.iloc[:,4:8]
#Liver has one female sample less
male_AUTO_comb_liver_min2 = AUTO_comb_liver_min2.iloc[:,:4]
female_AUTO_comb_liver_min2 = AUTO_comb_liver_min2.iloc[:,4:7]
male_AUTO_comb_spleen_min2 = AUTO_comb_spleen_min2.iloc[:,:4]
female_AUTO_comb_spleen_min2 = AUTO_comb_spleen_min2.iloc[:,4:8]
male_Z_comb_gonad_min2_ratio = male_Z_comb_gonad_min2.applymap(anti_log2).median() / male_AUTO_comb_gonad_min2.applymap(anti_log2).median()
female_Z_comb_gonad_min2_ratio = female_Z_comb_gonad_min2.applymap(anti_log2).median() / female_AUTO_comb_gonad_min2.applymap(anti_log2).median()
male_Z_comb_spleen_min2_ratio = male_Z_comb_spleen_min2.applymap(anti_log2).median() / male_AUTO_comb_spleen_min2.applymap(anti_log2).median()
female_Z_comb_spleen_min2_ratio = female_Z_comb_spleen_min2.applymap(anti_log2).median() / female_AUTO_comb_spleen_min2.applymap(anti_log2).median()
male_Z_comb_heart_min2_ratio = male_Z_comb_heart_min2.applymap(anti_log2).median() / male_AUTO_comb_heart_min2.applymap(anti_log2).median()
female_Z_comb_heart_min2_ratio = female_Z_comb_heart_min2.applymap(anti_log2).median() / female_AUTO_comb_heart_min2.applymap(anti_log2).median()
male_Z_comb_liver_min2_ratio = male_Z_comb_liver_min2.applymap(anti_log2).median() / male_AUTO_comb_liver_min2.applymap(anti_log2).median()
female_Z_comb_liver_min2_ratio = female_Z_comb_liver_min2.applymap(anti_log2).median() / female_AUTO_comb_liver_min2.applymap(anti_log2).median()
In [339]:
# Beware average CPM is log2 transformed.
# For avgCPM of 3 use log2(3)
comb_gonad_min3 = filter_cpm(comb_gonad, log2(3))
comb_heart_min3 = filter_cpm(comb_heart, log2(3))
comb_liver_min3 = filter_cpm(comb_liver, log2(3))
comb_spleen_min3 = filter_cpm(comb_spleen, log2(3))
# Filter the expression so it only contains Z expression
Z_comb_gonad_min3, AUTO_comb_gonad_min3 = split_dataframe(comb_gonad_min3, Z_genes)
Z_comb_heart_min3, AUTO_comb_heart_min3 = split_dataframe(comb_heart_min3, Z_genes)
Z_comb_liver_min3, AUTO_comb_liver_min3 = split_dataframe(comb_liver_min3, Z_genes)
Z_comb_spleen_min3, AUTO_comb_spleen_min3 = split_dataframe(comb_spleen_min3, Z_genes)
male_Z_comb_gonad_min3 = Z_comb_gonad_min3.iloc[:,:4]
female_Z_comb_gonad_min3 = Z_comb_gonad_min3.iloc[:,4:8]
male_Z_comb_heart_min3 = Z_comb_heart_min3.iloc[:,:4]
female_Z_comb_heart_min3 = Z_comb_heart_min3.iloc[:,4:8]
#Liver has one female sample less
male_Z_comb_liver_min3 = Z_comb_liver_min3.iloc[:,:4]
female_Z_comb_liver_min3 = Z_comb_liver_min3.iloc[:,4:7]
male_Z_comb_spleen_min3 = Z_comb_spleen_min3.iloc[:,:4]
female_Z_comb_spleen_min3 = Z_comb_spleen_min3.iloc[:,4:8]
male_AUTO_comb_gonad_min3 = AUTO_comb_gonad_min3.iloc[:,:4]
female_AUTO_comb_gonad_min3 = AUTO_comb_gonad_min3.iloc[:,4:8]
male_AUTO_comb_heart_min3 = AUTO_comb_heart_min3.iloc[:,:4]
female_AUTO_comb_heart_min3 = AUTO_comb_heart_min3.iloc[:,4:8]
#Liver has one female sample less
male_AUTO_comb_liver_min3 = AUTO_comb_liver_min3.iloc[:,:4]
female_AUTO_comb_liver_min3 = AUTO_comb_liver_min3.iloc[:,4:7]
male_AUTO_comb_spleen_min3 = AUTO_comb_spleen_min3.iloc[:,:4]
female_AUTO_comb_spleen_min3 = AUTO_comb_spleen_min3.iloc[:,4:8]
male_Z_comb_gonad_min3_ratio = male_Z_comb_gonad_min3.applymap(anti_log2).median() / male_AUTO_comb_gonad_min3.applymap(anti_log2).median()
female_Z_comb_gonad_min3_ratio = female_Z_comb_gonad_min3.applymap(anti_log2).median() / female_AUTO_comb_gonad_min3.applymap(anti_log2).median()
male_Z_comb_spleen_min3_ratio = male_Z_comb_spleen_min3.applymap(anti_log2).median() / male_AUTO_comb_spleen_min3.applymap(anti_log2).median()
female_Z_comb_spleen_min3_ratio = female_Z_comb_spleen_min3.applymap(anti_log2).median() / female_AUTO_comb_spleen_min3.applymap(anti_log2).median()
male_Z_comb_heart_min3_ratio = male_Z_comb_heart_min3.applymap(anti_log2).median() / male_AUTO_comb_heart_min3.applymap(anti_log2).median()
female_Z_comb_heart_min3_ratio = female_Z_comb_heart_min3.applymap(anti_log2).median() / female_AUTO_comb_heart_min3.applymap(anti_log2).median()
male_Z_comb_liver_min3_ratio = male_Z_comb_liver_min3.applymap(anti_log2).median() / male_AUTO_comb_liver_min3.applymap(anti_log2).median()
female_Z_comb_liver_min3_ratio = female_Z_comb_liver_min3.applymap(anti_log2).median() / female_AUTO_comb_liver_min3.applymap(anti_log2).median()
In [341]:
# Beware average CPM is log2 transformed.
# For avgCPM of 4 use log2(4)
comb_gonad_min4 = filter_cpm(comb_gonad, log2(4))
comb_heart_min4 = filter_cpm(comb_heart, log2(4))
comb_liver_min4 = filter_cpm(comb_liver, log2(4))
comb_spleen_min4 = filter_cpm(comb_spleen, log2(4))
# Filter the expression so it only contains Z expression
Z_comb_gonad_min4, AUTO_comb_gonad_min4 = split_dataframe(comb_gonad_min4, Z_genes)
Z_comb_heart_min4, AUTO_comb_heart_min4 = split_dataframe(comb_heart_min4, Z_genes)
Z_comb_liver_min4, AUTO_comb_liver_min4 = split_dataframe(comb_liver_min4, Z_genes)
Z_comb_spleen_min4, AUTO_comb_spleen_min4 = split_dataframe(comb_spleen_min4, Z_genes)
male_Z_comb_gonad_min4 = Z_comb_gonad_min4.iloc[:,:4]
female_Z_comb_gonad_min4 = Z_comb_gonad_min4.iloc[:,4:8]
male_Z_comb_heart_min4 = Z_comb_heart_min4.iloc[:,:4]
female_Z_comb_heart_min4 = Z_comb_heart_min4.iloc[:,4:8]
#Liver has one female sample less
male_Z_comb_liver_min4 = Z_comb_liver_min4.iloc[:,:4]
female_Z_comb_liver_min4 = Z_comb_liver_min4.iloc[:,4:7]
male_Z_comb_spleen_min4 = Z_comb_spleen_min4.iloc[:,:4]
female_Z_comb_spleen_min4 = Z_comb_spleen_min4.iloc[:,4:8]
male_AUTO_comb_gonad_min4 = AUTO_comb_gonad_min4.iloc[:,:4]
female_AUTO_comb_gonad_min4 = AUTO_comb_gonad_min4.iloc[:,4:8]
male_AUTO_comb_heart_min4 = AUTO_comb_heart_min4.iloc[:,:4]
female_AUTO_comb_heart_min4 = AUTO_comb_heart_min4.iloc[:,4:8]
#Liver has one female sample less
male_AUTO_comb_liver_min4 = AUTO_comb_liver_min4.iloc[:,:4]
female_AUTO_comb_liver_min4 = AUTO_comb_liver_min4.iloc[:,4:7]
male_AUTO_comb_spleen_min4 = AUTO_comb_spleen_min4.iloc[:,:4]
female_AUTO_comb_spleen_min4 = AUTO_comb_spleen_min4.iloc[:,4:8]
male_Z_comb_gonad_min4_ratio = male_Z_comb_gonad_min4.applymap(anti_log2).median() / male_AUTO_comb_gonad_min4.applymap(anti_log2).median()
female_Z_comb_gonad_min4_ratio = female_Z_comb_gonad_min4.applymap(anti_log2).median() / female_AUTO_comb_gonad_min4.applymap(anti_log2).median()
male_Z_comb_spleen_min4_ratio = male_Z_comb_spleen_min4.applymap(anti_log2).median() / male_AUTO_comb_spleen_min4.applymap(anti_log2).median()
female_Z_comb_spleen_min4_ratio = female_Z_comb_spleen_min4.applymap(anti_log2).median() / female_AUTO_comb_spleen_min4.applymap(anti_log2).median()
male_Z_comb_heart_min4_ratio = male_Z_comb_heart_min4.applymap(anti_log2).median() / male_AUTO_comb_heart_min4.applymap(anti_log2).median()
female_Z_comb_heart_min4_ratio = female_Z_comb_heart_min4.applymap(anti_log2).median() / female_AUTO_comb_heart_min4.applymap(anti_log2).median()
male_Z_comb_liver_min4_ratio = male_Z_comb_liver_min4.applymap(anti_log2).median() / male_AUTO_comb_liver_min4.applymap(anti_log2).median()
female_Z_comb_liver_min4_ratio = female_Z_comb_liver_min4.applymap(anti_log2).median() / female_AUTO_comb_liver_min4.applymap(anti_log2).median()
In [342]:
# Beware average CPM is log2 transformed.
# For avgCPM of 5 use log2(5)
comb_gonad_min5 = filter_cpm(comb_gonad, log2(5))
comb_heart_min5 = filter_cpm(comb_heart, log2(5))
comb_liver_min5 = filter_cpm(comb_liver, log2(5))
comb_spleen_min5 = filter_cpm(comb_spleen, log2(5))
# Filter the expression so it only contains Z expression
Z_comb_gonad_min5, AUTO_comb_gonad_min5 = split_dataframe(comb_gonad_min5, Z_genes)
Z_comb_heart_min5, AUTO_comb_heart_min5 = split_dataframe(comb_heart_min5, Z_genes)
Z_comb_liver_min5, AUTO_comb_liver_min5 = split_dataframe(comb_liver_min5, Z_genes)
Z_comb_spleen_min5, AUTO_comb_spleen_min5 = split_dataframe(comb_spleen_min5, Z_genes)
male_Z_comb_gonad_min5 = Z_comb_gonad_min5.iloc[:,:4]
female_Z_comb_gonad_min5 = Z_comb_gonad_min5.iloc[:,4:8]
male_Z_comb_heart_min5 = Z_comb_heart_min5.iloc[:,:4]
female_Z_comb_heart_min5 = Z_comb_heart_min5.iloc[:,4:8]
#Liver has one female sample less
male_Z_comb_liver_min5 = Z_comb_liver_min5.iloc[:,:4]
female_Z_comb_liver_min5 = Z_comb_liver_min5.iloc[:,4:7]
male_Z_comb_spleen_min5 = Z_comb_spleen_min5.iloc[:,:4]
female_Z_comb_spleen_min5 = Z_comb_spleen_min5.iloc[:,4:8]
male_AUTO_comb_gonad_min5 = AUTO_comb_gonad_min5.iloc[:,:4]
female_AUTO_comb_gonad_min5 = AUTO_comb_gonad_min5.iloc[:,4:8]
male_AUTO_comb_heart_min5 = AUTO_comb_heart_min5.iloc[:,:4]
female_AUTO_comb_heart_min5 = AUTO_comb_heart_min5.iloc[:,4:8]
#Liver has one female sample less
male_AUTO_comb_liver_min5 = AUTO_comb_liver_min5.iloc[:,:4]
female_AUTO_comb_liver_min5 = AUTO_comb_liver_min5.iloc[:,4:7]
male_AUTO_comb_spleen_min5 = AUTO_comb_spleen_min5.iloc[:,:4]
female_AUTO_comb_spleen_min5 = AUTO_comb_spleen_min5.iloc[:,4:8]
male_Z_comb_gonad_min5_ratio = male_Z_comb_gonad_min5.applymap(anti_log2).median() / male_AUTO_comb_gonad_min5.applymap(anti_log2).median()
female_Z_comb_gonad_min5_ratio = female_Z_comb_gonad_min5.applymap(anti_log2).median() / female_AUTO_comb_gonad_min5.applymap(anti_log2).median()
male_Z_comb_spleen_min5_ratio = male_Z_comb_spleen_min5.applymap(anti_log2).median() / male_AUTO_comb_spleen_min5.applymap(anti_log2).median()
female_Z_comb_spleen_min5_ratio = female_Z_comb_spleen_min5.applymap(anti_log2).median() / female_AUTO_comb_spleen_min5.applymap(anti_log2).median()
male_Z_comb_heart_min5_ratio = male_Z_comb_heart_min5.applymap(anti_log2).median() / male_AUTO_comb_heart_min5.applymap(anti_log2).median()
female_Z_comb_heart_min5_ratio = female_Z_comb_heart_min5.applymap(anti_log2).median() / female_AUTO_comb_heart_min5.applymap(anti_log2).median()
male_Z_comb_liver_min5_ratio = male_Z_comb_liver_min5.applymap(anti_log2).median() / male_AUTO_comb_liver_min5.applymap(anti_log2).median()
female_Z_comb_liver_min5_ratio = female_Z_comb_liver_min5.applymap(anti_log2).median() / female_AUTO_comb_liver_min5.applymap(anti_log2).median()
Now we combine all the ratios from unfiltered to min5 in a list with alternating male/female ratios. This is also the order these will be plotted in.
In [343]:
gonad_ratios = [male_Z_comb_gonad_unfiltered_ratio,
female_Z_comb_gonad_unfiltered_ratio,
male_Z_comb_gonad_min1_ratio,
female_Z_comb_gonad_min1_ratio,
male_Z_comb_gonad_min2_ratio,
female_Z_comb_gonad_min2_ratio,
male_Z_comb_gonad_min3_ratio,
female_Z_comb_gonad_min3_ratio,
male_Z_comb_gonad_min4_ratio,
female_Z_comb_gonad_min4_ratio,
male_Z_comb_gonad_min5_ratio,
female_Z_comb_gonad_min5_ratio]
heart_ratios = [male_Z_comb_heart_unfiltered_ratio,
female_Z_comb_heart_unfiltered_ratio,
male_Z_comb_heart_min1_ratio,
female_Z_comb_heart_min1_ratio,
male_Z_comb_heart_min2_ratio,
female_Z_comb_heart_min2_ratio,
male_Z_comb_heart_min3_ratio,
female_Z_comb_heart_min3_ratio,
male_Z_comb_heart_min4_ratio,
female_Z_comb_heart_min4_ratio,
male_Z_comb_heart_min5_ratio,
female_Z_comb_heart_min5_ratio]
liver_ratios = [male_Z_comb_liver_unfiltered_ratio,
female_Z_comb_liver_unfiltered_ratio,
male_Z_comb_liver_min1_ratio,
female_Z_comb_liver_min1_ratio,
male_Z_comb_liver_min2_ratio,
female_Z_comb_liver_min2_ratio,
male_Z_comb_liver_min3_ratio,
female_Z_comb_liver_min3_ratio,
male_Z_comb_liver_min4_ratio,
female_Z_comb_liver_min4_ratio,
male_Z_comb_liver_min5_ratio,
female_Z_comb_liver_min5_ratio]
spleen_ratios = [male_Z_comb_spleen_unfiltered_ratio,
female_Z_comb_spleen_unfiltered_ratio,
male_Z_comb_spleen_min1_ratio,
female_Z_comb_spleen_min1_ratio,
male_Z_comb_spleen_min2_ratio,
female_Z_comb_spleen_min2_ratio,
male_Z_comb_spleen_min3_ratio,
female_Z_comb_spleen_min3_ratio,
male_Z_comb_spleen_min4_ratio,
female_Z_comb_spleen_min4_ratio,
male_Z_comb_spleen_min5_ratio,
female_Z_comb_spleen_min5_ratio]
In [344]:
def color_boxes(bp):
red = False
for box in bp['boxes']:
# change outline color
box.set( color='black', linewidth=1)
if red == True:
box.set( facecolor = '#e74c3c' )
red = False
else:
box.set( facecolor = '#3498DB' )
red = True
for median in bp['medians']:
median.set(color='black', linewidth=1)
## change the style of fliers and their fill
for flier in bp['fliers']:
flier.set(marker='o', color='black', alpha=0.5, markersize=4)
## change color and linewidth of the whiskers
for whisker in bp['whiskers']:
whisker.set(color='black', linewidth=1)
## change color and linewidth of the caps
for cap in bp['caps']:
cap.set(color='black', linewidth=1)
# Create a figure instance
fig = plt.figure(1, figsize=(6, 6))
plt.rc("font", size=12)
ax1 = fig.add_subplot(221)
bp = ax1.boxplot(spleen_ratios,
notch=False, # box instead of notch shape
sym='ro', # red squares for outliers
positions = [1, 2, 4, 5, 7, 8, 10, 11, 13,14, 16,17],
widths = 0.9,
patch_artist=True)
plt.axhline(y=1, color='black', ls='--')
color_boxes(bp)
plt.ylim(0.4,1.1)
plt.title('(a) Spleen')
plt.ylabel('Z:A expression ratio')
plt.xlabel('Expression levels (CPM)')
ax2 = fig.add_subplot(222, sharex=ax1, sharey=ax1)
bp = ax2.boxplot(heart_ratios,
notch=False, # box instead of notch shape
sym='ro', # red squares for outliers
positions = [1, 2, 4, 5, 7, 8, 10, 11, 13,14, 16,17],
widths = 0.9,
patch_artist=True)
# Remove the ticks on the right and top side
ax2.yaxis.set_ticks_position('left')
ax2.xaxis.set_ticks_position('bottom')
plt.axhline(y=1, color='black', ls='--')
color_boxes(bp)
plt.ylim(0.4,1.1)
plt.title('(b) Heart')
plt.ylabel('Z:A expression ratio')
plt.xlabel('Expression levels (CPM)')
ax3 = fig.add_subplot(223, sharex=ax1)
bp = ax3.boxplot(liver_ratios,
notch=False, # box instead of notch shape
sym='ro', # red squares for outliers
positions = [1, 2, 4, 5, 7, 8, 10, 11, 13,14, 16,17],
widths = 0.9,
patch_artist=True)
# Remove the ticks on the right and top side
ax3.yaxis.set_ticks_position('left')
ax3.xaxis.set_ticks_position('bottom')
plt.axhline(y=1, color='black', ls='--')
color_boxes(bp)
plt.ylim(0.4,1.1)
plt.title('(c) Liver')
plt.ylabel('Z:A expression ratio')
plt.xlabel('Expression levels (CPM)')
ax4 = fig.add_subplot(224, sharex=ax1)
bp = ax4.boxplot(gonad_ratios,
notch=False, # box instead of notch shape
sym='ro', # red squares for outliers
positions = [1, 2, 4, 5, 7, 8, 10, 11, 13,14, 16,17],
widths = 0.9,
patch_artist=True)
# Remove the ticks on the right and top side
ax4.yaxis.set_ticks_position('left')
ax4.xaxis.set_ticks_position('bottom')
plt.axhline(y=1, color='black', ls='--')
color_boxes(bp)
plt.ylim(0.4,1.1)
ax1.set_xticklabels(['>0', '>1', '>2', '>3', '>4', '>5'])
ax1.set_xticks([1.5, 4.5, 7.5, 10.5, 13.5, 16.5])
plt.title('(d) Gonad')
plt.ylabel('Z:A expression ratio')
plt.xlabel('Expression levels (CPM)')
fig.tight_layout(pad=2)
plt.savefig("2015-10-27-ZtoA-ratio.png", dpi=300)
plt.savefig("2015-10-27-ZtoA-ratio.pdf", dpi=300)
In [345]:
def get_chromosomal_expr(df, gid_to_chr):
'''Make a dictionary of chromosomes containing the average expression for genes across samples'''
chromosomes = defaultdict(list)
for row_index, row in df.iterrows():
chromosomes[gid_to_chr[row_index]].append(log2(row.map(anti_log2).mean()))
return chromosomes
# These are already filtered down to only contain valid chromosomes (autosomes + Z)
comb_gonad_min2 = filter_cpm(comb_gonad, 1)
comb_heart_min2 = filter_cpm(comb_heart, 1)
comb_liver_min2 = filter_cpm(comb_liver, 1)
comb_spleen_min2 = filter_cpm(comb_spleen, 1)
male_comb_gonad_min2 = comb_gonad_min2.iloc[:,:4]
female_comb_gonad_min2= comb_gonad_min2.iloc[:,4:8]
male_comb_heart_min2 = comb_heart_min2.iloc[:,:4]
female_comb_heart_min2= comb_heart_min2.iloc[:,4:8]
#Liver has one female sample less
male_comb_liver_min2 = comb_liver_min2.iloc[:,:4]
female_comb_liver_min2 = comb_liver_min2.iloc[:,4:7]
male_comb_spleen_min2 = comb_spleen_min2.iloc[:,:4]
female_comb_spleen_min2 = comb_spleen_min2.iloc[:,4:8]
M_GONAD_MIN2_CHR = get_chromosomal_expr(male_comb_gonad_min2, gid_to_chr)
F_GONAD_MIN2_CHR = get_chromosomal_expr(female_comb_gonad_min2, gid_to_chr)
M_HEART_MIN2_CHR = get_chromosomal_expr(male_comb_heart_min2, gid_to_chr)
F_HEART_MIN2_CHR = get_chromosomal_expr(female_comb_heart_min2, gid_to_chr)
M_LIVER_MIN2_CHR = get_chromosomal_expr(male_comb_liver_min2, gid_to_chr)
F_LIVER_MIN2_CHR = get_chromosomal_expr(female_comb_liver_min2, gid_to_chr)
M_SPLEEN_MIN2_CHR = get_chromosomal_expr(male_comb_spleen_min2, gid_to_chr)
F_SPLEEN_MIN2_CHR = get_chromosomal_expr(female_comb_spleen_min2, gid_to_chr)
In [346]:
print chicken_chromosomes
In [347]:
from itertools import chain
# Create a figure instance
fig = plt.figure(1, figsize=(6, 10))
plt.rc("font", size=12)
ax1 = fig.add_subplot(411)
def color_boxes(bp):
red = False
for box in bp['boxes']:
# change outline color
box.set( color='black', linewidth=1)
if red == True:
box.set( facecolor = '#e74c3c' )
red = False
else:
box.set( facecolor = '#3498DB' )
red = True
for median in bp['medians']:
median.set(color='black', linewidth=1)
## change the style of fliers and their fill
for flier in bp['fliers']:
flier.set(marker='o', color='black', alpha=0.5, markersize=4)
## change color and linewidth of the whiskers
for whisker in bp['whiskers']:
whisker.set(color='black', linewidth=1)
## change color and linewidth of the caps
for cap in bp['caps']:
cap.set(color='black', linewidth=1)
M_SPLEEN_EXPR = [M_SPLEEN_MIN2_CHR[c] for c in chicken_chromosomes]
F_SPLEEN_EXPR = [F_SPLEEN_MIN2_CHR[c] for c in chicken_chromosomes]
# From http://stackoverflow.com/questions/3471999/how-do-i-merge-two-lists-into-a-single-list
# Takes a list a = [a,b,c] and b = [a,b,c] and combines it into [a,a,b,b,c,c]
COMB_SPLEEN_EXPR = list(chain(*zip(M_SPLEEN_EXPR,F_SPLEEN_EXPR)))
bp = ax1.boxplot(COMB_SPLEEN_EXPR,
notch=False, # box instead of notch shape
sym='', # red squares for outliers
widths = 1,
patch_artist=True)
color_boxes(bp)
# ax1.set_xticklabels(chicken_chromosomes)
# # ax1.axhline(y=comb_spleen_min2_median,c="black",linewidth=2, linestyle="--")
# ax1.set_xticks(list(np.arange(1.5, len(chicken_chromosomes)*2, 2)))
ax1.set_xticklabels(["1", "5", "10", "15", "20", "25", "Z"])
# 57.5 is the position of the Z chromosome
ax1.set_xticks([1.5, 9.5, 19.5, 29.5, 39.5, 49.5, 57.5])
# Only show ticks on the left and bottom spines
ax1.yaxis.set_ticks_position('left')
ax1.xaxis.set_ticks_position('bottom')
ax1.set_yticks(range(-2, 14, 2))
ax1.set_ylim(bottom=-2)
plt.title('(a) Spleen')
plt.ylabel(r'$log_2$'+"(RPKM)")
ax2 = fig.add_subplot(412)
M_HEART_EXPR = [M_HEART_MIN2_CHR[c] for c in chicken_chromosomes]
F_HEART_EXPR = [F_HEART_MIN2_CHR[c] for c in chicken_chromosomes]
#From http://stackoverflow.com/questions/3471999/how-do-i-merge-two-lists-into-a-single-list
COMB_HEART_EXPR = list(chain(*zip(M_HEART_EXPR,F_HEART_EXPR)))
bp = ax2.boxplot(COMB_HEART_EXPR,
notch=False, # box instead of notch shape
sym='', # red squares for outliers
widths = 1,
patch_artist=True)
color_boxes(bp)
# ax2.set_xticklabels(chicken_chromosomes)
# ax2.axhline(y=comb_heart_min2_median,c="black",linewidth=2, linestyle="--")
# ax2.set_xticks(list(np.arange(1.5, len(chicken_chromosomes)*2, 2)))
ax2.set_xticklabels(["1", "5", "10", "15", "20", "25", "Z"])
# ax4.axhline(y=comb_gonad_min2_median,c="white",linewidth=1, linestyle="-")
# 57.5 is the position of the Z chromosome
ax2.set_xticks([1.5, 9.5, 19.5, 29.5, 39.5, 49.5, 57.5])
ax2.yaxis.set_ticks_position('left')
ax2.xaxis.set_ticks_position('bottom')
ax2.set_yticks(range(-2, 14, 2))
ax2.set_ylim(bottom=-2)
plt.title('(b) Heart')
plt.ylabel(r'$log_2$'+"(RPKM)")
ax3 = fig.add_subplot(413)
M_LIVER_EXPR = [M_LIVER_MIN2_CHR[c] for c in chicken_chromosomes]
F_LIVER_EXPR = [F_LIVER_MIN2_CHR[c] for c in chicken_chromosomes]
#From http://stackoverflow.com/questions/3471999/how-do-i-merge-two-lists-into-a-single-list
COMB_LIVER_EXPR = list(chain(*zip(M_LIVER_EXPR,F_LIVER_EXPR)))
bp = ax3.boxplot(COMB_LIVER_EXPR,
notch=False, # box instead of notch shape
sym='', # red squares for outliers
widths = 1,
patch_artist=True)
color_boxes(bp)
# ax3.set_xticklabels(chicken_chromosomes)
# ax3.axhline(y=comb_liver_min2_median,c="black",linewidth=2, linestyle="--")
# ax3.set_xticks(list(np.arange(1.5, len(chicken_chromosomes)*2, 2)))
ax3.set_xticklabels(["1", "5", "10", "15", "20", "25", "Z"])
# 57.5 is the position of the Z chromosome
ax3.set_xticks([1.5, 9.5, 19.5, 29.5, 39.5, 49.5, 57.5])
ax3.yaxis.set_ticks_position('left')
ax3.xaxis.set_ticks_position('bottom')
ax3.set_yticks(range(-2, 14, 2))
ax3.set_ylim(bottom=-2)
plt.title('(c) Liver')
plt.ylabel(r'$log_2$'+"(RPKM)")
# plt.xlabel('Chromosomes')
ax4 = fig.add_subplot(414)
M_GONAD_EXPR = [M_GONAD_MIN2_CHR[c] for c in chicken_chromosomes]
F_GONAD_EXPR = [F_GONAD_MIN2_CHR[c] for c in chicken_chromosomes]
#From http://stackoverflow.com/questions/3471999/how-do-i-merge-two-lists-into-a-single-list
COMB_GONAD_EXPR = list(chain(*zip(M_GONAD_EXPR,F_GONAD_EXPR)))
bp = ax4.boxplot(COMB_GONAD_EXPR,
notch=False, # box instead of notch shape
sym='', # red squares for outliers
widths = 1,
patch_artist=True)
color_boxes(bp)
ax4.set_xticklabels(["1", "5", "10", "15", "20", "25", "Z"])
# ax4.axhline(y=comb_gonad_min2_median,c="white",linewidth=1, linestyle="-")
# 57.5 is the position of the Z chromosome
ax4.set_xticks([1.5, 9.5, 19.5, 29.5, 39.5, 49.5, 57.5])
ax4.yaxis.set_ticks_position('left')
ax4.xaxis.set_ticks_position('bottom')
ax4.set_yticks(range(-2, 14, 2))
ax4.set_ylim(bottom=-2)
plt.title('(d) Gonad')
plt.ylabel(r'$log_2$'+"(RPKM)")
plt.xlabel('Chromosomes')
# fig.savefig('2015-06-04-RPKM2-chromosome-expr-all-tissues.png', bbox_inches='tight')
fig.tight_layout(pad=2)
plt.savefig("2015-10-27-chromosomal-expression.png", dpi=300)
plt.savefig("2015-10-27-chromosomal-expression.pdf", dpi=300)
All the medians look correct High overlap in expression, some genes not in other tissues
In [348]:
male_Z_comb_gonad_min2 = Z_comb_gonad_min2.iloc[:,:4]
female_Z_comb_gonad_min2 = Z_comb_gonad_min2.iloc[:,4:8]
male_Z_comb_heart_min2 = Z_comb_heart_min2.iloc[:,:4]
female_Z_comb_heart_min2 = Z_comb_heart_min2.iloc[:,4:8]
#Liver has one female sample less
male_Z_comb_liver_min2 = Z_comb_liver_min2.iloc[:,:4]
female_Z_comb_liver_min2 = Z_comb_liver_min2.iloc[:,4:7]
male_Z_comb_spleen_min2 = Z_comb_spleen_min2.iloc[:,:4]
female_Z_comb_spleen_min2 = Z_comb_spleen_min2.iloc[:,4:8]
In [349]:
def logged_data_mean(df):
logCPMmean = pd.Series(df.applymap(anti_log2).mean(axis=1).map(log2), name="logRPKMmean")
return logCPMmean
male_Z_comb_gonad_min2 = pd.concat([male_Z_comb_gonad_min2, logged_data_mean(male_Z_comb_gonad_min2)], axis=1)
female_Z_comb_gonad_min2 = pd.concat([female_Z_comb_gonad_min2, logged_data_mean(female_Z_comb_gonad_min2)], axis=1)
male_Z_comb_heart_min2 = pd.concat([male_Z_comb_heart_min2, logged_data_mean(male_Z_comb_heart_min2)], axis=1)
female_Z_comb_heart_min2 = pd.concat([female_Z_comb_heart_min2, logged_data_mean(female_Z_comb_heart_min2)], axis=1)
male_Z_comb_liver_min2 = pd.concat([male_Z_comb_liver_min2, logged_data_mean(male_Z_comb_liver_min2)], axis=1)
female_Z_comb_liver_min2 = pd.concat([female_Z_comb_liver_min2, logged_data_mean(female_Z_comb_liver_min2)], axis=1)
male_Z_comb_spleen_min2 = pd.concat([male_Z_comb_spleen_min2, logged_data_mean(male_Z_comb_spleen_min2)], axis=1)
female_Z_comb_spleen_min2 = pd.concat([female_Z_comb_spleen_min2, logged_data_mean(female_Z_comb_spleen_min2)], axis=1)
In [350]:
quartiles_male_Z_comb_gonad_min2 = pd.qcut(male_Z_comb_gonad_min2["logRPKMmean"], [0, .25, .5, .75, 1.], labels=["1", "2", "3", "4"])
quartiles_male_Z_comb_gonad_min2.name = "quartiles"
male_Z_comb_gonad_min2 = pd.concat([male_Z_comb_gonad_min2, quartiles_male_Z_comb_gonad_min2], axis=1)
female_Z_comb_gonad_min2 = pd.concat([female_Z_comb_gonad_min2, quartiles_male_Z_comb_gonad_min2], axis=1)
quartiles_male_Z_comb_heart_min2 = pd.qcut(male_Z_comb_heart_min2["logRPKMmean"], [0, .25, .5, .75, 1.], labels=["1", "2", "3", "4"])
quartiles_male_Z_comb_heart_min2.name = "quartiles"
male_Z_comb_heart_min2 = pd.concat([male_Z_comb_heart_min2, quartiles_male_Z_comb_heart_min2], axis=1)
female_Z_comb_heart_min2 = pd.concat([female_Z_comb_heart_min2, quartiles_male_Z_comb_heart_min2], axis=1)
quartiles_male_Z_comb_liver_min2 = pd.qcut(male_Z_comb_liver_min2["logRPKMmean"], [0, .25, .5, .75, 1.], labels=["1", "2", "3", "4"])
quartiles_male_Z_comb_liver_min2.name = "quartiles"
male_Z_comb_liver_min2 = pd.concat([male_Z_comb_liver_min2, quartiles_male_Z_comb_liver_min2], axis=1)
female_Z_comb_liver_min2 = pd.concat([female_Z_comb_liver_min2, quartiles_male_Z_comb_liver_min2], axis=1)
quartiles_male_Z_comb_spleen_min2 = pd.qcut(male_Z_comb_spleen_min2["logRPKMmean"], [0, .25, .5, .75, 1.], labels=["1", "2", "3", "4"])
quartiles_male_Z_comb_spleen_min2.name = "quartiles"
male_Z_comb_spleen_min2 = pd.concat([male_Z_comb_spleen_min2, quartiles_male_Z_comb_spleen_min2], axis=1)
female_Z_comb_spleen_min2 = pd.concat([female_Z_comb_spleen_min2, quartiles_male_Z_comb_spleen_min2], axis=1)
In [351]:
male_gonad_grp = male_Z_comb_gonad_min2.groupby('quartiles')
female_gonad_grp = female_Z_comb_gonad_min2.groupby('quartiles')
male_heart_grp = male_Z_comb_heart_min2.groupby('quartiles')
female_heart_grp = female_Z_comb_heart_min2.groupby('quartiles')
male_liver_grp = male_Z_comb_liver_min2.groupby('quartiles')
female_liver_grp = female_Z_comb_liver_min2.groupby('quartiles')
male_spleen_grp = male_Z_comb_spleen_min2.groupby('quartiles')
female_spleen_grp = female_Z_comb_spleen_min2.groupby('quartiles')
In [352]:
gonad_quartiles = [male_gonad_grp.get_group("1")["logRPKMmean"],
female_gonad_grp.get_group("1")["logRPKMmean"],
male_gonad_grp.get_group("2")["logRPKMmean"],
female_gonad_grp.get_group("2")["logRPKMmean"],
male_gonad_grp.get_group("3")["logRPKMmean"],
female_gonad_grp.get_group("3")["logRPKMmean"],
male_gonad_grp.get_group("4")["logRPKMmean"],
female_gonad_grp.get_group("4")["logRPKMmean"]]
heart_quartiles = [male_heart_grp.get_group("1")["logRPKMmean"],
female_heart_grp.get_group("1")["logRPKMmean"],
male_heart_grp.get_group("2")["logRPKMmean"],
female_heart_grp.get_group("2")["logRPKMmean"],
male_heart_grp.get_group("3")["logRPKMmean"],
female_heart_grp.get_group("3")["logRPKMmean"],
male_heart_grp.get_group("4")["logRPKMmean"],
female_heart_grp.get_group("4")["logRPKMmean"]]
liver_quartiles = [male_liver_grp.get_group("1")["logRPKMmean"],
female_liver_grp.get_group("1")["logRPKMmean"],
male_liver_grp.get_group("2")["logRPKMmean"],
female_liver_grp.get_group("2")["logRPKMmean"],
male_liver_grp.get_group("3")["logRPKMmean"],
female_liver_grp.get_group("3")["logRPKMmean"],
male_liver_grp.get_group("4")["logRPKMmean"],
female_liver_grp.get_group("4")["logRPKMmean"]]
spleen_quartiles = [male_spleen_grp.get_group("1")["logRPKMmean"],
female_spleen_grp.get_group("1")["logRPKMmean"],
male_spleen_grp.get_group("2")["logRPKMmean"],
female_spleen_grp.get_group("2")["logRPKMmean"],
male_spleen_grp.get_group("3")["logRPKMmean"],
female_spleen_grp.get_group("3")["logRPKMmean"],
male_spleen_grp.get_group("4")["logRPKMmean"],
female_spleen_grp.get_group("4")["logRPKMmean"]]
print [len(i) for i in spleen_quartiles]
print [len(i) for i in heart_quartiles]
print [len(i) for i in liver_quartiles]
print [len(i) for i in gonad_quartiles]
In [353]:
print "Gonad quartiles"
print ranksums(male_gonad_grp.get_group("1")["logRPKMmean"], female_gonad_grp.get_group("1")["logRPKMmean"])
print ranksums(male_gonad_grp.get_group("2")["logRPKMmean"], female_gonad_grp.get_group("2")["logRPKMmean"])
print ranksums(male_gonad_grp.get_group("3")["logRPKMmean"], female_gonad_grp.get_group("3")["logRPKMmean"])
print ranksums(male_gonad_grp.get_group("4")["logRPKMmean"], female_gonad_grp.get_group("4")["logRPKMmean"])
print "-----"
print mannwhitneyu(male_gonad_grp.get_group("1")["logRPKMmean"], female_gonad_grp.get_group("1")["logRPKMmean"])[1]*2
print mannwhitneyu(male_gonad_grp.get_group("2")["logRPKMmean"], female_gonad_grp.get_group("2")["logRPKMmean"])[1]*2
print mannwhitneyu(male_gonad_grp.get_group("3")["logRPKMmean"], female_gonad_grp.get_group("3")["logRPKMmean"])[1]*2
print mannwhitneyu(male_gonad_grp.get_group("4")["logRPKMmean"], female_gonad_grp.get_group("4")["logRPKMmean"])[1]*2
print "Heart quartiles"
print ranksums(male_heart_grp.get_group("1")["logRPKMmean"], female_heart_grp.get_group("1")["logRPKMmean"])
print ranksums(male_heart_grp.get_group("2")["logRPKMmean"], female_heart_grp.get_group("2")["logRPKMmean"])
print ranksums(male_heart_grp.get_group("3")["logRPKMmean"], female_heart_grp.get_group("3")["logRPKMmean"])
print ranksums(male_heart_grp.get_group("4")["logRPKMmean"], female_heart_grp.get_group("4")["logRPKMmean"])
print "-----"
print mannwhitneyu(male_heart_grp.get_group("1")["logRPKMmean"], female_heart_grp.get_group("1")["logRPKMmean"])[1]*2
print mannwhitneyu(male_heart_grp.get_group("2")["logRPKMmean"], female_heart_grp.get_group("2")["logRPKMmean"])[1]*2
print mannwhitneyu(male_heart_grp.get_group("3")["logRPKMmean"], female_heart_grp.get_group("3")["logRPKMmean"])[1]*2
print mannwhitneyu(male_heart_grp.get_group("4")["logRPKMmean"], female_heart_grp.get_group("4")["logRPKMmean"])[1]*2
print "Liver quartiles"
print ranksums(male_liver_grp.get_group("1")["logRPKMmean"], female_liver_grp.get_group("1")["logRPKMmean"])
print ranksums(male_liver_grp.get_group("2")["logRPKMmean"], female_liver_grp.get_group("2")["logRPKMmean"])
print ranksums(male_liver_grp.get_group("3")["logRPKMmean"], female_liver_grp.get_group("3")["logRPKMmean"])
print ranksums(male_liver_grp.get_group("4")["logRPKMmean"], female_liver_grp.get_group("4")["logRPKMmean"])
print "-----"
print mannwhitneyu(male_liver_grp.get_group("1")["logRPKMmean"], female_liver_grp.get_group("1")["logRPKMmean"])[1]*2
print mannwhitneyu(male_liver_grp.get_group("2")["logRPKMmean"], female_liver_grp.get_group("2")["logRPKMmean"])[1]*2
print mannwhitneyu(male_liver_grp.get_group("3")["logRPKMmean"], female_liver_grp.get_group("3")["logRPKMmean"])[1]*2
print mannwhitneyu(male_liver_grp.get_group("4")["logRPKMmean"], female_liver_grp.get_group("4")["logRPKMmean"])[1]*2
print "Spleen quartiles"
print ranksums(male_spleen_grp.get_group("1")["logRPKMmean"], female_spleen_grp.get_group("1")["logRPKMmean"])
print ranksums(male_spleen_grp.get_group("2")["logRPKMmean"], female_spleen_grp.get_group("2")["logRPKMmean"])
print ranksums(male_spleen_grp.get_group("3")["logRPKMmean"], female_spleen_grp.get_group("3")["logRPKMmean"])
print ranksums(male_spleen_grp.get_group("4")["logRPKMmean"], female_spleen_grp.get_group("4")["logRPKMmean"])
print "-----"
print mannwhitneyu(male_spleen_grp.get_group("1")["logRPKMmean"], female_spleen_grp.get_group("1")["logRPKMmean"])[1]*2
print mannwhitneyu(male_spleen_grp.get_group("2")["logRPKMmean"], female_spleen_grp.get_group("2")["logRPKMmean"])[1]*2
print mannwhitneyu(male_spleen_grp.get_group("3")["logRPKMmean"], female_spleen_grp.get_group("3")["logRPKMmean"])[1]*2
print mannwhitneyu(male_spleen_grp.get_group("4")["logRPKMmean"], female_spleen_grp.get_group("4")["logRPKMmean"])[1]*2
In [354]:
def color_boxes(bp):
red = False
for box in bp['boxes']:
# change outline color
box.set( color='black', linewidth=1)
# box.set( color='black', linewidth=1)
if red == True:
box.set( facecolor = '#e74c3c' )
red = False
else:
box.set( facecolor = '#3498DB' )
red = True
for median in bp['medians']:
median.set(color='black', linewidth=1)
## change the style of fliers and their fill
for flier in bp['fliers']:
flier.set(marker='o', color='black', alpha=0.5, markersize=4)
## change color and linewidth of the whiskers
for whisker in bp['whiskers']:
whisker.set(color='black', linewidth=1, linestyle="-")
## change color and linewidth of the caps
for cap in bp['caps']:
cap.set(color='black', linewidth=1)
# Create a figure instance
fig = plt.figure(1, figsize=(6, 6))
plt.rc("font", size=12)
ax1 = fig.add_subplot(221)
bp = ax1.boxplot(spleen_quartiles,
notch=True,
sym='', # no outliers
positions = [1, 2, 4, 5, 7, 8, 10, 11],
widths = 0.9,
patch_artist=True)
ax1.set_xticklabels(['1\n(133)', '2\n(132)', '3\n(132)', '4\n(132)'])
ax1.set_xticks([1.5, 4.5, 7.5, 10.5])
ax1.text(1.35, 4, "*", fontsize=11)
ax1.text(4.35, 5, "***", fontsize=11)
ax1.text(7.35, 6, "***", fontsize=11)
ax1.text(10.35, 9, "***", fontsize=11)
# Remove the ticks on the right and top side
ax1.yaxis.set_ticks_position('left')
ax1.xaxis.set_ticks_position('bottom')
color_boxes(bp)
plt.ylim(-1,11)
plt.title('(a) Spleen')
plt.ylabel(r'$log_2$'+"(RPKM)")
ax2 = fig.add_subplot(222)
bp = ax2.boxplot(heart_quartiles,
notch=True, #
sym='', # no outliers
positions = [1, 2, 4, 5, 7, 8, 10, 11],
widths = 0.9,
patch_artist=True)
ax2.set_xticklabels(['1\n(124)', '2\n(123)', '3\n(123)', '4\n(123)'])
ax2.set_xticks([1.5, 4.5, 7.5, 10.5])
ax2.text(1.35, 4, "*", fontsize=11)
ax2.text(4.35, 5, "***", fontsize=11)
ax2.text(7.35, 6, "***", fontsize=11)
ax2.text(10.35, 9, "*", fontsize=11)
# Remove the ticks on the right and top side
ax2.yaxis.set_ticks_position('left')
ax2.xaxis.set_ticks_position('bottom')
color_boxes(bp)
plt.ylim(-1,11)
plt.title('(b) Heart')
ax3 = fig.add_subplot(223)
bp = ax3.boxplot(liver_quartiles,
notch=True,
sym='', # no outliers
positions = [1, 2, 4, 5, 7, 8, 10, 11],
widths = 0.9,
patch_artist=True)
ax3.set_xticklabels(['1\n(112)', '2\n(111)', '3\n(111)', '4\n(111)'])
ax3.set_xticks([1.5, 4.5, 7.5, 10.5])
ax3.text(1.35, 3, "*", fontsize=11)
ax3.text(4.35, 4, "***", fontsize=11)
ax3.text(7.35, 5, "***", fontsize=11)
ax3.text(10.35, 10, "*", fontsize=11)
# Remove the ticks on the right and top side
ax3.yaxis.set_ticks_position('left')
ax3.xaxis.set_ticks_position('bottom')
color_boxes(bp)
plt.ylim(-1,11)
plt.title('(c) Liver')
plt.ylabel(r'$log_2$'+"(RPKM)")
plt.xlabel('Quartiles')
ax4 = fig.add_subplot(224)
bp = ax4.boxplot(gonad_quartiles,
notch=True, # box instead of notch shape
sym='', # red squares for outliers
positions = [1, 2, 4, 5, 7, 8, 10, 11],
widths = 0.9,
patch_artist=True)
ax4.set_xticklabels(['1\n(151)', '2\n(151)', '3\n(151)', '4\n(151)'])
ax4.set_xticks([1.5, 4.5, 7.5, 10.5])
ax4.text(1.35, 6, "ns", fontsize=11)
ax4.text(4.35, 6, "***", fontsize=11)
ax4.text(7.35, 7.5, "***", fontsize=11)
ax4.text(10.35, 9.5, "***", fontsize=11)
# Remove the ticks on the right and top side
ax4.yaxis.set_ticks_position('left')
ax4.xaxis.set_ticks_position('bottom')
color_boxes(bp)
plt.ylim(-1,11)
plt.title('(d) Gonad')
plt.xlabel('Quartiles')
fig.tight_layout(pad=2)
plt.savefig("2015-10-27-Z-quartiles.png", dpi=300)
plt.savefig("2015-10-27-Z-quartiles.pdf", dpi=300)