General analyses and plots

  • Includes comparisons of Z to A expression
  • Comparison of DC expression in males and females

In [314]:
import numpy as np
import pandas as pd
from tabulate import tabulate
from collections import defaultdict

from scipy.stats import fisher_exact, mannwhitneyu, wilcoxon, ranksums, chi2_contingency, chisquare
# import matplotlibtail 
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D 
%matplotlib inline

All of the CPM values are log2 transformed. Be careful when doing calculations (logs are different). Usually the best thing is to unlog before calculation means etc.

These tables are completely unfilterted and contain also unexpressed genes. They need to be filtered


In [315]:
# Read in the RPKM tables
rpkm_gonad = pd.read_csv("/data/ucbtzim/ohnolog-dc/differential-expression/2015-10-27-MF-GONAD-RPKM-NORM-log2-edgeR.csv")
rpkm_spleen = pd.read_csv("/data/ucbtzim/ohnolog-dc/differential-expression/2015-10-27-MF-SPLEEN-RPKM-NORM-log2-edgeR.csv")
rpkm_heart = pd.read_csv("/data/ucbtzim/ohnolog-dc/differential-expression/2015-10-27-MF-HEART-RPKM-NORM-log2-edgeR.csv")
rpkm_liver = pd.read_csv("/data/ucbtzim/ohnolog-dc/differential-expression/2015-10-27-MF-LIVER-RPKM-NORM-log2-edgeR.csv")

# Read in the detag EdegR results
de_gonad = pd.read_csv("/data/ucbtzim/ohnolog-dc/differential-expression/2015-10-27-MF-GONAD-detags-edgeR.csv")
de_spleen = pd.read_csv("/data/ucbtzim/ohnolog-dc/differential-expression/2015-10-27-MF-SPLEEN-detags-edgeR.csv")
de_heart = pd.read_csv("/data/ucbtzim/ohnolog-dc/differential-expression/2015-10-27-MF-HEART-detags-edgeR.csv")
de_liver = pd.read_csv("/data/ucbtzim/ohnolog-dc/differential-expression/2015-10-27-MF-LIVER-detags-edgeR.csv")

A note on the logFC

The edegR logFC value is calculated internally using some additional processing.(see below)

Also "note that the first group listed in the pair is the baseline for the comparison—so if exactTest the pair is c("A","B") then the comparison is B - A, so genes with positive log-fold change are up-regulated in group B compared with group A (and vice versa for genes with negative log-fold change)." Source EdgeR: Manual

In our case this means that the Male expression is substracted from the Female expression (F-M): Positiv logFC is upregulation in females, negate logFC is upregulation in Males.

On the Z that means the close we get to 0.0 logFC the more DC.

Add some positional information. We need to know where genes are located


In [396]:
def read_gid_to_chr(infile):
    gid_to_chr = {}
    with open(infile, "rb") as handle:
        for line in handle:
            if line.startswith("Ensembl"):
                continue
            else:
                line = line.rstrip().split()
                gid_to_chr[line[0]] = line[1]
    return gid_to_chr

gid_to_chr = read_gid_to_chr("/data/ucbtzim/ohnolog-dc/dc-comparison/Gallus_gallus.75.gid.chr.proteincoding.tsv")

Two functions to work with log2 expression


In [317]:
def anti_log2(i):
    return np.power(2, i)

def log2(i):
    return np.log2(i)

print type(np.power)
print type(np.log2)


<type 'numpy.ufunc'>
<type 'numpy.ufunc'>

In [318]:
# Print the first 5 lines of the dataframe
rpkm_gonad.head()


Out[318]:
WTCHG_23425_05_GONAD_M_1_sequence WTCHG_23425_04_GONAD_M_1_sequence WTCHG_23425_12_GONAD_M_1_sequence WTCHG_23425_06_GONAD_M_1_sequence WTCHG_23702_04_GONAD_F_1_sequence WTCHG_23702_05_GONAD_F_1_sequence WTCHG_23702_12_GONAD_F_1_sequence WTCHG_23702_06_GONAD_F_1_sequence
ENSGALG00000000003 2.220145 2.480927 2.182589 2.065128 1.511206 1.005258 1.209041 1.443923
ENSGALG00000000004 3.032695 3.244164 2.891725 2.751503 2.446796 2.558455 2.407342 2.614145
ENSGALG00000000011 3.493343 3.531948 3.828556 3.722546 3.984308 3.480030 3.767212 3.670157
ENSGALG00000000013 3.407879 3.364665 3.446097 3.305962 3.119093 3.022892 3.193507 3.207058
ENSGALG00000000014 -0.004977 0.173711 -0.512172 -0.301640 0.007394 0.174725 0.358401 0.407864

In [319]:
# The differential expression results from edgeR
de_gonad.head()


Out[319]:
logFC logCPM PValue FDR
ENSGALG00000000386 8.108556 6.594766 0.000000e+00 0.000000e+00
ENSGALG00000001756 8.684406 7.036651 9.761462e-316 7.589536e-312
ENSGALG00000013312 9.834955 5.384945 9.475218e-253 4.911321e-249
ENSGALG00000022674 12.677968 5.112837 3.101470e-219 1.205696e-215
ENSGALG00000022679 12.374174 4.810069 2.111286e-215 6.566100e-212

First we combine the RPKM (!) and differential dataframes to hold all the data in one place.


In [320]:
comb_gonad = pd.concat([rpkm_gonad, de_gonad], axis=1)
comb_heart = pd.concat([rpkm_heart, de_heart], axis=1)
comb_liver = pd.concat([rpkm_liver, de_liver], axis=1)
comb_spleen = pd.concat([rpkm_spleen, de_spleen], axis=1)

print comb_gonad.shape
print comb_heart.shape
# Liver has one female sample less
print comb_liver.shape
print comb_spleen.shape

print comb_gonad.dtypes
print comb_heart.dtypes
# Liver has one female sample less
print comb_liver.dtypes
print comb_spleen.dtypes


(15550, 12)
(15550, 12)
(15550, 11)
(15550, 12)
WTCHG_23425_05_GONAD_M_1_sequence    float64
WTCHG_23425_04_GONAD_M_1_sequence    float64
WTCHG_23425_12_GONAD_M_1_sequence    float64
WTCHG_23425_06_GONAD_M_1_sequence    float64
WTCHG_23702_04_GONAD_F_1_sequence    float64
WTCHG_23702_05_GONAD_F_1_sequence    float64
WTCHG_23702_12_GONAD_F_1_sequence    float64
WTCHG_23702_06_GONAD_F_1_sequence    float64
logFC                                float64
logCPM                               float64
PValue                               float64
FDR                                  float64
dtype: object
WTCHG_35309_155_HEART_M_1_sequence    float64
WTCHG_35309_191_HEART_M_1_sequence    float64
WTCHG_35309_132_HEART_M_1_sequence    float64
WTCHG_35309_168_HEART_M_1_sequence    float64
WTCHG_35308_142_HEART_F_1_sequence    float64
WTCHG_35308_119_HEART_F_1_sequence    float64
WTCHG_35308_178_HEART_F_1_sequence    float64
WTCHG_35308_106_HEART_F_1_sequence    float64
logFC                                 float64
logCPM                                float64
PValue                                float64
FDR                                   float64
dtype: object
WTCHG_35309_180_LIVER_M_1_sequence    float64
WTCHG_35309_167_LIVER_M_1_sequence    float64
WTCHG_35309_108_LIVER_M_1_sequence    float64
WTCHG_35309_144_LIVER_M_1_sequence    float64
WTCHG_35308_131_LIVER_F_1_sequence    float64
WTCHG_35308_154_LIVER_F_1_sequence    float64
WTCHG_35308_190_LIVER_F_1_sequence    float64
logFC                                 float64
logCPM                                float64
PValue                                float64
FDR                                   float64
dtype: object
WTCHG_35309_179_SPLEEN_M_1_sequence    float64
WTCHG_35309_156_SPLEEN_M_1_sequence    float64
WTCHG_35309_120_SPLEEN_M_1_sequence    float64
WTCHG_35309_192_SPLEEN_M_1_sequence    float64
WTCHG_35308_166_SPLEEN_F_1_sequence    float64
WTCHG_35308_130_SPLEEN_F_1_sequence    float64
WTCHG_35308_107_SPLEEN_F_1_sequence    float64
WTCHG_35308_143_SPLEEN_F_1_sequence    float64
logFC                                  float64
logCPM                                 float64
PValue                                 float64
FDR                                    float64
dtype: object

Now we filter out all scaffolds and the W chromosome. Only keep autosomes 1-28 and the Z chromosome


In [321]:
# No expression on chromosome 32, excluded
chicken_chromosomes = [str(i) for i in range(1, 29)] + ["Z"]

print "Valid chromosomes: ", chicken_chromosomes

comb_gonad = comb_gonad[comb_gonad.index.map(lambda x: gid_to_chr[x] in chicken_chromosomes)]
comb_heart = comb_heart[comb_heart.index.map(lambda x: gid_to_chr[x] in chicken_chromosomes)]
comb_liver = comb_liver[comb_liver.index.map(lambda x: gid_to_chr[x] in chicken_chromosomes)]
comb_spleen = comb_spleen[comb_spleen.index.map(lambda x: gid_to_chr[x] in chicken_chromosomes)]

print comb_gonad.shape
print comb_heart.shape
print comb_liver.shape
print comb_spleen.shape


Valid chromosomes:  ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', 'Z']
(14496, 12)
(14496, 12)
(14496, 11)
(14496, 12)

In [ ]:


In [378]:
def filter_cpm(expr, mincpm):
    '''Filter out lowly expressed genes based on a fixed minimum CPM threshold'''
    filtered = expr[expr["logCPM"] > mincpm]
    return filtered

# Beware average CPM is log2 transformed.
# For avgCPM of 2 use 1 | log2(2) = 1
comb_gonad_min2 = filter_cpm(comb_gonad, 1)
comb_heart_min2 = filter_cpm(comb_heart, 1)
comb_liver_min2 = filter_cpm(comb_liver, 1)
comb_spleen_min2 = filter_cpm(comb_spleen, 1)


# Drop ENSGALG00000018479. Looks like a W gene
# Does not affect the Z vs A comparison at all
# comb_gonad_min2 = comb_gonad_min2.drop("ENSGALG00000018479")
# comb_heart_min2 = comb_heart_min2.drop("ENSGALG00000018479")
# comb_liver_min2 = comb_liver_min2.drop("ENSGALG00000018479")
# comb_spleen_min2 = comb_spleen_min2.drop("ENSGALG00000018479")

print comb_gonad_min2.shape
print comb_heart_min2.shape
print comb_liver_min2.shape
print comb_spleen_min2.shape


print comb_gonad_min2.dtypes
print comb_heart_min2.dtypes
print comb_liver_min2.dtypes
print comb_spleen_min2.dtypes


(11944, 12)
(9971, 12)
(9383, 11)
(10847, 12)
WTCHG_23425_05_GONAD_M_1_sequence    float64
WTCHG_23425_04_GONAD_M_1_sequence    float64
WTCHG_23425_12_GONAD_M_1_sequence    float64
WTCHG_23425_06_GONAD_M_1_sequence    float64
WTCHG_23702_04_GONAD_F_1_sequence    float64
WTCHG_23702_05_GONAD_F_1_sequence    float64
WTCHG_23702_12_GONAD_F_1_sequence    float64
WTCHG_23702_06_GONAD_F_1_sequence    float64
logFC                                float64
logCPM                               float64
PValue                               float64
FDR                                  float64
dtype: object
WTCHG_35309_155_HEART_M_1_sequence    float64
WTCHG_35309_191_HEART_M_1_sequence    float64
WTCHG_35309_132_HEART_M_1_sequence    float64
WTCHG_35309_168_HEART_M_1_sequence    float64
WTCHG_35308_142_HEART_F_1_sequence    float64
WTCHG_35308_119_HEART_F_1_sequence    float64
WTCHG_35308_178_HEART_F_1_sequence    float64
WTCHG_35308_106_HEART_F_1_sequence    float64
logFC                                 float64
logCPM                                float64
PValue                                float64
FDR                                   float64
dtype: object
WTCHG_35309_180_LIVER_M_1_sequence    float64
WTCHG_35309_167_LIVER_M_1_sequence    float64
WTCHG_35309_108_LIVER_M_1_sequence    float64
WTCHG_35309_144_LIVER_M_1_sequence    float64
WTCHG_35308_131_LIVER_F_1_sequence    float64
WTCHG_35308_154_LIVER_F_1_sequence    float64
WTCHG_35308_190_LIVER_F_1_sequence    float64
logFC                                 float64
logCPM                                float64
PValue                                float64
FDR                                   float64
dtype: object
WTCHG_35309_179_SPLEEN_M_1_sequence    float64
WTCHG_35309_156_SPLEEN_M_1_sequence    float64
WTCHG_35309_120_SPLEEN_M_1_sequence    float64
WTCHG_35309_192_SPLEEN_M_1_sequence    float64
WTCHG_35308_166_SPLEEN_F_1_sequence    float64
WTCHG_35308_130_SPLEEN_F_1_sequence    float64
WTCHG_35308_107_SPLEEN_F_1_sequence    float64
WTCHG_35308_143_SPLEEN_F_1_sequence    float64
logFC                                  float64
logCPM                                 float64
PValue                                 float64
FDR                                    float64
dtype: object

In [395]:



764
Number of genes (protein coding) on the Z chromosome: 764

Now have four different data frames with all the data required. Both dataframes contain males and females and autosomal + Z expression. Now we split the dataframes into one autosomal and one Z dataframe


In [379]:
def split_dataframe(data, genes):
    '''Returns two dataframes. The first return value contains all rows that did match the genes.
       The second dataframe contains only genes that did NOT match.
    '''
    match = data[data.index.isin(genes)]
    nomatch = data[~data.index.isin(genes)]
    
    return match, nomatch

# Get a list of Z linked gene IDs
Z_genes = [g for g, pos in gid_to_chr.items() if pos == "Z"]
print "Number of genes (protein coding) on the Z chromosome:", len(Z_genes)
print ""

# Filter the expression so it only contains Z expression
Z_comb_gonad_min2, AUTO_comb_gonad_min2 = split_dataframe(comb_gonad_min2, Z_genes)
Z_comb_heart_min2, AUTO_comb_heart_min2 = split_dataframe(comb_heart_min2, Z_genes)
Z_comb_liver_min2, AUTO_comb_liver_min2 = split_dataframe(comb_liver_min2, Z_genes)
Z_comb_spleen_min2, AUTO_comb_spleen_min2 = split_dataframe(comb_spleen_min2, Z_genes)


print tabulate([["Tissue", "Z", "Autosomes"],
                ["Gonad", Z_comb_gonad_min2.shape[0], AUTO_comb_gonad_min2.shape[0]],
                ["Heart", Z_comb_heart_min2.shape[0], AUTO_comb_heart_min2.shape[0]],
                ["Liver", Z_comb_liver_min2.shape[0], AUTO_comb_liver_min2.shape[0]],
                ["Spleen", Z_comb_spleen_min2.shape[0], AUTO_comb_spleen_min2.shape[0]]],
                headers="firstrow")

print ""
print "Z overlap liver heart", len(set(Z_comb_liver_min2.index).intersection(Z_comb_heart_min2.index))
print "Z overlap liver spleen", len(set(Z_comb_liver_min2.index).intersection(Z_comb_spleen_min2.index))
print "Z overlap liver gonad", len(set(Z_comb_liver_min2.index).intersection(Z_comb_gonad_min2.index))
print
print "Z overlap heart spleen", len(set(Z_comb_heart_min2.index).intersection(Z_comb_spleen_min2.index))
print "Z overlap heart gonad", len(set(Z_comb_heart_min2.index).intersection(Z_comb_gonad_min2.index))
print
print "Z overlap gonad spleen", len(set(Z_comb_gonad_min2.index).intersection(Z_comb_spleen_min2.index))


Number of genes (protein coding) on the Z chromosome: 764

Tissue      Z    Autosomes
--------  ---  -----------
Gonad     604        11340
Heart     493         9478
Liver     445         8938
Spleen    529        10318

Z overlap liver heart 422
Z overlap liver spleen 428
Z overlap liver gonad 439

Z overlap heart spleen 477
Z overlap heart gonad 488

Z overlap gonad spleen 516

In [323]:


In [383]:
def get_significant_de(detags, significance, foldchange):
    '''Split a dataframe into sex-biased and unbiased expression.
       A gene is biased when a) The FDR corrected p-value from exactTest() is above significance
       b) the foldchange is larger/smaller than given
    '''
    biased = detags[(detags["FDR"] < significance)]
    biased = biased[(biased["logFC"] < -foldchange) | (biased["logFC"] > foldchange)]
    #Everything that is not in biased is unbiased
    unbiased = detags[~detags.index.isin(biased.index)]

    return biased, unbiased

# Filter with a significance threshold of 0.05
AUTO_comb_gonad_min2_biased, AUTO_comb_gonad_min2_unbiased = get_significant_de(AUTO_comb_gonad_min2, 0.05, 1)
AUTO_comb_heart_min2_biased, AUTO_comb_heart_min2_unbiased = get_significant_de(AUTO_comb_heart_min2, 0.05, 1)
AUTO_comb_liver_min2_biased, AUTO_comb_liver_min2_unbiased = get_significant_de(AUTO_comb_liver_min2, 0.05, 1)
AUTO_comb_spleen_min2_biased, AUTO_comb_spleen_min2_unbiased = get_significant_de(AUTO_comb_spleen_min2, 0.05, 1)


def percentage(part, whole):
  return 100 * float(part)/float(whole)

#Number of genes expressed and sex-biased on a per tissue basis
print tabulate([["Tissue", "Sex-biased","%", "Unbiased", "%", "Total"],
                ["Gonad", AUTO_comb_gonad_min2_biased.shape[0], percentage(AUTO_comb_gonad_min2_biased.shape[0],AUTO_comb_gonad_min2.shape[0]), AUTO_comb_gonad_min2_unbiased.shape[0], percentage(AUTO_comb_gonad_min2_unbiased.shape[0],AUTO_comb_gonad_min2.shape[0]), AUTO_comb_gonad_min2.shape[0]],
                ["Spleen", AUTO_comb_spleen_min2_biased.shape[0], percentage(AUTO_comb_spleen_min2_biased.shape[0],AUTO_comb_spleen_min2.shape[0]), AUTO_comb_spleen_min2_unbiased.shape[0],percentage(AUTO_comb_spleen_min2_unbiased.shape[0],AUTO_comb_spleen_min2.shape[0]), AUTO_comb_spleen_min2.shape[0]],
                ["Heart", AUTO_comb_heart_min2_biased.shape[0], percentage(AUTO_comb_heart_min2_biased.shape[0],AUTO_comb_heart_min2.shape[0]), AUTO_comb_heart_min2_unbiased.shape[0],percentage(AUTO_comb_heart_min2_unbiased.shape[0],AUTO_comb_heart_min2.shape[0]), AUTO_comb_heart_min2.shape[0]],
                ["Liver", AUTO_comb_liver_min2_biased.shape[0], percentage(AUTO_comb_liver_min2_biased.shape[0],AUTO_comb_liver_min2.shape[0]), AUTO_comb_liver_min2_unbiased.shape[0], percentage(AUTO_comb_liver_min2_unbiased.shape[0],AUTO_comb_liver_min2.shape[0]), AUTO_comb_liver_min2.shape[0]]],
                    headers="firstrow")

def filter_foldchange(detags):
    '''Filter by foldchange, here hardcoded for our dosage compensation criterion'''
    DC = detags[(detags["logFC"] >= -0.5) & (detags["logFC"] <= 0.5)]
    biased = detags[(detags["logFC"] < -0.5) | (detags["logFC"] > 0.5)]
    return biased, DC

Z_de_gonad_min2_biased, Z_de_gonad_min2_dc = filter_foldchange(Z_comb_gonad_min2)
Z_de_spleen_min2_biased, Z_de_spleen_min2_dc = filter_foldchange(Z_comb_spleen_min2)
Z_de_heart_min2_biased, Z_de_heart_min2_dc = filter_foldchange(Z_comb_heart_min2)
Z_de_liver_min2_biased, Z_de_liver_min2_dc = filter_foldchange(Z_comb_liver_min2)


print tabulate([["Tissue", "Biased","%", "Compensated", "%", "Total"],
                ["Gonad", Z_de_gonad_min2_biased.shape[0], percentage(Z_de_gonad_min2_biased.shape[0],Z_comb_gonad_min2.shape[0]), Z_de_gonad_min2_dc.shape[0], percentage(Z_de_gonad_min2_dc.shape[0],Z_comb_gonad_min2.shape[0]), Z_comb_gonad_min2.shape[0]],
                ["Spleen", Z_de_spleen_min2_biased.shape[0], percentage(Z_de_spleen_min2_biased.shape[0],Z_comb_spleen_min2.shape[0]), Z_de_spleen_min2_dc.shape[0],percentage(Z_de_spleen_min2_dc.shape[0],Z_comb_spleen_min2.shape[0]), Z_comb_spleen_min2.shape[0]],
                ["Heart", Z_de_heart_min2_biased.shape[0], percentage(Z_de_heart_min2_biased.shape[0],Z_comb_heart_min2.shape[0]), Z_de_heart_min2_dc.shape[0],percentage(Z_de_heart_min2_dc.shape[0],Z_comb_heart_min2.shape[0]), Z_comb_heart_min2.shape[0]],
                ["Liver", Z_de_liver_min2_biased.shape[0], percentage(Z_de_liver_min2_biased.shape[0],Z_comb_liver_min2.shape[0]), Z_de_liver_min2_dc.shape[0], percentage(Z_de_liver_min2_dc.shape[0],Z_comb_liver_min2.shape[0]), Z_comb_liver_min2.shape[0]]],
                    headers="firstrow")


Tissue      Sex-biased          %    Unbiased        %    Total
--------  ------------  ---------  ----------  -------  -------
Gonad             1646  14.515           9694  85.485     11340
Spleen              79   0.765652       10239  99.2343    10318
Heart               20   0.211015        9458  99.789      9478
Liver               38   0.425151        8900  99.5748     8938
Tissue      Biased        %    Compensated        %    Total
--------  --------  -------  -------------  -------  -------
Gonad          415  68.7086            189  31.2914      604
Spleen         223  42.155             306  57.845       529
Heart          230  46.6531            263  53.3469      493
Liver          193  43.3708            252  56.6292      445

Slice the dataframes into male and female expression. Double check that the iloc slices are correct. NOTE: Maybe better to use sample ids?


In [325]:
male_Z_comb_gonad_min2 = Z_comb_gonad_min2.iloc[:,:4]
female_Z_comb_gonad_min2 = Z_comb_gonad_min2.iloc[:,4:8]

male_Z_comb_heart_min2 = Z_comb_heart_min2.iloc[:,:4]
female_Z_comb_heart_min2 = Z_comb_heart_min2.iloc[:,4:8]

#Liver has one female sample less
male_Z_comb_liver_min2 = Z_comb_liver_min2.iloc[:,:4]
female_Z_comb_liver_min2 = Z_comb_liver_min2.iloc[:,4:7]

male_Z_comb_spleen_min2 = Z_comb_spleen_min2.iloc[:,:4]
female_Z_comb_spleen_min2 = Z_comb_spleen_min2.iloc[:,4:8]



male_AUTO_comb_gonad_min2 = AUTO_comb_gonad_min2.iloc[:,:4]
female_AUTO_comb_gonad_min2 = AUTO_comb_gonad_min2.iloc[:,4:8]

male_AUTO_comb_heart_min2 = AUTO_comb_heart_min2.iloc[:,:4]
female_AUTO_comb_heart_min2 = AUTO_comb_heart_min2.iloc[:,4:8]

#Liver has one female sample less
male_AUTO_comb_liver_min2 = AUTO_comb_liver_min2.iloc[:,:4]
female_AUTO_comb_liver_min2 = AUTO_comb_liver_min2.iloc[:,4:7]

male_AUTO_comb_spleen_min2 = AUTO_comb_spleen_min2.iloc[:,:4]
female_AUTO_comb_spleen_min2 = AUTO_comb_spleen_min2.iloc[:,4:8]


print male_Z_comb_gonad_min2.shape
print female_Z_comb_gonad_min2.shape
# female_Z_comb_liver_min2.head()
# # male_Z_comb_liver_min2.head()


(604, 4)
(604, 4)

Have a look at the mean/median expression for the samples


In [326]:
print "MEANS"
print male_Z_comb_gonad_min2.applymap(anti_log2).mean()
print female_Z_comb_gonad_min2.applymap(anti_log2).mean()

print male_Z_comb_heart_min2.applymap(anti_log2).mean()
print female_Z_comb_heart_min2.applymap(anti_log2).mean()

print male_Z_comb_liver_min2.applymap(anti_log2).mean()
print female_Z_comb_liver_min2.applymap(anti_log2).mean()

print male_Z_comb_spleen_min2.applymap(anti_log2).mean()
print female_Z_comb_spleen_min2.applymap(anti_log2).mean()

print "MEDIANS"
print male_Z_comb_gonad_min2.applymap(anti_log2).median()
print female_Z_comb_gonad_min2.applymap(anti_log2).median()

print male_Z_comb_heart_min2.applymap(anti_log2).median()
print female_Z_comb_heart_min2.applymap(anti_log2).median()

print male_Z_comb_liver_min2.applymap(anti_log2).median()
print female_Z_comb_liver_min2.applymap(anti_log2).median()

print male_Z_comb_spleen_min2.applymap(anti_log2).median()
print female_Z_comb_spleen_min2.applymap(anti_log2).median()


MEANS
WTCHG_23425_05_GONAD_M_1_sequence    44.831603
WTCHG_23425_04_GONAD_M_1_sequence    41.646038
WTCHG_23425_12_GONAD_M_1_sequence    43.232253
WTCHG_23425_06_GONAD_M_1_sequence    43.344136
dtype: float64
WTCHG_23702_04_GONAD_F_1_sequence    27.125928
WTCHG_23702_05_GONAD_F_1_sequence    30.208077
WTCHG_23702_12_GONAD_F_1_sequence    28.281746
WTCHG_23702_06_GONAD_F_1_sequence    28.195377
dtype: float64
WTCHG_35309_155_HEART_M_1_sequence    39.003504
WTCHG_35309_191_HEART_M_1_sequence    36.417734
WTCHG_35309_132_HEART_M_1_sequence    39.873647
WTCHG_35309_168_HEART_M_1_sequence    41.351439
dtype: float64
WTCHG_35308_142_HEART_F_1_sequence    25.909568
WTCHG_35308_119_HEART_F_1_sequence    27.202560
WTCHG_35308_178_HEART_F_1_sequence    25.429428
WTCHG_35308_106_HEART_F_1_sequence    27.796906
dtype: float64
WTCHG_35309_180_LIVER_M_1_sequence    44.756728
WTCHG_35309_167_LIVER_M_1_sequence    44.850567
WTCHG_35309_108_LIVER_M_1_sequence    39.464573
WTCHG_35309_144_LIVER_M_1_sequence    41.688938
dtype: float64
WTCHG_35308_131_LIVER_F_1_sequence    37.225575
WTCHG_35308_154_LIVER_F_1_sequence    36.250626
WTCHG_35308_190_LIVER_F_1_sequence    33.656190
dtype: float64
WTCHG_35309_179_SPLEEN_M_1_sequence    50.790771
WTCHG_35309_156_SPLEEN_M_1_sequence    52.771802
WTCHG_35309_120_SPLEEN_M_1_sequence    47.435010
WTCHG_35309_192_SPLEEN_M_1_sequence    47.780041
dtype: float64
WTCHG_35308_166_SPLEEN_F_1_sequence    32.953366
WTCHG_35308_130_SPLEEN_F_1_sequence    35.357854
WTCHG_35308_107_SPLEEN_F_1_sequence    36.578680
WTCHG_35308_143_SPLEEN_F_1_sequence    33.880203
dtype: float64
MEDIANS
WTCHG_23425_05_GONAD_M_1_sequence    13.557610
WTCHG_23425_04_GONAD_M_1_sequence    13.791430
WTCHG_23425_12_GONAD_M_1_sequence    13.724764
WTCHG_23425_06_GONAD_M_1_sequence    13.461857
dtype: float64
WTCHG_23702_04_GONAD_F_1_sequence    8.799332
WTCHG_23702_05_GONAD_F_1_sequence    9.475938
WTCHG_23702_12_GONAD_F_1_sequence    8.934469
WTCHG_23702_06_GONAD_F_1_sequence    9.119472
dtype: float64
WTCHG_35309_155_HEART_M_1_sequence    5.562249
WTCHG_35309_191_HEART_M_1_sequence    5.803518
WTCHG_35309_132_HEART_M_1_sequence    5.645283
WTCHG_35309_168_HEART_M_1_sequence    5.451703
dtype: float64
WTCHG_35308_142_HEART_F_1_sequence    4.027838
WTCHG_35308_119_HEART_F_1_sequence    4.275367
WTCHG_35308_178_HEART_F_1_sequence    4.304247
WTCHG_35308_106_HEART_F_1_sequence    3.998643
dtype: float64
WTCHG_35309_180_LIVER_M_1_sequence    3.886357
WTCHG_35309_167_LIVER_M_1_sequence    3.984445
WTCHG_35309_108_LIVER_M_1_sequence    3.960369
WTCHG_35309_144_LIVER_M_1_sequence    4.138517
dtype: float64
WTCHG_35308_131_LIVER_F_1_sequence    2.930101
WTCHG_35308_154_LIVER_F_1_sequence    2.997359
WTCHG_35308_190_LIVER_F_1_sequence    3.027183
dtype: float64
WTCHG_35309_179_SPLEEN_M_1_sequence     9.331693
WTCHG_35309_156_SPLEEN_M_1_sequence     9.404695
WTCHG_35309_120_SPLEEN_M_1_sequence    10.163577
WTCHG_35309_192_SPLEEN_M_1_sequence     9.746239
dtype: float64
WTCHG_35308_166_SPLEEN_F_1_sequence    6.963901
WTCHG_35308_130_SPLEEN_F_1_sequence    7.482654
WTCHG_35308_107_SPLEEN_F_1_sequence    6.845427
WTCHG_35308_143_SPLEEN_F_1_sequence    7.103084
dtype: float64

Now get the average expression for male and female autosomal expression across samples


In [327]:
def logged_data_mean(df):
    '''Correctly take the mean of logged data. First unlog, then calculate mean, then log again'''
    logRPKMmean = pd.Series(df.applymap(anti_log2).mean(axis=1).map(log2), name="logRPKMmean")
    return logRPKMmean

male_Z_comb_gonad_min2 = pd.concat([male_Z_comb_gonad_min2, logged_data_mean(male_Z_comb_gonad_min2)], axis=1)
female_Z_comb_gonad_min2 = pd.concat([female_Z_comb_gonad_min2, logged_data_mean(female_Z_comb_gonad_min2)], axis=1)

male_Z_comb_heart_min2 = pd.concat([male_Z_comb_heart_min2, logged_data_mean(male_Z_comb_heart_min2)], axis=1)
female_Z_comb_heart_min2 = pd.concat([female_Z_comb_heart_min2, logged_data_mean(female_Z_comb_heart_min2)], axis=1)

male_Z_comb_liver_min2 = pd.concat([male_Z_comb_liver_min2, logged_data_mean(male_Z_comb_liver_min2)], axis=1)
female_Z_comb_liver_min2 = pd.concat([female_Z_comb_liver_min2, logged_data_mean(female_Z_comb_liver_min2)], axis=1)

male_Z_comb_spleen_min2 = pd.concat([male_Z_comb_spleen_min2, logged_data_mean(male_Z_comb_spleen_min2)], axis=1)
female_Z_comb_spleen_min2 = pd.concat([female_Z_comb_spleen_min2, logged_data_mean(female_Z_comb_spleen_min2)], axis=1)


male_AUTO_comb_gonad_min2 = pd.concat([male_AUTO_comb_gonad_min2, logged_data_mean(male_AUTO_comb_gonad_min2)], axis=1)
female_AUTO_comb_gonad_min2 = pd.concat([female_AUTO_comb_gonad_min2, logged_data_mean(female_AUTO_comb_gonad_min2)], axis=1)

male_AUTO_comb_heart_min2 = pd.concat([male_AUTO_comb_heart_min2, logged_data_mean(male_AUTO_comb_heart_min2)], axis=1)
female_AUTO_comb_heart_min2 = pd.concat([female_AUTO_comb_heart_min2, logged_data_mean(female_AUTO_comb_heart_min2)], axis=1)

male_AUTO_comb_liver_min2 = pd.concat([male_AUTO_comb_liver_min2, logged_data_mean(male_AUTO_comb_liver_min2)], axis=1)
female_AUTO_comb_liver_min2 = pd.concat([female_AUTO_comb_liver_min2, logged_data_mean(female_AUTO_comb_liver_min2)], axis=1)

male_AUTO_comb_spleen_min2 = pd.concat([male_AUTO_comb_spleen_min2, logged_data_mean(male_AUTO_comb_spleen_min2)], axis=1)
female_AUTO_comb_spleen_min2 = pd.concat([female_AUTO_comb_spleen_min2, logged_data_mean(female_AUTO_comb_spleen_min2)], axis=1)


print "Gonad male"
print '%.2f' % round(np.median(male_AUTO_comb_gonad_min2["logRPKMmean"]), 2) 
print '%.2f' % round(np.median(male_Z_comb_gonad_min2["logRPKMmean"]), 2 )
print "means of logged data"
print '%.2f' % round(np.mean(male_AUTO_comb_gonad_min2["logRPKMmean"]), 2) 
print '%.2f' % round(np.mean(male_Z_comb_gonad_min2["logRPKMmean"]), 2 )
print ranksums(male_AUTO_comb_gonad_min2["logRPKMmean"], male_Z_comb_gonad_min2["logRPKMmean"])
print '%.2f' % round(ranksums(male_AUTO_comb_gonad_min2["logRPKMmean"], male_Z_comb_gonad_min2["logRPKMmean"])[0], 2 )
#Calculate two sided p-value
print mannwhitneyu(male_AUTO_comb_gonad_min2["logRPKMmean"], male_Z_comb_gonad_min2["logRPKMmean"])[1]*2
print "---"
print "Gonad female"
print '%.2f' % round(np.median(female_AUTO_comb_gonad_min2["logRPKMmean"]), 2)
print '%.2f' % round(np.median(female_Z_comb_gonad_min2["logRPKMmean"]), 2)
print "means of logged data"
print '%.2f' % round(np.mean(female_AUTO_comb_gonad_min2["logRPKMmean"]), 2)
print '%.2f' % round(np.mean(female_Z_comb_gonad_min2["logRPKMmean"]), 2)
print ranksums(female_AUTO_comb_gonad_min2["logRPKMmean"], female_Z_comb_gonad_min2["logRPKMmean"])
print '%.2f' % round(ranksums(female_AUTO_comb_gonad_min2["logRPKMmean"], female_Z_comb_gonad_min2["logRPKMmean"])[0], 2 )
#Calculate two sided p-value
print mannwhitneyu(female_AUTO_comb_gonad_min2["logRPKMmean"], female_Z_comb_gonad_min2["logRPKMmean"])[1]*2
print "=============="
print "Heart male"
print "medians"
print '%.2f' % round(np.median(male_AUTO_comb_heart_min2["logRPKMmean"]), 2)
print '%.2f' % round(np.median(male_Z_comb_heart_min2["logRPKMmean"]), 2)
print "means of logged data"
print '%.2f' % round(np.mean(male_AUTO_comb_heart_min2["logRPKMmean"]), 2)
print '%.2f' % round(np.mean(male_Z_comb_heart_min2["logRPKMmean"]), 2)
print ranksums(male_AUTO_comb_heart_min2["logRPKMmean"], male_Z_comb_heart_min2["logRPKMmean"])
print '%.2f' % round(ranksums(male_AUTO_comb_heart_min2["logRPKMmean"], male_Z_comb_heart_min2["logRPKMmean"])[0], 2 )
#Calculate two sided p-value
print mannwhitneyu(male_AUTO_comb_heart_min2["logRPKMmean"], male_Z_comb_heart_min2["logRPKMmean"])[1]*2
print "---"
print "Heart female"
print "medians"
print '%.2f' % round(np.median(female_AUTO_comb_heart_min2["logRPKMmean"]), 2)
print '%.2f' % round(np.median(female_Z_comb_heart_min2["logRPKMmean"]), 2)
print "means of logged data"
print '%.2f' % round(np.mean(female_AUTO_comb_heart_min2["logRPKMmean"]), 2)
print '%.2f' % round(np.mean(female_Z_comb_heart_min2["logRPKMmean"]), 2)
print ranksums(female_AUTO_comb_heart_min2["logRPKMmean"], female_Z_comb_heart_min2["logRPKMmean"])
print '%.2f' % round(ranksums(female_AUTO_comb_heart_min2["logRPKMmean"], female_Z_comb_heart_min2["logRPKMmean"])[0], 2 )
#Calculate two sided p-value
print mannwhitneyu(female_AUTO_comb_heart_min2["logRPKMmean"], female_Z_comb_heart_min2["logRPKMmean"])[1]*2
print "=============="
print "Liver male"
print "medians"
print '%.2f' % round(np.median(male_AUTO_comb_liver_min2["logRPKMmean"]), 2)
print '%.2f' % round(np.median(male_Z_comb_liver_min2["logRPKMmean"]), 2)
print "means of logged data"
print '%.2f' % round(np.mean(male_AUTO_comb_liver_min2["logRPKMmean"]), 2)
print '%.2f' % round(np.mean(male_Z_comb_liver_min2["logRPKMmean"]), 2)
print ranksums(male_AUTO_comb_liver_min2["logRPKMmean"], male_Z_comb_liver_min2["logRPKMmean"])
print '%.2f' % round(ranksums(male_AUTO_comb_liver_min2["logRPKMmean"], male_Z_comb_liver_min2["logRPKMmean"])[0], 2 )
#Calculate two sided p-value
print mannwhitneyu(male_AUTO_comb_liver_min2["logRPKMmean"], male_Z_comb_liver_min2["logRPKMmean"])[1]*2
print "---"
print "Liver female"
print "medians"
print '%.2f' % round(np.median(female_AUTO_comb_liver_min2["logRPKMmean"]), 2)
print '%.2f' % round(np.median(female_Z_comb_liver_min2["logRPKMmean"]), 2)
print "means of logged data"
print '%.2f' % round(np.mean(female_AUTO_comb_liver_min2["logRPKMmean"]), 2)
print '%.2f' % round(np.mean(female_Z_comb_liver_min2["logRPKMmean"]), 2)
print ranksums(female_AUTO_comb_liver_min2["logRPKMmean"], female_Z_comb_liver_min2["logRPKMmean"])
print '%.2f' % round(ranksums(female_AUTO_comb_liver_min2["logRPKMmean"], female_Z_comb_liver_min2["logRPKMmean"])[0], 2 )
#Calculate two sided p-value
print mannwhitneyu(female_AUTO_comb_liver_min2["logRPKMmean"], female_Z_comb_liver_min2["logRPKMmean"])[1]*2
print "=============="
print "Spleen male"
print "medians"
print '%.2f' % round(np.median(male_AUTO_comb_spleen_min2["logRPKMmean"]), 2)
print '%.2f' % round(np.median(male_Z_comb_spleen_min2["logRPKMmean"]), 2)
print "means of logged data"
print '%.2f' % round(np.mean(male_AUTO_comb_spleen_min2["logRPKMmean"]), 2)
print '%.2f' % round(np.mean(male_Z_comb_spleen_min2["logRPKMmean"]), 2)
print ranksums(male_AUTO_comb_spleen_min2["logRPKMmean"], male_Z_comb_spleen_min2["logRPKMmean"])
print '%.2f' % round(ranksums(male_AUTO_comb_spleen_min2["logRPKMmean"], male_Z_comb_spleen_min2["logRPKMmean"])[0], 2 )
#Calculate two sided p-value
print mannwhitneyu(male_AUTO_comb_spleen_min2["logRPKMmean"], male_Z_comb_spleen_min2["logRPKMmean"])[1]*2
print "---"
print "Spleen female"
print "medians"
print '%.2f' % round(np.median(female_AUTO_comb_spleen_min2["logRPKMmean"]), 2)
print '%.2f' % round(np.median(female_Z_comb_spleen_min2["logRPKMmean"]), 2)
print "means of logged data"
print '%.2f' % round(np.mean(female_AUTO_comb_spleen_min2["logRPKMmean"]), 2)
print '%.2f' % round(np.mean(female_Z_comb_spleen_min2["logRPKMmean"]), 2)
print ranksums(female_AUTO_comb_spleen_min2["logRPKMmean"], female_Z_comb_spleen_min2["logRPKMmean"])
print '%.2f' % round(ranksums(female_AUTO_comb_spleen_min2["logRPKMmean"], female_Z_comb_spleen_min2["logRPKMmean"])[0], 2 )
#Calculate two sided p-value
print mannwhitneyu(female_AUTO_comb_spleen_min2["logRPKMmean"], female_Z_comb_spleen_min2["logRPKMmean"])[1]*2
print "=============="


Gonad male
3.85
3.74
means of logged data
3.65
3.72
(0.26905335761404309, 0.78788862253329928)
0.27
0.787893282295
---
Gonad female
3.89
3.18
means of logged data
3.82
3.19
(9.1959061143638419, 3.7184725071369637e-20)
9.20
3.71868196938e-20
==============
Heart male
medians
3.10
2.52
means of logged data
3.22
2.69
(6.69498308614821, 2.1569657883770082e-11)
6.69
2.15708414495e-11
---
Heart female
medians
3.10
2.05
means of logged data
3.23
2.31
(11.21846329752759, 3.3097156940157299e-29)
11.22
3.31001596687e-29
==============
Liver male
medians
2.50
2.02
means of logged data
2.73
2.36
(5.0231687494934194, 5.0825849677502516e-07)
5.02
5.08282232822e-07
---
Liver female
medians
2.53
1.61
means of logged data
2.74
2.00
(8.8832109981796137, 6.4957589163621832e-19)
8.88
6.49628266492e-19
==============
Spleen male
medians
3.72
3.29
means of logged data
3.73
3.30
(5.5014589854337235, 3.7666124087150526e-08)
5.50
3.76676450524e-08
---
Spleen female
medians
3.75
2.80
means of logged data
3.77
2.90
(11.192015720103473, 4.4617355984026459e-29)
11.19
4.46209385164e-29
==============

Comparison between male and female Z


In [328]:
print ranksums(female_Z_comb_gonad_min2["logRPKMmean"], male_Z_comb_gonad_min2["logRPKMmean"])
print ranksums(female_Z_comb_heart_min2["logRPKMmean"], male_Z_comb_heart_min2["logRPKMmean"])
print ranksums(female_Z_comb_liver_min2["logRPKMmean"], male_Z_comb_liver_min2["logRPKMmean"])
print ranksums(female_Z_comb_spleen_min2["logRPKMmean"], male_Z_comb_spleen_min2["logRPKMmean"])


(-5.6185393716807184, 1.9257851499510782e-08)
(-3.6104050043952887, 0.00030571925835241989)
(-2.8354446657353178, 0.0045761941754540198)
(-3.9425465190421387, 8.0621006359863942e-05)

Test for ties

NOTE: Sometimes similar gene expression across samples (once or twice) or sex-limited expression Causes ties and should thus maybe consider using mannwhitneyu test? http://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.mannwhitneyu.html#scipy.stats.mannwhitneyu NOTE 2015-09-30: Both tests provide almost identical results


In [329]:
# from collections import defaultdict
# D = defaultdict(list)
# for i,item in enumerate(male_Z_comb_liver_min2["logCPMmean"]):
#     D[item].append(i)
# D = {k:v for k,v in D.items() if len(v)>1}

# print D
# # male_AUTO_comb_heart_min2["logCPMmean"][7871]
# print male_Z_comb_liver_min2.iloc[[406]]
# print male_Z_comb_liver_min2.iloc[[441]]

Plot Z vs A data


In [330]:
plot_ZvsA_data = {"Gonad":{},"Spleen":{},"Heart":{},"Liver":{}}

plot_ZvsA_data["Gonad"]["Male-Auto"] = male_AUTO_comb_gonad_min2["logRPKMmean"]
plot_ZvsA_data["Gonad"]["Female-Auto"] = female_AUTO_comb_gonad_min2["logRPKMmean"]
plot_ZvsA_data["Gonad"]["Male-Z"] = male_Z_comb_gonad_min2["logRPKMmean"]
plot_ZvsA_data["Gonad"]["Female-Z"] = female_Z_comb_gonad_min2["logRPKMmean"]

plot_ZvsA_data["Heart"]["Male-Auto"] = male_AUTO_comb_heart_min2["logRPKMmean"]
plot_ZvsA_data["Heart"]["Female-Auto"] = female_AUTO_comb_heart_min2["logRPKMmean"]
plot_ZvsA_data["Heart"]["Male-Z"] = male_Z_comb_heart_min2["logRPKMmean"]
plot_ZvsA_data["Heart"]["Female-Z"] = female_Z_comb_heart_min2["logRPKMmean"]

plot_ZvsA_data["Liver"]["Male-Auto"] = male_AUTO_comb_liver_min2["logRPKMmean"]
plot_ZvsA_data["Liver"]["Female-Auto"] = female_AUTO_comb_liver_min2["logRPKMmean"]
plot_ZvsA_data["Liver"]["Male-Z"] = male_Z_comb_liver_min2["logRPKMmean"]
plot_ZvsA_data["Liver"]["Female-Z"] = female_Z_comb_liver_min2["logRPKMmean"]

plot_ZvsA_data["Spleen"]["Male-Auto"] = male_AUTO_comb_spleen_min2["logRPKMmean"]
plot_ZvsA_data["Spleen"]["Female-Auto"] = female_AUTO_comb_spleen_min2["logRPKMmean"]
plot_ZvsA_data["Spleen"]["Male-Z"] = male_Z_comb_spleen_min2["logRPKMmean"]
plot_ZvsA_data["Spleen"]["Female-Z"] = female_Z_comb_spleen_min2["logRPKMmean"]

In [331]:
def color_boxes(bp):
    red = False
    for box in bp['boxes']:
        # change outline color
        box.set( color='black', linewidth=1)

        if red == True:
            box.set( facecolor = '#E9E9E9' )
            red = False
        else:
            box.set( facecolor = '#2C3E50' )
            red = True

    for median in bp['medians']:
        median.set(color='black', linewidth=1)

    for whisker in bp['whiskers']:
        whisker.set(color='black', linewidth=1, linestyle="-")

    ## change color and linewidth of the caps
    for cap in bp['caps']:
        cap.set(color='black', linewidth=1)

        
font = {'family' : 'Source Sans Pro',
'weight' : 'normal',
'size'   : 11}

import matplotlib
matplotlib.rc('font', **font)


fig = plt.figure(figsize=(6, 6))
fig.subplots_adjust(wspace=0.1)

plotpos = 221
for name in ['Spleen', 'Heart', 'Liver', 'Gonad']:
    ax = fig.add_subplot(plotpos)
    
    plotpos+=1
    bp = ax.boxplot([plot_ZvsA_data[name][item] for item in ['Male-Auto', 'Male-Z', 'Female-Auto', 'Female-Z']], sym="", widths=0.7, patch_artist=True, notch=True)
    lengths = [len(plot_ZvsA_data[name][item]) for item in ['Male-Auto', 'Male-Z', 'Female-Auto', 'Female-Z']]

    ax.set_xticklabels(['(%i)\nAA' %lengths[0] , '(%i)\nZZ' %lengths[1], '(%i)\nAA' %lengths[2], '(%i)\nZW' %lengths[3]])
    ax.set_yticks(range(-2, 14, 2))
    ax.set_ylim(bottom=-3)
    color_boxes(bp)
    ax.grid(False)
    ax.annotate('Female', xy=(1, 0), xycoords='axes fraction',
                xytext=(-20, -30), textcoords='offset points',
                ha='right', va='top')
    ax.annotate('Male', xy=(1, 0), xycoords='axes fraction',
            xytext=(-95, -30), textcoords='offset points',
            ha='right', va='top')
    ax.patch.set_facecolor('None')
        # Remove the ticks on the right and top side
    ax.yaxis.set_ticks_position('left')
    ax.xaxis.set_ticks_position('bottom')
    l = Line2D([1,2],[10.7,10.7],color="black")                                    
    ax.add_line(l)
    l = Line2D([1,1],[10.7,10.4],color="black")                                    
    ax.add_line(l)
    l = Line2D([2,2],[10.7,10.4],color="black")                                    
    ax.add_line(l)
    
        
    l = Line2D([3,4],[10.7,10.7],color="black")                                    
    ax.add_line(l)
    l = Line2D([3,3],[10.7,10.4],color="black")                                    
    ax.add_line(l)
    l = Line2D([4,4],[10.7,10.4],color="black")                                    
    ax.add_line(l)
    
    plt.axvline(x=2.5, color='black', ls='-')
    
    if name == "Gonad":
        plt.title("(d) Gonad")
        ax.text(1.35, 10.8, "ns", fontsize=11)
        ax.text(3.35, 10.5, "***", fontsize=11)
    elif name == "Heart":
        plt.title("(b) Heart")
        ax.text(1.35, 10.5, "***", fontsize=11)
        ax.text(3.35, 10.5, "***", fontsize=11)
    elif name == "Liver":
        plt.title("(c) Liver")
        ax.set_ylabel(r'$log_2$'+"(RPKM)")
        ax.text(1.35, 10.5, "***", fontsize=11)
        ax.text(3.35, 10.5, "***", fontsize=11)
    elif name == "Spleen":
        plt.title("(a) Spleen")
        ax.set_ylabel(r'$log_2$'+"(RPKM)")
        ax.text(1.35, 10.5, "***", fontsize=11)
        ax.text(3.35, 10.5, "***", fontsize=11)
        
fig.tight_layout(pad=3)

plt.savefig("2015-10-27-ZvsA-expression.png", dpi=300)
plt.savefig("2015-10-27-ZvsA-expression.pdf", dpi=300)


# Check the numbers Tissue Z Autosomes -------- --- ----------- Gonad 604 11340 Heart 493 9478 Liver 445 8938 Spleen 529 10318

Plot Z vs A logFC


In [332]:
plot_ZvsA_logFC = {"Gonad":{},"Spleen":{},"Heart":{},"Liver":{}}

plot_ZvsA_logFC["Gonad"]["Auto"] = AUTO_comb_gonad_min2["logFC"]
plot_ZvsA_logFC["Gonad"]["Z"] = Z_comb_gonad_min2["logFC"]

plot_ZvsA_logFC["Heart"]["Auto"] = AUTO_comb_heart_min2["logFC"]
plot_ZvsA_logFC["Heart"]["Z"] = Z_comb_heart_min2["logFC"]

plot_ZvsA_logFC["Liver"]["Auto"] = AUTO_comb_liver_min2["logFC"]
plot_ZvsA_logFC["Liver"]["Z"] = Z_comb_liver_min2["logFC"]

plot_ZvsA_logFC["Spleen"]["Auto"] = AUTO_comb_spleen_min2["logFC"]
plot_ZvsA_logFC["Spleen"]["Z"] = Z_comb_spleen_min2["logFC"]

In [333]:
def color_boxes(bp):
    red = False
    for box in bp['boxes']:
        # change outline color
        box.set( color='black', linewidth=1)
#         box.set( color='black', linewidth=1)

        if red == True:
            box.set( facecolor = '#E9E9E9' )
            red = False
        else:
            box.set( facecolor = '#2C3E50' )
            red = True

    for median in bp['medians']:
        median.set(color='black', linewidth=1)

    for whisker in bp['whiskers']:
        whisker.set(color='black', linewidth=1, linestyle="-")

    ## change color and linewidth of the caps
    for cap in bp['caps']:
        cap.set(color='black', linewidth=1)

        
font = {'family' : 'Source Sans Pro',
'weight' : 'normal',
'size'   : 11}

matplotlib.rc('font', **font)
        
fig, axes = plt.subplots(ncols=4, sharey=True, figsize=(6, 4))
fig.subplots_adjust(wspace=0.1)

for ax, name in zip(axes, ['Spleen', 'Heart', 'Liver', "Gonad"]):
    bp = ax.boxplot([plot_ZvsA_logFC[name][item] for item in ['Auto', 'Z']], sym="", widths=0.7, patch_artist=True, notch=True)
    lengths = [len(plot_ZvsA_logFC[name][item]) for item in ['Auto', 'Z']]
    ax.set(xticklabels=['Auto\n(%i)' %lengths[0], 'Z\n(%i)' %lengths[1]])
    color_boxes(bp)
    ax.grid(False)
    
    ax.patch.set_facecolor('None')
    # Remove the ticks on the right and top side
    ax.yaxis.set_ticks_position('left')
    ax.xaxis.set_ticks_position('bottom')
    
    ax.axhline(y=0, c="black",linewidth=1,zorder=0, linestyle="--")
    if name == "Gonad":
        ax.set_title("(d) Gonad")
    elif name == "Spleen":
        ax.set_title("(a) Spleen")
        ax.set_ylabel('logFC')
    elif name == "Liver":
        ax.set_title("(c) Liver")
    elif name == "Heart":
        ax.set_title("(b) Heart")
    else:
        continue
fig.tight_layout()


Calculate Z:A ratios for the RPKM data

First we need to get the combined RPKM data from autosomes and the Z chromosome. This data is not filtered yet! (This has been done already above, just repeating to make sure the data has not changed)


In [334]:
print "Valid chromosomes: ", chicken_chromosomes

comb_gonad = comb_gonad[comb_gonad.index.map(lambda x: gid_to_chr[x] in chicken_chromosomes)]
comb_heart = comb_heart[comb_heart.index.map(lambda x: gid_to_chr[x] in chicken_chromosomes)]
comb_liver = comb_liver[comb_liver.index.map(lambda x: gid_to_chr[x] in chicken_chromosomes)]
comb_spleen = comb_spleen[comb_spleen.index.map(lambda x: gid_to_chr[x] in chicken_chromosomes)]

print comb_gonad.shape
print comb_heart.shape
print comb_liver.shape
print comb_spleen.shape


Valid chromosomes:  ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', 'Z']
(14496, 12)
(14496, 12)
(14496, 11)
(14496, 12)

Filter with a CPM threshold:

This is boring and repetitive, but I think this is very explicit

CPM unfiltered


In [335]:
# For no CPM filtering just copy the dataframe
comb_gonad_unfiltered = comb_gonad.copy()
comb_heart_unfiltered = comb_heart.copy()
comb_liver_unfiltered = comb_liver.copy()
comb_spleen_unfiltered = comb_spleen.copy()


# Filter the expression so it only contains Z expression
Z_comb_gonad_unfiltered, AUTO_comb_gonad_unfiltered = split_dataframe(comb_gonad_unfiltered, Z_genes)
Z_comb_heart_unfiltered, AUTO_comb_heart_unfiltered = split_dataframe(comb_heart_unfiltered, Z_genes)
Z_comb_liver_unfiltered, AUTO_comb_liver_unfiltered = split_dataframe(comb_liver_unfiltered, Z_genes)
Z_comb_spleen_unfiltered, AUTO_comb_spleen_unfiltered = split_dataframe(comb_spleen_unfiltered, Z_genes)


male_Z_comb_gonad_unfiltered = Z_comb_gonad_unfiltered.iloc[:,:4]
female_Z_comb_gonad_unfiltered = Z_comb_gonad_unfiltered.iloc[:,4:8]
male_Z_comb_heart_unfiltered = Z_comb_heart_unfiltered.iloc[:,:4]
female_Z_comb_heart_unfiltered = Z_comb_heart_unfiltered.iloc[:,4:8]
#Liver has one female sample less
male_Z_comb_liver_unfiltered = Z_comb_liver_unfiltered.iloc[:,:4]
female_Z_comb_liver_unfiltered = Z_comb_liver_unfiltered.iloc[:,4:7]
male_Z_comb_spleen_unfiltered = Z_comb_spleen_unfiltered.iloc[:,:4]
female_Z_comb_spleen_unfiltered = Z_comb_spleen_unfiltered.iloc[:,4:8]

male_AUTO_comb_gonad_unfiltered = AUTO_comb_gonad_unfiltered.iloc[:,:4]
female_AUTO_comb_gonad_unfiltered = AUTO_comb_gonad_unfiltered.iloc[:,4:8]
male_AUTO_comb_heart_unfiltered = AUTO_comb_heart_unfiltered.iloc[:,:4]
female_AUTO_comb_heart_unfiltered = AUTO_comb_heart_unfiltered.iloc[:,4:8]
#Liver has one female sample less
male_AUTO_comb_liver_unfiltered = AUTO_comb_liver_unfiltered.iloc[:,:4]
female_AUTO_comb_liver_unfiltered = AUTO_comb_liver_unfiltered.iloc[:,4:7]
male_AUTO_comb_spleen_unfiltered = AUTO_comb_spleen_unfiltered.iloc[:,:4]
female_AUTO_comb_spleen_unfiltered = AUTO_comb_spleen_unfiltered.iloc[:,4:8]


male_Z_comb_gonad_unfiltered_ratio = male_Z_comb_gonad_unfiltered.applymap(anti_log2).median() / male_AUTO_comb_gonad_unfiltered.applymap(anti_log2).median()
female_Z_comb_gonad_unfiltered_ratio = female_Z_comb_gonad_unfiltered.applymap(anti_log2).median() / female_AUTO_comb_gonad_unfiltered.applymap(anti_log2).median()

male_Z_comb_spleen_unfiltered_ratio = male_Z_comb_spleen_unfiltered.applymap(anti_log2).median() / male_AUTO_comb_spleen_unfiltered.applymap(anti_log2).median()
female_Z_comb_spleen_unfiltered_ratio =  female_Z_comb_spleen_unfiltered.applymap(anti_log2).median() / female_AUTO_comb_spleen_unfiltered.applymap(anti_log2).median()

male_Z_comb_heart_unfiltered_ratio = male_Z_comb_heart_unfiltered.applymap(anti_log2).median() / male_AUTO_comb_heart_unfiltered.applymap(anti_log2).median()
female_Z_comb_heart_unfiltered_ratio =  female_Z_comb_heart_unfiltered.applymap(anti_log2).median() / female_AUTO_comb_heart_unfiltered.applymap(anti_log2).median()

male_Z_comb_liver_unfiltered_ratio = male_Z_comb_liver_unfiltered.applymap(anti_log2).median() / male_AUTO_comb_liver_unfiltered.applymap(anti_log2).median()
female_Z_comb_liver_unfiltered_ratio =  female_Z_comb_liver_unfiltered.applymap(anti_log2).median() / female_AUTO_comb_liver_unfiltered.applymap(anti_log2).median()

In [336]:
female_Z_comb_liver_unfiltered.head()


Out[336]:
WTCHG_35308_131_LIVER_F_1_sequence WTCHG_35308_154_LIVER_F_1_sequence WTCHG_35308_190_LIVER_F_1_sequence
ENSGALG00000000004 -0.121779 0.030143 0.207727
ENSGALG00000000151 -0.893818 -1.329639 -1.613569
ENSGALG00000000161 3.024010 3.299994 3.305441
ENSGALG00000000184 -7.812517 -7.812517 -7.812517
ENSGALG00000000189 0.576336 0.644310 1.028262

CPM minimum 1


In [337]:
# Beware average CPM is log2 transformed.
# For avgCPM of 1 use 0 as filter
comb_gonad_min1 = filter_cpm(comb_gonad, 0)
comb_heart_min1 = filter_cpm(comb_heart, 0)
comb_liver_min1 = filter_cpm(comb_liver, 0)
comb_spleen_min1 = filter_cpm(comb_spleen, 0)


# Filter the expression so it only contains Z expression
Z_comb_gonad_min1, AUTO_comb_gonad_min1 = split_dataframe(comb_gonad_min1, Z_genes)
Z_comb_heart_min1, AUTO_comb_heart_min1 = split_dataframe(comb_heart_min1, Z_genes)
Z_comb_liver_min1, AUTO_comb_liver_min1 = split_dataframe(comb_liver_min1, Z_genes)
Z_comb_spleen_min1, AUTO_comb_spleen_min1 = split_dataframe(comb_spleen_min1, Z_genes)


male_Z_comb_gonad_min1 = Z_comb_gonad_min1.iloc[:,:4]
female_Z_comb_gonad_min1 = Z_comb_gonad_min1.iloc[:,4:8]
male_Z_comb_heart_min1 = Z_comb_heart_min1.iloc[:,:4]
female_Z_comb_heart_min1 = Z_comb_heart_min1.iloc[:,4:8]
#Liver has one female sample less
male_Z_comb_liver_min1 = Z_comb_liver_min1.iloc[:,:4]
female_Z_comb_liver_min1 = Z_comb_liver_min1.iloc[:,4:7]
male_Z_comb_spleen_min1 = Z_comb_spleen_min1.iloc[:,:4]
female_Z_comb_spleen_min1 = Z_comb_spleen_min1.iloc[:,4:8]

male_AUTO_comb_gonad_min1 = AUTO_comb_gonad_min1.iloc[:,:4]
female_AUTO_comb_gonad_min1 = AUTO_comb_gonad_min1.iloc[:,4:8]
male_AUTO_comb_heart_min1 = AUTO_comb_heart_min1.iloc[:,:4]
female_AUTO_comb_heart_min1 = AUTO_comb_heart_min1.iloc[:,4:8]
#Liver has one female sample less
male_AUTO_comb_liver_min1 = AUTO_comb_liver_min1.iloc[:,:4]
female_AUTO_comb_liver_min1 = AUTO_comb_liver_min1.iloc[:,4:7]
male_AUTO_comb_spleen_min1 = AUTO_comb_spleen_min1.iloc[:,:4]
female_AUTO_comb_spleen_min1 = AUTO_comb_spleen_min1.iloc[:,4:8]

male_Z_comb_gonad_min1_ratio = male_Z_comb_gonad_min1.applymap(anti_log2).median() / male_AUTO_comb_gonad_min1.applymap(anti_log2).median()
female_Z_comb_gonad_min1_ratio = female_Z_comb_gonad_min1.applymap(anti_log2).median() / female_AUTO_comb_gonad_min1.applymap(anti_log2).median()

male_Z_comb_spleen_min1_ratio = male_Z_comb_spleen_min1.applymap(anti_log2).median() / male_AUTO_comb_spleen_min1.applymap(anti_log2).median()
female_Z_comb_spleen_min1_ratio =  female_Z_comb_spleen_min1.applymap(anti_log2).median() / female_AUTO_comb_spleen_min1.applymap(anti_log2).median()

male_Z_comb_heart_min1_ratio = male_Z_comb_heart_min1.applymap(anti_log2).median() / male_AUTO_comb_heart_min1.applymap(anti_log2).median()
female_Z_comb_heart_min1_ratio =  female_Z_comb_heart_min1.applymap(anti_log2).median() / female_AUTO_comb_heart_min1.applymap(anti_log2).median()

male_Z_comb_liver_min1_ratio = male_Z_comb_liver_min1.applymap(anti_log2).median() / male_AUTO_comb_liver_min1.applymap(anti_log2).median()
female_Z_comb_liver_min1_ratio =  female_Z_comb_liver_min1.applymap(anti_log2).median() / female_AUTO_comb_liver_min1.applymap(anti_log2).median()

CPM minimum 2


In [338]:
# Beware average CPM is log2 transformed.
# For avgCPM of 2 use 1 | log2(2) = 1
comb_gonad_min2 = filter_cpm(comb_gonad, 1)
comb_heart_min2 = filter_cpm(comb_heart, 1)
comb_liver_min2 = filter_cpm(comb_liver, 1)
comb_spleen_min2 = filter_cpm(comb_spleen, 1)

# Filter the expression so it only contains Z expression
Z_comb_gonad_min2, AUTO_comb_gonad_min2 = split_dataframe(comb_gonad_min2, Z_genes)
Z_comb_heart_min2, AUTO_comb_heart_min2 = split_dataframe(comb_heart_min2, Z_genes)
Z_comb_liver_min2, AUTO_comb_liver_min2 = split_dataframe(comb_liver_min2, Z_genes)
Z_comb_spleen_min2, AUTO_comb_spleen_min2 = split_dataframe(comb_spleen_min2, Z_genes)


male_Z_comb_gonad_min2 = Z_comb_gonad_min2.iloc[:,:4]
female_Z_comb_gonad_min2 = Z_comb_gonad_min2.iloc[:,4:8]
male_Z_comb_heart_min2 = Z_comb_heart_min2.iloc[:,:4]
female_Z_comb_heart_min2 = Z_comb_heart_min2.iloc[:,4:8]
#Liver has one female sample less
male_Z_comb_liver_min2 = Z_comb_liver_min2.iloc[:,:4]
female_Z_comb_liver_min2 = Z_comb_liver_min2.iloc[:,4:7]
male_Z_comb_spleen_min2 = Z_comb_spleen_min2.iloc[:,:4]
female_Z_comb_spleen_min2 = Z_comb_spleen_min2.iloc[:,4:8]

male_AUTO_comb_gonad_min2 = AUTO_comb_gonad_min2.iloc[:,:4]
female_AUTO_comb_gonad_min2 = AUTO_comb_gonad_min2.iloc[:,4:8]
male_AUTO_comb_heart_min2 = AUTO_comb_heart_min2.iloc[:,:4]
female_AUTO_comb_heart_min2 = AUTO_comb_heart_min2.iloc[:,4:8]
#Liver has one female sample less
male_AUTO_comb_liver_min2 = AUTO_comb_liver_min2.iloc[:,:4]
female_AUTO_comb_liver_min2 = AUTO_comb_liver_min2.iloc[:,4:7]
male_AUTO_comb_spleen_min2 = AUTO_comb_spleen_min2.iloc[:,:4]
female_AUTO_comb_spleen_min2 = AUTO_comb_spleen_min2.iloc[:,4:8]

male_Z_comb_gonad_min2_ratio = male_Z_comb_gonad_min2.applymap(anti_log2).median() / male_AUTO_comb_gonad_min2.applymap(anti_log2).median()
female_Z_comb_gonad_min2_ratio = female_Z_comb_gonad_min2.applymap(anti_log2).median() / female_AUTO_comb_gonad_min2.applymap(anti_log2).median()

male_Z_comb_spleen_min2_ratio = male_Z_comb_spleen_min2.applymap(anti_log2).median() / male_AUTO_comb_spleen_min2.applymap(anti_log2).median()
female_Z_comb_spleen_min2_ratio =  female_Z_comb_spleen_min2.applymap(anti_log2).median() / female_AUTO_comb_spleen_min2.applymap(anti_log2).median()

male_Z_comb_heart_min2_ratio = male_Z_comb_heart_min2.applymap(anti_log2).median() / male_AUTO_comb_heart_min2.applymap(anti_log2).median()
female_Z_comb_heart_min2_ratio =  female_Z_comb_heart_min2.applymap(anti_log2).median() / female_AUTO_comb_heart_min2.applymap(anti_log2).median()

male_Z_comb_liver_min2_ratio = male_Z_comb_liver_min2.applymap(anti_log2).median() / male_AUTO_comb_liver_min2.applymap(anti_log2).median()
female_Z_comb_liver_min2_ratio =  female_Z_comb_liver_min2.applymap(anti_log2).median() / female_AUTO_comb_liver_min2.applymap(anti_log2).median()

CPM minimum 3


In [339]:
# Beware average CPM is log2 transformed.
# For avgCPM of 3 use log2(3)
comb_gonad_min3 = filter_cpm(comb_gonad, log2(3))
comb_heart_min3 = filter_cpm(comb_heart, log2(3))
comb_liver_min3 = filter_cpm(comb_liver, log2(3))
comb_spleen_min3 = filter_cpm(comb_spleen, log2(3))

# Filter the expression so it only contains Z expression
Z_comb_gonad_min3, AUTO_comb_gonad_min3 = split_dataframe(comb_gonad_min3, Z_genes)
Z_comb_heart_min3, AUTO_comb_heart_min3 = split_dataframe(comb_heart_min3, Z_genes)
Z_comb_liver_min3, AUTO_comb_liver_min3 = split_dataframe(comb_liver_min3, Z_genes)
Z_comb_spleen_min3, AUTO_comb_spleen_min3 = split_dataframe(comb_spleen_min3, Z_genes)


male_Z_comb_gonad_min3 = Z_comb_gonad_min3.iloc[:,:4]
female_Z_comb_gonad_min3 = Z_comb_gonad_min3.iloc[:,4:8]
male_Z_comb_heart_min3 = Z_comb_heart_min3.iloc[:,:4]
female_Z_comb_heart_min3 = Z_comb_heart_min3.iloc[:,4:8]
#Liver has one female sample less
male_Z_comb_liver_min3 = Z_comb_liver_min3.iloc[:,:4]
female_Z_comb_liver_min3 = Z_comb_liver_min3.iloc[:,4:7]
male_Z_comb_spleen_min3 = Z_comb_spleen_min3.iloc[:,:4]
female_Z_comb_spleen_min3 = Z_comb_spleen_min3.iloc[:,4:8]

male_AUTO_comb_gonad_min3 = AUTO_comb_gonad_min3.iloc[:,:4]
female_AUTO_comb_gonad_min3 = AUTO_comb_gonad_min3.iloc[:,4:8]
male_AUTO_comb_heart_min3 = AUTO_comb_heart_min3.iloc[:,:4]
female_AUTO_comb_heart_min3 = AUTO_comb_heart_min3.iloc[:,4:8]
#Liver has one female sample less
male_AUTO_comb_liver_min3 = AUTO_comb_liver_min3.iloc[:,:4]
female_AUTO_comb_liver_min3 = AUTO_comb_liver_min3.iloc[:,4:7]
male_AUTO_comb_spleen_min3 = AUTO_comb_spleen_min3.iloc[:,:4]
female_AUTO_comb_spleen_min3 = AUTO_comb_spleen_min3.iloc[:,4:8]

male_Z_comb_gonad_min3_ratio = male_Z_comb_gonad_min3.applymap(anti_log2).median() / male_AUTO_comb_gonad_min3.applymap(anti_log2).median()
female_Z_comb_gonad_min3_ratio = female_Z_comb_gonad_min3.applymap(anti_log2).median() / female_AUTO_comb_gonad_min3.applymap(anti_log2).median()

male_Z_comb_spleen_min3_ratio = male_Z_comb_spleen_min3.applymap(anti_log2).median() / male_AUTO_comb_spleen_min3.applymap(anti_log2).median()
female_Z_comb_spleen_min3_ratio =  female_Z_comb_spleen_min3.applymap(anti_log2).median() / female_AUTO_comb_spleen_min3.applymap(anti_log2).median()

male_Z_comb_heart_min3_ratio = male_Z_comb_heart_min3.applymap(anti_log2).median() / male_AUTO_comb_heart_min3.applymap(anti_log2).median()
female_Z_comb_heart_min3_ratio =  female_Z_comb_heart_min3.applymap(anti_log2).median() / female_AUTO_comb_heart_min3.applymap(anti_log2).median()

male_Z_comb_liver_min3_ratio = male_Z_comb_liver_min3.applymap(anti_log2).median() / male_AUTO_comb_liver_min3.applymap(anti_log2).median()
female_Z_comb_liver_min3_ratio =  female_Z_comb_liver_min3.applymap(anti_log2).median() / female_AUTO_comb_liver_min3.applymap(anti_log2).median()

CPM minimum 4


In [341]:
# Beware average CPM is log2 transformed.
# For avgCPM of 4 use log2(4)
comb_gonad_min4 = filter_cpm(comb_gonad, log2(4))
comb_heart_min4 = filter_cpm(comb_heart, log2(4))
comb_liver_min4 = filter_cpm(comb_liver, log2(4))
comb_spleen_min4 = filter_cpm(comb_spleen, log2(4))

# Filter the expression so it only contains Z expression
Z_comb_gonad_min4, AUTO_comb_gonad_min4 = split_dataframe(comb_gonad_min4, Z_genes)
Z_comb_heart_min4, AUTO_comb_heart_min4 = split_dataframe(comb_heart_min4, Z_genes)
Z_comb_liver_min4, AUTO_comb_liver_min4 = split_dataframe(comb_liver_min4, Z_genes)
Z_comb_spleen_min4, AUTO_comb_spleen_min4 = split_dataframe(comb_spleen_min4, Z_genes)


male_Z_comb_gonad_min4 = Z_comb_gonad_min4.iloc[:,:4]
female_Z_comb_gonad_min4 = Z_comb_gonad_min4.iloc[:,4:8]
male_Z_comb_heart_min4 = Z_comb_heart_min4.iloc[:,:4]
female_Z_comb_heart_min4 = Z_comb_heart_min4.iloc[:,4:8]
#Liver has one female sample less
male_Z_comb_liver_min4 = Z_comb_liver_min4.iloc[:,:4]
female_Z_comb_liver_min4 = Z_comb_liver_min4.iloc[:,4:7]
male_Z_comb_spleen_min4 = Z_comb_spleen_min4.iloc[:,:4]
female_Z_comb_spleen_min4 = Z_comb_spleen_min4.iloc[:,4:8]

male_AUTO_comb_gonad_min4 = AUTO_comb_gonad_min4.iloc[:,:4]
female_AUTO_comb_gonad_min4 = AUTO_comb_gonad_min4.iloc[:,4:8]
male_AUTO_comb_heart_min4 = AUTO_comb_heart_min4.iloc[:,:4]
female_AUTO_comb_heart_min4 = AUTO_comb_heart_min4.iloc[:,4:8]
#Liver has one female sample less
male_AUTO_comb_liver_min4 = AUTO_comb_liver_min4.iloc[:,:4]
female_AUTO_comb_liver_min4 = AUTO_comb_liver_min4.iloc[:,4:7]
male_AUTO_comb_spleen_min4 = AUTO_comb_spleen_min4.iloc[:,:4]
female_AUTO_comb_spleen_min4 = AUTO_comb_spleen_min4.iloc[:,4:8]


male_Z_comb_gonad_min4_ratio = male_Z_comb_gonad_min4.applymap(anti_log2).median() / male_AUTO_comb_gonad_min4.applymap(anti_log2).median()
female_Z_comb_gonad_min4_ratio = female_Z_comb_gonad_min4.applymap(anti_log2).median() / female_AUTO_comb_gonad_min4.applymap(anti_log2).median()

male_Z_comb_spleen_min4_ratio = male_Z_comb_spleen_min4.applymap(anti_log2).median() / male_AUTO_comb_spleen_min4.applymap(anti_log2).median()
female_Z_comb_spleen_min4_ratio =  female_Z_comb_spleen_min4.applymap(anti_log2).median() / female_AUTO_comb_spleen_min4.applymap(anti_log2).median()

male_Z_comb_heart_min4_ratio = male_Z_comb_heart_min4.applymap(anti_log2).median() / male_AUTO_comb_heart_min4.applymap(anti_log2).median()
female_Z_comb_heart_min4_ratio =  female_Z_comb_heart_min4.applymap(anti_log2).median() / female_AUTO_comb_heart_min4.applymap(anti_log2).median()

male_Z_comb_liver_min4_ratio = male_Z_comb_liver_min4.applymap(anti_log2).median() / male_AUTO_comb_liver_min4.applymap(anti_log2).median()
female_Z_comb_liver_min4_ratio =  female_Z_comb_liver_min4.applymap(anti_log2).median() / female_AUTO_comb_liver_min4.applymap(anti_log2).median()

CPM minimum 5


In [342]:
# Beware average CPM is log2 transformed.
# For avgCPM of 5 use log2(5)
comb_gonad_min5 = filter_cpm(comb_gonad, log2(5))
comb_heart_min5 = filter_cpm(comb_heart, log2(5))
comb_liver_min5 = filter_cpm(comb_liver, log2(5))
comb_spleen_min5 = filter_cpm(comb_spleen, log2(5))

# Filter the expression so it only contains Z expression
Z_comb_gonad_min5, AUTO_comb_gonad_min5 = split_dataframe(comb_gonad_min5, Z_genes)
Z_comb_heart_min5, AUTO_comb_heart_min5 = split_dataframe(comb_heart_min5, Z_genes)
Z_comb_liver_min5, AUTO_comb_liver_min5 = split_dataframe(comb_liver_min5, Z_genes)
Z_comb_spleen_min5, AUTO_comb_spleen_min5 = split_dataframe(comb_spleen_min5, Z_genes)


male_Z_comb_gonad_min5 = Z_comb_gonad_min5.iloc[:,:4]
female_Z_comb_gonad_min5 = Z_comb_gonad_min5.iloc[:,4:8]
male_Z_comb_heart_min5 = Z_comb_heart_min5.iloc[:,:4]
female_Z_comb_heart_min5 = Z_comb_heart_min5.iloc[:,4:8]
#Liver has one female sample less
male_Z_comb_liver_min5 = Z_comb_liver_min5.iloc[:,:4]
female_Z_comb_liver_min5 = Z_comb_liver_min5.iloc[:,4:7]
male_Z_comb_spleen_min5 = Z_comb_spleen_min5.iloc[:,:4]
female_Z_comb_spleen_min5 = Z_comb_spleen_min5.iloc[:,4:8]

male_AUTO_comb_gonad_min5 = AUTO_comb_gonad_min5.iloc[:,:4]
female_AUTO_comb_gonad_min5 = AUTO_comb_gonad_min5.iloc[:,4:8]
male_AUTO_comb_heart_min5 = AUTO_comb_heart_min5.iloc[:,:4]
female_AUTO_comb_heart_min5 = AUTO_comb_heart_min5.iloc[:,4:8]
#Liver has one female sample less
male_AUTO_comb_liver_min5 = AUTO_comb_liver_min5.iloc[:,:4]
female_AUTO_comb_liver_min5 = AUTO_comb_liver_min5.iloc[:,4:7]
male_AUTO_comb_spleen_min5 = AUTO_comb_spleen_min5.iloc[:,:4]
female_AUTO_comb_spleen_min5 = AUTO_comb_spleen_min5.iloc[:,4:8]

male_Z_comb_gonad_min5_ratio = male_Z_comb_gonad_min5.applymap(anti_log2).median() / male_AUTO_comb_gonad_min5.applymap(anti_log2).median()
female_Z_comb_gonad_min5_ratio = female_Z_comb_gonad_min5.applymap(anti_log2).median() / female_AUTO_comb_gonad_min5.applymap(anti_log2).median()

male_Z_comb_spleen_min5_ratio = male_Z_comb_spleen_min5.applymap(anti_log2).median() / male_AUTO_comb_spleen_min5.applymap(anti_log2).median()
female_Z_comb_spleen_min5_ratio =  female_Z_comb_spleen_min5.applymap(anti_log2).median() / female_AUTO_comb_spleen_min5.applymap(anti_log2).median()

male_Z_comb_heart_min5_ratio = male_Z_comb_heart_min5.applymap(anti_log2).median() / male_AUTO_comb_heart_min5.applymap(anti_log2).median()
female_Z_comb_heart_min5_ratio =  female_Z_comb_heart_min5.applymap(anti_log2).median() / female_AUTO_comb_heart_min5.applymap(anti_log2).median()

male_Z_comb_liver_min5_ratio = male_Z_comb_liver_min5.applymap(anti_log2).median() / male_AUTO_comb_liver_min5.applymap(anti_log2).median()
female_Z_comb_liver_min5_ratio =  female_Z_comb_liver_min5.applymap(anti_log2).median() / female_AUTO_comb_liver_min5.applymap(anti_log2).median()

Now we combine all the ratios from unfiltered to min5 in a list with alternating male/female ratios. This is also the order these will be plotted in.


In [343]:
gonad_ratios = [male_Z_comb_gonad_unfiltered_ratio,
                 female_Z_comb_gonad_unfiltered_ratio,
                 male_Z_comb_gonad_min1_ratio,
                 female_Z_comb_gonad_min1_ratio,
                 male_Z_comb_gonad_min2_ratio,
                 female_Z_comb_gonad_min2_ratio,
                 male_Z_comb_gonad_min3_ratio,
                 female_Z_comb_gonad_min3_ratio,
                 male_Z_comb_gonad_min4_ratio,
                 female_Z_comb_gonad_min4_ratio,
                 male_Z_comb_gonad_min5_ratio,
                 female_Z_comb_gonad_min5_ratio]

heart_ratios = [male_Z_comb_heart_unfiltered_ratio,
                 female_Z_comb_heart_unfiltered_ratio,
                 male_Z_comb_heart_min1_ratio,
                 female_Z_comb_heart_min1_ratio,
                 male_Z_comb_heart_min2_ratio,
                 female_Z_comb_heart_min2_ratio,
                 male_Z_comb_heart_min3_ratio,
                 female_Z_comb_heart_min3_ratio,
                 male_Z_comb_heart_min4_ratio,
                 female_Z_comb_heart_min4_ratio,
                 male_Z_comb_heart_min5_ratio,
                 female_Z_comb_heart_min5_ratio]

liver_ratios = [male_Z_comb_liver_unfiltered_ratio,
                 female_Z_comb_liver_unfiltered_ratio,
                 male_Z_comb_liver_min1_ratio,
                 female_Z_comb_liver_min1_ratio,
                 male_Z_comb_liver_min2_ratio,
                 female_Z_comb_liver_min2_ratio,
                 male_Z_comb_liver_min3_ratio,
                 female_Z_comb_liver_min3_ratio,
                 male_Z_comb_liver_min4_ratio,
                 female_Z_comb_liver_min4_ratio,
                 male_Z_comb_liver_min5_ratio,
                 female_Z_comb_liver_min5_ratio]

spleen_ratios = [male_Z_comb_spleen_unfiltered_ratio,
                 female_Z_comb_spleen_unfiltered_ratio,
                 male_Z_comb_spleen_min1_ratio,
                 female_Z_comb_spleen_min1_ratio,
                 male_Z_comb_spleen_min2_ratio,
                 female_Z_comb_spleen_min2_ratio,
                 male_Z_comb_spleen_min3_ratio,
                 female_Z_comb_spleen_min3_ratio,
                 male_Z_comb_spleen_min4_ratio,
                 female_Z_comb_spleen_min4_ratio,
                 male_Z_comb_spleen_min5_ratio,
                 female_Z_comb_spleen_min5_ratio]

In [344]:
def color_boxes(bp):
    red = False
    for box in bp['boxes']:
        # change outline color
        box.set( color='black', linewidth=1)

        if red == True:
            box.set( facecolor = '#e74c3c' )
            red = False
        else:
            box.set( facecolor = '#3498DB' )
            red = True
    for median in bp['medians']:
        median.set(color='black', linewidth=1)

    ## change the style of fliers and their fill
    for flier in bp['fliers']:
        flier.set(marker='o', color='black', alpha=0.5, markersize=4)
        
        ## change color and linewidth of the whiskers
    for whisker in bp['whiskers']:
        whisker.set(color='black', linewidth=1)

    ## change color and linewidth of the caps
    for cap in bp['caps']:
        cap.set(color='black', linewidth=1)

# Create a figure instance
fig = plt.figure(1, figsize=(6, 6))
plt.rc("font", size=12)
ax1 = fig.add_subplot(221)
bp = ax1.boxplot(spleen_ratios, 
            notch=False, # box instead of notch shape 
            sym='ro',    # red squares for outliers
            positions = [1, 2, 4, 5, 7, 8, 10, 11, 13,14, 16,17], 
            widths = 0.9,
             patch_artist=True)

plt.axhline(y=1, color='black', ls='--')
color_boxes(bp)
plt.ylim(0.4,1.1)
plt.title('(a) Spleen')
plt.ylabel('Z:A expression ratio')
plt.xlabel('Expression levels (CPM)')

ax2 = fig.add_subplot(222, sharex=ax1, sharey=ax1)
bp = ax2.boxplot(heart_ratios, 
            notch=False, # box instead of notch shape 
            sym='ro',    # red squares for outliers
            positions = [1, 2, 4, 5, 7, 8, 10, 11, 13,14, 16,17], 
            widths = 0.9,
             patch_artist=True)
# Remove the ticks on the right and top side
ax2.yaxis.set_ticks_position('left')
ax2.xaxis.set_ticks_position('bottom')
plt.axhline(y=1, color='black', ls='--')
color_boxes(bp)
plt.ylim(0.4,1.1)
plt.title('(b) Heart')
plt.ylabel('Z:A expression ratio')
plt.xlabel('Expression levels (CPM)')


ax3 = fig.add_subplot(223, sharex=ax1)
bp = ax3.boxplot(liver_ratios, 
            notch=False, # box instead of notch shape 
            sym='ro',    # red squares for outliers
            positions = [1, 2, 4, 5, 7, 8, 10, 11, 13,14, 16,17], 
            widths = 0.9,
             patch_artist=True)
# Remove the ticks on the right and top side
ax3.yaxis.set_ticks_position('left')
ax3.xaxis.set_ticks_position('bottom')
plt.axhline(y=1, color='black', ls='--')
color_boxes(bp)
plt.ylim(0.4,1.1)
plt.title('(c) Liver')
plt.ylabel('Z:A expression ratio')
plt.xlabel('Expression levels (CPM)')

ax4 = fig.add_subplot(224, sharex=ax1)
bp = ax4.boxplot(gonad_ratios, 
            notch=False, # box instead of notch shape 
            sym='ro',    # red squares for outliers
            positions = [1, 2, 4, 5, 7, 8, 10, 11, 13,14, 16,17], 
            widths = 0.9,
             patch_artist=True)

# Remove the ticks on the right and top side
ax4.yaxis.set_ticks_position('left')
ax4.xaxis.set_ticks_position('bottom')

plt.axhline(y=1, color='black', ls='--')
color_boxes(bp)
plt.ylim(0.4,1.1)
ax1.set_xticklabels(['>0', '>1', '>2', '>3', '>4', '>5'])
ax1.set_xticks([1.5, 4.5, 7.5, 10.5, 13.5, 16.5])
plt.title('(d) Gonad')
plt.ylabel('Z:A expression ratio')
plt.xlabel('Expression levels (CPM)')

fig.tight_layout(pad=2)
plt.savefig("2015-10-27-ZtoA-ratio.png", dpi=300)
plt.savefig("2015-10-27-ZtoA-ratio.pdf", dpi=300)


Calculate per chromosome expression


In [345]:
def get_chromosomal_expr(df, gid_to_chr):
    '''Make a dictionary of chromosomes containing the average expression for genes across samples'''
    chromosomes = defaultdict(list)  
    
    for row_index, row in df.iterrows():
        chromosomes[gid_to_chr[row_index]].append(log2(row.map(anti_log2).mean()))

    return chromosomes

# These are already filtered down to only contain valid chromosomes (autosomes + Z)
comb_gonad_min2 = filter_cpm(comb_gonad, 1)
comb_heart_min2 = filter_cpm(comb_heart, 1)
comb_liver_min2 = filter_cpm(comb_liver, 1)
comb_spleen_min2 = filter_cpm(comb_spleen, 1)


male_comb_gonad_min2 = comb_gonad_min2.iloc[:,:4]
female_comb_gonad_min2= comb_gonad_min2.iloc[:,4:8]

male_comb_heart_min2 = comb_heart_min2.iloc[:,:4]
female_comb_heart_min2= comb_heart_min2.iloc[:,4:8]

#Liver has one female sample less
male_comb_liver_min2 = comb_liver_min2.iloc[:,:4]
female_comb_liver_min2 = comb_liver_min2.iloc[:,4:7]

male_comb_spleen_min2 = comb_spleen_min2.iloc[:,:4]
female_comb_spleen_min2 = comb_spleen_min2.iloc[:,4:8]


M_GONAD_MIN2_CHR = get_chromosomal_expr(male_comb_gonad_min2, gid_to_chr)
F_GONAD_MIN2_CHR = get_chromosomal_expr(female_comb_gonad_min2, gid_to_chr)

M_HEART_MIN2_CHR = get_chromosomal_expr(male_comb_heart_min2, gid_to_chr)
F_HEART_MIN2_CHR = get_chromosomal_expr(female_comb_heart_min2, gid_to_chr)

M_LIVER_MIN2_CHR = get_chromosomal_expr(male_comb_liver_min2, gid_to_chr)
F_LIVER_MIN2_CHR = get_chromosomal_expr(female_comb_liver_min2, gid_to_chr)

M_SPLEEN_MIN2_CHR = get_chromosomal_expr(male_comb_spleen_min2, gid_to_chr)
F_SPLEEN_MIN2_CHR = get_chromosomal_expr(female_comb_spleen_min2, gid_to_chr)

In [346]:
print chicken_chromosomes


['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', 'Z']

In [347]:
from itertools import chain

# Create a figure instance
fig = plt.figure(1, figsize=(6, 10))
plt.rc("font", size=12)
ax1 = fig.add_subplot(411)


def color_boxes(bp):
    red = False
    for box in bp['boxes']:
        # change outline color
        box.set( color='black', linewidth=1)

        if red == True:
            box.set( facecolor = '#e74c3c' )
            red = False
        else:
            box.set( facecolor = '#3498DB' )
            red = True
    for median in bp['medians']:
        median.set(color='black', linewidth=1)

    ## change the style of fliers and their fill
    for flier in bp['fliers']:
        flier.set(marker='o', color='black', alpha=0.5, markersize=4)
        
        ## change color and linewidth of the whiskers
    for whisker in bp['whiskers']:
        whisker.set(color='black', linewidth=1)

    ## change color and linewidth of the caps
    for cap in bp['caps']:
        cap.set(color='black', linewidth=1)



M_SPLEEN_EXPR = [M_SPLEEN_MIN2_CHR[c] for c in chicken_chromosomes]
F_SPLEEN_EXPR = [F_SPLEEN_MIN2_CHR[c] for c in chicken_chromosomes]


# From http://stackoverflow.com/questions/3471999/how-do-i-merge-two-lists-into-a-single-list
# Takes a list a = [a,b,c] and b = [a,b,c] and combines it into [a,a,b,b,c,c]
COMB_SPLEEN_EXPR = list(chain(*zip(M_SPLEEN_EXPR,F_SPLEEN_EXPR)))

bp = ax1.boxplot(COMB_SPLEEN_EXPR,
            notch=False, # box instead of notch shape 
            sym='',    # red squares for outliers
            widths = 1,
             patch_artist=True)
color_boxes(bp)
# ax1.set_xticklabels(chicken_chromosomes)
# # ax1.axhline(y=comb_spleen_min2_median,c="black",linewidth=2, linestyle="--")
# ax1.set_xticks(list(np.arange(1.5, len(chicken_chromosomes)*2, 2)))

ax1.set_xticklabels(["1", "5", "10", "15", "20", "25", "Z"])
# 57.5 is the position of the Z chromosome
ax1.set_xticks([1.5, 9.5, 19.5, 29.5, 39.5, 49.5, 57.5])
# Only show ticks on the left and bottom spines
ax1.yaxis.set_ticks_position('left')
ax1.xaxis.set_ticks_position('bottom')
ax1.set_yticks(range(-2, 14, 2))
ax1.set_ylim(bottom=-2)
plt.title('(a) Spleen')
plt.ylabel(r'$log_2$'+"(RPKM)")


ax2 = fig.add_subplot(412)


M_HEART_EXPR = [M_HEART_MIN2_CHR[c] for c in chicken_chromosomes]
F_HEART_EXPR = [F_HEART_MIN2_CHR[c] for c in chicken_chromosomes]

#From http://stackoverflow.com/questions/3471999/how-do-i-merge-two-lists-into-a-single-list
COMB_HEART_EXPR = list(chain(*zip(M_HEART_EXPR,F_HEART_EXPR)))

bp = ax2.boxplot(COMB_HEART_EXPR,
            notch=False, # box instead of notch shape 
            sym='',    # red squares for outliers
            widths = 1,
             patch_artist=True)
color_boxes(bp)
# ax2.set_xticklabels(chicken_chromosomes)
# ax2.axhline(y=comb_heart_min2_median,c="black",linewidth=2, linestyle="--")
# ax2.set_xticks(list(np.arange(1.5, len(chicken_chromosomes)*2, 2)))
ax2.set_xticklabels(["1", "5", "10", "15", "20", "25", "Z"])
# ax4.axhline(y=comb_gonad_min2_median,c="white",linewidth=1, linestyle="-")

# 57.5 is the position of the Z chromosome
ax2.set_xticks([1.5, 9.5, 19.5, 29.5, 39.5, 49.5, 57.5])
ax2.yaxis.set_ticks_position('left')
ax2.xaxis.set_ticks_position('bottom')
ax2.set_yticks(range(-2, 14, 2))
ax2.set_ylim(bottom=-2)
plt.title('(b) Heart')
plt.ylabel(r'$log_2$'+"(RPKM)")

ax3 = fig.add_subplot(413)


M_LIVER_EXPR = [M_LIVER_MIN2_CHR[c] for c in chicken_chromosomes]
F_LIVER_EXPR = [F_LIVER_MIN2_CHR[c] for c in chicken_chromosomes]

#From http://stackoverflow.com/questions/3471999/how-do-i-merge-two-lists-into-a-single-list
COMB_LIVER_EXPR = list(chain(*zip(M_LIVER_EXPR,F_LIVER_EXPR)))

bp = ax3.boxplot(COMB_LIVER_EXPR,
            notch=False, # box instead of notch shape 
            sym='',    # red squares for outliers
            widths = 1,
             patch_artist=True)
color_boxes(bp)
# ax3.set_xticklabels(chicken_chromosomes)
# ax3.axhline(y=comb_liver_min2_median,c="black",linewidth=2, linestyle="--")
# ax3.set_xticks(list(np.arange(1.5, len(chicken_chromosomes)*2, 2)))

ax3.set_xticklabels(["1", "5", "10", "15", "20", "25", "Z"])
# 57.5 is the position of the Z chromosome
ax3.set_xticks([1.5, 9.5, 19.5, 29.5, 39.5, 49.5, 57.5])
ax3.yaxis.set_ticks_position('left')
ax3.xaxis.set_ticks_position('bottom')
ax3.set_yticks(range(-2, 14, 2))
ax3.set_ylim(bottom=-2)
plt.title('(c) Liver')
plt.ylabel(r'$log_2$'+"(RPKM)")
# plt.xlabel('Chromosomes')

ax4 = fig.add_subplot(414)


M_GONAD_EXPR = [M_GONAD_MIN2_CHR[c] for c in chicken_chromosomes]
F_GONAD_EXPR = [F_GONAD_MIN2_CHR[c] for c in chicken_chromosomes]

#From http://stackoverflow.com/questions/3471999/how-do-i-merge-two-lists-into-a-single-list
COMB_GONAD_EXPR = list(chain(*zip(M_GONAD_EXPR,F_GONAD_EXPR)))

bp = ax4.boxplot(COMB_GONAD_EXPR,
            notch=False, # box instead of notch shape 
            sym='',    # red squares for outliers
            widths = 1,
             patch_artist=True)
color_boxes(bp)
ax4.set_xticklabels(["1", "5", "10", "15", "20", "25", "Z"])
# ax4.axhline(y=comb_gonad_min2_median,c="white",linewidth=1, linestyle="-")

# 57.5 is the position of the Z chromosome
ax4.set_xticks([1.5, 9.5, 19.5, 29.5, 39.5, 49.5, 57.5])
ax4.yaxis.set_ticks_position('left')
ax4.xaxis.set_ticks_position('bottom')
ax4.set_yticks(range(-2, 14, 2))
ax4.set_ylim(bottom=-2)
plt.title('(d) Gonad')
plt.ylabel(r'$log_2$'+"(RPKM)")
plt.xlabel('Chromosomes')

# fig.savefig('2015-06-04-RPKM2-chromosome-expr-all-tissues.png', bbox_inches='tight')
fig.tight_layout(pad=2)
plt.savefig("2015-10-27-chromosomal-expression.png", dpi=300)
plt.savefig("2015-10-27-chromosomal-expression.pdf", dpi=300)


All the medians look correct High overlap in expression, some genes not in other tissues

Z expression by quartiles


In [348]:
male_Z_comb_gonad_min2 = Z_comb_gonad_min2.iloc[:,:4]
female_Z_comb_gonad_min2 = Z_comb_gonad_min2.iloc[:,4:8]

male_Z_comb_heart_min2 = Z_comb_heart_min2.iloc[:,:4]
female_Z_comb_heart_min2 = Z_comb_heart_min2.iloc[:,4:8]

#Liver has one female sample less
male_Z_comb_liver_min2 = Z_comb_liver_min2.iloc[:,:4]
female_Z_comb_liver_min2 = Z_comb_liver_min2.iloc[:,4:7]

male_Z_comb_spleen_min2 = Z_comb_spleen_min2.iloc[:,:4]
female_Z_comb_spleen_min2 = Z_comb_spleen_min2.iloc[:,4:8]

In [349]:
def logged_data_mean(df):
    logCPMmean = pd.Series(df.applymap(anti_log2).mean(axis=1).map(log2), name="logRPKMmean")
    return logCPMmean

male_Z_comb_gonad_min2 = pd.concat([male_Z_comb_gonad_min2, logged_data_mean(male_Z_comb_gonad_min2)], axis=1)
female_Z_comb_gonad_min2 = pd.concat([female_Z_comb_gonad_min2, logged_data_mean(female_Z_comb_gonad_min2)], axis=1)

male_Z_comb_heart_min2 = pd.concat([male_Z_comb_heart_min2, logged_data_mean(male_Z_comb_heart_min2)], axis=1)
female_Z_comb_heart_min2 = pd.concat([female_Z_comb_heart_min2, logged_data_mean(female_Z_comb_heart_min2)], axis=1)

male_Z_comb_liver_min2 = pd.concat([male_Z_comb_liver_min2, logged_data_mean(male_Z_comb_liver_min2)], axis=1)
female_Z_comb_liver_min2 = pd.concat([female_Z_comb_liver_min2, logged_data_mean(female_Z_comb_liver_min2)], axis=1)

male_Z_comb_spleen_min2 = pd.concat([male_Z_comb_spleen_min2, logged_data_mean(male_Z_comb_spleen_min2)], axis=1)
female_Z_comb_spleen_min2 = pd.concat([female_Z_comb_spleen_min2, logged_data_mean(female_Z_comb_spleen_min2)], axis=1)

Use pd.qcut to produce quartiles


In [350]:
quartiles_male_Z_comb_gonad_min2 = pd.qcut(male_Z_comb_gonad_min2["logRPKMmean"], [0, .25, .5, .75, 1.], labels=["1", "2", "3", "4"])
quartiles_male_Z_comb_gonad_min2.name = "quartiles"
male_Z_comb_gonad_min2 = pd.concat([male_Z_comb_gonad_min2, quartiles_male_Z_comb_gonad_min2], axis=1)
female_Z_comb_gonad_min2 = pd.concat([female_Z_comb_gonad_min2, quartiles_male_Z_comb_gonad_min2], axis=1)

quartiles_male_Z_comb_heart_min2 = pd.qcut(male_Z_comb_heart_min2["logRPKMmean"], [0, .25, .5, .75, 1.], labels=["1", "2", "3", "4"])
quartiles_male_Z_comb_heart_min2.name = "quartiles"
male_Z_comb_heart_min2 = pd.concat([male_Z_comb_heart_min2, quartiles_male_Z_comb_heart_min2], axis=1)
female_Z_comb_heart_min2 = pd.concat([female_Z_comb_heart_min2, quartiles_male_Z_comb_heart_min2], axis=1)

quartiles_male_Z_comb_liver_min2 = pd.qcut(male_Z_comb_liver_min2["logRPKMmean"], [0, .25, .5, .75, 1.], labels=["1", "2", "3", "4"])
quartiles_male_Z_comb_liver_min2.name = "quartiles"
male_Z_comb_liver_min2 = pd.concat([male_Z_comb_liver_min2, quartiles_male_Z_comb_liver_min2], axis=1)
female_Z_comb_liver_min2 = pd.concat([female_Z_comb_liver_min2, quartiles_male_Z_comb_liver_min2], axis=1)

quartiles_male_Z_comb_spleen_min2 = pd.qcut(male_Z_comb_spleen_min2["logRPKMmean"], [0, .25, .5, .75, 1.], labels=["1", "2", "3", "4"])
quartiles_male_Z_comb_spleen_min2.name = "quartiles"
male_Z_comb_spleen_min2 = pd.concat([male_Z_comb_spleen_min2, quartiles_male_Z_comb_spleen_min2], axis=1)
female_Z_comb_spleen_min2 = pd.concat([female_Z_comb_spleen_min2, quartiles_male_Z_comb_spleen_min2], axis=1)

In [351]:
male_gonad_grp = male_Z_comb_gonad_min2.groupby('quartiles')
female_gonad_grp = female_Z_comb_gonad_min2.groupby('quartiles')

male_heart_grp = male_Z_comb_heart_min2.groupby('quartiles')
female_heart_grp = female_Z_comb_heart_min2.groupby('quartiles')

male_liver_grp = male_Z_comb_liver_min2.groupby('quartiles')
female_liver_grp = female_Z_comb_liver_min2.groupby('quartiles')

male_spleen_grp = male_Z_comb_spleen_min2.groupby('quartiles')
female_spleen_grp = female_Z_comb_spleen_min2.groupby('quartiles')

In [352]:
gonad_quartiles = [male_gonad_grp.get_group("1")["logRPKMmean"],
                female_gonad_grp.get_group("1")["logRPKMmean"],
                male_gonad_grp.get_group("2")["logRPKMmean"],
                female_gonad_grp.get_group("2")["logRPKMmean"],
                male_gonad_grp.get_group("3")["logRPKMmean"],
                female_gonad_grp.get_group("3")["logRPKMmean"],
                male_gonad_grp.get_group("4")["logRPKMmean"],
                female_gonad_grp.get_group("4")["logRPKMmean"]]

heart_quartiles = [male_heart_grp.get_group("1")["logRPKMmean"],
                female_heart_grp.get_group("1")["logRPKMmean"],
                male_heart_grp.get_group("2")["logRPKMmean"],
                female_heart_grp.get_group("2")["logRPKMmean"],
                male_heart_grp.get_group("3")["logRPKMmean"],
                female_heart_grp.get_group("3")["logRPKMmean"],
                male_heart_grp.get_group("4")["logRPKMmean"],
                female_heart_grp.get_group("4")["logRPKMmean"]]

liver_quartiles = [male_liver_grp.get_group("1")["logRPKMmean"],
                female_liver_grp.get_group("1")["logRPKMmean"],
                male_liver_grp.get_group("2")["logRPKMmean"],
                female_liver_grp.get_group("2")["logRPKMmean"],
                male_liver_grp.get_group("3")["logRPKMmean"],
                female_liver_grp.get_group("3")["logRPKMmean"],
                male_liver_grp.get_group("4")["logRPKMmean"],
                female_liver_grp.get_group("4")["logRPKMmean"]]

spleen_quartiles = [male_spleen_grp.get_group("1")["logRPKMmean"],
                female_spleen_grp.get_group("1")["logRPKMmean"],
                male_spleen_grp.get_group("2")["logRPKMmean"],
                female_spleen_grp.get_group("2")["logRPKMmean"],
                male_spleen_grp.get_group("3")["logRPKMmean"],
                female_spleen_grp.get_group("3")["logRPKMmean"],
                male_spleen_grp.get_group("4")["logRPKMmean"],
                female_spleen_grp.get_group("4")["logRPKMmean"]]

print [len(i) for i in spleen_quartiles]
print [len(i) for i in heart_quartiles]
print [len(i) for i in liver_quartiles]
print [len(i) for i in gonad_quartiles]


[133, 133, 132, 132, 132, 132, 132, 132]
[124, 124, 123, 123, 123, 123, 123, 123]
[112, 112, 111, 111, 111, 111, 111, 111]
[151, 151, 151, 151, 151, 151, 151, 151]

In [353]:
print "Gonad quartiles"
print ranksums(male_gonad_grp.get_group("1")["logRPKMmean"], female_gonad_grp.get_group("1")["logRPKMmean"])
print ranksums(male_gonad_grp.get_group("2")["logRPKMmean"], female_gonad_grp.get_group("2")["logRPKMmean"])
print ranksums(male_gonad_grp.get_group("3")["logRPKMmean"], female_gonad_grp.get_group("3")["logRPKMmean"])
print ranksums(male_gonad_grp.get_group("4")["logRPKMmean"], female_gonad_grp.get_group("4")["logRPKMmean"])
print "-----"
print mannwhitneyu(male_gonad_grp.get_group("1")["logRPKMmean"], female_gonad_grp.get_group("1")["logRPKMmean"])[1]*2
print mannwhitneyu(male_gonad_grp.get_group("2")["logRPKMmean"], female_gonad_grp.get_group("2")["logRPKMmean"])[1]*2
print mannwhitneyu(male_gonad_grp.get_group("3")["logRPKMmean"], female_gonad_grp.get_group("3")["logRPKMmean"])[1]*2
print mannwhitneyu(male_gonad_grp.get_group("4")["logRPKMmean"], female_gonad_grp.get_group("4")["logRPKMmean"])[1]*2

print "Heart quartiles"
print ranksums(male_heart_grp.get_group("1")["logRPKMmean"], female_heart_grp.get_group("1")["logRPKMmean"])
print ranksums(male_heart_grp.get_group("2")["logRPKMmean"], female_heart_grp.get_group("2")["logRPKMmean"])
print ranksums(male_heart_grp.get_group("3")["logRPKMmean"], female_heart_grp.get_group("3")["logRPKMmean"])
print ranksums(male_heart_grp.get_group("4")["logRPKMmean"], female_heart_grp.get_group("4")["logRPKMmean"])
print "-----"
print mannwhitneyu(male_heart_grp.get_group("1")["logRPKMmean"], female_heart_grp.get_group("1")["logRPKMmean"])[1]*2
print mannwhitneyu(male_heart_grp.get_group("2")["logRPKMmean"], female_heart_grp.get_group("2")["logRPKMmean"])[1]*2
print mannwhitneyu(male_heart_grp.get_group("3")["logRPKMmean"], female_heart_grp.get_group("3")["logRPKMmean"])[1]*2
print mannwhitneyu(male_heart_grp.get_group("4")["logRPKMmean"], female_heart_grp.get_group("4")["logRPKMmean"])[1]*2

print "Liver quartiles"
print ranksums(male_liver_grp.get_group("1")["logRPKMmean"], female_liver_grp.get_group("1")["logRPKMmean"])
print ranksums(male_liver_grp.get_group("2")["logRPKMmean"], female_liver_grp.get_group("2")["logRPKMmean"])
print ranksums(male_liver_grp.get_group("3")["logRPKMmean"], female_liver_grp.get_group("3")["logRPKMmean"])
print ranksums(male_liver_grp.get_group("4")["logRPKMmean"], female_liver_grp.get_group("4")["logRPKMmean"])
print "-----"
print mannwhitneyu(male_liver_grp.get_group("1")["logRPKMmean"], female_liver_grp.get_group("1")["logRPKMmean"])[1]*2
print mannwhitneyu(male_liver_grp.get_group("2")["logRPKMmean"], female_liver_grp.get_group("2")["logRPKMmean"])[1]*2
print mannwhitneyu(male_liver_grp.get_group("3")["logRPKMmean"], female_liver_grp.get_group("3")["logRPKMmean"])[1]*2
print mannwhitneyu(male_liver_grp.get_group("4")["logRPKMmean"], female_liver_grp.get_group("4")["logRPKMmean"])[1]*2

print "Spleen quartiles"
print ranksums(male_spleen_grp.get_group("1")["logRPKMmean"], female_spleen_grp.get_group("1")["logRPKMmean"])
print ranksums(male_spleen_grp.get_group("2")["logRPKMmean"], female_spleen_grp.get_group("2")["logRPKMmean"])
print ranksums(male_spleen_grp.get_group("3")["logRPKMmean"], female_spleen_grp.get_group("3")["logRPKMmean"])
print ranksums(male_spleen_grp.get_group("4")["logRPKMmean"], female_spleen_grp.get_group("4")["logRPKMmean"])
print "-----"
print mannwhitneyu(male_spleen_grp.get_group("1")["logRPKMmean"], female_spleen_grp.get_group("1")["logRPKMmean"])[1]*2
print mannwhitneyu(male_spleen_grp.get_group("2")["logRPKMmean"], female_spleen_grp.get_group("2")["logRPKMmean"])[1]*2
print mannwhitneyu(male_spleen_grp.get_group("3")["logRPKMmean"], female_spleen_grp.get_group("3")["logRPKMmean"])[1]*2
print mannwhitneyu(male_spleen_grp.get_group("4")["logRPKMmean"], female_spleen_grp.get_group("4")["logRPKMmean"])[1]*2


Gonad quartiles
(1.0009678754496514, 0.3168423394893195)
(8.3444741322046969, 7.1531460766000107e-17)
(10.758921989773837, 5.3795767916352687e-27)
(5.7942794791499566, 6.8615114876267109e-09)
-----
0.317161036395
7.19313876655e-17
5.41817736188e-27
6.88850136748e-09
Heart quartiles
(2.6290310658820992, 0.0085628538126141622)
(7.4753112974223068, 7.7021308038608343e-14)
(7.7763666247546688, 7.4637195995068213e-15)
(3.0472891912421507, 0.002309154333055681)
-----
0.00858516925364
7.75478753195e-14
7.5167420116e-15
0.00231604747143
Liver quartiles
(2.8764415197125999, 0.0040218671239935291)
(6.3980772066820037, 1.5734572776539001e-10)
(6.230889169270748, 4.6379515023175781e-10)
(2.5569320471583969, 0.010559984388705682)
-----
0.00403502474768
1.58425843595e-10
4.66899135609e-10
0.0105917471486
Spleen quartiles
(2.4874085590224433, 0.012867752842213703)
(6.6176975755715564, 3.6483643298173662e-11)
(8.9085010481384703, 5.1727146638287874e-19)
(4.6751091276875796, 2.9379734954453239e-06)
-----
0.0128966124367
3.66830471024e-11
5.21045075428e-19
2.94953493971e-06

In [354]:
def color_boxes(bp):
    red = False
    for box in bp['boxes']:
        # change outline color
        box.set( color='black', linewidth=1)
#         box.set( color='black', linewidth=1)

        if red == True:
            box.set( facecolor = '#e74c3c' )
            red = False
        else:
            box.set( facecolor = '#3498DB' )
            red = True
    for median in bp['medians']:
        median.set(color='black', linewidth=1)

    ## change the style of fliers and their fill
    for flier in bp['fliers']:
        flier.set(marker='o', color='black', alpha=0.5, markersize=4)
        
        ## change color and linewidth of the whiskers
    for whisker in bp['whiskers']:
        whisker.set(color='black', linewidth=1, linestyle="-")

    ## change color and linewidth of the caps
    for cap in bp['caps']:
        cap.set(color='black', linewidth=1)
        
# Create a figure instance
fig = plt.figure(1, figsize=(6, 6))
plt.rc("font", size=12)
ax1 = fig.add_subplot(221)
bp = ax1.boxplot(spleen_quartiles, 
            notch=True, 
            sym='',    # no outliers
            positions = [1, 2, 4, 5, 7, 8, 10, 11], 
            widths = 0.9,
             patch_artist=True)
ax1.set_xticklabels(['1\n(133)', '2\n(132)', '3\n(132)', '4\n(132)'])
ax1.set_xticks([1.5, 4.5, 7.5, 10.5])
ax1.text(1.35, 4, "*", fontsize=11)
ax1.text(4.35, 5, "***", fontsize=11)
ax1.text(7.35, 6, "***", fontsize=11)
ax1.text(10.35, 9, "***", fontsize=11)
# Remove the ticks on the right and top side
ax1.yaxis.set_ticks_position('left')
ax1.xaxis.set_ticks_position('bottom')

color_boxes(bp)
plt.ylim(-1,11)
plt.title('(a) Spleen')
plt.ylabel(r'$log_2$'+"(RPKM)")

ax2 = fig.add_subplot(222)
bp = ax2.boxplot(heart_quartiles, 
            notch=True, # 
            sym='',    # no outliers
            positions = [1, 2, 4, 5, 7, 8, 10, 11], 
            widths = 0.9,
             patch_artist=True)
ax2.set_xticklabels(['1\n(124)', '2\n(123)', '3\n(123)', '4\n(123)'])
ax2.set_xticks([1.5, 4.5, 7.5, 10.5])
ax2.text(1.35, 4, "*", fontsize=11)
ax2.text(4.35, 5, "***", fontsize=11)
ax2.text(7.35, 6, "***", fontsize=11)
ax2.text(10.35, 9, "*", fontsize=11)
# Remove the ticks on the right and top side
ax2.yaxis.set_ticks_position('left')
ax2.xaxis.set_ticks_position('bottom')


color_boxes(bp)
plt.ylim(-1,11)
plt.title('(b) Heart')

ax3 = fig.add_subplot(223)
bp = ax3.boxplot(liver_quartiles, 
            notch=True, 
            sym='',    # no outliers
            positions = [1, 2, 4, 5, 7, 8, 10, 11], 
            widths = 0.9,
             patch_artist=True)
ax3.set_xticklabels(['1\n(112)', '2\n(111)', '3\n(111)', '4\n(111)'])
ax3.set_xticks([1.5, 4.5, 7.5, 10.5])
ax3.text(1.35, 3, "*", fontsize=11)
ax3.text(4.35, 4, "***", fontsize=11)
ax3.text(7.35, 5, "***", fontsize=11)
ax3.text(10.35, 10, "*", fontsize=11)
# Remove the ticks on the right and top side
ax3.yaxis.set_ticks_position('left')
ax3.xaxis.set_ticks_position('bottom')


color_boxes(bp)
plt.ylim(-1,11)
plt.title('(c) Liver')
plt.ylabel(r'$log_2$'+"(RPKM)")
plt.xlabel('Quartiles')

ax4 = fig.add_subplot(224)
bp = ax4.boxplot(gonad_quartiles, 
            notch=True, # box instead of notch shape 
            sym='',    # red squares for outliers
            positions = [1, 2, 4, 5, 7, 8, 10, 11], 
            widths = 0.9,
             patch_artist=True)
ax4.set_xticklabels(['1\n(151)', '2\n(151)', '3\n(151)', '4\n(151)'])
ax4.set_xticks([1.5, 4.5, 7.5, 10.5])
ax4.text(1.35, 6, "ns", fontsize=11)
ax4.text(4.35, 6, "***", fontsize=11)
ax4.text(7.35, 7.5, "***", fontsize=11)
ax4.text(10.35, 9.5, "***", fontsize=11)
# Remove the ticks on the right and top side
ax4.yaxis.set_ticks_position('left')
ax4.xaxis.set_ticks_position('bottom')

color_boxes(bp)
plt.ylim(-1,11)
plt.title('(d) Gonad')
plt.xlabel('Quartiles')
fig.tight_layout(pad=2)
plt.savefig("2015-10-27-Z-quartiles.png", dpi=300)
plt.savefig("2015-10-27-Z-quartiles.pdf", dpi=300)