In [1]:
%matplotlib inline
In [2]:
import glob
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
pd.set_option('display.max_columns', 50) # print all rows
import os
os.chdir('/Users/evanbiederstedt/Downloads/RRBS_data_files')
In [3]:
stats = pd.read_csv("RRBS_anno_statistics_full_446files_filter50K.csv")
In [4]:
stats.shape
Out[4]:
In [5]:
normal = stats[stats["type"]=="normal"]
CLL = stats[stats["type"]=="CLL"]
In [6]:
len(normal)
Out[6]:
In [7]:
len(CLL)
Out[7]:
In [8]:
import seaborn as sns
sns.set_style("whitegrid")
ax = sns.stripplot(x=stats["type"], y=stats["methylation"])
sns.plt.title("Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
plt.ylabel("Percent Methylation, weighted")
Out[8]:
In [9]:
import seaborn as sns
sns.set_style("whitegrid")
ax = sns.stripplot(x=stats["type"], y=stats["methylation"], jitter=True)
sns.plt.title("Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
plt.ylabel("Percent Methylation, weighted")
Out[9]:
In [10]:
ax = sns.violinplot(x=stats["type"], y=stats["methylation"])
sns.plt.title("Violin plot: Methylation per cell, normal vs CLL")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
plt.ylabel("Percent Methylation, weighted")
print("violin plot features a kernel density estimation of the underlying distribution")
In [11]:
ax = sns.boxplot(x=stats["type"],y=stats["methylation"], linewidth=1.5)
sns.plt.title("Box Whisker plot: Methylation per cell, normal vs CLL")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
plt.ylabel("Percent Methylation, weighted")
print("Box whisker plot")
In [12]:
ax = sns.boxplot(x=stats["type"],y=stats["methylation"], linewidth=1.5)
sns.plt.title("Violin plot: Methylation per cell, normal vs CLL")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
plt.ylabel("Percent Methylation, weighted")
print("Box whisker plot")
#plt.ylim(0.35, 0.7)
In [13]:
ax = sns.boxplot(y=stats["type"], x=stats["methylation"], linewidth=1.5)
sns.plt.title("Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
plt.ylabel("Cell Type: Normal cells vs. CLL samples")
Out[13]:
In [14]:
ax = sns.boxplot(x=stats["type"], y=stats["methylation"], linewidth=1.5)
ax = sns.stripplot(x=stats["type"], y=stats["methylation"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
Out[14]:
In [15]:
combined = stats
In [16]:
ax = sns.boxplot(y=combined["type"], x=combined["methylation"], linewidth=1.5)
ax = sns.stripplot(y=combined["type"], x=combined["methylation"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
plt.ylabel("Percent Methylation, weighted")
Out[16]:
In [17]:
ax = sns.boxplot(x=combined["type"], y=combined["methylation"], linewidth=1.5)
ax = sns.swarmplot(x=combined["type"], y=combined["methylation"], color=".25", linewidth=1.5)
sns.plt.title("Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
plt.ylabel("Percent Methylation, weighted")
Out[17]:
In [18]:
ax = sns.boxplot(y=combined["type"], x=combined["methylation"], linewidth=1.5)
ax = sns.swarmplot(y=combined["type"], x=combined["methylation"], color=".25", linewidth=1.5)
sns.plt.title("Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
plt.ylabel("Percent Methylation, weighted")
Out[18]:
In [19]:
ax = sns.boxplot(y=combined["type"], x=combined["methylation"], linewidth=1.5)
ax = sns.swarmplot(y=combined["type"], x=combined["methylation"], color=".25", linewidth=1.5)
sns.plt.title("Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
plt.ylabel("Percent Methylation, weighted")
Out[19]:
In [20]:
combined2 = stats
In [21]:
import seaborn as sns
sns.set_style("whitegrid")
ax = sns.stripplot(x=combined2["type"], y=combined2["methylation"], hue=combined2.bio)
sns.plt.title("Biological sorting: Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
plt.ylabel("Percent Methylation, weighted")
Out[21]:
In [22]:
import seaborn as sns
sns.set(style="whitegrid", palette="muted")
ax = sns.stripplot(x=combined2["type"], y=combined2["methylation"], hue=combined2.bio,
jitter=True, palette=dict(normal_B = 'b', CD19CD27m = 'g', CD19CD27p = 'r', CLL='m', CD19p = 'c'))
sns.plt.title("Biological sorting: Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
plt.ylabel("Percent Methylation, weighted")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
Out[22]:
In [23]:
import seaborn as sns
sns.set_style("whitegrid")
ax = sns.stripplot(x=combined2["type"], y=combined2["methylation"], hue=combined2.bio, jitter=True)
sns.plt.title("Biological sorting: Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
plt.ylabel("Percent Methylation, weighted")
Out[23]:
In [24]:
ax = sns.boxplot(x=combined2["type"],y=combined2["methylation"], linewidth=1.5)
plt.ylim(0.15,0.8)
sns.set_style("whitegrid")
ax = sns.stripplot(x=combined2["type"], y=combined2["methylation"], hue=combined2.bio, jitter=True, linewidth=1.5)
sns.plt.title("Biological sorting: Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
plt.ylabel("Percent Methylation, weighted")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
Out[24]:
In [25]:
combined2.boxplot(column = 'methylation', by='type', fontsize=13)
sns.set_style("whitegrid")
ax = sns.stripplot(x=combined2["type"], y=combined2["methylation"], hue=combined2.bio, jitter=True, linewidth=1.5)
sns.plt.title("Biological sorting: Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
plt.ylabel("Percent Methylation, weighted")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
Out[25]:
In [26]:
combined2.boxplot(column = 'methylation', by='type', fontsize=13)
plt.title('corrected')
Out[26]:
In [27]:
ax = sns.violinplot(x=combined2["type"],y=combined2["methylation"], palette="Set3")
sns.set_style("whitegrid")
ax = sns.stripplot(x=combined2["type"], y=combined2["methylation"], hue=combined2.bio, jitter=True, linewidth=1.5)
sns.plt.title("Biological sorting: Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
plt.ylabel("Percent Methylation, weighted")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
Out[27]:
In [28]:
sns.set_style("whitegrid")
ax = sns.stripplot(x=combined2["type"], y=combined2["methylation"], hue=combined2.protocol, jitter=True)
sns.plt.title("Technical/protocol sorting: Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
plt.ylabel("Percent Methylation, weighted")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
Out[28]:
In [29]:
sns.set_style("whitegrid")
ax = sns.stripplot(x=combined2["type"],y=combined2["methylation"], hue=combined2.protocol, jitter=True, linewidth=1.0)
sns.plt.title("Technical/protocol sorting: Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
plt.ylabel("Percent Methylation, weighted")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
Out[29]:
In [30]:
ax = sns.violinplot(x=combined2["type"], y=combined2["methylation"], palette="Set1")
sns.set_style("whitegrid")
ax = sns.stripplot(x=combined2["type"], y=combined2["methylation"], hue=combined2.protocol, jitter=True, linewidth=1.0)
sns.plt.title("Technical/protocol sorting: Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
plt.ylabel("Percent Methylation, weighted")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
Out[30]:
In [31]:
ax = sns.boxplot(x=combined["type"], y=combined["methylation"], linewidth=1.5)
ax = sns.stripplot(x=combined["type"], y=combined["methylation"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
Out[31]:
In [32]:
from scipy.stats import ttest_ind
cat1 = combined[combined['type']=='CLL']
cat2 = combined[combined['type']=='normal']
t, p = ttest_ind(cat1.methylation, cat2.methylation)
print(str("t-statistic is ") + str(t))
print(str("p-value is ") + str(p))
In [33]:
import scipy.stats
z, p = scipy.stats.mannwhitneyu(cat1.methylation, cat2.methylation)
p_value = p * 2
print(str("Mann-Whitney U statistic is ") + str(z))
print(str("p-value is ") + str(p_value))
In [34]:
sns.boxplot(x="bio", y="methylation", hue="type", data=combined2, palette="PRGn")
sns.despine(offset=10, trim=True)
In [35]:
sns.plt.title("Biological sorting: Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
sns.boxplot(x="bio", y="methylation", data=combined2)
sns.despine(offset=10, trim=True)
In [36]:
sns.plt.title("Biological sorting: Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
sns.boxplot(x="bio", y="methylation", data=combined2)
sns.stripplot(x="bio", y="methylation", data=combined2, color=".25", linewidth=0.5, jitter=True)
sns.despine(offset=10, trim=True)
In [37]:
sns.plt.title("Biological sorting: Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
sns.violinplot(x="bio", y="methylation", data=combined2)
sns.despine(offset=10, trim=True)
In [38]:
sns.plt.title("Biological sorting: Methylation per cell, normal vs CLL, all chromosomes, weighted")
sns.stripplot(x="bio", y="methylation", data=combined2, color=".25", linewidth=0.5, jitter=True)
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
sns.violinplot(x="bio", y="methylation", data=combined2)
sns.despine(offset=10, trim=True)
In [39]:
final_real2 = stats
In [40]:
norm = final_real2[final_real2['type'] == 'normal']
print(len(norm))
cll = final_real2[final_real2['type'] == 'CLL']
print(len(cll))
In [41]:
ax = sns.boxplot(x=final_real2["type"], y=final_real2["methylation"], linewidth=1.5)
ax = sns.stripplot(x=final_real2["type"], y=final_real2["methylation"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
Out[41]:
In [42]:
print(norm['methylation'].mean()) # mean
print(cll['methylation'].mean())
print(norm['methylation'].std()) # standard deviation
print(cll['methylation'].std())
In [43]:
t, p = ttest_ind(norm.methylation, cll.methylation)
print(str("t-statistic is ") + str(t))
print(str("p-value is ") + str(p))
#
# The classic Student's t-test is a parametric test that only works when the data are normally distributed.
# A good non-parametric alternative to the t-test is the Mann-Whitney U-test (called ranksum in matlab,
# or Wilcoxon test, or Man-Whitney-Wilcoxon)
#
import scipy.stats
z, p = scipy.stats.mannwhitneyu(norm.methylation, cll.methylation)
p_value = p * 2
print(str("Mann-Whitney U statistic is ") + str(z))
print(str("p-value is ") + str(p_value))
In [44]:
ax = sns.boxplot(x=final_real2["type"], y=final_real2["methylation"], linewidth=1.5)
ax = sns.stripplot(x=final_real2["type"], y=final_real2["methylation"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
x1, x2 = 0, 1 # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
y, h, col = final_real2["methylation"].max() + .025, .01, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r'p = $8.38 \times 10^{-19}$', ha='center', va='bottom', color=col)
Out[44]:
In [45]:
ax = sns.boxplot(x=final_real2["type"], y=final_real2["PDR_total"], linewidth=1.5)
ax = sns.stripplot(x=final_real2["type"], y=final_real2["PDR_total"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("PDR per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
plt.ylabel("PDR")
Out[45]:
In [46]:
print(norm['PDR_total'].mean()) # mean
print(cll['PDR_total'].mean())
print(norm['PDR_total'].std()) # standard deviation
print(cll['PDR_total'].std())
In [47]:
t, p = ttest_ind(norm.PDR_total, cll.PDR_total)
print(str("t-statistic is ") + str(t))
print(str("p-value is ") + str(p))
#
# The classic Student's t-test is a parametric test that only works when the data are normally distributed.
# A good non-parametric alternative to the t-test is the Mann-Whitney U-test (called ranksum in matlab,
# or Wilcoxon test, or Man-Whitney-Wilcoxon)
#
import scipy.stats
z, p = scipy.stats.mannwhitneyu(norm.PDR_total, cll.PDR_total)
p_value = p * 2
print(str("Mann-Whitney U statistic is ") + str(z))
print(str("p-value is ") + str(p_value))
In [48]:
ax = sns.boxplot(x=final_real2["type"], y=final_real2["PDR_total"], linewidth=1.5)
ax = sns.stripplot(x=final_real2["type"], y=final_real2["PDR_total"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("PDR per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
x1, x2 = 0, 1 # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
y, h, col = final_real2["PDR_total"].max() + .025, .01, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r'p = $2.31 \times 10^{-29}$', ha='center', va='bottom', color=col)
plt.ylabel("PDR")
Out[48]:
In [49]:
ax = sns.boxplot(x=final_real2["type"], y=final_real2["methylation_unweighted"], linewidth=1.5)
ax = sns.stripplot(x=final_real2["type"], y=final_real2["methylation_unweighted"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
Out[49]:
In [50]:
print(norm['methylation_unweighted'].mean()) # mean
print(cll['methylation_unweighted'].mean())
print(norm['methylation_unweighted'].std()) # standard deviation
print(cll['methylation_unweighted'].std())
In [51]:
t, p = ttest_ind(norm.methylation_unweighted, cll.methylation_unweighted)
print(str("t-statistic is ") + str(t))
print(str("p-value is ") + str(p))
#
# The classic Student's t-test is a parametric test that only works when the data are normally distributed.
# A good non-parametric alternative to the t-test is the Mann-Whitney U-test (called ranksum in matlab,
# or Wilcoxon test, or Man-Whitney-Wilcoxon)
#
import scipy.stats
z, p = scipy.stats.mannwhitneyu(norm.methylation_unweighted, cll.methylation_unweighted)
p_value = p * 2
print(str("Mann-Whitney U statistic is ") + str(z))
print(str("p-value is ") + str(p_value))
In [52]:
ax = sns.boxplot(x=final_real2["type"], y=final_real2["methylation_unweighted"], linewidth=1.5)
ax = sns.stripplot(x=final_real2["type"], y=final_real2["methylation_unweighted"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("Methylation per cell, normal vs CLL, all chromosomes, unweighted")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
x1, x2 = 0, 1 # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
y, h, col = final_real2["methylation_unweighted"].max() + .025, .01, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r'p = $6.48 \times 10^{-9}$', ha='center', va='bottom', color=col)
plt.ylabel("Methylation")
Out[52]:
In [53]:
ax = sns.boxplot(x=final_real2["type"], y=final_real2["PDR_unweighted"], linewidth=1.5)
ax = sns.stripplot(x=final_real2["type"], y=final_real2["PDR_unweighted"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("PDR per cell, normal vs CLL, all chromosomes, unweighted")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
plt.ylabel("PDR")
Out[53]:
In [54]:
print(norm['PDR_unweighted'].mean()) # mean
print(cll['PDR_unweighted'].mean())
print(norm['PDR_unweighted'].std()) # standard deviation
print(cll['PDR_unweighted'].std())
In [55]:
t, p = ttest_ind(norm.PDR_unweighted, cll.PDR_unweighted)
print(str("t-statistic is ") + str(t))
print(str("p-value is ") + str(p))
#
# The classic Student's t-test is a parametric test that only works when the data are normally distributed.
# A good non-parametric alternative to the t-test is the Mann-Whitney U-test (called ranksum in matlab,
# or Wilcoxon test, or Man-Whitney-Wilcoxon)
#
import scipy.stats
z, p = scipy.stats.mannwhitneyu(norm.PDR_unweighted, cll.PDR_unweighted)
p_value = p * 2
print(str("Mann-Whitney U statistic is ") + str(z))
print(str("p-value is ") + str(p_value))
In [56]:
ax = sns.boxplot(x=final_real2["type"], y=final_real2["PDR_unweighted"], linewidth=1.5)
ax = sns.stripplot(x=final_real2["type"], y=final_real2["PDR_unweighted"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("PDR per cell, normal vs CLL, all chromosomes, unweighted")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
x1, x2 = 0, 1 # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
y, h, col = final_real2["PDR_unweighted"].max() + .025, .01, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r'p = $4.11 \times 10^{-27}$', ha='center', va='bottom', color=col)
plt.ylabel("PDR")
Out[56]:
In [57]:
CD27p = final_real2[final_real2['bio'] == 'CD19CD27p']
print(len(CD27p))
CD27m = final_real2[final_real2['bio'] == 'CD19CD27m']
print(len(CD27m))
In [58]:
# CD19CD27m
CD27cells = pd.concat([CD27p, CD27m])
In [59]:
CD27cells = CD27cells.reset_index(drop=True)
In [60]:
ax = sns.boxplot(x=CD27cells["bio"], y=CD27cells["methylation"], linewidth=1.5, palette='Set2')
ax = sns.stripplot(x=CD27cells["bio"], y=CD27cells["methylation"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("Methylation per cell, CD27p vs. CD27m, all chromosomes, weighted")
plt.xlabel("CD27p = 68 *.anno files; CD27m = 74 *.anno files")
plt.ylabel("Methylation")
Out[60]:
In [61]:
print(CD27p['methylation'].mean()) # mean
print(CD27m['methylation'].mean())
print(CD27p['methylation'].std()) # standard deviation
print(CD27m['methylation'].std())
In [62]:
t, p = ttest_ind(CD27p.methylation, CD27m.methylation)
print(str("t-statistic is ") + str(t))
print(str("p-value is ") + str(p))
#
# The classic Student's t-test is a parametric test that only works when the data are normally distributed.
# A good non-parametric alternative to the t-test is the Mann-Whitney U-test (called ranksum in matlab,
# or Wilcoxon test, or Man-Whitney-Wilcoxon)
#
import scipy.stats
z, p = scipy.stats.mannwhitneyu(CD27p.methylation, CD27m.methylation)
p_value = p * 2
print(str("Mann-Whitney U statistic is ") + str(z))
print(str("p-value is ") + str(p_value))
In [63]:
ax = sns.stripplot(x=CD27cells["bio"], y=CD27cells["methylation"], hue=CD27cells.protocol, linewidth=1.5, jitter=True)
sns.plt.title("Methylation per cell, CD27p vs. CD27m, all chromosomes, weighted")
plt.xlabel("CD27p = 68 *.anno files; CD27m = 74 *.anno files")
plt.ylabel("Methylation")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
Out[63]:
In [64]:
CD27p122 = CD27p[CD27p['protocol'] == 'NormalBCD19pCD27pcell1_22_']
CD27p2344 = CD27p[CD27p['protocol'] == 'NormalBCD19pCD27pcell23_44']
CD27p4566 = CD27p[CD27p['protocol'] == 'NormalBCD19pCD27pcell45_66']
CD27p6788 = CD27p[CD27p['protocol'] == 'NormalBCD19pCD27pcell67_88']
CD27m122 = CD27m[CD27m['protocol'] == 'NormalBCD19pCD27mcell1_22_']
CD27m2344 = CD27m[CD27m['protocol'] == 'NormalBCD19pCD27mcell23_44']
CD27m4566 = CD27m[CD27m['protocol'] == 'NormalBCD19pCD27mcell45_66']
CD27m6788 = CD27m[CD27m['protocol'] == 'NormalBCD19pCD27mcell67_88']
In [65]:
print("MEAN")
print("CD27p122")
print(CD27p122['methylation'].mean())
print("CD27p2344")
print(CD27p2344['methylation'].mean())
print("CD27p4566")
print(CD27p4566['methylation'].mean())
print("CD27p6788")
print(CD27p6788['methylation'].mean())
print("***")
print("CD27m122")
print(CD27m122['methylation'].mean())
print("CD27m2344")
print(CD27m2344['methylation'].mean())
print("CD27m4566")
print(CD27m4566['methylation'].mean())
print("CD27m6788")
print(CD27m6788['methylation'].mean())
In [66]:
print("total")
print("CD27p122")
print(len(CD27p122['methylation']))
print("CD27p2344")
print(len(CD27p2344['methylation']))
print("CD27p4566")
print(len(CD27p4566['methylation']))
print("CD27p6788")
print(len(CD27p6788['methylation']))
print("***")
print("CD27m122")
print(len(CD27m122['methylation']))
print("CD27m2344")
print(len(CD27m2344['methylation']))
print("CD27m4566")
print(len(CD27m4566['methylation']))
print("CD27m6788")
print(len(CD27m6788['methylation']))
In [67]:
print("STD")
print("CD27p122")
print(CD27p122['methylation'].std())
print("CD27p2344")
print(CD27p2344['methylation'].std())
print("CD27p4566")
print(CD27p4566['methylation'].std())
print("CD27p6788")
print(CD27p6788['methylation'].std())
print("***")
print("CD27m122")
print(CD27m122['methylation'].std())
print("CD27m2344")
print(CD27m2344['methylation'].std())
print("CD27m4566")
print(CD27m4566['methylation'].std())
print("CD27m6788")
print(CD27m6788['methylation'].std())
In [68]:
ax = sns.boxplot(x=CD27cells["protocol"], y=CD27cells["methylation"], linewidth=1.5, palette='Set2')
ax = sns.stripplot(x=CD27cells["protocol"], y=CD27cells["methylation"], color=".25", linewidth=1.5, jitter=True)
ax.xaxis.set_major_formatter(plt.NullFormatter())
sns.plt.title("Methylation per cell, CD27p vs. CD27m, all chromosomes, weighted")
plt.xlabel("CD27p122 CD27p2344 CD27p4566 CD27p6788 CD27m122 CD27m2344 CD27m4566 CD27m6788")
plt.ylabel("Methylation")
Out[68]:
In [69]:
ax = sns.boxplot(x=CD27p["protocol"], y=CD27p["methylation"], linewidth=1.5, palette='Set2')
ax = sns.stripplot(x=CD27p["protocol"], y=CD27p["methylation"], color=".25", linewidth=1.5, jitter=True)
ax.xaxis.set_major_formatter(plt.NullFormatter())
sns.plt.title("Methylation per cell, CD27p, all chromosomes, weighted")
plt.xlabel("CD27p122 CD27p2344 CD27p4566 CD27p6788")
plt.ylabel("Methylation")
Out[69]:
In [70]:
ax = sns.boxplot(x=CD27m["protocol"], y=CD27m["methylation"], linewidth=1.5, palette='Set1')
ax = sns.stripplot(x=CD27m["protocol"], y=CD27m["methylation"], color=".25", linewidth=1.5, jitter=True)
ax.xaxis.set_major_formatter(plt.NullFormatter())
sns.plt.title("Methylation per cell, CD27m, all chromosomes, weighted")
plt.xlabel("CD27m122 CD27m2344 CD27m4566 CD27m6788")
plt.ylabel("Methylation")
Out[70]:
In [71]:
print("MEAN: methylation unweighted")
print("CD27p122")
print(CD27p122['methylation_unweighted'].mean())
print("CD27p2344")
print(CD27p2344['methylation_unweighted'].mean())
print("CD27p4566")
print(CD27p4566['methylation_unweighted'].mean())
print("CD27p6788")
print(CD27p6788['methylation_unweighted'].mean())
print("***")
print("CD27m122")
print(CD27m122['methylation_unweighted'].mean())
print("CD27m2344")
print(CD27m2344['methylation_unweighted'].mean())
print("CD27m4566")
print(CD27m4566['methylation_unweighted'].mean())
print("CD27m6788")
print(CD27m6788['methylation_unweighted'].mean())
In [72]:
print("STD: methylation unweighted")
print("CD27p122")
print(CD27p122['methylation_unweighted'].std())
print("CD27p2344")
print(CD27p2344['methylation_unweighted'].std())
print("CD27p4566")
print(CD27p4566['methylation_unweighted'].std())
print("CD27p6788")
print(CD27p6788['methylation_unweighted'].std())
print("***")
print("CD27m122")
print(CD27m122['methylation_unweighted'].std())
print("CD27m2344")
print(CD27m2344['methylation_unweighted'].std())
print("CD27m4566")
print(CD27m4566['methylation_unweighted'].std())
print("CD27m6788")
print(CD27m6788['methylation_unweighted'].std())
In [73]:
ax = sns.stripplot(x=CD27cells["bio"], y=CD27cells["methylation_unweighted"], hue=CD27cells.protocol, linewidth=1.5, jitter=True)
sns.plt.title("Methylation per cell, CD27p vs. CD27m, all chromosomes, unweighted")
plt.xlabel("CD27p = 68 *.anno files; CD27m = 74 *.anno files")
plt.ylabel("Methylation")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
Out[73]:
In [74]:
ax = sns.boxplot(x=CD27cells["protocol"], y=CD27cells["methylation_unweighted"], linewidth=1.5, palette='Set2')
ax = sns.stripplot(x=CD27cells["protocol"], y=CD27cells["methylation_unweighted"], color=".25", linewidth=1.5, jitter=True)
ax.xaxis.set_major_formatter(plt.NullFormatter())
sns.plt.title("Methylation per cell, CD27p vs. CD27m, all chromosomes, unweighted")
plt.xlabel("CD27p122 CD27p2344 CD27p4566 CD27p6788 CD27m122 CD27m2344 CD27m4566 CD27m6788")
plt.ylabel("Methylation")
Out[74]:
In [75]:
ax = sns.boxplot(x=CD27p["protocol"], y=CD27p["methylation_unweighted"], linewidth=1.5, palette='Set2')
ax = sns.stripplot(x=CD27p["protocol"], y=CD27p["methylation_unweighted"], color=".25", linewidth=1.5, jitter=True)
ax.xaxis.set_major_formatter(plt.NullFormatter())
sns.plt.title("Methylation per cell, CD27p, all chromosomes, unweighted")
plt.xlabel("CD27p122 CD27p2344 CD27p4566 CD27p6788")
plt.ylabel("Methylation")
Out[75]:
In [76]:
ax = sns.boxplot(x=CD27m["protocol"], y=CD27m["methylation_unweighted"], linewidth=1.5, palette='Set1')
ax = sns.stripplot(x=CD27m["protocol"], y=CD27m["methylation_unweighted"], color=".25", linewidth=1.5, jitter=True)
ax.xaxis.set_major_formatter(plt.NullFormatter())
sns.plt.title("Methylation per cell, CD27m, all chromosomes, unweighted")
plt.xlabel("CD27m122 CD27m2344 CD27m4566 CD27m6788")
plt.ylabel("Methylation")
Out[76]:
In [77]:
print("MEAN: PDR, weighted")
print("CD27p122")
print(CD27p122['PDR_total'].mean())
print("CD27p2344")
print(CD27p2344['PDR_total'].mean())
print("CD27p4566")
print(CD27p4566['PDR_total'].mean())
print("CD27p6788")
print(CD27p6788['PDR_total'].mean())
print("***")
print("CD27m122")
print(CD27m122['PDR_total'].mean())
print("CD27m2344")
print(CD27m2344['PDR_total'].mean())
print("CD27m4566")
print(CD27m4566['PDR_total'].mean())
print("CD27m6788")
print(CD27m6788['PDR_total'].mean())
print("***")
print("*** *** *** *** *** ***")
print("***")
print("STD: PDR, weighted")
print("CD27p122")
print(CD27p122['PDR_total'].std())
print("CD27p2344")
print(CD27p2344['PDR_total'].std())
print("CD27p4566")
print(CD27p4566['PDR_total'].std())
print("CD27p6788")
print(CD27p6788['PDR_total'].std())
print("***")
print("CD27m122")
print(CD27m122['PDR_total'].std())
print("CD27m2344")
print(CD27m2344['PDR_total'].std())
print("CD27m4566")
print(CD27m4566['PDR_total'].std())
print("CD27m6788")
print(CD27m6788['PDR_total'].std())
In [78]:
ax = sns.stripplot(x=CD27cells["bio"], y=CD27cells["PDR_total"], hue=CD27cells.protocol, linewidth=1.5, jitter=True)
sns.plt.title("PDR total per cell, CD27p vs. CD27m, all chromosomes, weighted")
plt.xlabel("CD27p = 68 *.anno files; CD27m = 74 *.anno files")
plt.ylabel("PDR total, weighted")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
Out[78]:
In [79]:
ax = sns.boxplot(x=CD27cells["protocol"], y=CD27cells["PDR_total"], linewidth=1.5, palette='Set2')
ax = sns.stripplot(x=CD27cells["protocol"], y=CD27cells["PDR_total"], color=".25", linewidth=1.5, jitter=True)
ax.xaxis.set_major_formatter(plt.NullFormatter())
sns.plt.title("PDR total per cell, CD27p vs. CD27m, all chromosomes, weighted")
plt.xlabel("CD27p122 CD27p2344 CD27p4566 CD27p6788 CD27m122 CD27m2344 CD27m4566 CD27m6788")
plt.ylabel("PDR total, weighted")
Out[79]:
In [80]:
ax = sns.boxplot(x=CD27p["protocol"], y=CD27p["PDR_total"], linewidth=1.5, palette='Set2')
ax = sns.stripplot(x=CD27p["protocol"], y=CD27p["PDR_total"], color=".25", linewidth=1.5, jitter=True)
ax.xaxis.set_major_formatter(plt.NullFormatter())
sns.plt.title("PDR total per cell, CD27p, all chromosomes, weighted")
plt.xlabel("CD27p122 CD27p2344 CD27p4566 CD27p6788")
plt.ylabel("PDR total, weighted")
Out[80]:
In [81]:
ax = sns.boxplot(x=CD27m["protocol"], y=CD27m["PDR_total"], linewidth=1.5, palette='Set1')
ax = sns.stripplot(x=CD27m["protocol"], y=CD27m["PDR_total"], color=".25", linewidth=1.5, jitter=True)
ax.xaxis.set_major_formatter(plt.NullFormatter())
sns.plt.title("PDR total per cell, CD27m, all chromosomes, weighted")
plt.xlabel("CD27m122 CD27m2344 CD27m4566 CD27m6788")
plt.ylabel("PDR total, weighted")
Out[81]:
In [82]:
print("MEAN: PDR, unweighted")
print("CD27p122")
print(CD27p122['PDR_unweighted'].mean())
print("CD27p2344")
print(CD27p2344['PDR_unweighted'].mean())
print("CD27p4566")
print(CD27p4566['PDR_unweighted'].mean())
print("CD27p6788")
print(CD27p6788['PDR_unweighted'].mean())
print("***")
print("CD27m122")
print(CD27m122['PDR_unweighted'].mean())
print("CD27m2344")
print(CD27m2344['PDR_unweighted'].mean())
print("CD27m4566")
print(CD27m4566['PDR_unweighted'].mean())
print("CD27m6788")
print(CD27m6788['PDR_unweighted'].mean())
print("***")
print("*** *** *** *** *** ***")
print("***")
print("STD: PDR, unweighted")
print("CD27p122")
print(CD27p122['PDR_unweighted'].std())
print("CD27p2344")
print(CD27p2344['PDR_unweighted'].std())
print("CD27p4566")
print(CD27p4566['PDR_unweighted'].std())
print("CD27p6788")
print(CD27p6788['PDR_unweighted'].std())
print("***")
print("CD27m122")
print(CD27m122['PDR_unweighted'].std())
print("CD27m2344")
print(CD27m2344['PDR_unweighted'].std())
print("CD27m4566")
print(CD27m4566['PDR_unweighted'].std())
print("CD27m6788")
print(CD27m6788['PDR_unweighted'].std())
In [83]:
ax = sns.stripplot(x=CD27cells["bio"], y=CD27cells["PDR_unweighted"], hue=CD27cells.protocol, linewidth=1.5, jitter=True)
sns.plt.title("PDR per cell, CD27p vs. CD27m, all chromosomes, unweighted")
plt.xlabel("CD27p = 68 *.anno files; CD27m = 74 *.anno files")
plt.ylabel("PDR total, unweighted")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
Out[83]:
In [84]:
ax = sns.boxplot(x=CD27cells["protocol"], y=CD27cells["PDR_unweighted"], linewidth=1.5, palette='Set2')
ax = sns.stripplot(x=CD27cells["protocol"], y=CD27cells["PDR_unweighted"], color=".25", linewidth=1.5, jitter=True)
ax.xaxis.set_major_formatter(plt.NullFormatter())
sns.plt.title("PDR per cell, CD27p vs. CD27m, all chromosomes, unweighted")
plt.xlabel("CD27p122 CD27p2344 CD27p4566 CD27p6788 CD27m122 CD27m2344 CD27m4566 CD27m6788")
plt.ylabel("PDR, unweighted")
Out[84]:
In [85]:
ax = sns.boxplot(x=CD27p["protocol"], y=CD27p["PDR_unweighted"], linewidth=1.5, palette='Set2')
ax = sns.stripplot(x=CD27p["protocol"], y=CD27p["PDR_unweighted"], color=".25", linewidth=1.5, jitter=True)
ax.xaxis.set_major_formatter(plt.NullFormatter())
sns.plt.title("PDR per cell, CD27p, all chromosomes, unweighted")
plt.xlabel("CD27p122 CD27p2344 CD27p4566 CD27p6788")
plt.ylabel("PDR, unweighted")
Out[85]:
In [86]:
ax = sns.boxplot(x=CD27m["protocol"], y=CD27m["PDR_unweighted"], linewidth=1.5, palette='Set1')
ax = sns.stripplot(x=CD27m["protocol"], y=CD27m["PDR_unweighted"], color=".25", linewidth=1.5, jitter=True)
ax.xaxis.set_major_formatter(plt.NullFormatter())
sns.plt.title("PDR total per cell, CD27m, all chromosomes, unweighted")
plt.xlabel("CD27m122 CD27m2344 CD27m4566 CD27m6788")
plt.ylabel("PDR, unweighted")
Out[86]:
In [87]:
ax = sns.boxplot(x=CD27m["protocol"], y=CD27m["PDR_unweighted"], linewidth=1.5, palette='Set1')
ax = sns.stripplot(x=CD27m["protocol"], y=CD27m["PDR_unweighted"], color=".25", linewidth=1.5, jitter=True)
ax.xaxis.set_major_formatter(plt.NullFormatter())
sns.plt.title("PDR total per cell, CD27m, all chromosomes, unweighted")
plt.xlabel("CD27m122 CD27m2344 CD27m4566 CD27m6788")
plt.ylabel("PDR, unweighted")
plt.ylim(0.16, 0.19)
Out[87]:
In [88]:
ax = sns.boxplot(x=CD27cells["bio"], y=CD27cells["methylation"], linewidth=1.5, palette='Set2')
ax = sns.stripplot(x=CD27cells["bio"], y=CD27cells["methylation"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("Methylation per cell, CD27p vs. CD27m, all chromosomes, weighted")
plt.xlabel("CD27p = 68 *.anno files; CD27m = 74 *.anno files")
x1, x2 = 0, 1 # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
y, h, col = CD27cells["methylation"].max() + .025, .01, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r'p = $0.0177$', ha='center', va='bottom', color=col)
plt.ylabel("Methylation")
plt.ylim(0.40, 0.63)
Out[88]:
In [89]:
print(CD27p['methylation'].mean()) # mean
print(CD27m['methylation'].mean())
print(CD27p['methylation'].std()) # standard deviation
print(CD27m['methylation'].std())
In [90]:
t, p = ttest_ind(CD27p.methylation, CD27m.methylation)
print(str("t-statistic is ") + str(t))
print(str("p-value is ") + str(p))
#
# The classic Student's t-test is a parametric test that only works when the data are normally distributed.
# A good non-parametric alternative to the t-test is the Mann-Whitney U-test (called ranksum in matlab,
# or Wilcoxon test, or Man-Whitney-Wilcoxon)
#
import scipy.stats
z, p = scipy.stats.mannwhitneyu(CD27p.methylation, CD27m.methylation)
p_value = p * 2
print(str("Mann-Whitney U statistic is ") + str(z))
print(str("p-value is ") + str(p_value))
In [91]:
ax = sns.boxplot(x=CD27cells["bio"], y=CD27cells["PDR_total"], linewidth=1.5, palette='Set2')
ax = sns.stripplot(x=CD27cells["bio"], y=CD27cells["PDR_total"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("PDR per cell, CD27p vs. CD27m, all chromosomes, weighted")
plt.xlabel("CD27p = 68 *.anno files; CD27m = 74 *.anno files")
plt.ylabel("PDR")
Out[91]:
In [92]:
print(CD27p['PDR_total'].mean()) # mean
print(CD27m['PDR_total'].mean())
print(CD27p['PDR_total'].std()) # standard deviation
print(CD27m['PDR_total'].std())
In [93]:
t, p = ttest_ind(CD27p.PDR_total, CD27m.PDR_total)
print(str("t-statistic is ") + str(t))
print(str("p-value is ") + str(p))
#
# The classic Student's t-test is a parametric test that only works when the data are normally distributed.
# A good non-parametric alternative to the t-test is the Mann-Whitney U-test (called ranksum in matlab,
# or Wilcoxon test, or Man-Whitney-Wilcoxon)
#
import scipy.stats
z, p = scipy.stats.mannwhitneyu(CD27p.PDR_total, CD27m.PDR_total)
p_value = p * 2
print(str("Mann-Whitney U statistic is ") + str(z))
print(str("p-value is ") + str(p_value))
In [94]:
ax = sns.boxplot(x=CD27cells["bio"], y=CD27cells["PDR_total"], linewidth=1.5, palette='Set2')
ax = sns.stripplot(x=CD27cells["bio"], y=CD27cells["PDR_total"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("PDR per cell, CD27p vs. CD27m, all chromosomes, weighted")
plt.xlabel("CD27p = 68 *.anno files; CD27m = 74 *.anno files")
x1, x2 = 0, 1 # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
y, h, col = CD27cells["PDR_total"].max() + .025, .01, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r'p = $5.21 \times 10^{-25}$', ha='center', va='bottom', color=col)
plt.ylabel("PDR")
Out[94]:
In [95]:
ax = sns.boxplot(x=CD27cells["bio"], y=CD27cells["methylation_unweighted"], linewidth=1.5, palette='Set2')
ax = sns.stripplot(x=CD27cells["bio"], y=CD27cells["methylation_unweighted"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("Methylation per cell, CD27p vs. CD27m, all chromosomes, unweighted")
plt.xlabel("CD27p = 68 *.anno files; CD27m = 74 *.anno files")
plt.ylabel("Methylation")
Out[95]:
In [96]:
print(CD27p['methylation_unweighted'].mean()) # mean
print(CD27m['methylation_unweighted'].mean())
print(CD27p['methylation_unweighted'].std()) # standard deviation
print(CD27m['methylation_unweighted'].std())
In [97]:
t, p = ttest_ind(CD27p.methylation_unweighted, CD27m.methylation_unweighted)
print(str("t-statistic is ") + str(t))
print(str("p-value is ") + str(p))
#
# The classic Student's t-test is a parametric test that only works when the data are normally distributed.
# A good non-parametric alternative to the t-test is the Mann-Whitney U-test (called ranksum in matlab,
# or Wilcoxon test, or Man-Whitney-Wilcoxon)
#
import scipy.stats
z, p = scipy.stats.mannwhitneyu(CD27p.methylation_unweighted, CD27m.methylation_unweighted)
p_value = p * 2
print(str("Mann-Whitney U statistic is ") + str(z))
print(str("p-value is ") + str(p_value))
In [98]:
ax = sns.boxplot(x=CD27cells["bio"], y=CD27cells["methylation_unweighted"], linewidth=1.5, palette='Set2')
ax = sns.stripplot(x=CD27cells["bio"], y=CD27cells["methylation_unweighted"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("Methylation per cell, CD27p vs. CD27m, all chromosomes, unweighted")
plt.xlabel("CD27p = 68 *.anno files; CD27m = 74 *.anno files")
x1, x2 = 0, 1 # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
y, h, col = CD27cells["methylation_unweighted"].max() + .025, .01, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r'p = $1.62 \times 10^{-14}$', ha='center', va='bottom', color=col)
plt.ylabel("Methylation")
Out[98]:
In [99]:
ax = sns.boxplot(x=CD27cells["bio"], y=CD27cells["PDR_unweighted"], linewidth=1.5, palette='Set2')
ax = sns.stripplot(x=CD27cells["bio"], y=CD27cells["PDR_unweighted"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("PDR per cell, CD27p vs. CD27m, all chromosomes, unweighted")
plt.xlabel("CD27p = 68 *.anno files; CD27m = 74 *.anno files")
plt.ylabel("PDR")
Out[99]:
In [100]:
print(CD27p['PDR_unweighted'].mean()) # mean
print(CD27m['PDR_unweighted'].mean())
print(CD27p['PDR_unweighted'].std()) # standard deviation
print(CD27m['PDR_unweighted'].std())
In [101]:
t, p = ttest_ind(CD27p.PDR_unweighted, CD27m.PDR_unweighted)
print(str("t-statistic is ") + str(t))
print(str("p-value is ") + str(p))
#
# The classic Student's t-test is a parametric test that only works when the data are normally distributed.
# A good non-parametric alternative to the t-test is the Mann-Whitney U-test (called ranksum in matlab,
# or Wilcoxon test, or Man-Whitney-Wilcoxon)
#
import scipy.stats
z, p = scipy.stats.mannwhitneyu(CD27p.PDR_unweighted, CD27m.PDR_unweighted)
p_value = p * 2
print(str("Mann-Whitney U statistic is ") + str(z))
print(str("p-value is ") + str(p_value))
In [102]:
ax = sns.boxplot(x=CD27cells["bio"], y=CD27cells["PDR_unweighted"], linewidth=1.5, palette='Set2')
ax = sns.stripplot(x=CD27cells["bio"], y=CD27cells["PDR_unweighted"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("PDR per cell, CD27p vs. CD27m, all chromosomes, unweighted")
plt.xlabel("CD27p = 68 *.anno files; CD27m = 74 *.anno files")
x1, x2 = 0, 1 # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
y, h, col = CD27cells["PDR_unweighted"].max() + .025, .01, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r'p = $5.00 \times 10^{-25}$', ha='center', va='bottom', color=col)
plt.ylabel("PDR")
Out[102]:
In [103]:
import seaborn as sns
sns.set(style="whitegrid", palette="muted")
muted = ["#4878CF", "#6ACC65", "#D65F5F", "#B47CC7", "#C4AD66", "#77BEDB"]
newPal = dict(normal_B = muted[5], CD19CD27m = muted[2],CD19CD27p = muted[1], CD19p = muted[3], CLL=muted[0])
ax = sns.stripplot(x=final_real2["type"], y=final_real2["methylation"], hue=final_real2.bio, jitter=True, palette=newPal)
sns.plt.title("Biological sorting: Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
plt.ylabel("Methylation")
Out[103]:
In [104]:
import seaborn as sns
sns.set(style="whitegrid", palette="muted")
muted = ["#4878CF", "#6ACC65", "#D65F5F", "#B47CC7", "#C4AD66", "#77BEDB"]
newPal = dict(normal_B = muted[5], CD19CD27m = muted[2],CD19CD27p = muted[1], CD19p = muted[3], CLL=muted[0])
ax = sns.stripplot(x=final_real2["type"], y=final_real2["PDR_total"], hue=final_real2.bio, jitter=True, palette=newPal)
sns.plt.title("Biological sorting: PDR per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
plt.ylabel("PDR")
Out[104]:
In [105]:
import seaborn as sns
sns.set(style="whitegrid", palette="muted")
muted = ["#4878CF", "#6ACC65", "#D65F5F", "#B47CC7", "#C4AD66", "#77BEDB"]
newPal = dict(normal_B = muted[5], CD19CD27m = muted[2],CD19CD27p = muted[1], CD19p = muted[3], CLL=muted[0])
ax = sns.stripplot(x=final_real2["type"], y=final_real2["methylation_unweighted"], hue=final_real2.bio, jitter=True, palette=newPal)
sns.plt.title("Biological sorting: Methylation per cell, normal vs CLL, all chromosomes, unweighted")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
plt.ylabel("Methylation")
Out[105]:
In [106]:
sns.set(style="whitegrid", palette="muted")
muted = ["#4878CF", "#6ACC65", "#D65F5F", "#B47CC7", "#C4AD66", "#77BEDB"]
newPal = dict(normal_B = muted[5], CD19CD27m = muted[2],CD19CD27p = muted[1], CD19p = muted[3], CLL=muted[0])
ax = sns.stripplot(x=final_real2["type"], y=final_real2["PDR_unweighted"], hue=final_real2.bio, jitter=True, palette=newPal)
sns.plt.title("Biological sorting: PDR per cell, normal vs CLL, all chromosomes, unweighted")
plt.xlabel("Normal cells = 342 *.anno files; CLL cells = 104 *.anno files")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
plt.ylabel("PDR")
Out[106]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: