In [ ]:
%matplotlib inline

In [ ]:
import glob
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
pd.set_option('display.max_columns', 50) # print all rows


import os
os.chdir('/Users/evanbiederstedt/Downloads/RRBS_data_files')

In [ ]:
stats = pd.read_csv("RRBS_anno_statistics_full_446files_filter50K.csv")

In [ ]:
stats.shape

In [ ]:
normal = stats[stats["type"]=="normal"]
CLL = stats[stats["type"]=="CLL"]

In [ ]:
len(normal)

In [ ]:
len(CLL)

In [ ]:
import seaborn as sns
sns.set_style("whitegrid")
ax = sns.stripplot(x=stats["type"], y=stats["methylation"])
sns.plt.title("Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")
plt.ylabel("Percent Methylation, weighted")

In [ ]:
import seaborn as sns
sns.set_style("whitegrid")
ax = sns.stripplot(x=stats["type"], y=stats["methylation"], jitter=True)
sns.plt.title("Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")
plt.ylabel("Percent Methylation, weighted")

In [ ]:
ax = sns.violinplot(x=stats["type"],  y=stats["methylation"])
sns.plt.title("Violin plot: Methylation per cell, normal vs CLL")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")
plt.ylabel("Percent Methylation, weighted")
print("violin plot features a kernel density estimation of the underlying distribution")

In [ ]:
ax = sns.boxplot(x=stats["type"],y=stats["methylation"], linewidth=1.5)
sns.plt.title("Box Whisker plot: Methylation per cell, normal vs CLL")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")
plt.ylabel("Percent Methylation, weighted")
print("Box whisker plot")

In [ ]:
ax = sns.boxplot(x=stats["type"],y=stats["methylation"], linewidth=1.5)
sns.plt.title("Violin plot: Methylation per cell, normal vs CLL")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")
plt.ylabel("Percent Methylation, weighted")
print("Box whisker plot")
#plt.ylim(0.35, 0.7)

In [ ]:
ax = sns.boxplot(y=stats["type"], x=stats["methylation"], linewidth=1.5)
sns.plt.title("Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")
plt.ylabel("Cell Type: Normal cells vs. CLL samples")

In [ ]:
ax = sns.boxplot(x=stats["type"], y=stats["methylation"], linewidth=1.5)
ax = sns.stripplot(x=stats["type"], y=stats["methylation"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")

In [ ]:
combined = stats

In [ ]:
ax = sns.boxplot(y=combined["type"],  x=combined["methylation"], linewidth=1.5)
ax = sns.stripplot(y=combined["type"], x=combined["methylation"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")
plt.ylabel("Percent Methylation, weighted")

In [ ]:
ax = sns.boxplot(x=combined["type"], y=combined["methylation"], linewidth=1.5)
ax = sns.swarmplot(x=combined["type"], y=combined["methylation"], color=".25", linewidth=1.5)
sns.plt.title("Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")
plt.ylabel("Percent Methylation, weighted")

In [ ]:
ax = sns.boxplot(y=combined["type"], x=combined["methylation"], linewidth=1.5)
ax = sns.swarmplot(y=combined["type"], x=combined["methylation"], color=".25", linewidth=1.5)
sns.plt.title("Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")
plt.ylabel("Percent Methylation, weighted")

In [ ]:
ax = sns.boxplot(y=combined["type"], x=combined["methylation"], linewidth=1.5)
ax = sns.swarmplot(y=combined["type"], x=combined["methylation"], color=".25", linewidth=1.5)
sns.plt.title("Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")
plt.ylabel("Percent Methylation, weighted")

In [ ]:
combined2 = stats

In [ ]:
import seaborn as sns
sns.set_style("whitegrid")
ax = sns.stripplot(x=combined2["type"], y=combined2["methylation"], hue=combined2.bio)
sns.plt.title("Biological sorting: Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
plt.ylabel("Percent Methylation, weighted")

In [ ]:
import seaborn as sns
sns.set(style="whitegrid", palette="muted")
ax = sns.stripplot(x=combined2["type"], y=combined2["methylation"], hue=combined2.bio, 
                   jitter=True, palette=dict(normal_B = 'b', CD19CD27m = 'g', CD19CD27p = 'r', CLL='m', CD19p = 'c'))
sns.plt.title("Biological sorting: Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")
plt.ylabel("Percent Methylation, weighted")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)

In [ ]:
import seaborn as sns
sns.set_style("whitegrid")
ax = sns.stripplot(x=combined2["type"], y=combined2["methylation"], hue=combined2.bio, jitter=True)
sns.plt.title("Biological sorting: Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
plt.ylabel("Percent Methylation, weighted")

In [ ]:
ax = sns.boxplot(x=combined2["type"],y=combined2["methylation"], linewidth=1.5)
plt.ylim(0.15,0.8)
sns.set_style("whitegrid")
ax = sns.stripplot(x=combined2["type"], y=combined2["methylation"], hue=combined2.bio, jitter=True, linewidth=1.5)
sns.plt.title("Biological sorting: Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")
plt.ylabel("Percent Methylation, weighted")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)

In [ ]:
combined2.boxplot(column = 'methylation', by='type', fontsize=13)

sns.set_style("whitegrid")
ax = sns.stripplot(x=combined2["type"], y=combined2["methylation"], hue=combined2.bio, jitter=True, linewidth=1.5)
sns.plt.title("Biological sorting: Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")
plt.ylabel("Percent Methylation, weighted")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)

In [ ]:
combined2.boxplot(column = 'methylation', by='type', fontsize=13)
plt.title('corrected')

In [ ]:
ax = sns.violinplot(x=combined2["type"],y=combined2["methylation"], palette="Set3")
sns.set_style("whitegrid")
ax = sns.stripplot(x=combined2["type"], y=combined2["methylation"], hue=combined2.bio, jitter=True, linewidth=1.5)
sns.plt.title("Biological sorting: Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")
plt.ylabel("Percent Methylation, weighted")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)

In [ ]:
sns.set_style("whitegrid")
ax = sns.stripplot(x=combined2["type"], y=combined2["methylation"], hue=combined2.protocol, jitter=True)
sns.plt.title("Technical/protocol sorting: Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")
plt.ylabel("Percent Methylation, weighted")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)

In [ ]:
sns.set_style("whitegrid")
ax = sns.stripplot(x=combined2["type"],y=combined2["methylation"], hue=combined2.protocol, jitter=True, linewidth=1.0)
sns.plt.title("Technical/protocol sorting: Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")
plt.ylabel("Percent Methylation, weighted")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)

In [ ]:
ax = sns.violinplot(x=combined2["type"], y=combined2["methylation"], palette="Set1")
sns.set_style("whitegrid")
ax = sns.stripplot(x=combined2["type"], y=combined2["methylation"], hue=combined2.protocol, jitter=True, linewidth=1.0)
sns.plt.title("Technical/protocol sorting: Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")
plt.ylabel("Percent Methylation, weighted")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)

In [ ]:
ax = sns.boxplot(x=combined["type"], y=combined["methylation"], linewidth=1.5)
ax = sns.stripplot(x=combined["type"], y=combined["methylation"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")

In [ ]:
from scipy.stats import ttest_ind

cat1 = combined[combined['type']=='CLL']
cat2 = combined[combined['type']=='normal']

t, p = ttest_ind(cat1.methylation, cat2.methylation)
print(str("t-statistic is ") + str(t))
print(str("p-value is ") + str(p))

In [ ]:
import scipy.stats
z, p = scipy.stats.mannwhitneyu(cat1.methylation, cat2.methylation)
p_value = p * 2
print(str("Mann-Whitney U statistic is ") + str(z))
print(str("p-value is ") + str(p_value))

In [ ]:
sns.boxplot(x="bio", y="methylation", hue="type", data=combined2, palette="PRGn")
sns.despine(offset=10, trim=True)

In [ ]:
sns.plt.title("Biological sorting: Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")
sns.boxplot(x="bio", y="methylation", data=combined2)
sns.despine(offset=10, trim=True)

In [ ]:
sns.plt.title("Biological sorting: Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")
sns.boxplot(x="bio", y="methylation", data=combined2)
sns.stripplot(x="bio", y="methylation", data=combined2, color=".25", linewidth=0.5, jitter=True)
sns.despine(offset=10, trim=True)

In [ ]:
sns.plt.title("Biological sorting: Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")
sns.violinplot(x="bio", y="methylation", data=combined2)
sns.despine(offset=10, trim=True)

In [ ]:
sns.plt.title("Biological sorting: Methylation per cell, normal vs CLL, all chromosomes, weighted")
sns.stripplot(x="bio", y="methylation", data=combined2, color=".25", linewidth=0.5, jitter=True)
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")
sns.violinplot(x="bio", y="methylation", data=combined2)
sns.despine(offset=10, trim=True)

In [ ]:
final_real2 = stats

In [ ]:
norm = final_real2[final_real2['type'] == 'normal']
print(len(norm))
cll = final_real2[final_real2['type'] == 'CLL']
print(len(cll))

In [ ]:
ax = sns.boxplot(x=final_real2["type"], y=final_real2["methylation"], linewidth=1.5)
ax = sns.stripplot(x=final_real2["type"], y=final_real2["methylation"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")

In [ ]:
print(norm['methylation'].mean())  # mean
print(cll['methylation'].mean())

print(norm['methylation'].std())   # standard deviation
print(cll['methylation'].std())

In [ ]:
t, p = ttest_ind(norm.methylation, cll.methylation)
print(str("t-statistic is ") + str(t))
print(str("p-value is ") + str(p))

#
# The classic Student's t-test is a parametric test that only works when the data are normally distributed.
# A good non-parametric alternative to the t-test is the Mann-Whitney U-test (called ranksum in matlab, 
#     or Wilcoxon test, or Man-Whitney-Wilcoxon)
#
import scipy.stats
z, p = scipy.stats.mannwhitneyu(norm.methylation, cll.methylation)
p_value = p * 2
print(str("Mann-Whitney U statistic is ") + str(z))
print(str("p-value is ") + str(p_value))

In [ ]:
ax = sns.boxplot(x=final_real2["type"], y=final_real2["methylation"], linewidth=1.5)
ax = sns.stripplot(x=final_real2["type"], y=final_real2["methylation"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")
x1, x2 = 0, 1   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
y, h, col = final_real2["methylation"].max() + .025, .01, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r'p = $3.56 \times 10^{-20}$', ha='center', va='bottom', color=col)

In [ ]:
ax = sns.boxplot(x=final_real2["type"], y=final_real2["PDR_total"], linewidth=1.5)
ax = sns.stripplot(x=final_real2["type"], y=final_real2["PDR_total"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("PDR per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")
plt.ylabel("PDR")

In [ ]:
print(norm['PDR_total'].mean())  # mean
print(cll['PDR_total'].mean())

print(norm['PDR_total'].std())   # standard deviation
print(cll['PDR_total'].std())

In [ ]:
t, p = ttest_ind(norm.PDR_total, cll.PDR_total)
print(str("t-statistic is ") + str(t))
print(str("p-value is ") + str(p))

#
# The classic Student's t-test is a parametric test that only works when the data are normally distributed.
# A good non-parametric alternative to the t-test is the Mann-Whitney U-test (called ranksum in matlab, 
#     or Wilcoxon test, or Man-Whitney-Wilcoxon)
#
import scipy.stats
z, p = scipy.stats.mannwhitneyu(norm.PDR_total, cll.PDR_total)
p_value = p * 2
print(str("Mann-Whitney U statistic is ") + str(z))
print(str("p-value is ") + str(p_value))

In [ ]:
ax = sns.boxplot(x=final_real2["type"], y=final_real2["PDR_total"], linewidth=1.5)
ax = sns.stripplot(x=final_real2["type"], y=final_real2["PDR_total"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("PDR per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")
x1, x2 = 0, 1   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
y, h, col = final_real2["PDR_total"].max() + .025, .01, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r'p = $1.30 \times 10^{-31}$', ha='center', va='bottom', color=col)
plt.ylabel("PDR")
plt.savefig('PDR_normal_CLL_weighted.eps', format='eps', dpi=1000000)

In [ ]:
ax = sns.boxplot(x=final_real2["type"], y=final_real2["methylation_unweighted"], linewidth=1.5)
ax = sns.stripplot(x=final_real2["type"], y=final_real2["methylation_unweighted"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")

In [ ]:
print(norm['methylation_unweighted'].mean())  # mean
print(cll['methylation_unweighted'].mean())

print(norm['methylation_unweighted'].std())   # standard deviation
print(cll['methylation_unweighted'].std())

In [ ]:
t, p = ttest_ind(norm.methylation_unweighted, cll.methylation_unweighted)
print(str("t-statistic is ") + str(t))
print(str("p-value is ") + str(p))

#
# The classic Student's t-test is a parametric test that only works when the data are normally distributed.
# A good non-parametric alternative to the t-test is the Mann-Whitney U-test (called ranksum in matlab, 
#     or Wilcoxon test, or Man-Whitney-Wilcoxon)
#
import scipy.stats
z, p = scipy.stats.mannwhitneyu(norm.methylation_unweighted, cll.methylation_unweighted)
p_value = p * 2
print(str("Mann-Whitney U statistic is ") + str(z))
print(str("p-value is ") + str(p_value))

In [ ]:
ax = sns.boxplot(x=final_real2["type"], y=final_real2["methylation_unweighted"], linewidth=1.5)
ax = sns.stripplot(x=final_real2["type"], y=final_real2["methylation_unweighted"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("Methylation per cell, normal vs CLL, all chromosomes, unweighted")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")
x1, x2 = 0, 1   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
y, h, col = final_real2["methylation_unweighted"].max() + .025, .01, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r'p = $1.86 \times 10^{-9}$', ha='center', va='bottom', color=col)
plt.ylabel("Methylation")

In [ ]:
ax = sns.boxplot(x=final_real2["type"], y=final_real2["PDR_unweighted"], linewidth=1.5)
ax = sns.stripplot(x=final_real2["type"], y=final_real2["PDR_unweighted"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("PDR per cell, normal vs CLL, all chromosomes, unweighted")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")
plt.ylabel("PDR")

In [ ]:
print(norm['PDR_unweighted'].mean())  # mean
print(cll['PDR_unweighted'].mean())

print(norm['PDR_unweighted'].std())   # standard deviation
print(cll['PDR_unweighted'].std())

In [ ]:
t, p = ttest_ind(norm.PDR_unweighted, cll.PDR_unweighted)
print(str("t-statistic is ") + str(t))
print(str("p-value is ") + str(p))

#
# The classic Student's t-test is a parametric test that only works when the data are normally distributed.
# A good non-parametric alternative to the t-test is the Mann-Whitney U-test (called ranksum in matlab, 
#     or Wilcoxon test, or Man-Whitney-Wilcoxon)
#
import scipy.stats
z, p = scipy.stats.mannwhitneyu(norm.PDR_unweighted, cll.PDR_unweighted)
p_value = p * 2
print(str("Mann-Whitney U statistic is ") + str(z))
print(str("p-value is ") + str(p_value))

In [ ]:
ax = sns.boxplot(x=final_real2["type"], y=final_real2["PDR_unweighted"], linewidth=1.5)
ax = sns.stripplot(x=final_real2["type"], y=final_real2["PDR_unweighted"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("PDR per cell, normal vs CLL, all chromosomes, unweighted")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")
x1, x2 = 0, 1   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
y, h, col = final_real2["PDR_unweighted"].max() + .025, .01, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r'p = $1.11 \times 10^{-29}$', ha='center', va='bottom', color=col)
plt.ylabel("PDR")

In [ ]:
CD27p = final_real2[final_real2['bio'] == 'CD19CD27p']
print(len(CD27p))
CD27m = final_real2[final_real2['bio'] == 'CD19CD27m']
print(len(CD27m))

In [ ]:
# CD19CD27m

CD27cells = pd.concat([CD27p, CD27m])

In [ ]:
CD27cells = CD27cells.reset_index(drop=True)

In [ ]:
ax = sns.boxplot(x=CD27cells["bio"], y=CD27cells["methylation"], linewidth=1.5, palette='Set2')
ax = sns.stripplot(x=CD27cells["bio"], y=CD27cells["methylation"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("Methylation per cell, CD27p vs. CD27m, all chromosomes, weighted")
plt.xlabel("CD27p = 68 *.anno files; CD27m = 74 *.anno files")
plt.ylabel("Methylation")

In [ ]:
print(CD27p['methylation'].mean())  # mean
print(CD27m['methylation'].mean())

print(CD27p['methylation'].std())   # standard deviation
print(CD27m['methylation'].std())

In [ ]:
t, p = ttest_ind(CD27p.methylation, CD27m.methylation)
print(str("t-statistic is ") + str(t))
print(str("p-value is ") + str(p))

#
# The classic Student's t-test is a parametric test that only works when the data are normally distributed.
# A good non-parametric alternative to the t-test is the Mann-Whitney U-test (called ranksum in matlab, 
#     or Wilcoxon test, or Man-Whitney-Wilcoxon)
#
import scipy.stats
z, p = scipy.stats.mannwhitneyu(CD27p.methylation, CD27m.methylation)
p_value = p * 2
print(str("Mann-Whitney U statistic is ") + str(z))
print(str("p-value is ") + str(p_value))

In [ ]:
ax = sns.stripplot(x=CD27cells["bio"], y=CD27cells["methylation"], hue=CD27cells.protocol, linewidth=1.5, jitter=True)
sns.plt.title("Methylation per cell, CD27p vs. CD27m, all chromosomes, weighted")
plt.xlabel("CD27p = 68 *.anno files; CD27m = 74 *.anno files")
plt.ylabel("Methylation")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)

In [ ]:
CD27p122 = CD27p[CD27p['protocol'] == 'NormalBCD19pCD27pcell1_22_']
CD27p2344 = CD27p[CD27p['protocol'] == 'NormalBCD19pCD27pcell23_44']
CD27p4566 = CD27p[CD27p['protocol'] == 'NormalBCD19pCD27pcell45_66']
CD27p6788 = CD27p[CD27p['protocol'] == 'NormalBCD19pCD27pcell67_88']

CD27m122 = CD27m[CD27m['protocol'] == 'NormalBCD19pCD27mcell1_22_']
CD27m2344 = CD27m[CD27m['protocol'] == 'NormalBCD19pCD27mcell23_44']
CD27m4566 = CD27m[CD27m['protocol'] == 'NormalBCD19pCD27mcell45_66']
CD27m6788 = CD27m[CD27m['protocol'] == 'NormalBCD19pCD27mcell67_88']

In [ ]:
print("MEAN")
print("CD27p122")
print(CD27p122['methylation'].mean()) 
print("CD27p2344")
print(CD27p2344['methylation'].mean())
print("CD27p4566")
print(CD27p4566['methylation'].mean()) 
print("CD27p6788")
print(CD27p6788['methylation'].mean())


print("***")
print("CD27m122")
print(CD27m122['methylation'].mean()) 
print("CD27m2344")
print(CD27m2344['methylation'].mean())
print("CD27m4566")
print(CD27m4566['methylation'].mean()) 
print("CD27m6788")
print(CD27m6788['methylation'].mean())

In [ ]:
print("total")
print("CD27p122")
print(len(CD27p122['methylation']))
print("CD27p2344")
print(len(CD27p2344['methylation']))
print("CD27p4566")
print(len(CD27p4566['methylation']))
print("CD27p6788")
print(len(CD27p6788['methylation']))


print("***")
print("CD27m122")
print(len(CD27m122['methylation']))
print("CD27m2344")
print(len(CD27m2344['methylation']))
print("CD27m4566")
print(len(CD27m4566['methylation']))
print("CD27m6788")
print(len(CD27m6788['methylation']))

In [ ]:
print("STD")
print("CD27p122")
print(CD27p122['methylation'].std()) 
print("CD27p2344")
print(CD27p2344['methylation'].std())
print("CD27p4566")
print(CD27p4566['methylation'].std()) 
print("CD27p6788")
print(CD27p6788['methylation'].std())


print("***")
print("CD27m122")
print(CD27m122['methylation'].std()) 
print("CD27m2344")
print(CD27m2344['methylation'].std())
print("CD27m4566")
print(CD27m4566['methylation'].std()) 
print("CD27m6788")
print(CD27m6788['methylation'].std())

In [ ]:
ax = sns.boxplot(x=CD27cells["protocol"], y=CD27cells["methylation"], linewidth=1.5, palette='Set2')
ax = sns.stripplot(x=CD27cells["protocol"], y=CD27cells["methylation"], color=".25", linewidth=1.5, jitter=True)
ax.xaxis.set_major_formatter(plt.NullFormatter())
sns.plt.title("Methylation per cell, CD27p vs. CD27m, all chromosomes, weighted")
plt.xlabel("CD27p122 CD27p2344 CD27p4566 CD27p6788 CD27m122 CD27m2344 CD27m4566 CD27m6788")
plt.ylabel("Methylation")

In [ ]:
ax = sns.boxplot(x=CD27p["protocol"], y=CD27p["methylation"], linewidth=1.5, palette='Set2')
ax = sns.stripplot(x=CD27p["protocol"], y=CD27p["methylation"], color=".25", linewidth=1.5, jitter=True)
ax.xaxis.set_major_formatter(plt.NullFormatter())
sns.plt.title("Methylation per cell, CD27p, all chromosomes, weighted")
plt.xlabel("CD27p122 CD27p2344 CD27p4566 CD27p6788")
plt.ylabel("Methylation")

In [ ]:
ax = sns.boxplot(x=CD27m["protocol"], y=CD27m["methylation"], linewidth=1.5, palette='Set1')
ax = sns.stripplot(x=CD27m["protocol"], y=CD27m["methylation"], color=".25", linewidth=1.5, jitter=True)
ax.xaxis.set_major_formatter(plt.NullFormatter())
sns.plt.title("Methylation per cell, CD27m, all chromosomes, weighted")
plt.xlabel("CD27m122 CD27m2344 CD27m4566 CD27m6788")
plt.ylabel("Methylation")

In [ ]:
print("MEAN: methylation unweighted")
print("CD27p122")
print(CD27p122['methylation_unweighted'].mean()) 
print("CD27p2344")
print(CD27p2344['methylation_unweighted'].mean())
print("CD27p4566")
print(CD27p4566['methylation_unweighted'].mean()) 
print("CD27p6788")
print(CD27p6788['methylation_unweighted'].mean())


print("***")
print("CD27m122")
print(CD27m122['methylation_unweighted'].mean()) 
print("CD27m2344")
print(CD27m2344['methylation_unweighted'].mean())
print("CD27m4566")
print(CD27m4566['methylation_unweighted'].mean()) 
print("CD27m6788")
print(CD27m6788['methylation_unweighted'].mean())

In [ ]:
print("STD: methylation unweighted")
print("CD27p122")
print(CD27p122['methylation_unweighted'].std()) 
print("CD27p2344")
print(CD27p2344['methylation_unweighted'].std())
print("CD27p4566")
print(CD27p4566['methylation_unweighted'].std()) 
print("CD27p6788")
print(CD27p6788['methylation_unweighted'].std())


print("***")
print("CD27m122")
print(CD27m122['methylation_unweighted'].std()) 
print("CD27m2344")
print(CD27m2344['methylation_unweighted'].std())
print("CD27m4566")
print(CD27m4566['methylation_unweighted'].std()) 
print("CD27m6788")
print(CD27m6788['methylation_unweighted'].std())

In [ ]:
ax = sns.stripplot(x=CD27cells["bio"], y=CD27cells["methylation_unweighted"], hue=CD27cells.protocol, linewidth=1.5, jitter=True)
sns.plt.title("Methylation per cell, CD27p vs. CD27m, all chromosomes, unweighted")
plt.xlabel("CD27p = 68 *.anno files; CD27m = 74 *.anno files")
plt.ylabel("Methylation")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)

In [ ]:
ax = sns.boxplot(x=CD27cells["protocol"], y=CD27cells["methylation_unweighted"], linewidth=1.5, palette='Set2')
ax = sns.stripplot(x=CD27cells["protocol"], y=CD27cells["methylation_unweighted"], color=".25", linewidth=1.5, jitter=True)
ax.xaxis.set_major_formatter(plt.NullFormatter())
sns.plt.title("Methylation per cell, CD27p vs. CD27m, all chromosomes, unweighted")
plt.xlabel("CD27p122 CD27p2344 CD27p4566 CD27p6788 CD27m122 CD27m2344 CD27m4566 CD27m6788")
plt.ylabel("Methylation")

In [ ]:
ax = sns.boxplot(x=CD27p["protocol"], y=CD27p["methylation_unweighted"], linewidth=1.5, palette='Set2')
ax = sns.stripplot(x=CD27p["protocol"], y=CD27p["methylation_unweighted"], color=".25", linewidth=1.5, jitter=True)
ax.xaxis.set_major_formatter(plt.NullFormatter())
sns.plt.title("Methylation per cell, CD27p, all chromosomes, unweighted")
plt.xlabel("CD27p122 CD27p2344 CD27p4566 CD27p6788")
plt.ylabel("Methylation")

In [ ]:
ax = sns.boxplot(x=CD27m["protocol"], y=CD27m["methylation_unweighted"], linewidth=1.5, palette='Set1')
ax = sns.stripplot(x=CD27m["protocol"], y=CD27m["methylation_unweighted"], color=".25", linewidth=1.5, jitter=True)
ax.xaxis.set_major_formatter(plt.NullFormatter())
sns.plt.title("Methylation per cell, CD27m, all chromosomes, unweighted")
plt.xlabel("CD27m122 CD27m2344 CD27m4566 CD27m6788")
plt.ylabel("Methylation")

In [ ]:
print("MEAN: PDR, weighted")
print("CD27p122")
print(CD27p122['PDR_total'].mean()) 
print("CD27p2344")
print(CD27p2344['PDR_total'].mean())
print("CD27p4566")
print(CD27p4566['PDR_total'].mean()) 
print("CD27p6788")
print(CD27p6788['PDR_total'].mean())


print("***")
print("CD27m122")
print(CD27m122['PDR_total'].mean()) 
print("CD27m2344")
print(CD27m2344['PDR_total'].mean())
print("CD27m4566")
print(CD27m4566['PDR_total'].mean()) 
print("CD27m6788")
print(CD27m6788['PDR_total'].mean())

print("***")
print("*** *** *** *** *** ***")
print("***")

print("STD: PDR, weighted")
print("CD27p122")
print(CD27p122['PDR_total'].std()) 
print("CD27p2344")
print(CD27p2344['PDR_total'].std())
print("CD27p4566")
print(CD27p4566['PDR_total'].std()) 
print("CD27p6788")
print(CD27p6788['PDR_total'].std())


print("***")
print("CD27m122")
print(CD27m122['PDR_total'].std()) 
print("CD27m2344")
print(CD27m2344['PDR_total'].std())
print("CD27m4566")
print(CD27m4566['PDR_total'].std()) 
print("CD27m6788")
print(CD27m6788['PDR_total'].std())

In [ ]:
ax = sns.stripplot(x=CD27cells["bio"], y=CD27cells["PDR_total"], hue=CD27cells.protocol, linewidth=1.5, jitter=True)
sns.plt.title("PDR total per cell, CD27p vs. CD27m, all chromosomes, weighted")
plt.xlabel("CD27p = 68 *.anno files; CD27m = 74 *.anno files")
plt.ylabel("PDR total, weighted")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)

In [ ]:
ax = sns.boxplot(x=CD27cells["protocol"], y=CD27cells["PDR_total"], linewidth=1.5, palette='Set2')
ax = sns.stripplot(x=CD27cells["protocol"], y=CD27cells["PDR_total"], color=".25", linewidth=1.5, jitter=True)
ax.xaxis.set_major_formatter(plt.NullFormatter())
sns.plt.title("PDR total per cell, CD27p vs. CD27m, all chromosomes, weighted")
plt.xlabel("CD27p122 CD27p2344 CD27p4566 CD27p6788 CD27m122 CD27m2344 CD27m4566 CD27m6788")
plt.ylabel("PDR total, weighted")

In [ ]:
ax = sns.boxplot(x=CD27p["protocol"], y=CD27p["PDR_total"], linewidth=1.5, palette='Set2')
ax = sns.stripplot(x=CD27p["protocol"], y=CD27p["PDR_total"], color=".25", linewidth=1.5, jitter=True)
ax.xaxis.set_major_formatter(plt.NullFormatter())
sns.plt.title("PDR total per cell, CD27p, all chromosomes, weighted")
plt.xlabel("CD27p122 CD27p2344 CD27p4566 CD27p6788")
plt.ylabel("PDR total, weighted")

In [ ]:
ax = sns.boxplot(x=CD27m["protocol"], y=CD27m["PDR_total"], linewidth=1.5, palette='Set1')
ax = sns.stripplot(x=CD27m["protocol"], y=CD27m["PDR_total"], color=".25", linewidth=1.5, jitter=True)
ax.xaxis.set_major_formatter(plt.NullFormatter())
sns.plt.title("PDR total per cell, CD27m, all chromosomes, weighted")
plt.xlabel("CD27m122 CD27m2344 CD27m4566 CD27m6788")
plt.ylabel("PDR total, weighted")

In [ ]:
print("MEAN: PDR, unweighted")
print("CD27p122")
print(CD27p122['PDR_unweighted'].mean()) 
print("CD27p2344")
print(CD27p2344['PDR_unweighted'].mean())
print("CD27p4566")
print(CD27p4566['PDR_unweighted'].mean()) 
print("CD27p6788")
print(CD27p6788['PDR_unweighted'].mean())


print("***")
print("CD27m122")
print(CD27m122['PDR_unweighted'].mean()) 
print("CD27m2344")
print(CD27m2344['PDR_unweighted'].mean())
print("CD27m4566")
print(CD27m4566['PDR_unweighted'].mean()) 
print("CD27m6788")
print(CD27m6788['PDR_unweighted'].mean())

print("***")
print("*** *** *** *** *** ***")
print("***")

print("STD: PDR, unweighted")
print("CD27p122")
print(CD27p122['PDR_unweighted'].std()) 
print("CD27p2344")
print(CD27p2344['PDR_unweighted'].std())
print("CD27p4566")
print(CD27p4566['PDR_unweighted'].std()) 
print("CD27p6788")
print(CD27p6788['PDR_unweighted'].std())


print("***")
print("CD27m122")
print(CD27m122['PDR_unweighted'].std()) 
print("CD27m2344")
print(CD27m2344['PDR_unweighted'].std())
print("CD27m4566")
print(CD27m4566['PDR_unweighted'].std()) 
print("CD27m6788")
print(CD27m6788['PDR_unweighted'].std())

In [ ]:
ax = sns.stripplot(x=CD27cells["bio"], y=CD27cells["PDR_unweighted"], hue=CD27cells.protocol, linewidth=1.5, jitter=True)
sns.plt.title("PDR per cell, CD27p vs. CD27m, all chromosomes, unweighted")
plt.xlabel("CD27p = 68 *.anno files; CD27m = 74 *.anno files")
plt.ylabel("PDR total, unweighted")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)

In [ ]:
ax = sns.boxplot(x=CD27cells["protocol"], y=CD27cells["PDR_unweighted"], linewidth=1.5, palette='Set2')
ax = sns.stripplot(x=CD27cells["protocol"], y=CD27cells["PDR_unweighted"], color=".25", linewidth=1.5, jitter=True)
ax.xaxis.set_major_formatter(plt.NullFormatter())
sns.plt.title("PDR per cell, CD27p vs. CD27m, all chromosomes, unweighted")
plt.xlabel("CD27p122 CD27p2344 CD27p4566 CD27p6788 CD27m122 CD27m2344 CD27m4566 CD27m6788")
plt.ylabel("PDR, unweighted")

In [ ]:
ax = sns.boxplot(x=CD27p["protocol"], y=CD27p["PDR_unweighted"], linewidth=1.5, palette='Set2')
ax = sns.stripplot(x=CD27p["protocol"], y=CD27p["PDR_unweighted"], color=".25", linewidth=1.5, jitter=True)
ax.xaxis.set_major_formatter(plt.NullFormatter())
sns.plt.title("PDR per cell, CD27p, all chromosomes, unweighted")
plt.xlabel("CD27p122 CD27p2344 CD27p4566 CD27p6788")
plt.ylabel("PDR, unweighted")

In [ ]:
ax = sns.boxplot(x=CD27m["protocol"], y=CD27m["PDR_unweighted"], linewidth=1.5, palette='Set1')
ax = sns.stripplot(x=CD27m["protocol"], y=CD27m["PDR_unweighted"], color=".25", linewidth=1.5, jitter=True)
ax.xaxis.set_major_formatter(plt.NullFormatter())
sns.plt.title("PDR total per cell, CD27m, all chromosomes, unweighted")
plt.xlabel("CD27m122 CD27m2344 CD27m4566 CD27m6788")
plt.ylabel("PDR, unweighted")

In [ ]:
ax = sns.boxplot(x=CD27cells["bio"], y=CD27cells["methylation"], linewidth=1.5, palette='Set2')
ax = sns.stripplot(x=CD27cells["bio"], y=CD27cells["methylation"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("Methylation per cell, CD27p vs. CD27m, all chromosomes, weighted")
plt.xlabel("CD27p = 68 *.anno files; CD27m = 74 *.anno files")
x1, x2 = 0, 1   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
y, h, col = CD27cells["methylation"].max() + .025, .01, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r'p = $0.0232$', ha='center', va='bottom', color=col)
plt.ylabel("Methylation")
plt.ylim(0.40, 0.63)

In [ ]:
print(CD27p['methylation'].mean())  # mean
print(CD27m['methylation'].mean())

print(CD27p['methylation'].std())   # standard deviation
print(CD27m['methylation'].std())

In [ ]:
t, p = ttest_ind(CD27p.methylation, CD27m.methylation)
print(str("t-statistic is ") + str(t))
print(str("p-value is ") + str(p))

#
# The classic Student's t-test is a parametric test that only works when the data are normally distributed.
# A good non-parametric alternative to the t-test is the Mann-Whitney U-test (called ranksum in matlab, 
#     or Wilcoxon test, or Man-Whitney-Wilcoxon)
#
import scipy.stats
z, p = scipy.stats.mannwhitneyu(CD27p.methylation, CD27m.methylation)
p_value = p * 2
print(str("Mann-Whitney U statistic is ") + str(z))
print(str("p-value is ") + str(p_value))

In [ ]:
ax = sns.boxplot(x=CD27cells["bio"], y=CD27cells["PDR_total"], linewidth=1.5, palette='Set2')
ax = sns.stripplot(x=CD27cells["bio"], y=CD27cells["PDR_total"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("PDR per cell, CD27p vs. CD27m, all chromosomes, weighted")
plt.xlabel("CD27p = 68 *.anno files; CD27m = 74 *.anno files")
plt.ylabel("PDR")

In [ ]:
print(CD27p['PDR_total'].mean())  # mean
print(CD27m['PDR_total'].mean())

print(CD27p['PDR_total'].std())   # standard deviation
print(CD27m['PDR_total'].std())

In [ ]:
t, p = ttest_ind(CD27p.PDR_total, CD27m.PDR_total)
print(str("t-statistic is ") + str(t))
print(str("p-value is ") + str(p))

#
# The classic Student's t-test is a parametric test that only works when the data are normally distributed.
# A good non-parametric alternative to the t-test is the Mann-Whitney U-test (called ranksum in matlab, 
#     or Wilcoxon test, or Man-Whitney-Wilcoxon)
#
import scipy.stats
z, p = scipy.stats.mannwhitneyu(CD27p.PDR_total, CD27m.PDR_total)
p_value = p * 2
print(str("Mann-Whitney U statistic is ") + str(z))
print(str("p-value is ") + str(p_value))

In [ ]:
ax = sns.boxplot(x=CD27cells["bio"], y=CD27cells["PDR_total"], linewidth=1.5, palette='Set2')
ax = sns.stripplot(x=CD27cells["bio"], y=CD27cells["PDR_total"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("PDR per cell, CD27p vs. CD27m, all chromosomes, weighted")
plt.xlabel("CD27p = 68 *.anno files; CD27m = 74 *.anno files")
x1, x2 = 0, 1   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
y, h, col = CD27cells["PDR_total"].max() + .025, .01, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r'p = $1.69 \times 10^{-24}$', ha='center', va='bottom', color=col)
plt.ylabel("PDR")

In [ ]:
ax = sns.boxplot(x=CD27cells["bio"], y=CD27cells["methylation_unweighted"], linewidth=1.5, palette='Set2')
ax = sns.stripplot(x=CD27cells["bio"], y=CD27cells["methylation_unweighted"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("Methylation per cell, CD27p vs. CD27m, all chromosomes, unweighted")
plt.xlabel("CD27p = 68 *.anno files; CD27m = 74 *.anno files")
plt.ylabel("Methylation")

In [ ]:
print(CD27p['methylation_unweighted'].mean())  # mean
print(CD27m['methylation_unweighted'].mean())

print(CD27p['methylation_unweighted'].std())   # standard deviation
print(CD27m['methylation_unweighted'].std())

In [ ]:
t, p = ttest_ind(CD27p.methylation_unweighted, CD27m.methylation_unweighted)
print(str("t-statistic is ") + str(t))
print(str("p-value is ") + str(p))

#
# The classic Student's t-test is a parametric test that only works when the data are normally distributed.
# A good non-parametric alternative to the t-test is the Mann-Whitney U-test (called ranksum in matlab, 
#     or Wilcoxon test, or Man-Whitney-Wilcoxon)
#
import scipy.stats
z, p = scipy.stats.mannwhitneyu(CD27p.methylation_unweighted, CD27m.methylation_unweighted)
p_value = p * 2
print(str("Mann-Whitney U statistic is ") + str(z))
print(str("p-value is ") + str(p_value))

In [ ]:
ax = sns.boxplot(x=CD27cells["bio"], y=CD27cells["methylation_unweighted"], linewidth=1.5, palette='Set2')
ax = sns.stripplot(x=CD27cells["bio"], y=CD27cells["methylation_unweighted"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("Methylation per cell, CD27p vs. CD27m, all chromosomes, unweighted")
plt.xlabel("CD27p = 68 *.anno files; CD27m = 74 *.anno files")
x1, x2 = 0, 1   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
y, h, col = CD27cells["methylation_unweighted"].max() + .025, .01, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r'p = $2.42 \times 10^{-14}$', ha='center', va='bottom', color=col)
plt.ylabel("Methylation")

In [ ]:
ax = sns.boxplot(x=CD27cells["bio"], y=CD27cells["PDR_unweighted"], linewidth=1.5, palette='Set2')
ax = sns.stripplot(x=CD27cells["bio"], y=CD27cells["PDR_unweighted"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("PDR per cell, CD27p vs. CD27m, all chromosomes, unweighted")
plt.xlabel("CD27p = 68 *.anno files; CD27m = 74 *.anno files")
plt.ylabel("PDR")

In [ ]:
print(CD27p['PDR_unweighted'].mean())  # mean
print(CD27m['PDR_unweighted'].mean())

print(CD27p['PDR_unweighted'].std())   # standard deviation
print(CD27m['PDR_unweighted'].std())

In [ ]:
t, p = ttest_ind(CD27p.PDR_unweighted, CD27m.PDR_unweighted)
print(str("t-statistic is ") + str(t))
print(str("p-value is ") + str(p))

#
# The classic Student's t-test is a parametric test that only works when the data are normally distributed.
# A good non-parametric alternative to the t-test is the Mann-Whitney U-test (called ranksum in matlab, 
#     or Wilcoxon test, or Man-Whitney-Wilcoxon)
#
import scipy.stats
z, p = scipy.stats.mannwhitneyu(CD27p.PDR_unweighted, CD27m.PDR_unweighted)
p_value = p * 2
print(str("Mann-Whitney U statistic is ") + str(z))
print(str("p-value is ") + str(p_value))

In [ ]:
ax = sns.boxplot(x=CD27cells["bio"], y=CD27cells["PDR_unweighted"], linewidth=1.5, palette='Set2')
ax = sns.stripplot(x=CD27cells["bio"], y=CD27cells["PDR_unweighted"], color=".25", linewidth=1.5, jitter=True)
sns.plt.title("PDR per cell, CD27p vs. CD27m, all chromosomes, unweighted")
plt.xlabel("CD27p = 68 *.anno files; CD27m = 74 *.anno files")
x1, x2 = 0, 1   # columns 'Sat' and 'Sun' (first column: 0, see plt.xticks())
y, h, col = CD27cells["PDR_unweighted"].max() + .025, .01, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r'p = $1.623 \times 10^{-24}$', ha='center', va='bottom', color=col)
plt.ylabel("PDR")

In [ ]:
import seaborn as sns
sns.set(style="whitegrid", palette="muted")
muted = ["#4878CF", "#6ACC65", "#D65F5F", "#B47CC7", "#C4AD66", "#77BEDB"]
newPal   = dict(normal_B = muted[5], CD19CD27m = muted[2],CD19CD27p = muted[1], CD19p = muted[3], CLL=muted[0])
ax = sns.stripplot(x=final_real2["type"], y=final_real2["methylation"], hue=final_real2.bio, jitter=True, palette=newPal)
sns.plt.title("Biological sorting: Methylation per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
plt.ylabel("Methylation")

In [ ]:
import seaborn as sns
sns.set(style="whitegrid", palette="muted")
muted = ["#4878CF", "#6ACC65", "#D65F5F", "#B47CC7", "#C4AD66", "#77BEDB"]
newPal   = dict(normal_B = muted[5], CD19CD27m = muted[2],CD19CD27p = muted[1], CD19p = muted[3], CLL=muted[0])
ax = sns.stripplot(x=final_real2["type"], y=final_real2["PDR_total"], hue=final_real2.bio, jitter=True, palette=newPal)
sns.plt.title("Biological sorting: PDR per cell, normal vs CLL, all chromosomes, weighted")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
plt.ylabel("PDR")

In [ ]:
import seaborn as sns
sns.set(style="whitegrid", palette="muted")
muted = ["#4878CF", "#6ACC65", "#D65F5F", "#B47CC7", "#C4AD66", "#77BEDB"]
newPal   = dict(normal_B = muted[5], CD19CD27m = muted[2],CD19CD27p = muted[1], CD19p = muted[3], CLL=muted[0])
ax = sns.stripplot(x=final_real2["type"], y=final_real2["methylation_unweighted"], hue=final_real2.bio, jitter=True, palette=newPal)
sns.plt.title("Biological sorting: Methylation per cell, normal vs CLL, all chromosomes, unweighted")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
plt.ylabel("Methylation")

In [ ]:
sns.set(style="whitegrid", palette="muted")
muted = ["#4878CF", "#6ACC65", "#D65F5F", "#B47CC7", "#C4AD66", "#77BEDB"]
newPal   = dict(normal_B = muted[5], CD19CD27m = muted[2],CD19CD27p = muted[1], CD19p = muted[3], CLL=muted[0])
ax = sns.stripplot(x=final_real2["type"], y=final_real2["PDR_unweighted"], hue=final_real2.bio, jitter=True, palette=newPal)
sns.plt.title("Biological sorting: PDR per cell, normal vs CLL, all chromosomes, unweighted")
plt.xlabel("Normal cells = 336 *.anno files; CLL cells = 102 *.anno files")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
plt.ylabel("PDR")

In [ ]:


In [ ]:


In [ ]:


In [ ]: