In [1]:
import pyparanoid.pyparanoid as pp

In [2]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
sizes = pp.get_groupsizes("../../data/Pseudo/")

x, y = pd.Series(sizes[0], name="gene families"), pd.Series(sizes[1], name="size of family")


24066 total groups in Pseudo
1784 groups are present in a single copy in >95% of strains.

In [4]:
plt.xlim([0,len(x)])
plt.ylim([0,y[1]*1.1])
ax = sns.regplot(x=x,y=y,fit_reg=False)



In [5]:
rarefaction = pp.get_rarefaction("../../data/Pseudo")


on genome number 100 of 203
on genome number 200 of 203
Done!

In [6]:
x, y = pd.Series(rarefaction[0], name="strains sampled"), pd.Series(rarefaction[1], name="unique gene families")
plt.xlim([0,max(x)*1.1])
plt.ylim([0,max(y)*1.1])
ax = sns.regplot(x=x,y=y,fit_reg=False)