Looking at Normality

Structural Equation Modeling (SEM) requires that data is multivariate normal. Here we use normalized data sets for both the DSPR and Tester-cross. Here I am looking at distributions of the data for genes in the sex hierarchy.


In [ ]:
%run 'ipython_startup.py'

In [ ]:
# Import Additional Libs
import sas7bdat

In [ ]:
# Read in the Tester-cross data
with sas7bdat.SAS7BDAT('../sas_data/cegsv_by_gene_sbs.sas7bdat') as F:
    dfCEGS = F.to_data_frame()
    
dfCEGS.set_index('line', inplace=True)

In [ ]:
dfCEGSSex = dfCEGS[[x for x in dfCEGS.columns if 'FBgn' not in x]]

In [ ]:
dfCEGSSex = dfCEGSSex.apply(lambda x: x - x.mean(), axis=0)
fig = dfCEGSSex.plot(kind='hist', subplots=True, layout=(4, 5), figsize=(20, 10))
plt.suptitle('Tester-cross F1-hybrid', fontsize=18)
plt.savefig('../manuscript/images/cegsV_sex_det_distribution.png', bbox_inches='tight')

In [ ]:
# Read in the DSPR data
with sas7bdat.SAS7BDAT('../sas_data/dsrp_sbs_gene_level_sym.sas7bdat') as F:
    dfDSPR = F.to_data_frame()
    
dfDSPR.set_index(['matRIL', 'patRIL'], inplace=True)

In [ ]:
dfDSPRSex = dfDSPR[[x for x in dfDSPR.columns if 'CG' not in x]]

In [ ]:
fig = dfDSPRSex.plot(kind='hist', subplots=True, layout=(4, 5), figsize=(20, 10))
plt.suptitle('DSPR F1-hybrid', fontsize=18)
plt.savefig('../manuscript/images/dspr_sex_det_distribution.png', bbox_inches='tight')

In [ ]: