Directory Information
| Directory | |
|---|---|
| Libref | CEGS |
| Engine | V9 |
| Physical Name | /home/jfear/mclab/cegs_ase_paper/sas_data |
| Filename | /home/jfear/mclab/cegs_ase_paper/sas_data |
| Inode Number | 6 |
| Access Permission | rwx------ |
| Owner Name | jfear |
| File Size (bytes) | 0 |
In [1]:
libname CEGS '/home/jfear/mclab/cegs_ase_paper/sas_data/';
proc datasets library=cegs nodetails;
Out[1]:
In [2]:
*proc print data=CEGS.r2_ct_models (obs=10); run;
Out[2]:
In [3]:
*proc print data=CEGS.CIS_EST_V13 (obs=10); run;
Out[3]:
In [4]:
libname DMEL '/home/jfear/mclab/useful_dmel_data/flybase551/sasdata';
proc datasets library=DMEL nodetails;
Out[4]:
In [5]:
proc print data=DMEL.FB551_SI_FUSIONS_UNIQUE_FLAGGED(obs=10); run;
Out[5]:
In [6]:
proc print data=CEGS.ai_reg_fit_full2 (obs=10); run;
Out[6]:
In [7]:
proc print data=CEGS.ai_reg_parms_full (obs=10); run;
Out[7]:
In [8]:
proc print data=CEGS.ai_reg_fit_full (obs=10); run;
Out[8]:
In [9]:
Data cis_int;
set cegs.cis_est_v13 (obs=10);
int= c_i*t_i_1a;
run;
proc sort data=cis_int;
by fusion_id mating_status;
run;
*full model;
proc reg data=cis_int ;
by fusion_id mating_status;
model q5_mean_theta=c_i t_i_1a int;
ods output ParameterEstimates=parms_full fitstatistics=fit_full;
run;
Out[9]:
In [10]:
proc print data=CEGS.r2_ct_models (obs=10); run;
Out[10]:
In [11]:
proc univariate data =CEGS.r2_ct_models(obs=2) normal plot ;
var R2_full R2_noint R2_diff_int R2_diff_trans r2_cis;
run;
Out[11]:
Now that I have selected a model, I need to make a dataset with significance flags. While Lauren made a dataset with for comparing model fit, I need to make my own significance flags for the coefficients using the probt values. I am going to go ahead and merge mated and virgin side-by-side.
WORK.merge_sig
In [12]:
*proc print data=CEGS.ai_reg_parms_full (obs=10); run;
Out[12]:
In [13]:
data sig_cis;
set CEGS.ai_reg_parms_full;
where Variable eq 'c_i' and mating_status eq 'M';
if probt le 0.05 then flag_sig_cis_m = 1;
else flag_sig_cis_m = 0;
keep fusion_id flag_sig_cis_m;
run;
data sig_trans;
set CEGS.ai_reg_parms_full;
where Variable eq 'T_i_1a' and mating_status eq 'M';
if probt le 0.05 then flag_sig_trans_m = 1;
else flag_sig_trans_m = 0;
keep fusion_id flag_sig_trans_m;
run;
data sig_int;
set CEGS.ai_reg_parms_full;
where Variable eq 'int' and mating_status eq 'M';
if probt le 0.05 then flag_sig_int_m = 1;
else flag_sig_int_m = 0;
keep fusion_id flag_sig_int_m;
run;
data merge_sig_m;
merge sig_cis sig_trans sig_int;
by fusion_id;
run;
Out[13]:
In [14]:
data sig_cis;
set CEGS.ai_reg_parms_full;
where Variable eq 'c_i' and mating_status eq 'V';
if probt le 0.05 then flag_sig_cis_v = 1;
else flag_sig_cis_v = 0;
keep fusion_id flag_sig_cis_v;
run;
data sig_trans;
set CEGS.ai_reg_parms_full;
where Variable eq 'T_i_1a' and mating_status eq 'V';
if probt le 0.05 then flag_sig_trans_v = 1;
else flag_sig_trans_v = 0;
keep fusion_id flag_sig_trans_v;
run;
data sig_int;
set CEGS.ai_reg_parms_full;
where Variable eq 'int' and mating_status eq 'V';
if probt le 0.05 then flag_sig_int_v = 1;
else flag_sig_int_v = 0;
keep fusion_id flag_sig_int_v;
run;
data merge_sig_v;
merge sig_cis sig_trans sig_int;
by fusion_id;
run;
Out[14]:
In [15]:
data merge_sig;
merge merge_sig_m merge_sig_v;
by fusion_id;
run;
proc sort data=merge_sig; by fusion_id; run;
Out[15]:
In [16]:
proc print data=merge_sig (obs=10); run;
Out[16]:
In [17]:
proc freq data=merge_sig;
tables flag_sig_cis_m;
run;
proc freq data=merge_sig;
tables flag_sig_trans_m;
run;
proc freq data=merge_sig;
tables flag_sig_int_m;
run;
Out[17]:
In [18]:
proc freq data=merge_sig;
tables flag_sig_cis_v;
run;
proc freq data=merge_sig;
tables flag_sig_trans_v;
run;
proc freq data=merge_sig;
tables flag_sig_int_v;
run;
Out[18]:
In [19]:
/* Mated */
data mated;
set CEGS.clean_ase_sbs;
keep line fusion_id flag_ai_combined_m;
run;
proc means data=mated noprint;
by fusion_id;
output out=sum sum(flag_ai_combined_m)=sum_ai;
run;
data m_freq_ai;
set sum;
if _FREQ_ gt 0 then m_pct_ai = sum_ai / _FREQ_ * 100;
else m_pct_ai = 0;
keep fusion_id m_pct_ai;
run;
Out[19]:
In [20]:
/* Virgin */
data virgin;
set CEGS.clean_ase_sbs;
keep line fusion_id flag_ai_combined_v;
run;
proc means data=virgin noprint;
by fusion_id;
output out=sum sum(flag_ai_combined_v)=sum_ai;
run;
data v_freq_ai;
set sum;
if _FREQ_ gt 0 then v_pct_ai = sum_ai / _FREQ_ * 100;
else v_pct_ai = 0;
keep fusion_id v_pct_ai;
run;
Out[20]:
In [21]:
/* Merge */
data pct_ai;
merge m_freq_ai v_freq_ai;
by fusion_id;
run;
proc print data=pct_ai (obs=10); run;
Out[21]:
In [22]:
data pct_ai_model;
merge pct_ai merge_sig;
by fusion_id;
if m_pct_ai eq '.' then m_pct_ai = 0;
if v_pct_ai eq '.' then v_pct_ai = 0;
if flag_sig_cis_m eq '.' then flag_sig_cis_m = 0;
if flag_sig_trans_m eq '.' then flag_sig_trans_m = 0;
if flag_sig_int_m eq '.' then flag_sig_int_m = 0;
if flag_sig_cis_v eq '.' then flag_sig_cis_v = 0;
if flag_sig_trans_v eq '.' then flag_sig_trans_v = 0;
if flag_sig_int_v eq '.' then flag_sig_int_v = 0;
run;
Out[22]:
In [23]:
proc print data=pct_ai_model(obs=10); run;
Out[23]:
In [24]:
data genes;
set DMEL.FB551_SI_FUSIONS_UNIQUE_FLAGGED;
keep fusion_id FBgn_cat symbol_cat genes_per_fusion;
run;
proc sort data=genes;
by fusion_id;
run;
proc print data=genes(obs=10);run;
Out[24]:
In [25]:
data mg;
merge pct_ai_model(in=in1) genes(in=in2);
by fusion_id;
if in1;
run;
Out[25]:
In [26]:
proc print data=mg(obs=10); run;
Out[26]:
In [27]:
data noMulitGene;
set mg;
where genes_per_fusion eq 1;
run;
Out[27]:
In [28]:
proc print data=noMulitGene (obs=10); run;
Out[28]:
In [29]:
proc sort data=noMulitGene;
by FBgn_cat;
run;
proc means data=noMulitGene noprint;
by FBgn_cat;
output out=means
mean(m_pct_ai)=m_pct_ai_bar
mean(v_pct_ai)=v_pct_ai_bar
sum(flag_sig_cis_m)=flag_sig_cis_m_sum
sum(flag_sig_trans_m)=flag_sig_trans_m_sum
sum(flag_sig_int_m)=flag_sig_int_m_sum
sum(flag_sig_cis_v)=flag_sig_cis_v_sum
sum(flag_sig_trans_v)=flag_sig_trans_v_sum
sum(flag_sig_int_v)=flag_sig_int_v_sum
;
run;
data means;
set means;
if flag_sig_cis_m_sum > 0 then flag_sig_cis_m_sum = 1;
if flag_sig_trans_m_sum > 0 then flag_sig_trans_m_sum = 1;
if flag_sig_int_m_sum > 0 then flag_sig_int_m_sum = 1;
if flag_sig_cis_v_sum > 0 then flag_sig_cis_v_sum = 1;
if flag_sig_trans_v_sum > 0 then flag_sig_trans_v_sum = 1;
if flag_sig_int_v_sum > 0 then flag_sig_int_v_sum = 1;
run;
Out[29]:
In [30]:
proc print data=means (obs=10); run;
Out[30]:
In [31]:
proc print data=noMulitGene(where=(FBgn_cat eq 'FBgn0000064')); run;
Out[31]:
Clone github repository that I created with some transcription factor gene lists.
WORK.TF2
In [32]:
%%shell
cd /home/jfear/devel
git clone https://github.com/Oliver-Lab/genelists.git
Import gene list
In [33]:
proc import datafile='!HOME/devel/genelists/transcription_factors/Rhee_2014/genesList' out=tf dbms=csv replace;
getnames=no;
run;
Out[33]:
In [34]:
proc print data=tf (obs=10); run;
Out[34]:
In [35]:
proc sort data=tf;
by VAR1;
run;
Out[35]:
In [36]:
data FBgns;
set DMEL.FBGN2COORD;
keep primary_fbgn;
run;
proc sort data=FBgns nodups;
by primary_FBgn;
run;
Out[36]:
In [37]:
data mgFbgn_Test;
merge FBgns (in=in1) tf (in=in2 rename=(VAR1=primary_fbgn));
by primary_fbgn;
if in2 and not in1;
run;
proc print data=mgFbgn_Test; run;
Out[37]:
There are 2 FBgns that are not in my annotation.
FBgn0014467 comes up as FBgn0265784 (Dmel\CrebB)
FBgn0083919 comes up as FBgn0265991 (Dmel\Zasp52)
Check if these are in my big FBgn List.
In [38]:
proc print data=FBgns (where=(primary_fbgn eq 'FBgn0265784' or primary_fbgn eq 'FBgn0265991')); run;
Out[38]:
Yes they are present, so I can just rename FBgn0014467 and FBgn0083919.
In [39]:
data tf2;
rename VAR1 = FBgn_cat;
set tf;
if VAR1 eq 'FBgn0014467' then VAR1 = 'FBgn0265784';
if VAR1 eq 'FBgn0083919' then VAR1 = 'FBgn0265991';
run;
Out[39]:
In [40]:
proc sort data=tf2;
by FBgn_cat;
run;
data mgFbgn_Test;
merge FBgns (in=in1) tf2 (in=in2 rename=(FBgn_cat=primary_fbgn));
by primary_fbgn;
if in2 and not in1;
run;
Out[40]:
In [41]:
data CEGS.mgTFsig;
merge TF2 (in=in1) means (in=in2);
by FBgn_cat;
if in1 then flag_tf = 1;
if in2 and not in1 then flag_tf = 0;
if in2;
run;
Out[41]:
In [42]:
proc print data=CEGs.mgTFsig(obs=10); run;
Out[42]:
In [43]:
proc freq data=CEGs.mgTFsig;
table flag_tf*flag_sig_cis_m_sum / chisq;
run;
Out[43]:
In [44]:
proc freq data=CEGs.mgTFsig;
table flag_tf*flag_sig_cis_v_sum / chisq;
run;
Out[44]:
In [45]:
proc freq data=CEGs.mgTFsig;
table flag_tf*flag_sig_trans_m_sum / chisq;
run;
Out[45]:
In [46]:
proc freq data=CEGs.mgTFsig;
table flag_tf*flag_sig_trans_v_sum / chisq;
run;
Out[46]:
In [47]:
proc freq data=CEGs.mgTFsig;
table flag_tf*flag_sig_int_m_sum / chisq;
run;
Out[47]:
In [48]:
proc freq data=CEGs.mgTFsig;
table flag_tf*flag_sig_int_v_sum / chisq;
run;
Out[48]:
In [49]:
data mm;
set CEGS.mgTFsig;
if m_pct_ai_bar ge 50;
run;
proc freq data=mm;
table flag_tf*flag_sig_cis_m_sum / chisq;
run;
proc freq data=mm;
table flag_tf*flag_sig_trans_m_sum / chisq;
run;
proc freq data=mm;
table flag_tf*flag_sig_int_m_sum / chisq;
run;
Out[49]:
In [50]:
data vv;
set CEGS.mgTFsig;
if v_pct_ai_bar ge 50;
run;
proc freq data=vv;
table flag_tf*flag_sig_cis_v_sum / chisq;
run;
proc freq data=vv;
table flag_tf*flag_sig_trans_v_sum / chisq;
run;
proc freq data=vv;
table flag_tf*flag_sig_int_v_sum / chisq;
run;
Out[50]:
In [ ]: