In [1]:
# settings and modules
%config InlineBackend.figure_format = 'retina'
%pylab inline
from nsaba.nsaba import nsaba
from nsaba.nsaba import analysis


Populating the interactive namespace from numpy and matplotlib

In [2]:
ns_path = "/Users/Torben/Documents/ABI analysis/current_data_new/"
aba_path = '/Users/Torben/Documents/ABI analysis/normalized_microarray_donor9861/'
#nsaba.Nsaba.aba_load(aba_path)
nsaba.Nsaba.ns_load(ns_path)
nsaba.Nsaba.aba_load(aba_path)
N = nsaba.Nsaba()
N.load_ge_pickle(pkl_file='/Users/Torben/Documents/ABI analysis/normalized_microarray_donor9861/Nsaba_ABA_ge.pkl')


This may take a minute or two ...
database.txt loaded.
features.txt loaded.
Nsaba.ns['mni_coords'] initialized.

This may take a minute or two ...
Initializing gene data from normalized_microarray_donor9861
SampleAnnot.csv loaded.
MicroarrayExpression.csv loaded.
Probes.csv loaded.
Nsaba.aba['mni_coords'] initialized.

This may take a minute or two ...
'ge' dictionary successfully loaded

Loading in terms with given method and smoothing parameters


In [3]:
N.get_ns_act('depression', thresh=-1, method='knn',smoothing='sum')
N.get_ns_act('dopamine', thresh=-1, method='knn',smoothing='sum')
N.get_ns_act('reward', thresh=-1, method='knn',smoothing='sum')
N.get_ns_act('serotonin', thresh=-1, method='knn',smoothing='sum')
N.get_ns_act('anxiety', thresh=-1, method='knn',smoothing='sum')
N.get_ns_act('schizophrenia', thresh=-1, method='knn',smoothing='sum')


This may take a few minutes...
This may take a few minutes...
This may take a few minutes...
This may take a few minutes...
This may take a few minutes...
This may take a few minutes...

Loading in gene lists


In [4]:
depression_genes = analysis.load_gene_list('/Users/Torben/Documents/ABI analysis/gene_collections/','DepressionGenes.csv')
dopamine_genes = analysis.load_gene_list('/Users/Torben/Documents/ABI analysis/gene_collections/','DopamineGenes2.csv')
reward_genes = analysis.load_gene_list('/Users/Torben/Documents/ABI analysis/gene_collections/','rewardGenes2.csv')
serotonin_genes = analysis.load_gene_list('/Users/Torben/Documents/ABI analysis/gene_collections/','SerotoninGenes.csv')
anxiety_genes = analysis.load_gene_list('/Users/Torben/Documents/ABI analysis/gene_collections/','AnxietyGenes.csv')
schizophrenia_genes = analysis.load_gene_list('/Users/Torben/Documents/ABI analysis/gene_collections/','SchizophreniaGenes.csv')

performing a t test on correlations of genes associated with their term.

i.e. are these genes associated with this term more than by chance? I do this with 4 correlation methods: pearson's r, spearman's r, slope of linear regression, and a t test


In [5]:
import scipy.stats as stats
A = analysis.NsabaAnalysis(N)
all_analyses = np.zeros((6,4))
methods = ['pearson','spearman','regression','t_test']

for m in xrange(len(methods)):
    all_analyses[0,m]= stats.ttest_1samp(A.validate_with_t_test('depression',depression_genes,method=methods[m],quant=85)[0],0)[1]
    all_analyses[1,m]= stats.ttest_1samp(A.validate_with_t_test('dopamine',dopamine_genes,method=methods[m],quant=85)[0],0)[1]
    all_analyses[2,m]= stats.ttest_1samp(A.validate_with_t_test('reward',reward_genes,method=methods[m],quant=85)[0],0)[1]
    all_analyses[3,m]= stats.ttest_1samp(A.validate_with_t_test('serotonin',serotonin_genes,method=methods[m],quant=85)[0],0)[1]
    all_analyses[4,m]= stats.ttest_1samp(A.validate_with_t_test('anxiety',anxiety_genes,method=methods[m],quant=85)[0],0)[1]
    all_analyses[5,m]= stats.ttest_1samp(A.validate_with_t_test('schizophrenia',schizophrenia_genes,method=methods[m],quant=85)[0],0)[1]
print all_analyses


To use inline plotting functionality in Jupyter, '%matplotlib inline' must be enabled
This may take a couple of minutes ...
This may take a couple of minutes ...
This may take a couple of minutes ...
This may take a couple of minutes ...
This may take a couple of minutes ...
This may take a couple of minutes ...
[[ 0.03221204  0.19113684  0.1112404   0.17955216]
 [ 0.02855893  0.01201999  0.02917607  0.00788237]
 [ 0.28522727  0.13528047  0.51525944  0.06936555]
 [ 0.2433367   0.58826992  0.66014865  0.33719902]
 [ 0.05435975  0.01091574  0.03152745  0.0121238 ]
 [ 0.66021186  0.33179933  0.25655699  0.32716676]]

Testing different cutoff values and methods for splitting term/non-term groups for t tests

Machine learning methods are kmeans and mixture of gaussians.


In [ ]:
t_test_analyses = np.zeros((6,6))
quants = [50,75,85,95]

for q in xrange(len(quants)):
    t_test_analyses[0,q]= stats.ttest_1samp(A.validate_with_t_test('depression',depression_genes,quant=quants[q])[0],0)[1]
    t_test_analyses[1,q]= stats.ttest_1samp(A.validate_with_t_test('dopamine',dopamine_genes,quant=quants[q])[0],0)[1]
    t_test_analyses[2,q]= stats.ttest_1samp(A.validate_with_t_test('reward',reward_genes,quant=quants[q])[0],0)[1]
    t_test_analyses[3,q]= stats.ttest_1samp(A.validate_with_t_test('serotonin',serotonin_genes,quant=quants[q])[0],0)[1]
    t_test_analyses[4,q]= stats.ttest_1samp(A.validate_with_t_test('anxiety',anxiety_genes,quant=quants[q])[0],0)[1]
    t_test_analyses[5,q]= stats.ttest_1samp(A.validate_with_t_test('schizophrenia',schizophrenia_genes,quant=quants[q])[0],0)[1]

methods = ['kmeans','mog']
for m in xrange(len(methods)):
    t_test_analyses[0,m+4]= stats.ttest_1samp(A.validate_with_t_test('depression',depression_genes,split_method=methods[m])[0],0)[1]
    t_test_analyses[1,m+4]= stats.ttest_1samp(A.validate_with_t_test('dopamine',dopamine_genes,split_method=methods[m])[0],0)[1]
    t_test_analyses[2,m+4]= stats.ttest_1samp(A.validate_with_t_test('reward',reward_genes,split_method=methods[m])[0],0)[1]
    t_test_analyses[3,m+4]= stats.ttest_1samp(A.validate_with_t_test('serotonin',serotonin_genes,split_method=methods[m])[0],0)[1]
    t_test_analyses[4,m+4]= stats.ttest_1samp(A.validate_with_t_test('anxiety',anxiety_genes,split_method=methods[m])[0],0)[1]
    t_test_analyses[5,m+4]= stats.ttest_1samp(A.validate_with_t_test('schizophrenia',schizophrenia_genes,split_method=methods[m])[0],0)[1]
    
print t_test_analyses

In [ ]:


In [30]:



0
6
5
0
0
0

In [7]:
import csv

with open('/Users/Torben/Documents/ABI analysis/validation/summed_validation.csv', 'wb') as csvfile:
    spamwriter = csv.writer(csvfile)
    for a in all_analyses:
        spamwriter.writerow(a)

with open('/Users/Torben/Documents/ABI analysis/validation/summed_validation.csv', 'wb') as csvfile:
    spamwriter = csv.writer(csvfile)
    for a in t_test_analyses:
        spamwriter.writerow(a)

In [12]:
a= A.validate_with_t_test('anxiety',anxiety_genes,split_method=methods[m])[0]


This may take a couple of minutes ...

In [8]:
[1 for t in xrange(10)]


Out[8]:
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

In [ ]: