In [1]:
%reload_ext autoreload

In [2]:
%autoreload 2

In [16]:
def train_ncRNA_model(fname=None, model_fname=None, n_iter=40, verbose=False):
    #parameters
    times=2
    size=200
    active_set_size=400
    threshold=1
    n_active_learning_iterations=3
    train_test_split=0.7
    
    
    def rfam_uri(family_id):
        return 'http://rfam.xfam.org/family/%s/alignment?acc=%s&format=fastau&download=0'%(family_id,family_id)
    
    def pre_processor( data, **args):
        #from eden.converter.rna.rnashapes import rnashapes_to_eden
        #graphs = rnashapes_to_eden( data, **args )
        from eden.converter.rna.rnafold import rnafold_to_eden
        graphs = rnafold_to_eden( data, **args )
        return graphs

    
    from eden.graph import Vectorizer
    vectorizer = Vectorizer()

    from sklearn.linear_model import SGDClassifier
    estimator = SGDClassifier(class_weight='auto', shuffle=True)
    

    #create iterable from files
    from eden.converter.fasta import fasta_to_sequence
    seqs = fasta_to_sequence( rfam_uri( rfam_id ) )
    from itertools import tee
    seqs,seqs_=tee(seqs)
    iterable_pos = seqs
    from eden.modifier.seq import seq_to_seq, shuffle_modifier
    iterable_neg = seq_to_seq( seqs_, modifier=shuffle_modifier, times=times, order=2 )

    #consier only first 'size' elements
    from itertools import islice
    iterable_pos = islice(iterable_pos,size)
    iterable_neg = islice(iterable_neg,size*times)

    #split train/test
    from eden.util import random_bipartition_iter
    iterable_pos_train, iterable_pos_test = random_bipartition_iter(iterable_pos, relative_size=train_test_split)
    iterable_neg_train, iterable_neg_test = random_bipartition_iter(iterable_neg, relative_size=train_test_split)
    
    #make predictive model
    from eden.model import ActiveLearningBinaryClassificationModel
    model = ActiveLearningBinaryClassificationModel( pre_processor, estimator=estimator, vectorizer=vectorizer )

    #optimize hyperparameters and fit model
    from numpy.random import randint
    from numpy.random import uniform
#    pre_processor_parameters={'max_num':[1,2,3], 
#                              'shape_type':[3,4,5], 
#                              'energy_range':randint(10, 40, size=n_iter)}
    pre_processor_parameters={} 

    vectorizer_parameters={'complexity':[1,2]}

    estimator_parameters={'n_iter':randint(5, 100, size=n_iter),
                          'penalty':['l1','l2','elasticnet'],
                          'l1_ratio':uniform(0.1,0.9, size=n_iter), 
                          'loss':['hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron'],
                          'power_t':uniform(0.1, size=n_iter),
                          'alpha': [10**x for x in range(-8,0)],
                          'eta0': [10**x for x in range(-4,-1)],
                          'learning_rate': ["invscaling", "constant", "optimal"]}

    model.optimize(iterable_pos_train, iterable_neg_train, 
                   n_active_learning_iterations=n_active_learning_iterations,
                   size_positive=-1,
                   size_negative=active_set_size,
                   n_iter=n_iter, cv=3, n_jobs=1, verbose=verbose,
                   pre_processor_parameters=pre_processor_parameters, 
                   vectorizer_parameters=vectorizer_parameters, 
                   estimator_parameters=estimator_parameters)

    #save model
    model.save(model_fname)
    print 'Saved model in %s'%model_fname
    
    #estimate predictive performance
    model.estimate( iterable_pos_test, iterable_neg_test, cv=5 )
    
    
    
    
def test_ncRNA_model(fname=None, model_fname=None):
    from eden.model import ActiveLearningBinaryClassificationModel

    model = ActiveLearningBinaryClassificationModel()
    model.load(model_fname)

    def rfam_uri(family_id):
        return 'http://rfam.xfam.org/family/%s/alignment?acc=%s&format=fastau&download=0'%(family_id,family_id)

    from eden.converter.fasta import fasta_to_sequence
    seqs = fasta_to_sequence( rfam_uri( rfam_id ) )
    from itertools import tee
    seqs,seqs_=tee(seqs)
    
    predictions= model.decision_function( seqs_ )
    
    from itertools import izip
    seqs,seqs_=tee(seqs)
    results = [(p,s) for s,p in izip(seqs_,predictions)]
    
    return results

In [4]:
def rfam_url(family_id):
    return 'http://rfam.xfam.org/family/%s/alignment?acc=%s&format=fastau&download=0'%(family_id,family_id)

In [5]:
import requests
import numpy

def rfam_size(rfam_id):
    uri = rfam_url( rfam_id )
    from eden.modifier.fasta import fasta_to_fasta, one_line_modifier
    iterable = fasta_to_fasta( uri , modifier = one_line_modifier, header_only = True)
    size = sum(1 for x in iterable)
    return size

def rfam_stats(rfam_id):
    uri = rfam_url( rfam_id )
    from eden.modifier.fasta import fasta_to_fasta, one_line_modifier
    iterable = fasta_to_fasta( uri , modifier = one_line_modifier, sequence_only = True)
    len_seqs = [len(seq) for seq in iterable]
    median = numpy.percentile(len_seqs, 50)
    lower_quartile = numpy.percentile(len_seqs, 25)
    upper_quartile = numpy.percentile(len_seqs, 75)
    return lower_quartile, median, upper_quartile

In [6]:
%%time
#identify non available rfam families

blacklist = []

size_limit = 1000
num = 2600
rfam_ids = ['RF%0.5d'%i for i in range(1,num)]
print 'Determining non valid Rfam family accession or ID...'
counter = 0
for rfam_id in rfam_ids:
    try:
        size = rfam_size( rfam_id )
    except:
        counter += 1
        blacklist.append(rfam_id)
        print counter, rfam_id
    else:
        if size > size_limit:
            counter += 1
            blacklist.append(rfam_id)
            print counter, rfam_id, '>>'
    finally:
        pass
    
arfam_ids = [rfam_id for rfam_id in rfam_ids if rfam_id not in blacklist]
print 'Tried %d families, found %d non valid Rfam family accession or ID' % (num, len(blacklist))


Determining non valid Rfam family accession or ID...
Tried 2600 families, found 0 non valid Rfam family accession or ID
CPU times: user 8.27 s, sys: 2.39 s, total: 10.7 s
Wall time: 6min 30s

In [7]:
%%time
#select families with num instances within specified limits
size_upper_limit = 300
size_lower_limit = 150
selected_rfam_ids = [rfam_id for rfam_id in arfam_ids if rfam_size( rfam_id ) >= size_lower_limit and rfam_size( rfam_id ) <= size_upper_limit]
print 'Rfam families stisfying the num seqs limits > %d and < %d are: %d' % (size_lower_limit, size_upper_limit, len(selected_rfam_ids) )


Rfam families stisfying the num seqs limits > 150 and < 300 are: 22
CPU times: user 7.7 s, sys: 2.2 s, total: 9.9 s
Wall time: 7min 39s

In [8]:
%%time
#print stats
rfam_legths = {}
print 'Rfam statistics:'
for i,rfam_id in enumerate(selected_rfam_ids):
    size = rfam_size( rfam_id )
    lower_quartile, median, upper_quartile = rfam_stats( rfam_id )
    print "%0.5d rfam: %s num seqs: %0.4d length stats: Q1: %0.4d Q2: %0.4d Q3: %0.4d" % ( i+1, rfam_id, size, lower_quartile, median, upper_quartile )
    rfam_legths[rfam_id] = median


Rfam statistics:
00001 rfam: RF00004 num seqs: 0208 length stats: Q1: 0191 Q2: 0193 Q3: 0195
00002 rfam: RF00015 num seqs: 0170 length stats: Q1: 0135 Q2: 0141 Q3: 0146
00003 rfam: RF00020 num seqs: 0180 length stats: Q1: 0115 Q2: 0116 Q3: 0119
00004 rfam: RF00026 num seqs: 0188 length stats: Q1: 0103 Q2: 0107 Q3: 0108
00005 rfam: RF00169 num seqs: 0261 length stats: Q1: 0081 Q2: 0094 Q3: 0098
00006 rfam: RF00380 num seqs: 0157 length stats: Q1: 0165 Q2: 0168 Q3: 0172
00007 rfam: RF00386 num seqs: 0160 length stats: Q1: 0090 Q2: 0090 Q3: 0091
00008 rfam: RF01051 num seqs: 0155 length stats: Q1: 0085 Q2: 0087 Q3: 0089
00009 rfam: RF01055 num seqs: 0160 length stats: Q1: 0135 Q2: 0142 Q3: 0147
00010 rfam: RF01234 num seqs: 0160 length stats: Q1: 0136 Q2: 0138 Q3: 0138
00011 rfam: RF01699 num seqs: 0194 length stats: Q1: 0167 Q2: 0168 Q3: 0172
00012 rfam: RF01701 num seqs: 0265 length stats: Q1: 0066 Q2: 0068 Q3: 0069
00013 rfam: RF01705 num seqs: 0201 length stats: Q1: 0076 Q2: 0080 Q3: 0082
00014 rfam: RF01731 num seqs: 0210 length stats: Q1: 0164 Q2: 0167 Q3: 0169
00015 rfam: RF01734 num seqs: 0287 length stats: Q1: 0063 Q2: 0067 Q3: 0074
00016 rfam: RF01745 num seqs: 0189 length stats: Q1: 0186 Q2: 0188 Q3: 0193
00017 rfam: RF01750 num seqs: 0182 length stats: Q1: 0084 Q2: 0086 Q3: 0101
00018 rfam: RF01942 num seqs: 0171 length stats: Q1: 0118 Q2: 0119 Q3: 0121
00019 rfam: RF01998 num seqs: 0237 length stats: Q1: 0079 Q2: 0084 Q3: 0089
00020 rfam: RF02005 num seqs: 0174 length stats: Q1: 0220 Q2: 0223 Q3: 0229
00021 rfam: RF02012 num seqs: 0244 length stats: Q1: 0139 Q2: 0148 Q3: 0150
00022 rfam: RF02034 num seqs: 0219 length stats: Q1: 0189 Q2: 0191 Q3: 0192
CPU times: user 212 ms, sys: 54.5 ms, total: 267 ms
Wall time: 9.98 s

In [18]:
%%time
for rfam_id in selected_rfam_ids:
    print rfam_id
    model_fname=rfam_id+'.model'
    train_ncRNA_model(fname=rfam_id, model_fname=model_fname, n_iter=10, verbose=False)
    results = test_ncRNA_model(fname=rfam_id, model_fname=model_fname)


RF00004
Saved model in RF00004.model
Classifier:
SGDClassifier(alpha=0.0001, class_weight='auto', epsilon=0.1, eta0=0.0001,
       fit_intercept=True, l1_ratio=0.10158070470654268,
       learning_rate='constant', loss='squared_hinge', n_iter=87, n_jobs=1,
       penalty='l2', power_t=0.48091437952204807, random_state=None,
       shuffle=True, verbose=0, warm_start=False)
--------------------------------------------------------------------------------
Predictive performance:
            accuracy: 0.856 +- 0.087
           precision: 0.738 +- 0.120
              recall: 0.917 +- 0.129
                  f1: 0.812 +- 0.109
   average_precision: 0.967 +- 0.046
             roc_auc: 0.978 +- 0.031
--------------------------------------------------------------------------------
RF00015
Saved model in RF00015.model
Classifier:
SGDClassifier(alpha=0.01, class_weight='auto', epsilon=0.1, eta0=0.001,
       fit_intercept=True, l1_ratio=0.64640302916124981,
       learning_rate='invscaling', loss='log', n_iter=23, n_jobs=1,
       penalty='l2', power_t=0.91995260126272083, random_state=None,
       shuffle=True, verbose=0, warm_start=False)
--------------------------------------------------------------------------------
Predictive performance:
            accuracy: 0.638 +- 0.258
           precision: 0.269 +- 0.389
              recall: 0.600 +- 0.490
                  f1: 0.225 +- 0.278
   average_precision: 0.918 +- 0.043
             roc_auc: 0.957 +- 0.041
--------------------------------------------------------------------------------
RF00020
Saved model in RF00020.model
Classifier:
SGDClassifier(alpha=0.01, class_weight='auto', epsilon=0.1, eta0=0.01,
       fit_intercept=True, l1_ratio=0.63227765595103635,
       learning_rate='invscaling', loss='squared_hinge', n_iter=36,
       n_jobs=1, penalty='l2', power_t=0.43674388179763302,
       random_state=None, shuffle=True, verbose=0, warm_start=False)
--------------------------------------------------------------------------------
Predictive performance:
            accuracy: 0.964 +- 0.023
           precision: 0.922 +- 0.069
              recall: 0.982 +- 0.036
                  f1: 0.948 +- 0.031
   average_precision: 0.998 +- 0.003
             roc_auc: 0.999 +- 0.002
--------------------------------------------------------------------------------
RF00026
Saved model in RF00026.model
Classifier:
SGDClassifier(alpha=0.01, class_weight='auto', epsilon=0.1, eta0=0.01,
       fit_intercept=True, l1_ratio=0.13475884399289795,
       learning_rate='constant', loss='perceptron', n_iter=39, n_jobs=1,
       penalty='l2', power_t=0.20114541710393255, random_state=None,
       shuffle=True, verbose=0, warm_start=False)
--------------------------------------------------------------------------------
Predictive performance:
            accuracy: 0.927 +- 0.091
           precision: 0.967 +- 0.067
              recall: 0.782 +- 0.273
                  f1: 0.903 +- 0.107
   average_precision: 0.996 +- 0.006
             roc_auc: 0.998 +- 0.003
--------------------------------------------------------------------------------
RF00169
Saved model in RF00169.model
Classifier:
SGDClassifier(alpha=0.0001, class_weight='auto', epsilon=0.1, eta0=0.01,
       fit_intercept=True, l1_ratio=0.54444744563017045,
       learning_rate='constant', loss='perceptron', n_iter=69, n_jobs=1,
       penalty='elasticnet', power_t=0.74764244335882957,
       random_state=None, shuffle=True, verbose=0, warm_start=False)
--------------------------------------------------------------------------------
Predictive performance:
            accuracy: 0.872 +- 0.054
           precision: 0.906 +- 0.052
              recall: 0.717 +- 0.155
                  f1: 0.776 +- 0.088
   average_precision: 0.883 +- 0.086
             roc_auc: 0.918 +- 0.048
--------------------------------------------------------------------------------
RF00380
Saved model in RF00380.model
Classifier:
SGDClassifier(alpha=0.1, class_weight='auto', epsilon=0.1, eta0=0.0001,
       fit_intercept=True, l1_ratio=0.64265903304770333,
       learning_rate='optimal', loss='perceptron', n_iter=43, n_jobs=1,
       penalty='l2', power_t=0.98982760378145185, random_state=None,
       shuffle=True, verbose=0, warm_start=False)
--------------------------------------------------------------------------------
Predictive performance:
            accuracy: 0.951 +- 0.042
           precision: 1.000 +- 0.000
              recall: 0.873 +- 0.155
                  f1: 0.942 +- 0.056
   average_precision: 0.998 +- 0.005
             roc_auc: 1.000 +- 0.000
--------------------------------------------------------------------------------
RF00386
Saved model in RF00386.model
Classifier:
SGDClassifier(alpha=1e-05, class_weight='auto', epsilon=0.1, eta0=0.01,
       fit_intercept=True, l1_ratio=0.37602958317424173,
       learning_rate='constant', loss='perceptron', n_iter=52, n_jobs=1,
       penalty='l2', power_t=0.18949858556654495, random_state=None,
       shuffle=True, verbose=0, warm_start=False)
--------------------------------------------------------------------------------
Predictive performance:
            accuracy: 0.993 +- 0.014
           precision: 1.000 +- 0.000
              recall: 0.978 +- 0.044
                  f1: 0.978 +- 0.027
   average_precision: 1.000 +- 0.000
             roc_auc: 0.999 +- 0.002
--------------------------------------------------------------------------------
RF01051
Saved model in RF01051.model
Classifier:
SGDClassifier(alpha=0.01, class_weight='auto', epsilon=0.1, eta0=0.0001,
       fit_intercept=True, l1_ratio=0.80173625561833473,
       learning_rate='optimal', loss='modified_huber', n_iter=8, n_jobs=1,
       penalty='l2', power_t=0.23465038101144742, random_state=None,
       shuffle=True, verbose=0, warm_start=False)
--------------------------------------------------------------------------------
Predictive performance:
            accuracy: 0.971 +- 0.043
           precision: 1.000 +- 0.000
              recall: 0.933 +- 0.089
                  f1: 0.886 +- 0.143
   average_precision: 1.000 +- 0.000
             roc_auc: 1.000 +- 0.000
--------------------------------------------------------------------------------
RF01055
Saved model in RF01055.model
Classifier:
SGDClassifier(alpha=0.0001, class_weight='auto', epsilon=0.1, eta0=0.0001,
       fit_intercept=True, l1_ratio=0.64391470405668427,
       learning_rate='optimal', loss='perceptron', n_iter=9, n_jobs=1,
       penalty='elasticnet', power_t=0.33670895830666792,
       random_state=None, shuffle=True, verbose=0, warm_start=False)
--------------------------------------------------------------------------------
Predictive performance:
            accuracy: 0.958 +- 0.053
           precision: 0.982 +- 0.036
              recall: 0.918 +- 0.076
                  f1: 0.915 +- 0.053
   average_precision: 1.000 +- 0.000
             roc_auc: 0.992 +- 0.006
--------------------------------------------------------------------------------
RF01234
Saved model in RF01234.model
Classifier:
SGDClassifier(alpha=0.001, class_weight='auto', epsilon=0.1, eta0=0.01,
       fit_intercept=True, l1_ratio=0.64261133610654331,
       learning_rate='invscaling', loss='perceptron', n_iter=14, n_jobs=1,
       penalty='l2', power_t=0.16616271616774891, random_state=None,
       shuffle=True, verbose=0, warm_start=False)
--------------------------------------------------------------------------------
Predictive performance:
            accuracy: 1.000 +- 0.000
           precision: 1.000 +- 0.000
              recall: 1.000 +- 0.000
                  f1: 1.000 +- 0.000
   average_precision: 1.000 +- 0.000
             roc_auc: 1.000 +- 0.000
--------------------------------------------------------------------------------
RF01699
Saved model in RF01699.model
Classifier:
SGDClassifier(alpha=1e-07, class_weight='auto', epsilon=0.1, eta0=0.0001,
       fit_intercept=True, l1_ratio=0.45235173381275839,
       learning_rate='constant', loss='log', n_iter=50, n_jobs=1,
       penalty='elasticnet', power_t=0.49561474770449854,
       random_state=None, shuffle=True, verbose=0, warm_start=False)
--------------------------------------------------------------------------------
Predictive performance:
            accuracy: 0.779 +- 0.117
           precision: 0.629 +- 0.111
              recall: 0.983 +- 0.033
                  f1: 0.760 +- 0.089
   average_precision: 0.994 +- 0.011
             roc_auc: 0.996 +- 0.009
--------------------------------------------------------------------------------
RF01701
Saved model in RF01701.model
Classifier:
SGDClassifier(alpha=1e-08, class_weight='auto', epsilon=0.1, eta0=0.001,
       fit_intercept=True, l1_ratio=0.55998861689157009,
       learning_rate='invscaling', loss='perceptron', n_iter=99, n_jobs=1,
       penalty='l1', power_t=0.30156293415887159, random_state=None,
       shuffle=True, verbose=0, warm_start=False)
--------------------------------------------------------------------------------
Predictive performance:
            accuracy: 0.944 +- 0.043
           precision: 0.969 +- 0.062
              recall: 0.900 +- 0.097
                  f1: 0.929 +- 0.041
   average_precision: 0.992 +- 0.011
             roc_auc: 0.993 +- 0.009
--------------------------------------------------------------------------------
RF01705
Saved model in RF01705.model
Classifier:
SGDClassifier(alpha=0.0001, class_weight='auto', epsilon=0.1, eta0=0.01,
       fit_intercept=True, l1_ratio=0.65407415607929464,
       learning_rate='constant', loss='modified_huber', n_iter=69,
       n_jobs=1, penalty='l2', power_t=0.28178398019804851,
       random_state=None, shuffle=True, verbose=0, warm_start=False)
--------------------------------------------------------------------------------
Predictive performance:
            accuracy: 0.989 +- 0.014
           precision: 1.000 +- 0.000
              recall: 0.967 +- 0.041
                  f1: 0.983 +- 0.021
   average_precision: 0.999 +- 0.003
             roc_auc: 0.999 +- 0.001
--------------------------------------------------------------------------------
RF01731
Saved model in RF01731.model
Classifier:
SGDClassifier(alpha=0.001, class_weight='auto', epsilon=0.1, eta0=0.001,
       fit_intercept=True, l1_ratio=0.77395618830631885,
       learning_rate='constant', loss='squared_hinge', n_iter=97, n_jobs=1,
       penalty='elasticnet', power_t=0.24447368124995239,
       random_state=None, shuffle=True, verbose=0, warm_start=False)
--------------------------------------------------------------------------------
Predictive performance:
            accuracy: 0.956 +- 0.065
           precision: 0.983 +- 0.033
              recall: 0.883 +- 0.194
                  f1: 0.917 +- 0.129
   average_precision: 0.997 +- 0.006
             roc_auc: 0.999 +- 0.003
--------------------------------------------------------------------------------
RF01734
Saved model in RF01734.model
Classifier:
SGDClassifier(alpha=0.001, class_weight='auto', epsilon=0.1, eta0=0.001,
       fit_intercept=True, l1_ratio=0.35770528087577169,
       learning_rate='invscaling', loss='squared_hinge', n_iter=91,
       n_jobs=1, penalty='l2', power_t=0.26619060893857599,
       random_state=None, shuffle=True, verbose=0, warm_start=False)
--------------------------------------------------------------------------------
Predictive performance:
            accuracy: 0.722 +- 0.056
           precision: 0.560 +- 0.064
              recall: 0.933 +- 0.062
                  f1: 0.688 +- 0.040
   average_precision: 0.855 +- 0.074
             roc_auc: 0.901 +- 0.064
--------------------------------------------------------------------------------
RF01745
Saved model in RF01745.model
Classifier:
SGDClassifier(alpha=0.0001, class_weight='auto', epsilon=0.1, eta0=0.0001,
       fit_intercept=True, l1_ratio=0.37211447829516431,
       learning_rate='constant', loss='squared_hinge', n_iter=90, n_jobs=1,
       penalty='l2', power_t=0.68342934202312244, random_state=None,
       shuffle=True, verbose=0, warm_start=False)
--------------------------------------------------------------------------------
Predictive performance:
            accuracy: 0.953 +- 0.023
           precision: 1.000 +- 0.000
              recall: 0.861 +- 0.066
                  f1: 0.924 +- 0.039
   average_precision: 0.967 +- 0.035
             roc_auc: 0.974 +- 0.028
--------------------------------------------------------------------------------
RF01750
Saved model in RF01750.model
Classifier:
SGDClassifier(alpha=1e-08, class_weight='auto', epsilon=0.1, eta0=0.01,
       fit_intercept=True, l1_ratio=0.12942585708270446,
       learning_rate='invscaling', loss='perceptron', n_iter=24, n_jobs=1,
       penalty='l2', power_t=0.26601266866057927, random_state=None,
       shuffle=True, verbose=0, warm_start=False)
--------------------------------------------------------------------------------
Predictive performance:
            accuracy: 0.909 +- 0.047
           precision: 1.000 +- 0.000
              recall: 0.818 +- 0.057
                  f1: 0.918 +- 0.053
   average_precision: 0.965 +- 0.032
             roc_auc: 0.974 +- 0.021
--------------------------------------------------------------------------------
RF01942
Saved model in RF01942.model
Classifier:
SGDClassifier(alpha=0.1, class_weight='auto', epsilon=0.1, eta0=0.001,
       fit_intercept=True, l1_ratio=0.82324455289727794,
       learning_rate='constant', loss='log', n_iter=12, n_jobs=1,
       penalty='l2', power_t=0.23732531046483163, random_state=None,
       shuffle=True, verbose=0, warm_start=False)
--------------------------------------------------------------------------------
Predictive performance:
            accuracy: 0.815 +- 0.073
           precision: 0.659 +- 0.101
              recall: 1.000 +- 0.000
                  f1: 0.790 +- 0.071
   average_precision: 1.000 +- 0.000
             roc_auc: 1.000 +- 0.000
--------------------------------------------------------------------------------
RF01998
Saved model in RF01998.model
Classifier:
SGDClassifier(alpha=1e-05, class_weight='auto', epsilon=0.1, eta0=0.001,
       fit_intercept=True, l1_ratio=0.65980206038345846,
       learning_rate='constant', loss='squared_hinge', n_iter=98, n_jobs=1,
       penalty='elasticnet', power_t=0.91790557640332981,
       random_state=None, shuffle=True, verbose=0, warm_start=False)
--------------------------------------------------------------------------------
Predictive performance:
            accuracy: 0.944 +- 0.039
           precision: 0.950 +- 0.067
              recall: 0.883 +- 0.085
                  f1: 0.913 +- 0.062
   average_precision: 0.962 +- 0.032
             roc_auc: 0.974 +- 0.024
--------------------------------------------------------------------------------
RF02005
Saved model in RF02005.model
Classifier:
SGDClassifier(alpha=1e-05, class_weight='auto', epsilon=0.1, eta0=0.01,
       fit_intercept=True, l1_ratio=0.33523353554900681,
       learning_rate='constant', loss='squared_hinge', n_iter=64, n_jobs=1,
       penalty='l1', power_t=0.39476574010678822, random_state=None,
       shuffle=True, verbose=0, warm_start=False)
--------------------------------------------------------------------------------
Predictive performance:
            accuracy: 0.861 +- 0.053
           precision: 0.783 +- 0.128
              recall: 0.851 +- 0.094
                  f1: 0.814 +- 0.068
   average_precision: 0.910 +- 0.065
             roc_auc: 0.936 +- 0.055
--------------------------------------------------------------------------------
RF02012
Saved model in RF02012.model
Classifier:
SGDClassifier(alpha=0.0001, class_weight='auto', epsilon=0.1, eta0=0.0001,
       fit_intercept=True, l1_ratio=0.73412284676305983,
       learning_rate='constant', loss='squared_hinge', n_iter=69, n_jobs=1,
       penalty='l2', power_t=0.73243001023244714, random_state=None,
       shuffle=True, verbose=0, warm_start=False)
--------------------------------------------------------------------------------
Predictive performance:
            accuracy: 0.694 +- 0.077
           precision: 0.522 +- 0.058
              recall: 0.933 +- 0.062
                  f1: 0.672 +- 0.060
   average_precision: 0.944 +- 0.034
             roc_auc: 0.948 +- 0.039
--------------------------------------------------------------------------------
RF02034
Saved model in RF02034.model
Classifier:
SGDClassifier(alpha=0.0001, class_weight='auto', epsilon=0.1, eta0=0.01,
       fit_intercept=True, l1_ratio=0.73947453315103839,
       learning_rate='constant', loss='modified_huber', n_iter=77,
       n_jobs=1, penalty='l2', power_t=0.57060176400075679,
       random_state=None, shuffle=True, verbose=0, warm_start=False)
--------------------------------------------------------------------------------
Predictive performance:
            accuracy: 0.994 +- 0.011
           precision: 1.000 +- 0.000
              recall: 0.983 +- 0.033
                  f1: 0.991 +- 0.017
   average_precision: 1.000 +- 0.000
             roc_auc: 1.000 +- 0.000
--------------------------------------------------------------------------------
CPU times: user 2h 57min 31s, sys: 23min 42s, total: 3h 21min 14s
Wall time: 5h 16min 1s