In [1]:
import pprint

from sklearn.svm import LinearSVC, NuSVC, SVC
from sklearn.datasets import make_classification
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.linear_model import LogisticRegressionCV, LogisticRegression, SGDClassifier
from sklearn.ensemble import BaggingClassifier, ExtraTreesClassifier, RandomForestClassifier

from yellowbrick.utils.timer import Timer
from yellowbrick.classifier import ClassificationReport

In [2]:
# Try them all!
# TODO add MLP, new XGBoost

models = [
    LinearSVC(), 
    SVC(gamma='auto'), 
    NuSVC(gamma='auto'),
    BaggingClassifier(), 
    KNeighborsClassifier(), 
    LogisticRegressionCV(cv=3), 
    LogisticRegression(solver='lbfgs'), 
    SGDClassifier(max_iter=100, tol=1e-3), 
    ExtraTreesClassifier(n_estimators=100), 
    RandomForestClassifier(n_estimators=100)
]

In [3]:
# for n in range(8):
#     instances = 500*10**n
#     for x in range(3):
#         features = 5*10**x
#         X, y = make_classification(n_samples=instances, n_features=features)

In [ ]:
# make classification datasets

# 500 instances, 5 features
X, y = make_classification(n_samples=500, n_features=5)
# 500 instances, 50 features
# 500 instances, 500 features
# 5000 instances, 5 features
# 5000 instances, 50 features
# 5000 instances, 500 features
# 50000 instances, 5 features
# 50000 instances, 50 features
# 50000 instances, 500 features
# 500000 instances, 5 features
# 500000 instances, 50 features
# 500000 instances, 500 features
# 5000000 instances, 5 features
# 5000000 instances, 50 features
# 5000000 instances, 500 features
# 50000000 instances, 5 features
# 50000000 instances, 50 features
# 50000000 instances, 500 features
# 500000000 instances, 5 features
# 500000000 instances, 50 features
# 500000000 instances, 500 features
# 5000000000 instances, 5 features
# 5000000000 instances, 50 features
# 5000000000 instances, 500 features

In [ ]:
def time_models(X, y, estimators):
    """
    Test various estimators.
    """ 
    fit_times = dict()
    
    for estimator in estimators:
        
        # Use Timer context manager to track fit time
        with Timer() as timer:
            
            # Instantiate the classification model and visualizer
            estimator.fit(X, y)  
            y_hat = estimator.predict(X)
            
            # TODO: stop execution if the fit time is greater than 15min

        fit_times[estimator.__class__.__name__] = timer.interval
        
    return fit_times

In [ ]:
# use on classifiers & get fit time (create time-out if > 10 min)

for n in range(3):
    instances = 500*10**n
    for x in range(3):
        features = 5*10**x
        X, y = make_classification(n_samples=instances, n_features=features)
        fit_times = time_models(X, y, models)
        print("{} Instances, {} Features".format(instances, features))
        pprint.pprint(fit_times)


500 Instances, 5 Features
{'BaggingClassifier': 0.013997793197631836,
 'ExtraTreesClassifier': 0.06728887557983398,
 'KNeighborsClassifier': 0.012450933456420898,
 'LinearSVC': 0.003692150115966797,
 'LogisticRegression': 0.0018661022186279297,
 'LogisticRegressionCV': 0.035315752029418945,
 'NuSVC': 0.006707906723022461,
 'RandomForestClassifier': 0.08254480361938477,
 'SGDClassifier': 0.0013248920440673828,
 'SVC': 0.0037508010864257812}
/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/svm/base.py:929: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  "the number of iterations.", ConvergenceWarning)
500 Instances, 50 Features
{'BaggingClassifier': 0.07693982124328613,
 'ExtraTreesClassifier': 0.09247803688049316,
 'KNeighborsClassifier': 0.026669025421142578,
 'LinearSVC': 0.025574207305908203,
 'LogisticRegression': 0.0031859874725341797,
 'LogisticRegressionCV': 0.08308291435241699,
 'NuSVC': 0.02192521095275879,
 'RandomForestClassifier': 0.15237998962402344,
 'SGDClassifier': 0.0023882389068603516,
 'SVC': 0.02039194107055664}
500 Instances, 500 Features
{'BaggingClassifier': 0.6162397861480713,
 'ExtraTreesClassifier': 0.15839815139770508,
 'KNeighborsClassifier': 0.18971896171569824,
 'LinearSVC': 0.01911616325378418,
 'LogisticRegression': 0.007774829864501953,
 'LogisticRegressionCV': 0.1060340404510498,
 'NuSVC': 0.274137020111084,
 'RandomForestClassifier': 0.36962294578552246,
 'SGDClassifier': 0.004414796829223633,
 'SVC': 0.26242899894714355}
5000 Instances, 5 Features
{'BaggingClassifier': 0.10015106201171875,
 'ExtraTreesClassifier': 0.22339200973510742,
 'KNeighborsClassifier': 0.11458396911621094,
 'LinearSVC': 0.06003093719482422,
 'LogisticRegression': 0.0046520233154296875,
 'LogisticRegressionCV': 0.08202719688415527,
 'NuSVC': 0.6230740547180176,
 'RandomForestClassifier': 0.502263069152832,
 'SGDClassifier': 0.005421161651611328,
 'SVC': 0.11417269706726074}
/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/svm/base.py:929: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  "the number of iterations.", ConvergenceWarning)
5000 Instances, 50 Features
{'BaggingClassifier': 1.3166677951812744,
 'ExtraTreesClassifier': 0.5566577911376953,
 'KNeighborsClassifier': 1.7136907577514648,
 'LinearSVC': 0.4286210536956787,
 'LogisticRegression': 0.0043981075286865234,
 'LogisticRegressionCV': 0.0815432071685791,
 'NuSVC': 1.866575002670288,
 'RandomForestClassifier': 2.0283870697021484,
 'SGDClassifier': 0.04179263114929199,
 'SVC': 1.2708790302276611}
/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/svm/base.py:929: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  "the number of iterations.", ConvergenceWarning)
/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/stochastic_gradient.py:561: ConvergenceWarning: Maximum number of iteration reached before convergence. Consider increasing max_iter to improve the fit.
  ConvergenceWarning)
5000 Instances, 500 Features
{'BaggingClassifier': 28.349195957183838,
 'ExtraTreesClassifier': 1.7987909317016602,
 'KNeighborsClassifier': 19.721530199050903,
 'LinearSVC': 3.6333279609680176,
 'LogisticRegression': 0.033267974853515625,
 'LogisticRegressionCV': 0.4466688632965088,
 'NuSVC': 20.55934476852417,
 'RandomForestClassifier': 8.674499034881592,
 'SGDClassifier': 0.38391613960266113,
 'SVC': 18.37574815750122}
/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/svm/base.py:929: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  "the number of iterations.", ConvergenceWarning)
50000 Instances, 5 Features
{'BaggingClassifier': 2.2388899326324463,
 'ExtraTreesClassifier': 3.6898550987243652,
 'KNeighborsClassifier': 1.2156751155853271,
 'LinearSVC': 2.207770824432373,
 'LogisticRegression': 0.02276301383972168,
 'LogisticRegressionCV': 0.3271970748901367,
 'NuSVC': 68.04271364212036,
 'RandomForestClassifier': 10.067652702331543,
 'SGDClassifier': 0.059821128845214844,
 'SVC': 29.71822690963745}
/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/svm/base.py:929: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  "the number of iterations.", ConvergenceWarning)
50000 Instances, 50 Features
{'BaggingClassifier': 29.504643201828003,
 'ExtraTreesClassifier': 7.222272872924805,
 'KNeighborsClassifier': 374.22185587882996,
 'LinearSVC': 8.88854193687439,
 'LogisticRegression': 0.058763980865478516,
 'LogisticRegressionCV': 0.6985459327697754,
 'NuSVC': 327.74772596359253,
 'RandomForestClassifier': 38.99566125869751,
 'SGDClassifier': 0.30802416801452637,
 'SVC': 184.66295671463013}
/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/svm/base.py:929: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  "the number of iterations.", ConvergenceWarning)

In [ ]:
# use on YB visualizers & get fit time (create time-out if > 10 min)