notebook.community

Edit and run



In [1]:

    
# Stochastic Gradient Descent is a fundamental to fit a model
# for regression. There are natural connections between the two
# techniques.



In [2]:

    
# In regression, we minimized a cost function that penalized
# for bad choices on a continuous scale, but for classification,
# we minimize a cost function that penalizes for two (or more)
# cases.



In [3]:

    
from sklearn import datasets
X, y = datasets.make_classification()



In [4]:

    
from sklearn import linear_model



In [5]:

    
sgd_clf = linear_model.SGDClassifier()



In [6]:

    
sgd_clf.fit(X, y)









    Out[6]:





SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,
       eta0=0.0, fit_intercept=True, l1_ratio=0.15,
       learning_rate='optimal', loss='hinge', n_iter=5, n_jobs=1,
       penalty='l2', power_t=0.5, random_state=None, shuffle=True,
       verbose=0, warm_start=False)



In [8]:

    
# the Hinge lost function is: max(0, 1 - ty)
# t is the true classification (+1 for one case, -1 for the other)
# y is the vector of coefficients.



In [9]:

    
from sklearn.metrics import classification_report



In [10]:

    
predictions = sgd_clf.predict(X)
print classification_report(predictions, y)









    



             precision    recall  f1-score   support

          0       0.96      0.96      0.96        50
          1       0.96      0.96      0.96        50

avg / total       0.96      0.96      0.96       100



In [ ]: