notebook.community

Edit and run



In [1]:

    
from sklearn import datasets
iris = datasets.load_iris()
digits = datasets.load_digits()



In [2]:

    
print(digits.data)









    



[[  0.   0.   5. ...,   0.   0.   0.]
 [  0.   0.   0. ...,  10.   0.   0.]
 [  0.   0.   0. ...,  16.   9.   0.]
 ..., 
 [  0.   0.   1. ...,   6.   0.   0.]
 [  0.   0.   2. ...,  12.   0.   0.]
 [  0.   0.  10. ...,  12.   1.   0.]]



In [3]:

    
digits.target









    Out[3]:





array([0, 1, 2, ..., 8, 9, 8])



In [4]:

    
digits.images[0]









    Out[4]:





array([[  0.,   0.,   5.,  13.,   9.,   1.,   0.,   0.],
       [  0.,   0.,  13.,  15.,  10.,  15.,   5.,   0.],
       [  0.,   3.,  15.,   2.,   0.,  11.,   8.,   0.],
       [  0.,   4.,  12.,   0.,   0.,   8.,   8.,   0.],
       [  0.,   5.,   8.,   0.,   0.,   9.,   8.,   0.],
       [  0.,   4.,  11.,   0.,   1.,  12.,   7.,   0.],
       [  0.,   2.,  14.,   5.,  10.,  12.,   0.,   0.],
       [  0.,   0.,   6.,  13.,  10.,   0.,   0.,   0.]])



In [5]:

    
from sklearn import svm



In [6]:

    
clf = svm.SVC(gamma=0.001, C=100.)



In [8]:

    
clf.fit(digits.data[:-1], digits.target[:-1])









    Out[8]:





SVC(C=100.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)



In [9]:

    
clf.predict(digits.data[-1:])









    Out[9]:





array([8])



In [10]:

    
import numpy as np
import pandas as pd
import statsmodels.formula.api as sm



In [14]:

    
gym = pd.read_csv('/Users/Dan/Downloads/Crowdedness gym/data.csv')



In [17]:

    
list(gym)









    Out[17]:





['number_people',
 'date',
 'timestamp',
 'day_of_week',
 'is_weekend',
 'is_holiday',
 'temperature',
 'is_start_of_semester',
 'is_during_semester',
 'month',
 'hour']



In [18]:

    
result = sm.ols(formula="number_people ~ is_weekend + temperature", data=gym).fit()



In [19]:

    
print result.params









    



Intercept     -47.654294
is_weekend     -9.155930
temperature     1.354520
dtype: float64



In [20]:

    
print result.summary()









    



                            OLS Regression Results                            
==============================================================================
Dep. Variable:          number_people   R-squared:                       0.172
Model:                            OLS   Adj. R-squared:                  0.172
Method:                 Least Squares   F-statistic:                     6476.
Date:                Wed, 07 Jun 2017   Prob (F-statistic):               0.00
Time:                        14:21:52   Log-Likelihood:            -2.7648e+05
No. Observations:               62184   AIC:                         5.530e+05
Df Residuals:                   62181   BIC:                         5.530e+05
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
===============================================================================
                  coef    std err          t      P>|t|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------
Intercept     -47.6543      0.773    -61.674      0.000       -49.169   -46.140
is_weekend     -9.1559      0.184    -49.809      0.000        -9.516    -8.796
temperature     1.3545      0.013    103.339      0.000         1.329     1.380
==============================================================================
Omnibus:                     3458.878   Durbin-Watson:                   0.098
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             4060.444
Skew:                           0.616   Prob(JB):                         0.00
Kurtosis:                       3.223   Cond. No.                         550.
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]: