In [1]:
from sklearn import datasets
iris = datasets.load_iris()
digits = datasets.load_digits()

In [2]:
print(digits.data)


[[  0.   0.   5. ...,   0.   0.   0.]
 [  0.   0.   0. ...,  10.   0.   0.]
 [  0.   0.   0. ...,  16.   9.   0.]
 ..., 
 [  0.   0.   1. ...,   6.   0.   0.]
 [  0.   0.   2. ...,  12.   0.   0.]
 [  0.   0.  10. ...,  12.   1.   0.]]

In [3]:
digits.target


Out[3]:
array([0, 1, 2, ..., 8, 9, 8])

In [4]:
digits.images[0]


Out[4]:
array([[  0.,   0.,   5.,  13.,   9.,   1.,   0.,   0.],
       [  0.,   0.,  13.,  15.,  10.,  15.,   5.,   0.],
       [  0.,   3.,  15.,   2.,   0.,  11.,   8.,   0.],
       [  0.,   4.,  12.,   0.,   0.,   8.,   8.,   0.],
       [  0.,   5.,   8.,   0.,   0.,   9.,   8.,   0.],
       [  0.,   4.,  11.,   0.,   1.,  12.,   7.,   0.],
       [  0.,   2.,  14.,   5.,  10.,  12.,   0.,   0.],
       [  0.,   0.,   6.,  13.,  10.,   0.,   0.,   0.]])

In [5]:
from sklearn import svm

In [6]:
clf = svm.SVC(gamma=0.001, C=100.)

In [8]:
clf.fit(digits.data[:-1], digits.target[:-1])


Out[8]:
SVC(C=100.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [9]:
clf.predict(digits.data[-1:])


Out[9]:
array([8])

In [10]:
import numpy as np
import pandas as pd
import statsmodels.formula.api as sm

In [14]:
gym = pd.read_csv('/Users/Dan/Downloads/Crowdedness gym/data.csv')

In [17]:
list(gym)


Out[17]:
['number_people',
 'date',
 'timestamp',
 'day_of_week',
 'is_weekend',
 'is_holiday',
 'temperature',
 'is_start_of_semester',
 'is_during_semester',
 'month',
 'hour']

In [18]:
result = sm.ols(formula="number_people ~ is_weekend + temperature", data=gym).fit()

In [19]:
print result.params


Intercept     -47.654294
is_weekend     -9.155930
temperature     1.354520
dtype: float64

In [20]:
print result.summary()


                            OLS Regression Results                            
==============================================================================
Dep. Variable:          number_people   R-squared:                       0.172
Model:                            OLS   Adj. R-squared:                  0.172
Method:                 Least Squares   F-statistic:                     6476.
Date:                Wed, 07 Jun 2017   Prob (F-statistic):               0.00
Time:                        14:21:52   Log-Likelihood:            -2.7648e+05
No. Observations:               62184   AIC:                         5.530e+05
Df Residuals:                   62181   BIC:                         5.530e+05
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
===============================================================================
                  coef    std err          t      P>|t|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------
Intercept     -47.6543      0.773    -61.674      0.000       -49.169   -46.140
is_weekend     -9.1559      0.184    -49.809      0.000        -9.516    -8.796
temperature     1.3545      0.013    103.339      0.000         1.329     1.380
==============================================================================
Omnibus:                     3458.878   Durbin-Watson:                   0.098
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             4060.444
Skew:                           0.616   Prob(JB):                         0.00
Kurtosis:                       3.223   Cond. No.                         550.
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]: