In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import statsmodels.formula.api as sm
from scipy import stats
stats.chisqprob = lambda chisq, df: stats.chi2.sf(chisq, df)
In [2]:
tips = pd.read_csv('input/tips.csv')
tips['tip_percent'] = (tips['tip'] / tips['total_bill'] * 100)
tips['tip_above_avg'] = np.where(tips['tip_percent'] >= tips['tip_percent'].mean(), 1, 0)
tips.replace({'Yes': 1, 'No': 0}, inplace=True)
http://www.dummies.com/education/math/statistics/how-to-interpret-a-correlation-coefficient-r/
In [3]:
# Describe what we want using R-style formulas
formula = 'tip_percent ~ total_bill + party_size + C(ordered_alc_bev) + C(gender) + C(day) + C(time)'
model = sm.ols(formula, data=tips) # Describe model
results = model.fit() # Fit model
results.summary() # Summarize model
Out[3]:
In [4]:
columns = ['total_bill', 'gender', 'ordered_alc_bev', 'day', 'time', 'party_size']
data = [[15.52, 'Female', 0, 'Sun', 'Lunch', 1]]
df = pd.DataFrame(data, columns=columns)
predictions = results.predict(df).tolist() # Convert to simple list
predictions
Out[4]:
In [5]:
columns = ['total_bill', 'party_size', 'ordered_alc_bev']
training_columns = tips[columns].columns
logit = sm.Logit(tips['tip_above_avg'], tips[training_columns]) # Describe model
results = logit.fit() # Fit model
results.summary() # Summarize model
Out[5]:
In [6]:
data = [[10.52, 4, 0]]
df = pd.DataFrame(data, columns=columns)
predictions = results.predict(df).tolist() # Convert to simple list
predictions
Out[6]:
In [7]:
from requests import get, post
request_data = [
{"total_bill": 22.50, "party_size": 3, "ordered_alc_bev": 1, "gender": "Female", "day": "Sat", "time": "Dinner"},
{"total_bill": 18.62, "party_size": 3, "ordered_alc_bev": 1, "gender": "Female", "day": "Sat", "time": "Dinner"},
{"total_bill": 17.14, "party_size": 3, "ordered_alc_bev": 0, "gender": "Female", "day": "Sat", "time": "Dinner"},
]
post('http://localhost:5000/', json=request_data).json()
Out[7]:
In [ ]: