In [1]:
    
import pandas as pd
import numpy as np
import seaborn as sns
import statsmodels.formula.api as sm
from scipy import stats
stats.chisqprob = lambda chisq, df: stats.chi2.sf(chisq, df)
    
In [2]:
    
tips = pd.read_csv('input/tips.csv')
tips['tip_percent'] = (tips['tip'] / tips['total_bill'] * 100)
tips['tip_above_avg'] = np.where(tips['tip_percent'] >= tips['tip_percent'].mean(), 1, 0)
tips.replace({'Yes': 1, 'No': 0}, inplace=True)
    
http://www.dummies.com/education/math/statistics/how-to-interpret-a-correlation-coefficient-r/
In [3]:
    
# Describe what we want using R-style formulas
formula = 'tip_percent ~ total_bill + party_size + C(ordered_alc_bev) + C(gender) + C(day) + C(time)'
model = sm.ols(formula, data=tips)    # Describe model
results = model.fit()                 # Fit model       
results.summary()                     # Summarize model
    
    Out[3]:
In [4]:
    
columns = ['total_bill', 'gender', 'ordered_alc_bev', 'day', 'time', 'party_size']
data = [[15.52, 'Female', 0, 'Sun', 'Lunch', 1]]
df = pd.DataFrame(data, columns=columns)
predictions = results.predict(df).tolist()  # Convert to simple list
predictions
    
    Out[4]:
In [5]:
    
columns = ['total_bill', 'party_size', 'ordered_alc_bev']
training_columns = tips[columns].columns
logit = sm.Logit(tips['tip_above_avg'], tips[training_columns])  # Describe model
results = logit.fit()                                            # Fit model  
results.summary()                                                # Summarize model
    
    
    Out[5]:
In [6]:
    
data = [[10.52, 4, 0]]
df = pd.DataFrame(data, columns=columns)
predictions = results.predict(df).tolist()  # Convert to simple list
predictions
    
    Out[6]:
In [7]:
    
from requests import get, post
request_data = [
	{"total_bill": 22.50, "party_size": 3, "ordered_alc_bev": 1, "gender": "Female", "day": "Sat", "time": "Dinner"},
	{"total_bill": 18.62, "party_size": 3, "ordered_alc_bev": 1, "gender": "Female", "day": "Sat", "time": "Dinner"},
	{"total_bill": 17.14, "party_size": 3, "ordered_alc_bev": 0, "gender": "Female", "day": "Sat", "time": "Dinner"},
]
post('http://localhost:5000/', json=request_data).json()
    
    Out[7]:
In [ ]: