In [ ]:


In [ ]:


In [ ]:
df_max_BetDate = df_bc.groupby(['UserId'])['BetDate'].max().reset_index()
df_max_BetDate.columns = ['UserId', 'max_BetDate']

df_min_FirstDepositDate = df_bc.groupby(['UserId'])['FirstDepositDate'].min().reset_index()
df_min_FirstDepositDate.columns = ['UserId', 'min_FirstDepositDate']

df_ltv = pd.merge(df_bc, 
                  df_max_BetDate, 
                  how='left', 
                  left_on=['UserId'], 
                  right_on=['UserId'])

df_ltv = pd.merge(df_ltv, 
                  df_min_FirstDepositDate, 
                  how='left', 
                  left_on=['UserId'], 
                  right_on=['UserId'])

df_ltv['lifetime'] = df_ltv['max_BetDate'] - df_ltv['min_FirstDepositDate']

df_ltv.head()

In [ ]:
df_Value = df_bc.groupby(['UserId'])['BetAmount'].sum().reset_index()
df_Value.columns = ['UserId', 'Value']

df_ltv = pd.merge(df_ltv, 
                  df_Value, 
                  how='left', 
                  left_on=['UserId'], 
                  right_on=['UserId'])

df_ltv.head()

In [ ]:
# value/lifetime
df_ltv['lifetime_int'] = df_ltv['lifetime'].astype('<m8[D]')
# turn 1 the 0 lifetime
df_ltv['lifetime_int'] = np.where(df_ltv['lifetime_int'] == 0, 1, df_ltv['lifetime_int'])
df_ltv['Value_lifetime'] = df_ltv['Value'] / df_ltv['lifetime_int']

In [ ]:
# age
now = pd.Timestamp(datetime.now())
df_ltv['age'] = (now - df_bc['BirthDate']).astype('<m8[Y]')

In [ ]:
df_ltv['lifetime_int'] = df_ltv['lifetime'].dt.days

In [ ]:
from datetime import timedelta

In [ ]:
df_ltv_15 = df_ltv[df_ltv['BetDate'] <= (df_ltv['FirstDepositDate'] + timedelta(days=15))]

In [ ]:
mean_bet_by_user = df_ltv_15.groupby('UserId')['BetAmount'].mean().reset_index()
mean_bet_by_user.columns = ['UserId', 'mean_bet']

min_bet_by_user = df_ltv_15.groupby('UserId')['BetAmount'].min().reset_index()
min_bet_by_user.columns = ['UserId', 'min_bet']

max_bet_by_user = df_ltv_15.groupby('UserId')['BetAmount'].max().reset_index()
max_bet_by_user.columns = ['UserId', 'max_bet']

median_bet_by_user = df_ltv_15.groupby('UserId')['BetAmount'].median().reset_index()
median_bet_by_user.columns = ['UserId', 'median_bet']

sum_bet_by_user = df_ltv_15.groupby('UserId')['BetAmount'].sum().reset_index()
sum_bet_by_user.columns = ['UserId', 'sum_bet']

count_bet_by_user = df_ltv_15.groupby('UserId')['BetId'].count().reset_index()
count_bet_by_user.columns = ['UserId', 'count_bet']

In [ ]:
mean_bet_won_by_user = df_ltv_15.groupby('UserId')['AmountWon'].mean().reset_index()
mean_bet_won_by_user.columns = ['UserId', 'mean_bet_won']

min_bet_won_by_user = df_ltv_15.groupby('UserId')['AmountWon'].min().reset_index()
min_bet_won_by_user.columns = ['UserId', 'min_bet_won']

max_bet_won_by_user = df_ltv_15.groupby('UserId')['AmountWon'].max().reset_index()
max_bet_won_by_user.columns = ['UserId', 'max_bet_won']

median_bet_won_by_user = df_ltv_15.groupby('UserId')['AmountWon'].median().reset_index()
median_bet_won_by_user.columns = ['UserId', 'median_bet_won']

sum_bet_won_by_user = df_ltv_15.groupby('UserId')['AmountWon'].sum().reset_index()
sum_bet_won_by_user.columns = ['UserId', 'sum_bet_won']

count_bet_won_by_user = df_ltv_15[df_ltv_15['AmountWon']>0].groupby('UserId')['BetId'].count().reset_index()
count_bet_won_by_user.columns = ['UserId', 'count_bet_won']

In [ ]:
application_used = pd.get_dummies(df_ltv_15[['UserId', 'Application']]).groupby('UserId').sum()

In [ ]:
mean_ods_by_user = df_ltv_15.groupby('UserId')['Odds'].mean()

In [ ]:
islive_by_user = df_ltv_15.groupby('UserId')['IsLive'].max()

In [ ]:
partnertype_used = pd.get_dummies(df_ltv_15[['UserId', 'PartnerType']]).groupby('UserId').sum()

In [ ]:
gender_used = pd.get_dummies(df_ltv_15[['UserId', 'Gender']]).groupby('UserId').sum()
gender_used = gender_used / gender_used
gender_used = gender_used.fillna(0)

In [ ]:
Country_used = pd.get_dummies(df_ltv_15[['UserId', 'Country']]).groupby('UserId').sum()
Country_used = Country_used / Country_used
Country_used = Country_used.fillna(0)

In [ ]:
df_res = pd.merge(mean_bet_by_user, 
                  min_bet_by_user, 
                  how='left', 
                  left_on=['UserId'], 
                  right_on=['UserId'])
df_res = pd.merge(df_res, 
                  max_bet_by_user, 
                  how='left', 
                  left_on=['UserId'], 
                  right_on=['UserId'])
df_res = pd.merge(df_res, 
                  median_bet_by_user, 
                  how='left', 
                  left_on=['UserId'], 
                  right_on=['UserId'])
df_res = pd.merge(df_res, 
                  sum_bet_by_user, 
                  how='left', 
                  left_on=['UserId'], 
                  right_on=['UserId'])
df_res = pd.merge(df_res, 
                  count_bet_by_user, 
                  how='left', 
                  left_on=['UserId'], 
                  right_on=['UserId'])
df_res = pd.merge(df_res, 
                  mean_bet_won_by_user, 
                  how='left', 
                  left_on=['UserId'], 
                  right_on=['UserId'])
df_res = pd.merge(df_res, 
                  min_bet_won_by_user, 
                  how='left', 
                  left_on=['UserId'], 
                  right_on=['UserId'])
df_res = pd.merge(df_res, 
                  max_bet_won_by_user, 
                  how='left', 
                  left_on=['UserId'], 
                  right_on=['UserId'])
df_res = pd.merge(df_res, 
                  median_bet_won_by_user, 
                  how='left', 
                  left_on=['UserId'], 
                  right_on=['UserId'])
df_res = pd.merge(df_res, 
                  sum_bet_won_by_user, 
                  how='left', 
                  left_on=['UserId'], 
                  right_on=['UserId'])
df_res = pd.merge(df_res, 
                  count_bet_won_by_user, 
                  how='left', 
                  left_on=['UserId'], 
                  right_on=['UserId'])
df_res = pd.merge(df_res, 
                  application_used, 
                  how='left', 
                  left_on=['UserId'], 
                  right_on=['UserId'])
df_res = pd.merge(df_res, 
                  mean_ods_by_user, 
                  how='left', 
                  left_on=['UserId'], 
                  right_on=['UserId'])
df_res = pd.merge(df_res, 
                  islive_by_user, 
                  how='left', 
                  left_on=['UserId'], 
                  right_on=['UserId'])
df_res = pd.merge(df_res, 
                  partnertype_used, 
                  how='left', 
                  left_on=['UserId'], 
                  right_on=['UserId'])
df_res = pd.merge(df_res, 
                  gender_used, 
                  how='left', 
                  left_on=['UserId'], 
                  right_on=['UserId'])
df_res = pd.merge(df_res, 
                  Country_used, 
                  how='left', 
                  left_on=['UserId'], 
                  right_on=['UserId'])

In [ ]:
df_res['ratio_win'] = df_res['count_bet_won'] / df_res['count_bet']

In [ ]:
df_res = df_res.fillna(0)

In [ ]:
ltv_to_evaluation = df_ltv[['UserId', 'Value_lifetime']].drop_duplicates()

In [ ]:
df_final = pd.merge(df_res, 
                    ltv_to_evaluation, 
                    how='left', 
                    left_on=['UserId'], 
                    right_on=['UserId'])

In [ ]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

dfTest = scaler.fit_transform(df_final[list(set(df_final).difference({'Value_lifetime'}))])

dfTest

In [ ]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

X = dfTest
# y = 1 * x_0 + 2 * x_1 + 3
y = df_final['Value_lifetime']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42)

reg = LinearRegression().fit(X_train, y_train)
reg.score(X, y)


y_pred = reg.predict(X_test)
# The coefficients
print('Coefficients: \n', reg.coef_)
# The mean squared error
print("Mean squared error: %.2f"
      % mean_squared_error(y_test, y_pred))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % r2_score(y_test, y_pred))

In [ ]:
from sklearn import svm

X = dfTest
# y = 1 * x_0 + 2 * x_1 + 3
y = df_final['Value_lifetime']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42)

clf = svm.SVR()
clf.fit(X_train, y_train) 

clf.score(X_train, y_train)


y_pred = clf.predict(X_test)
# The coefficients
#print('Coefficients: \n', clf.coef_)
# The mean squared error
#print("Mean squared error: %.2f"
      #% mean_squared_error(y_test, y_pred))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % r2_score(y_test, y_pred))

In [ ]:
print('Variance score: %.2f' % r2_score(y_test, y_pred))

In [ ]: