In [ ]:
import pandas as pd
from sklearn.linear_model import LinearRegression

Read data


In [ ]:
df_train = pd.read_csv('nettebad_train_set.csv')
df_test = pd.read_csv('nettebad_test_set.csv')

In [ ]:
df_train.head()

Baseline model 1: Mean


In [ ]:
# Calculate mean
visitors_mean = int(df_train['visitors_pool_total'].mean())
visitors_mean

In [ ]:
# Create DataFrame
df_submit = df_test[['date','sportbad_closed']]
df_submit.loc[:, 'visitors_pool_total'] = visitors_mean
df_submit.drop('sportbad_closed', axis=1, inplace=True)
df_submit.head()

Baseline model 2: Median


In [ ]:
# Calculate median
visitors_median = int(df_train['visitors_pool_total'].median())
visitors_median

In [ ]:
# Create DataFrame
df_submit.loc[:, 'visitors_pool_total'] = visitors_median

In [ ]:
# Save submission csv
df_submit.to_csv('submission_baseline_median.csv', index=False)

In [ ]:

Baseline model 3: Linear regression


In [ ]:


In [ ]:
# Save submission csv
df_submit.to_csv('submission_baseline_mean.csv', index=False)

In [ ]: