In [ ]:
import pandas as pd
from sklearn.linear_model import LinearRegression
In [ ]:
df_train = pd.read_csv('nettebad_train_set.csv')
df_test = pd.read_csv('nettebad_test_set.csv')
In [ ]:
df_train.head()
In [ ]:
# Calculate mean
visitors_mean = int(df_train['visitors_pool_total'].mean())
visitors_mean
In [ ]:
# Create DataFrame
df_submit = df_test[['date','sportbad_closed']]
df_submit.loc[:, 'visitors_pool_total'] = visitors_mean
df_submit.drop('sportbad_closed', axis=1, inplace=True)
df_submit.head()
In [ ]:
# Calculate median
visitors_median = int(df_train['visitors_pool_total'].median())
visitors_median
In [ ]:
# Create DataFrame
df_submit.loc[:, 'visitors_pool_total'] = visitors_median
In [ ]:
# Save submission csv
df_submit.to_csv('submission_baseline_median.csv', index=False)
In [ ]:
In [ ]:
In [ ]:
# Save submission csv
df_submit.to_csv('submission_baseline_mean.csv', index=False)
In [ ]: