In [2]:
%matplotlib inline
import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
In [45]:
DISEASE_PATH = '../Data/interim/disease_SG/weekly-dengue-malaria-cleaned.csv'
WEATHER_DIR = '../Data/interim/weather_SG'
In [55]:
# Load data as a pandas DataFrame
dfDisease = pd.read_csv(disease_path)
weather_files = os.listdir(weather_dir)
In [56]:
dfDisease.sort_values(by=['year','week'], inplace=True)
dfDisease.reset_index(drop=True, inplace=True)
dfDisease.head()
Out[56]:
In [7]:
colors = sns.color_palette(palette='colorblind')
markers = ['*', 's', 'p', '8','h']
In [134]:
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5);
for i, year in enumerate(range(2012, 2020)):
df_year = dfDisease[dfDisease['year']==year]
plt.plot(df_year.week, df_year.Malaria, color = colors[i], label=str(year))
plt.legend()
plt.xlabel('Week number')
plt.ylabel('Number of cases')
plt.title('Malaria weekly trend')
plt.savefig('malaria.png')
In [135]:
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5);
for i, year in enumerate(range(2012, 2020)):
df_year = dfDisease[dfDisease['year']==year]
plt.plot(df_year.week, df_year.Dengue, color = colors[i], label=str(year))
plt.legend()
plt.xlabel('Week number')
plt.ylabel('Number of cases')
plt.title('Dengue Fever weekly trend')
plt.savefig('dengue.png')
In [59]:
from datetime import datetime
dfDisease['date'] = pd.to_datetime(dfDisease.year.astype(str), format='%Y') + \
pd.to_timedelta(dfDisease.week.mul(7).astype(str) + ' days')
In [132]:
len(dfDisease)
Out[132]:
In [61]:
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5);
plt.plot(dfDisease.index, dfDisease.Dengue, color = colors[0], label='Dengue')
plt.gca().axes.get_xaxis().set_visible(False)
plt.legend()
plt.ylabel('Number of cases')
plt.title('Dengue historical weekly 2012-2019')
Out[61]:
In [62]:
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5);
plt.plot(dfDisease.index, dfDisease.Malaria, color = colors[1], label='Malaria')
plt.gca().axes.get_xaxis().set_visible(False)
plt.ylabel('Number of cases')
plt.title('Malaria historical weekly trend 2012-2019')
Out[62]:
In [103]:
# Get all weather files
ddWeather = []
for file in weather_files:
path = os.path.join(WEATHER_DIR, file)
ddWeather.append(pd.read_csv(path))
In [104]:
len(ddWeather)
Out[104]:
In [105]:
df_concat = pd.concat(ddWeather, axis=1)
In [107]:
df_concat = df_concat.drop('location', axis=1)
df_concat.head()
Out[107]:
In [112]:
df_weather_mean = df_concat.stack().groupby(level=[0,1]).mean().unstack()
In [123]:
df_weather_mean.head()
Out[123]:
In [122]:
df_weather_mean[['year','week']] = df_weather_mean[['year','week']].applymap(int)
In [114]:
print(dfDisease.shape, dfWeather.shape, df_weather_mean.shape)
In [124]:
dfData = df_weather_mean.merge(dfDisease, on=['year','week'])
dfData[350:]
Out[124]:
In [68]:
dfData.columns
Out[68]:
In [ ]:
dfData = dfData.drop(columns=['lo'])
In [78]:
dfData.describe()
Out[78]:
In [125]:
# Combined:
dfData.describe()
Out[125]:
In [136]:
labels = ['Dengue','Rainfall Total','Max Temperature', 'Min Temperature',
'Mean Wind Speed', 'Max Wind Speed', 'Malaria']
num_of_axes = len(labels)
fig, axes = plt.subplots(nrows=num_of_axes, ncols=1, sharex=True, sharey=False, figsize=(8,8))
fig.set_size_inches(18.5, 30)
plt.figure(1)
for i in range(num_of_axes):
ax = axes[i]
#ax.set_ylim([dfData[labels[i]].min(),dfData[labels[i]].max()])
ax.plot(dfData.index, dfData[labels[i]], label=labels[i], color=colors[i])
ax.set_ylabel(labels[i])
ax.legend(loc='upper left', framealpha=0.5, prop={'size':'small'})
plt.savefig('stacked-combined.png')
In [130]:
os.makedirs('../Data/interim/data_SG/', exist_ok=True)
In [131]:
dfData.to_csv('../Data/interim/data_SG/combined.csv')
In [ ]: