In [1]:
%matplotlib inline
In [2]:
import numpy as np
import pandas as pd
In [3]:
df_weather2017 = pd.read_csv("data/気象庁千葉2017.csv", encoding='shift_jis', skiprows=3)
2017年1月以降の気象データを読み込みます。
以下でデータフレームの加工を行い、学習時と同じ形式にします。
In [4]:
df_weather2017.shape
Out[4]:
In [5]:
df_weather2017.head()
Out[5]:
In [6]:
df_weather2017 = df_weather2017.iloc[:, [i for i, t in enumerate(df_weather2017.iloc[1, :].isnull()) if t]]
In [7]:
df_weather2017.head()
Out[7]:
In [8]:
df_weather2017 = df_weather2017.drop([0, 1])
In [9]:
df_weather2017["年月日"] = pd.to_datetime(df_weather2017["年月日"])
In [10]:
df_weather2017 = df_weather2017.set_index("年月日")
In [11]:
df_weather2017.head()
Out[11]:
In [12]:
df_weather2017.tail()
Out[12]:
In [13]:
df_weather2017.shape
Out[13]:
In [14]:
from sklearn.externals import joblib
In [15]:
clf = joblib.load("clf_rf.db")
In [16]:
type(clf)
Out[16]:
In [17]:
pred = clf.predict(df_weather2017)
In [18]:
pred
Out[18]:
In [19]:
from matplotlib import rcParams
rcParams['font.sans-serif'] = "Source Han Code JP"
rcParams['font.weight'] = "regular"
rcParams['axes.titlesize'] = 15
rcParams['ytick.labelsize'] = 12
rcParams['xtick.labelsize'] = 12
In [20]:
df = pd.DataFrame(pred, columns=["予測"], index=df_weather2017.index)
In [21]:
df.head()
Out[21]:
In [22]:
df.plot()
Out[22]:
In [23]:
df_influ_2017 = pd.read_csv("data/201712influenza.csv", encoding="shift_jis")
In [24]:
df_2017 = pd.DataFrame(df_influ_2017.iloc[1, 1:])
In [25]:
df_2017.columns = ['報告数']
In [26]:
df_influ_2017.shape
Out[26]:
In [27]:
date_index = pd.date_range(start="2017-1-2", periods=53, freq="W-MON")
In [28]:
df_2017.index = date_index
In [29]:
df_2017.head()
Out[29]:
In [30]:
df_2017.tail()
Out[30]:
In [31]:
df_2017 = df_2017.dropna()
In [32]:
df_2017.dtypes
Out[32]:
In [33]:
df_2017['報告数'] = df_2017['報告数'].astype(np.float32)
In [34]:
df_2017.dtypes
Out[34]:
In [35]:
df_2017.plot(title="2017年のインフルエンザ報告件数")
Out[35]:
In [36]:
df.plot(title="予測結果")
Out[36]:
In [37]:
df.shape
Out[37]:
In [38]:
df.head(10)
Out[38]:
In [39]:
df.groupby(pd.Grouper(freq="w"))["予測"].mean().plot(title="予測結果を1週ごとの平均")
Out[39]:
In [40]:
df_2017.plot(title="報告数データ")
Out[40]:
In [ ]: