In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from include.feature_lists import L0_stations, L1_stations, L2_stations, L3_stations
%matplotlib inline
In [2]:
df = pd.read_csv("item_station_date.csv", index_col=['Id'])
In [ ]:
L0_df = df[L0_stations + ['L0_Minimum', 'L0_Maximum', 'L0_Duration']]
L1_df = df[L1_stations + ['L1_Minimum', 'L1_Maximum', 'L1_Duration']]
L2_df = df[L2_stations + ['L2_Minimum', 'L2_Maximum', 'L2_Duration']]
L3_df = df[L3_stations + ['L3_Minimum', 'L3_Maximum', 'L3_Duration']]
In [ ]:
L0_df_corr = L0_df.corr()
In [ ]:
plt.figure(figsize=(20,20))
sns.heatmap(L0_df_corr, annot=True)
plt.show()
In [ ]:
L1_df_corr = L1_df.corr()
In [ ]:
plt.figure(figsize=(20,20))
sns.heatmap(L1_df_corr, annot=True)
plt.show()
In [ ]:
In [ ]:
L2_df_corr = L2_df.corr()
In [ ]:
plt.figure(figsize=(20,20))
sns.heatmap(L2_df_corr, annot=True)
plt.show()
In [ ]:
In [ ]:
L3_df_corr = L3_df.corr()
In [ ]:
plt.figure(figsize=(20,20))
sns.heatmap(L3_df_corr, annot=True)
plt.show()
In [ ]:
L3_df.head(10)
In [130]:
time_df = df[['L0_Minimum', 'L0_Maximum', 'L0_Duration', 'L1_Minimum', 'L1_Maximum', 'L1_Duration', 'L2_Minimum', 'L2_Maximum', 'L2_Duration', 'L3_Minimum', 'L3_Maximum', 'L3_Duration', 'Minimum', 'Maximum', 'Total_Duration', 'Duration_Sum', 'Duration_Lag']]
In [107]:
time_df = df[['L0_Duration', 'L1_Duration', 'L2_Duration', 'L3_Duration', 'Duration_Sum', 'Total_Duration']]
In [ ]:
time_df.corr()
In [132]:
time_df_corr = time_df.corr()
In [133]:
plt.figure(figsize=(10,10))
sns.heatmap(time_df_corr, annot=True)
plt.show()
In [131]:
time_df.head(10)
Out[131]:
In [33]:
df = df.sort_values(L0_stations + L1_stations + L2_stations + L3_stations)
In [34]:
df_flow = df[L0_stations + L1_stations + L2_stations + L3_stations]
In [35]:
df_flow.shape[0] / 4000
Out[35]:
In [36]:
df_flow[df_flow.notnull()] = 1
df_flow[df_flow.isnull()] = 0
In [14]:
img = [df_flow.loc[idx] if (idx % 4000 == 0) else for idx in df_flow.index]
In [37]:
df_flow = df_flow.reset_index()
del df_flow['Id']
In [38]:
img = df_flow[df_flow.index % 4000 == 0]
In [39]:
plt.figure(figsize=(20,20))
plt.imshow(img.values)
Out[39]:
In [ ]: