In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from include.feature_lists import L0_stations, L1_stations, L2_stations, L3_stations

%matplotlib inline

In [2]:
df = pd.read_csv("item_station_date.csv", index_col=['Id'])


d:\Anaconda\envs\Deep2\lib\site-packages\numpy\lib\arraysetops.py:395: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  mask |= (ar1 == a)

In [ ]:
L0_df = df[L0_stations + ['L0_Minimum', 'L0_Maximum', 'L0_Duration']]
L1_df = df[L1_stations + ['L1_Minimum', 'L1_Maximum', 'L1_Duration']]
L2_df = df[L2_stations + ['L2_Minimum', 'L2_Maximum', 'L2_Duration']]
L3_df = df[L3_stations + ['L3_Minimum', 'L3_Maximum', 'L3_Duration']]

In [ ]:
L0_df_corr = L0_df.corr()

In [ ]:
plt.figure(figsize=(20,20))
sns.heatmap(L0_df_corr, annot=True)
plt.show()

In [ ]:
L1_df_corr = L1_df.corr()

In [ ]:
plt.figure(figsize=(20,20))
sns.heatmap(L1_df_corr, annot=True)
plt.show()

In [ ]:


In [ ]:
L2_df_corr = L2_df.corr()

In [ ]:
plt.figure(figsize=(20,20))
sns.heatmap(L2_df_corr, annot=True)
plt.show()

In [ ]:


In [ ]:
L3_df_corr = L3_df.corr()

In [ ]:
plt.figure(figsize=(20,20))
sns.heatmap(L3_df_corr, annot=True)
plt.show()

In [ ]:
L3_df.head(10)

In [130]:
time_df = df[['L0_Minimum', 'L0_Maximum', 'L0_Duration', 'L1_Minimum', 'L1_Maximum', 'L1_Duration', 'L2_Minimum', 'L2_Maximum', 'L2_Duration', 'L3_Minimum', 'L3_Maximum', 'L3_Duration', 'Minimum', 'Maximum', 'Total_Duration', 'Duration_Sum', 'Duration_Lag']]

In [107]:
time_df = df[['L0_Duration', 'L1_Duration', 'L2_Duration', 'L3_Duration', 'Duration_Sum', 'Total_Duration']]

In [ ]:
time_df.corr()

In [132]:
time_df_corr = time_df.corr()

In [133]:
plt.figure(figsize=(10,10))
sns.heatmap(time_df_corr, annot=True)
plt.show()



In [131]:
time_df.head(10)


Out[131]:
L0_Minimum L0_Maximum L0_Duration L1_Minimum L1_Maximum L1_Duration L2_Minimum L2_Maximum L2_Duration L3_Minimum L3_Maximum L3_Duration Minimum Maximum Total_Duration Duration_Sum Duration_Lag
Id
510783 0.00 0.47 0.47 NaN NaN 0.0 NaN NaN 0.0 1.58 1.61 0.03 0.00 1.61 1.61 0.50 1.11
651542 0.00 0.23 0.23 NaN NaN 0.0 NaN NaN 0.0 1.49 1.53 0.04 0.00 1.53 1.53 0.27 1.26
108193 0.01 0.44 0.43 NaN NaN 0.0 NaN NaN 0.0 1.59 1.61 0.02 0.01 1.61 1.60 0.45 1.15
430412 0.01 0.23 0.22 NaN NaN 0.0 NaN NaN 0.0 1.50 1.53 0.03 0.01 1.53 1.52 0.25 1.27
443497 0.01 0.11 0.10 NaN NaN 0.0 NaN NaN 0.0 1.49 1.53 0.04 0.01 1.53 1.52 0.14 1.38
513365 0.01 0.23 0.22 NaN NaN 0.0 NaN NaN 0.0 1.49 1.53 0.04 0.01 1.53 1.52 0.26 1.26
517076 0.01 0.23 0.22 NaN NaN 0.0 NaN NaN 0.0 1.49 1.53 0.04 0.01 1.53 1.52 0.26 1.26
520216 0.01 0.23 0.22 NaN NaN 0.0 NaN NaN 0.0 1.49 1.52 0.03 0.01 1.52 1.51 0.25 1.26
521262 0.01 0.23 0.22 NaN NaN 0.0 NaN NaN 0.0 1.49 1.53 0.04 0.01 1.53 1.52 0.26 1.26
574529 0.01 0.23 0.22 NaN NaN 0.0 NaN NaN 0.0 1.49 1.52 0.03 0.01 1.52 1.51 0.25 1.26

In [33]:
df = df.sort_values(L0_stations + L1_stations + L2_stations + L3_stations)

In [34]:
df_flow = df[L0_stations + L1_stations + L2_stations + L3_stations]

In [35]:
df_flow.shape[0] / 4000


Out[35]:
591

In [36]:
df_flow[df_flow.notnull()] = 1
df_flow[df_flow.isnull()] = 0


d:\Anaconda\envs\Deep2\lib\site-packages\ipykernel_launcher.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
d:\Anaconda\envs\Deep2\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  

In [14]:
img = [df_flow.loc[idx] if (idx % 4000 == 0) else for idx in df_flow.index]


  File "<ipython-input-14-082fa3c1ed29>", line 1
    img = [df_flow.loc[idx] if (idx % 4000 == 0) for idx in df_flow.idx]
                                                   ^
SyntaxError: invalid syntax

In [37]:
df_flow = df_flow.reset_index()
del df_flow['Id']

In [38]:
img = df_flow[df_flow.index % 4000 == 0]

In [39]:
plt.figure(figsize=(20,20))
plt.imshow(img.values)


Out[39]:
<matplotlib.image.AxesImage at 0x4dbcfef0>

In [ ]: