In [2]:
%matplotlib inline
import pandas as pd
import numpy as np
import seaborn as sb
import matplotlib.pyplot as plt
In [3]:
train = pd.read_csv('../data/raw/train.csv')
print train.shape
In [4]:
train.info(memory_usage='deep')
In [5]:
train = train.sort_values(by=['time'], ascending=True)
In [6]:
#dataset['InstallDate'] = pd.to_datetime(dataset['time'], unit='ms')
In [7]:
train[1:10]
Out[7]:
In [7]:
train.tail()
Out[7]:
In [8]:
train.describe()
Out[8]:
In [ ]:
sb.distplot(train['accuracy'])
sb.distplot(train['x'])
sb.distplot(train['y'])
sb.distplot(train['accuracy']);
sb.distplot(train['time']);
sb.distplot(train['place_id']);
In [9]:
with sb.axes_style("white"):
sb.jointplot(x=train['x'], y=train['y'], kind="hex", color="k");
In [10]:
with sb.axes_style("white"):
sb.jointplot(x=train['accuracy'], y=train['time'], kind="hex", color="k");
In [ ]:
with sb.axes_style("white"):
sb.jointplot(x=train['place_id'], y=train['accuracy'], kind="hex", color="k");
In [ ]: