In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import seaborn as sb
import matplotlib.pyplot as plt
In [2]:
train = pd.read_csv('../data/raw/train.csv')
print train.shape
In [3]:
train.info(memory_usage='deep')
In [4]:
train = train.sort_values(by=['time'], ascending=True)
In [5]:
#dataset['InstallDate'] = pd.to_datetime(dataset['time'], unit='ms')
In [6]:
train.head()
Out[6]:
In [7]:
train.tail()
Out[7]:
In [8]:
train.describe()
Out[8]:
In [ ]:
sb.distplot(train['accuracy'])
sb.distplot(train['x'])
sb.distplot(train['y'])
sb.distplot(train['accuracy']);
sb.distplot(train['time']);
sb.distplot(train['place_id']);
In [ ]:
sb.jointplot(x="x", y="y", data=train, kind="kde")
In [ ]: