In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
%cd ~/minibook/chapter2/
In [3]:
!wget https://raw.githubusercontent.com/ipython-books/minibook-2nd-data/master/nyc_taxi.zip
!unzip nyc_taxi.zip
In [4]:
%ls data
Out[4]:
In [5]:
data_filename = 'data/nyc_data.csv'
fare_filename = 'data/nyc_fare.csv'
In [6]:
data = pd.read_csv(data_filename, parse_dates=['pickup_datetime',
'dropoff_datetime'])
fare = pd.read_csv(fare_filename, parse_dates=['pickup_datetime'])
In [7]:
data.head(3)
In [8]:
data.columns
Out[8]:
In [9]:
p_lng = data.pickup_longitude
p_lat = data.pickup_latitude
d_lng = data.dropoff_longitude
d_lat = data.dropoff_latitude
In [10]:
p_lng
Out[10]:
In [11]:
def lat_lng_to_pixels(lat, lng):
lat_rad = lat * np.pi / 180.0
lat_rad = np.log(np.tan((lat_rad + np.pi / 2.0) / 2.0))
x = 100 * (lng + 180.0) / 360.0
y = 100 * (lat_rad - np.pi) / (2.0 * np.pi)
return (x, y)
In [12]:
px, py = lat_lng_to_pixels(p_lat, p_lng)
In [13]:
px
Out[13]:
In [14]:
plt.scatter(px, py)
In [15]:
plt.figure(figsize=(8, 6))
plt.scatter(px, py, s=.1, alpha=.03)
plt.axis('equal')
plt.xlim(29.40, 29.55)
plt.ylim(-37.63, -37.54)
plt.axis('off')
In [16]:
px.count(), px.min(), px.max()
Out[16]:
In [17]:
px.mean(), px.median(), px.std()
Out[17]:
In [18]:
!conda install seaborn -q -y
In [19]:
import seaborn as sns
sns.__version__
Out[19]:
In [20]:
data.trip_distance.hist(bins=np.linspace(0., 10., 100))