In [8]:
%matplotlib inline
import sys
import numpy as np
import pandas as pd
import pandas.io.data
import matplotlib.pyplot as plt
print sys.version
print "Pandas:", pd.__version__
In [11]:
the_url = 'http://stat-computing.org/dataexpo/2009/the-data.html'
datedict = {'Date': ['Year','Month','DayofMonth']}
df = pd.read_csv('../june_airplane_data.csv', header=None, parse_dates= datedict, names=pd.read_html(the_url)[1][1][1:])
In [10]:
df.head()
Out[10]:
In [12]:
ranges = [0,600,1200,1800,2400]
labels=['Early Morning', 'Morning', 'Early Afternoon', 'Evening']
In [13]:
df['DepTime2'] = pd.cut(df.DepTime, ranges, labels=labels).astype('category')
In [16]:
df['ArrTime2'] = pd.cut(df.ArrTime, ranges, labels=labels).astype('category')
In [17]:
df.ArrTime2.head()
Out[17]:
In [19]:
df[['DepTime2', 'ArrTime2']].describe()
Out[19]: