In [ ]:
########################
# A notebook for exploratory analysis of Livorno cruiseship data
########################
import warnings
warnings.filterwarnings('ignore')
from pylab import *
import psycopg2
%timeit
import seaborn as sns
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
sns.set_style("white")
sns.set(font_scale=1.0)
%matplotlib inline
In [ ]:
###### Connect to db
connection = connect(host='', port=5432)
livorno_data = pd.read_csv(livorno_data, '../src/output/livorno_data.csv')
In [16]:
# Get data for summer 2016
June = livorno_data[livorno_data.Month == 'Giu']
July = livorno_data[livorno_data.Month == 'Lug']
August = livorno_data[livorno_data.Month == 'Ago']
September = livorno_data[livorno_data.Month == 'Set']
## How many passengers were there per month of summer 2016?
june_pass_ts = pd.DataFrame(June.groupby(['Arrival_Date']).sum()['PassTrans'])
june_pass_ts['date'] = june_pass_ts.index
july_pass_ts = pd.DataFrame(July.groupby(['Arrival_Date']).sum()['PassTrans'])
july_pass_ts['date'] = july_pass_ts.index
august_pass_ts = pd.DataFrame(August.groupby(['Arrival_Date']).sum()['PassTrans'])
august_pass_ts['date'] = august_pass_ts.index
sept_pass_ts = pd.DataFrame(September.groupby(['Arrival_Date']).sum()['PassTrans'])
sept_pass_ts['date'] = sept_pass_ts.index
# How many people arriving in June, July, August, September?
print('June: ', june_pass_ts['PassTrans'].sum())
print('July: ', july_pass_ts['PassTrans'].sum())
print('August: ', august_pass_ts['PassTrans'].sum())
print('September: ', sept_pass_ts['PassTrans'].sum())
print('Total Passengers: ', livorno_data['PassTrans'].sum())
In [18]:
# How many ships arriving per day, on average?
daily_ships = pd.DataFrame(livorno_data.groupby(['Arrival_Date']).count()['Ship'])
print('Average ships arriving per day:', daily_ships.Ship.mean().round())
In [20]:
# How many ships total
print('Total number of ships over summer:', livorno_data.Ship.count())
In [26]:
# Avg passengers per day arriving in Livorno
pass_ts = pd.DataFrame(livorno_data.groupby(['ArrivalTime']).sum()['PassTrans'])
pass_ts['time'] = pass_ts.index
print('Average number of passengers arriving daily in Livorno:', livorno_data.PassTrans.mean())
In [6]:
# How many total passengers over summer?
livorno_data.PassTrans.sum()
Out[6]:
In [7]:
#### Total days & hours in port
departtimes = pd.to_datetime(livorno_data['DepartureTime'], format="%H:%M:%S")
arrivaltimes = pd.to_datetime(livorno_data['ArrivalTime'], format="%H:%M:%S")
livorno_data['hours_in_port'] = departtimes - arrivaltimes
departdays = pd.to_datetime(livorno_data['Departure_Date'])
arrivaldays = pd.to_datetime(livorno_data['Arrival_Date'])
livorno_data['days_in_port'] = departdays - arrivaldays
grouped_by_arrivaltime = livorno_data.groupby(['ArrivalTime']).sum()
grouped_by_departuretime = livorno_data.groupby(['DepartureTime']).sum()
grouped_by_arrivaltime
## Most boats arrive morning, leave in late evening
Out[7]:
In [8]:
# How many passengers on ships that stay 2 days?
two_day_ships = livorno_data[livorno_data['days_in_port'] == '2 days']
two_day_ships['PassTrans'].sum()
Out[8]:
In [9]:
### How many overnight boats? 8
overnight_boats = livorno_data[(livorno_data['PassOver']).notnull()]
not_overnight_boats = livorno_data[(livorno_data['PassOver']).isnull()]
# How many people total on these overnight boats? 3990
overnight_boats.PassTrans.sum()
Out[9]:
In [10]:
# On overnight boats, do passengers sleep on the boats? YES!
sleep_on_ship = overnight_boats.ix[:, ['PassOver','PassTrans']]
sleep_on_ship
Out[10]:
In [11]:
mean_time = not_overnight_boats['hours_in_port'].mean()
max_time = not_overnight_boats['hours_in_port'].max()
print(mean_time)
print(max_time)
In [12]:
### Who are the main shipowners?
livorno_data['Ship'].value_counts()
Out[12]:
In [13]:
plt.close('all')
f, ax = plt.subplots(2,2, figsize=(10,10), dpi=180, facecolor='w', edgecolor='k')
ax[0,0].stem(june_pass_ts['date'],june_pass_ts['PassTrans'])
ax[0,1].stem(july_pass_ts['date'],july_pass_ts['PassTrans'])
ax[1,0].stem(august_pass_ts['date'],august_pass_ts['PassTrans'])
ax[1,1].stem(sept_pass_ts['date'],sept_pass_ts['PassTrans'])
plt.setp([a.get_xticklabels() for a in ax[0, :]], visible=False, rotation=30)
plt.setp([a.get_xticklabels() for a in ax[:, 1]], visible=True, rotation=30)
plt.setp([a.get_xticklabels() for a in ax[:, 0]], visible=True, rotation=30)
plt.setp([a.get_xticklabels() for a in ax[1, :]], visible=True, rotation=30)
plt.show()