In [2]:
%matplotlib inline
In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
names = ["date", "city", "state", "country", "shape", "duration_seconds", "duration_reported", "description", "report_date", "latitude", "longitude"]
ufo = pd.read_csv("data-readonly/ufo-scrubbed-geocoded-time-standardized.csv",
names = names, parse_dates = ["date", "report_date"])
ufo_dates = ufo.set_index("date")
ufo_dates["hour_of_day"] = ufo_dates.index.hour
In [4]:
which_hour = ufo_dates.groupby("hour_of_day")["duration_seconds"].count()
duration_by_hour = ufo_dates.groupby("hour_of_day")["duration_seconds"].sum()
In [5]:
duration_by_hour
Out[5]:
In [6]:
duration_by_hour = ufo_dates.groupby(["state","hour_of_day"])["duration_seconds"].sum()
In [8]:
duration_by_hour.loc["ak",:]
Out[8]:
In [10]:
duration_by_hour.loc["ab":"ak", 5:10]
Out[10]:
In [17]:
ufo_dates.index
Out[17]:
In [23]:
ufo_dates.index.days_in_month
Out[23]:
In [24]:
ufo_dates.index.hour
Out[24]:
In [25]:
ufo_dates.index.minute
Out[25]:
In [31]:
duration_by_hour.sum()
Out[31]:
In [33]:
duration_by_hour.loc["ak",:].sum()
Out[33]:
In [34]:
plt.plot( [ 1, 2, 3 ], [ 4, 5, 6 ])
Out[34]:
In [41]:
ax = plt.subplot(111, polar=True)
ax.set_theta_offset(np.pi/2)
ax.set_theta_direction(-1)
In [42]:
which_hour = ufo_dates.groupby("hour_of_day")["duration_seconds"].count()
Visualize the number of sightings as a function of the hour of the day, split up into AM and PM
In [43]:
which_hour.index > 11
Out[43]:
In [44]:
which_hour.loc[:12]
Out[44]:
In [52]:
am_counts = which_hour.loc[:11]
pm_counts = which_hour.loc[12:]
pm_radians = pm_counts.index * 2*np.pi/12
am_radians = am_counts.index * 2*np.pi/12
ax = plt.subplot(111, polar=True)
ax.set_theta_offset(np.pi/2)
ax.set_theta_direction(-1)
ax.plot(am_radians, am_counts)
ax.plot(pm_radians, pm_counts)
Out[52]:
In [47]:
which_hour.loc[12:]
Out[47]:
In [82]:
plt.rcParams["figure.figsize"] = (10, 12)
In [83]:
am_counts = which_hour.loc[:12]
pm_counts = which_hour.loc[12:].append(which_hour.loc[0:0])
pm_radians = pm_counts.index * 2*np.pi/12
am_radians = am_counts.index * 2*np.pi/12
ax = plt.subplot(111, polar=True)
ax.set_theta_offset(np.pi/2)
ax.set_theta_direction(-1)
ax.plot(am_radians, am_counts, label = 'AM', linewidth = 2.5)
ax.plot(pm_radians, pm_counts, label = 'PM', linewidth = 2.5)
ax.set_xticks(am_radians)
ax.set_xticklabels(['12', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11'])
ax.set_yticks([])
ax.legend(loc = 'upper left')
Out[83]:
In [87]:
all_counts = which_hour.append(which_hour.loc[0:0])
all_radians = all_counts.index * 2*np.pi/24
ax = plt.subplot(111, polar=True)
ax.set_theta_offset(np.pi/2)
ax.set_theta_direction(-1)
ax.plot(all_radians, all_counts, linewidth = 2.5)
ax.fill_between(all_radians, np.zeros(all_counts.size), all_counts)
ax.set_xticks(all_radians)
ax.set_xticklabels([str(_) for _ in range(24)])
ax.set_yticks([])
#ax.legend(loc = 'upper left')
Out[87]:
In [115]:
bystate = ufo_dates.groupby(["state", "hour_of_day"])["state"].count()
In [141]:
texas_counts = bystate.loc["tx",:]
illinois_counts = bystate.loc["il",:]
In [131]:
all_radians = np.arange(24) * 2*np.pi/24
ax = plt.subplot(121, polar=True)
ax.set_theta_offset(np.pi/2)
ax.set_theta_direction(-1)
ax.stackplot(all_radians, texas_counts, illinois_counts)
ax.set_xticks(all_radians)
ax.set_xticklabels([str(_) for _ in range(24)])
ax.set_yticks([])
ax = plt.subplot(122, polar=True)
ax.set_theta_offset(np.pi/2)
ax.set_theta_direction(-1)
ax.stackplot(all_radians, illinois_counts, illinois_counts)
ax.set_xticks(all_radians)
ax.set_xticklabels([str(_) for _ in range(24)])
ax.set_yticks([])
#ax.legend(loc = 'upper left')
Out[131]:
In [133]:
plt.subplot(121)
plt.stackplot(np.arange(24), illinois_counts, texas_counts)
plt.subplot(122)
plt.stackplot(np.arange(24), texas_counts, illinois_counts)
Out[133]:
In [150]:
all_radians = np.arange(24) * 2*np.pi/24
ax = plt.subplot(111, polar=True)
ax.set_theta_offset(np.pi/2)
ax.set_theta_direction(-1)
ax.plot(all_radians, texas_counts/illinois_counts)
ax.set_xticks(all_radians)
ax.set_xticklabels([str(_) for _ in range(24)])
#ax.set_yticks([])
Out[150]:
In [143]:
all_radians.size
Out[143]:
In [144]:
texas_counts.size
Out[144]:
In [147]:
texas_counts.index = np.arange(24)
In [151]:
illinois_counts.index = np.arange(24)
In [156]:
plt.bar(np.arange(24), texas_counts)
plt.bar(np.arange(24), illinois_counts, bottom=texas_counts)
Out[156]:
In [157]:
import seaborn as sns
In [ ]:
plt.rcParams["figure.figsize"] = (12, 10)
In [168]:
sns.distplot(ufo_dates["hour_of_day"], kde=False)
Out[168]:
In [170]:
ufo_dates["month"] = ufo_dates.index.month
In [177]:
p = sns.jointplot(x="hour_of_day", y="month", data=ufo_dates, kind='hex', )
In [178]:
sns.jointplot?
In [183]:
ufo_dates["time"] = ufo_dates.index.time
In [185]:
ufo_dates["dayofyear"] = ufo_dates.index.dayofyear
ufo_date
In [191]:
ufo_dates["seconds_since_midnight"] = ufo_dates.hour_of_day * 3600 + ufo_dates.index.minute * 60 + ufo_dates.index.second
In [193]:
sns.jointplot(x="seconds_since_midnight", y="dayofyear", data = ufo_dates, kind="kde")
Out[193]:
In [196]:
sns.stripplot(x="shape", y="duration_seconds", data=ufo_dates)
Out[196]:
In [199]:
ufo_dates.groupby("shape")["state"].count()
Out[199]:
In [215]:
ufo_shapes = ufo_dates.set_index("shape")
ufo_shapes
Out[215]:
In [217]:
ufo_shapes.sort_index(inplace=True)
In [219]:
ufo_shapes.index.
Out[219]:
In [ ]: