In [2]:
%matplotlib inline

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

names = ["date", "city", "state", "country", "shape", "duration_seconds", "duration_reported", "description", "report_date", "latitude", "longitude"]

ufo = pd.read_csv("data-readonly/ufo-scrubbed-geocoded-time-standardized.csv",
                 names = names, parse_dates = ["date", "report_date"])

ufo_dates = ufo.set_index("date")

ufo_dates["hour_of_day"] = ufo_dates.index.hour

In [4]:
which_hour = ufo_dates.groupby("hour_of_day")["duration_seconds"].count()
duration_by_hour = ufo_dates.groupby("hour_of_day")["duration_seconds"].sum()

In [5]:
duration_by_hour


Out[5]:
hour_of_day
0     2.336747e+07
1     7.321914e+07
2     1.358630e+07
3     4.977840e+06
4     3.644581e+06
5     8.395434e+06
6     1.453030e+06
7     5.967030e+05
8     9.840235e+05
9     8.306253e+06
10    1.510430e+06
11    2.607532e+06
12    1.983128e+07
13    1.323662e+06
14    2.861558e+06
15    3.820363e+06
16    3.221071e+06
17    1.034460e+08
18    7.279388e+07
19    1.086671e+07
20    3.135144e+07
21    8.992292e+07
22    5.478192e+07
23    1.874752e+08
Name: duration_seconds, dtype: float64

In [6]:
duration_by_hour = ufo_dates.groupby(["state","hour_of_day"])["duration_seconds"].sum()

In [8]:
duration_by_hour.loc["ak",:]


Out[8]:
state  hour_of_day
ak     0               30887.0
       1               74031.0
       2               10993.0
       3               11347.0
       4                1682.0
       5                2530.0
       6                4800.0
       7                 820.0
       8                5162.0
       9               19382.0
       10               1230.0
       11              17400.0
       12               2540.0
       13               3710.0
       14               8580.0
       15               1692.0
       16              17000.0
       17               2133.0
       18             612573.0
       19              14602.0
       20              20189.0
       21              60412.0
       22              53817.0
       23             520556.0
Name: duration_seconds, dtype: float64

In [10]:
duration_by_hour.loc["ab":"ak", 5:10]


Out[10]:
state  hour_of_day
ab     5               4220.0
       6               5115.0
       7               1040.0
       8              16200.0
       9                 30.0
       10              2825.0
ak     5               2530.0
       6               4800.0
       7                820.0
       8               5162.0
       9              19382.0
       10              1230.0
Name: duration_seconds, dtype: float64

In [17]:
ufo_dates.index


Out[17]:
DatetimeIndex(['1949-10-10 20:30:00', '1949-10-10 21:00:00',
               '1955-10-10 17:00:00', '1956-10-10 21:00:00',
               '1960-10-10 20:00:00', '1961-10-10 19:00:00',
               '1965-10-10 21:00:00', '1965-10-10 23:45:00',
               '1966-10-10 20:00:00', '1966-10-10 21:00:00',
               ...
               '2013-09-09 21:00:00', '2013-09-09 21:00:00',
               '2013-09-09 21:00:00', '2013-09-09 21:00:00',
               '2013-09-09 21:00:00', '2013-09-09 21:15:00',
               '2013-09-09 22:00:00', '2013-09-09 22:00:00',
               '2013-09-09 22:20:00', '2013-09-09 23:00:00'],
              dtype='datetime64[ns]', name='date', length=80332, freq=None)

In [23]:
ufo_dates.index.days_in_month


Out[23]:
Int64Index([31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
            ...
            30, 30, 30, 30, 30, 30, 30, 30, 30, 30],
           dtype='int64', name='date', length=80332)

In [24]:
ufo_dates.index.hour


Out[24]:
Int64Index([20, 21, 17, 21, 20, 19, 21, 23, 20, 21,
            ...
            21, 21, 21, 21, 21, 21, 22, 22, 22, 23],
           dtype='int64', name='date', length=80332)

In [25]:
ufo_dates.index.minute


Out[25]:
Int64Index([30,  0,  0,  0,  0,  0,  0, 45,  0,  0,
            ...
             0,  0,  0,  0,  0, 15,  0,  0, 20,  0],
           dtype='int64', name='date', length=80332)

In [31]:
duration_by_hour.sum()


Out[31]:
493235482.85100001

In [33]:
duration_by_hour.loc["ak",:].sum()


Out[33]:
1498068.0

In [34]:
plt.plot( [ 1, 2, 3 ], [ 4, 5, 6 ])


Out[34]:
[<matplotlib.lines.Line2D at 0x7f793f402828>]

In [41]:
ax = plt.subplot(111, polar=True)
ax.set_theta_offset(np.pi/2)
ax.set_theta_direction(-1)



In [42]:
which_hour = ufo_dates.groupby("hour_of_day")["duration_seconds"].count()

Visualize the number of sightings as a function of the hour of the day, split up into AM and PM


In [43]:
which_hour.index > 11


Out[43]:
array([False, False, False, False, False, False, False, False, False,
       False, False, False,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True], dtype=bool)

In [44]:
which_hour.loc[:12]


Out[44]:
hour_of_day
0     4108
1     3210
2     2357
3     2004
4     1529
5     1591
6     1224
7      905
8      803
9      958
10    1166
11    1144
12    1368
Name: duration_seconds, dtype: int64

In [52]:
am_counts = which_hour.loc[:11]
pm_counts = which_hour.loc[12:]
pm_radians = pm_counts.index * 2*np.pi/12
am_radians = am_counts.index * 2*np.pi/12
ax = plt.subplot(111, polar=True)
ax.set_theta_offset(np.pi/2)
ax.set_theta_direction(-1)
ax.plot(am_radians, am_counts)
ax.plot(pm_radians, pm_counts)


Out[52]:
[<matplotlib.lines.Line2D at 0x7f793e9c2048>]

In [47]:
which_hour.loc[12:]


Out[47]:
hour_of_day
12     1368
13     1303
14     1322
15     1433
16     1620
17     2592
18     4002
19     6147
20     8617
21    11445
22    10837
23     8647
Name: duration_seconds, dtype: int64

In [82]:
plt.rcParams["figure.figsize"] = (10, 12)

In [83]:
am_counts = which_hour.loc[:12]
pm_counts = which_hour.loc[12:].append(which_hour.loc[0:0])
pm_radians = pm_counts.index * 2*np.pi/12
am_radians = am_counts.index * 2*np.pi/12
ax = plt.subplot(111, polar=True)
ax.set_theta_offset(np.pi/2)
ax.set_theta_direction(-1)
ax.plot(am_radians, am_counts, label = 'AM', linewidth = 2.5)
ax.plot(pm_radians, pm_counts, label = 'PM', linewidth = 2.5)
ax.set_xticks(am_radians)
ax.set_xticklabels(['12', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11'])
ax.set_yticks([])
ax.legend(loc = 'upper left')


Out[83]:
<matplotlib.legend.Legend at 0x7f793e324240>

In [87]:
all_counts = which_hour.append(which_hour.loc[0:0])
all_radians = all_counts.index * 2*np.pi/24
ax = plt.subplot(111, polar=True)
ax.set_theta_offset(np.pi/2)
ax.set_theta_direction(-1)
ax.plot(all_radians, all_counts, linewidth = 2.5)
ax.fill_between(all_radians, np.zeros(all_counts.size), all_counts)
ax.set_xticks(all_radians)
ax.set_xticklabels([str(_) for _ in range(24)])
ax.set_yticks([])
#ax.legend(loc = 'upper left')


Out[87]:
[]

In [115]:
bystate = ufo_dates.groupby(["state", "hour_of_day"])["state"].count()

In [141]:
texas_counts = bystate.loc["tx",:]
illinois_counts = bystate.loc["il",:]

In [131]:
all_radians = np.arange(24) * 2*np.pi/24
ax = plt.subplot(121, polar=True)
ax.set_theta_offset(np.pi/2)
ax.set_theta_direction(-1)
ax.stackplot(all_radians, texas_counts, illinois_counts)
ax.set_xticks(all_radians)
ax.set_xticklabels([str(_) for _ in range(24)])
ax.set_yticks([])

ax = plt.subplot(122, polar=True)
ax.set_theta_offset(np.pi/2)
ax.set_theta_direction(-1)
ax.stackplot(all_radians, illinois_counts, illinois_counts)
ax.set_xticks(all_radians)
ax.set_xticklabels([str(_) for _ in range(24)])
ax.set_yticks([])
#ax.legend(loc = 'upper left')


Out[131]:
[]

In [133]:
plt.subplot(121)
plt.stackplot(np.arange(24), illinois_counts, texas_counts)
plt.subplot(122)
plt.stackplot(np.arange(24), texas_counts, illinois_counts)


Out[133]:
[<matplotlib.collections.PolyCollection at 0x7f793dc0ddd8>,
 <matplotlib.collections.PolyCollection at 0x7f793dbe7ba8>]

In [150]:
all_radians = np.arange(24) * 2*np.pi/24
ax = plt.subplot(111, polar=True)
ax.set_theta_offset(np.pi/2)
ax.set_theta_direction(-1)
ax.plot(all_radians, texas_counts/illinois_counts)
ax.set_xticks(all_radians)
ax.set_xticklabels([str(_) for _ in range(24)])
#ax.set_yticks([])


Out[150]:
[<matplotlib.text.Text at 0x7f793ebb7908>,
 <matplotlib.text.Text at 0x7f793deed080>,
 <matplotlib.text.Text at 0x7f793a460710>,
 <matplotlib.text.Text at 0x7f793a460e48>,
 <matplotlib.text.Text at 0x7f793de5b5c0>,
 <matplotlib.text.Text at 0x7f793dee8128>,
 <matplotlib.text.Text at 0x7f793eccdc18>,
 <matplotlib.text.Text at 0x7f793deed470>,
 <matplotlib.text.Text at 0x7f793de3e5c0>,
 <matplotlib.text.Text at 0x7f793de3ec88>,
 <matplotlib.text.Text at 0x7f793e8f1898>,
 <matplotlib.text.Text at 0x7f793de46518>,
 <matplotlib.text.Text at 0x7f793de46ba8>,
 <matplotlib.text.Text at 0x7f793de0a320>,
 <matplotlib.text.Text at 0x7f793de0aa58>,
 <matplotlib.text.Text at 0x7f793de201d0>,
 <matplotlib.text.Text at 0x7f793de20908>,
 <matplotlib.text.Text at 0x7f793de08be0>,
 <matplotlib.text.Text at 0x7f793de08ac8>,
 <matplotlib.text.Text at 0x7f793de08198>,
 <matplotlib.text.Text at 0x7f793de0b668>,
 <matplotlib.text.Text at 0x7f793de0bda0>,
 <matplotlib.text.Text at 0x7f793ddec518>,
 <matplotlib.text.Text at 0x7f793ddecc50>]

In [143]:
all_radians.size


Out[143]:
24

In [144]:
texas_counts.size


Out[144]:
24

In [147]:
texas_counts.index = np.arange(24)

In [151]:
illinois_counts.index = np.arange(24)

In [156]:
plt.bar(np.arange(24), texas_counts)
plt.bar(np.arange(24), illinois_counts, bottom=texas_counts)


Out[156]:
<Container object of 24 artists>

In [157]:
import seaborn as sns

In [ ]:
plt.rcParams["figure.figsize"] = (12, 10)

In [168]:
sns.distplot(ufo_dates["hour_of_day"], kde=False)


Out[168]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f792b63ff98>

In [170]:
ufo_dates["month"] = ufo_dates.index.month

In [177]:
p = sns.jointplot(x="hour_of_day", y="month", data=ufo_dates, kind='hex', )



In [178]:
sns.jointplot?

In [183]:
ufo_dates["time"] = ufo_dates.index.time

In [185]:
ufo_dates["dayofyear"] = ufo_dates.index.dayofyear
ufo_date

In [191]:
ufo_dates["seconds_since_midnight"] = ufo_dates.hour_of_day * 3600 + ufo_dates.index.minute * 60 + ufo_dates.index.second

In [193]:
sns.jointplot(x="seconds_since_midnight", y="dayofyear", data = ufo_dates, kind="kde")


Out[193]:
<seaborn.axisgrid.JointGrid at 0x7f79399b5dd8>

In [196]:
sns.stripplot(x="shape", y="duration_seconds", data=ufo_dates)


Out[196]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f79398a3e10>

In [199]:
ufo_dates.groupby("shape")["state"].count()


Out[199]:
shape
changed          1
changing      1819
chevron        912
cigar         1894
circle        6946
cone           277
crescent         2
cross          212
cylinder      1195
delta            7
diamond       1074
disk          4786
dome             1
egg            678
fireball      5825
flare            1
flash         1243
formation     2283
hexagon          1
light        15438
other         5184
oval          3446
pyramid          1
rectangle     1217
round            2
sphere        4989
teardrop       674
triangle      7395
unknown       5239
Name: state, dtype: int64

In [215]:
ufo_shapes = ufo_dates.set_index("shape")
ufo_shapes


Out[215]:
city state country duration_seconds duration_reported description report_date latitude longitude hour_of_day month time dayofyear seconds_since_midnight
shape
cylinder san marcos tx us 2700.0 45 minutes This event took place in early fall around 194... 2004-04-27 29.883056 -97.941111 20 10 20:30:00 283 73800
light lackland afb tx NaN 7200.0 1-2 hrs 1949 Lackland AFB&#44 TX. Lights racing acros... 2005-12-16 29.384210 -98.581082 21 10 21:00:00 283 75600
circle chester (uk/england) NaN gb 20.0 20 seconds Green/Orange circular disc over Chester&#44 En... 2008-01-21 53.200000 -2.916667 17 10 17:00:00 283 61200
circle edna tx us 20.0 1/2 hour My older brother and twin sister were leaving ... 2004-01-17 28.978333 -96.645833 21 10 21:00:00 284 75600
light kaneohe hi us 900.0 15 minutes AS a Marine 1st Lt. flying an FJ4B fighter/att... 2004-01-22 21.418056 -157.803611 20 10 20:00:00 284 72000
sphere bristol tn us 300.0 5 minutes My father is now 89 my brother 52 the girl wit... 2007-04-27 36.595000 -82.188889 19 10 19:00:00 283 68400
circle penarth (uk/wales) NaN gb 180.0 about 3 mins penarth uk circle 3mins stayed 30ft above m... 2006-02-14 51.434722 -3.180000 21 10 21:00:00 283 75600
disk norwalk ct us 1200.0 20 minutes A bright orange color changing to reddish colo... 1999-10-02 41.117500 -73.408333 23 10 23:45:00 283 85500
disk pell city al us 180.0 3 minutes Strobe Lighted disk shape object observed clos... 2009-03-19 33.586111 -86.286111 20 10 20:00:00 283 72000
disk live oak fl us 120.0 several minutes Saucer zaps energy from powerline as my pregna... 2005-05-11 30.294722 -82.984167 21 10 21:00:00 283 75600
circle hawthorne ca us 300.0 5 min. ROUND &#44 ORANGE &#44 WITH WHAT I WOULD SAY W... 2003-10-31 33.916389 -118.351667 13 10 13:00:00 284 46800
fireball brevard nc us 180.0 3 minutes silent red /orange mass of energy floated by t... 2008-06-12 35.233333 -82.734444 19 10 19:00:00 284 68400
disk bellmore ny us 1800.0 30 min. silver disc seen by family and neighbors 2000-05-11 40.668611 -73.527500 16 10 16:00:00 283 57600
unknown manchester ky us 180.0 3 minutes Slow moving&#44 silent craft accelerated at an... 2008-02-14 37.153611 -83.761944 19 10 19:00:00 283 68400
oval lexington nc us 30.0 30 seconds green oval shaped light over my local church&#... 2010-02-14 35.823889 -80.253611 21 10 21:00:00 283 75600
circle harlan county ky us 1200.0 20minutes On october 10&#44 1972 myself&#44my 5yrs.daugh... 2005-09-15 36.843056 -83.321944 19 10 19:00:00 284 68400
disk west bloomfield mi us 120.0 2 minutes The UFO was so close&#44 my battery in the car... 2007-08-14 42.537778 -83.233056 22 10 22:30:00 284 81000
disk niantic ct us 1800.0 20-30 min Oh&#44 what a night &#33 Two (2) saucer-shape... 2003-09-24 41.325278 -72.193611 19 10 19:00:00 283 68400
light bermuda nas NaN NaN 20.0 20 sec. saw fast moving blip on the radar scope thin w... 2002-01-11 32.364167 -64.678611 23 10 23:00:00 283 82800
other hudson ma us 2700.0 45 minutes Not sure of the eact month or year of this sig... 1999-08-10 42.391667 -71.566667 19 10 19:30:00 283 70200
disk cardiff (uk/wales) NaN gb 1200.0 20 minutes back in 1974 I was 19 at the time and lived i... 2007-02-01 51.500000 -3.200000 21 10 21:30:00 283 77400
light hudson ks us 1200.0 one hour? The light chased us. 2004-07-25 38.105556 -98.659722 23 10 23:00:00 283 82800
light north charleston sc us 360.0 5-6 minutes Several Flashing UFO lights over Charleston Na... 2008-02-14 32.854444 -79.975000 17 10 17:00:00 283 61200
oval washougal wa us 60.0 1 minute Three extremely large lights hanging above nea... 2014-02-07 45.582778 -122.352222 20 10 20:30:00 284 73800
cigar stoke mandeville (uk/england) NaN gb 3.0 3 seconds White object over Buckinghamshire UK. 2009-12-12 51.783333 -0.783333 22 10 22:00:00 284 79200
other san antonio tx us 30.0 30 seconds i was about six or seven and my family and me ... 2005-02-24 29.423889 -98.493333 12 10 12:00:00 283 43200
light louisville ky us 30.0 approx: 30 seconds HBCCUFO CANADIAN REPORT: Pilot Sighting Of Un... 2004-03-17 38.254167 -85.759444 22 10 22:00:00 283 79200
rectangle elmont ny us 300.0 5min A memory I will never forget that happened men... 2007-02-01 40.700833 -73.713333 2 10 02:00:00 283 7200
chevron poughkeepsie ny us 900.0 15 minutes 1/4 moon-like&#44 its &#39chord&#39 or flat s... 2005-04-16 41.700278 -73.921389 0 10 00:00:00 283 0
triangle saddle lake (canada) ab NaN 270.0 4.5 or more min. Lights far above&#44 that glance; then flee f... 2005-01-19 53.970571 -111.689885 22 10 22:00:00 283 79200
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
light wilson nc us 10800.0 3 hours Bright orb being chased by a jet along with se... 2012-09-24 35.721111 -77.915833 20 9 20:00:00 253 72000
circle elmont ny us 600.0 10 minutes Orange lights seen in Elmont&#44 Long Island&#... 2012-09-24 40.700833 -73.713333 20 9 20:10:00 253 72600
light mt. juliet tn us 120.0 2 minutes Bright white light moving slowly across sky wi... 2012-09-24 36.200000 -86.518611 20 9 20:30:00 253 73800
chevron ventura ca us 900.0 15 minutes Beautiful bright blue delta shaped aerobatics. 2012-09-24 34.278333 -119.292222 20 9 20:30:00 253 73800
circle south jordan ut us 10.0 10 seconds Circular disk with blinking lights scares two ... 2012-09-24 40.562222 -111.928889 20 9 20:52:00 253 75120
oval elkhart in us 600.0 10 minutes It was the night of sept 9 between 9 and 10 pm... 2012-09-24 41.681944 -85.976667 21 9 21:00:00 253 75600
light new york city (brooklyn) ny us 1290.0 21:30 Glowing&#44 circular lights visible in the clo... 2012-09-24 40.714167 -74.006389 21 9 21:00:00 253 75600
oval pawleys island sc us 60.0 less than a minute One large bright orange flanked by three small... 2012-09-24 33.433056 -79.121667 21 9 21:00:00 253 75600
circle ventura ca us 300.0 5 minutes Bright Blue Object seen floating in sky near C... 2012-09-24 34.278333 -119.292222 21 9 21:00:00 253 75600
flash charleston sc us 900.0 15 minutes Orb of light flashing reds and blues&#44 stati... 2012-09-24 32.776389 -79.931111 21 9 21:55:00 253 78900
light gainesville ga us 5.0 5 seconds Ball of light 2012-09-24 34.297778 -83.824167 23 9 23:00:00 253 82800
unknown norfolk va us 1.0 split second Two or three lights shoot across sky over nava... 2013-09-30 36.846667 -76.285556 0 9 00:15:00 252 900
triangle buffalo (west of; on highway 90 west) ny us 180.0 3 minutes Massive Flat Black triangle with 3 red lights. 2013-09-30 42.886389 -78.878611 1 9 01:50:00 252 6600
unknown struthers oh us 120.0 2 minutes I saw a routaing line of stares that seemed to... 2013-09-09 41.052500 -80.608056 3 9 03:00:00 252 10800
light san diego ca us 4.0 ~4 seconds 2 white lights zig-zag over Qualcomm Stadium (... 2013-09-30 32.715278 -117.156389 9 9 09:51:00 252 35460
cigar cedar park tx us 8.0 5-8 seconds Cigar Shaped Object Descending in the Directio... 2013-09-09 30.505000 -97.820000 12 9 12:34:00 252 45240
unknown calmar (canada) ab ca 90.0 45-90 seconds Fastest dot I have ever seen in the sky&#33 2013-09-09 53.250000 -113.783333 13 9 13:10:00 252 47400
other clifton nj NaN 3600.0 ~1hr+ Luminous line seen in New Jersey sky. 2013-09-30 40.858433 -74.163755 20 9 20:15:00 252 72900
fireball tuscaloosa al us 60.0 1:00 White/green object much larger than &quot;shoo... 2013-09-30 33.209722 -87.569167 20 9 20:20:00 252 73200
fireball clarksville tn us 3.0 3 seconds Green fireball like object shooting across the... 2013-09-30 36.529722 -87.359444 20 9 20:21:00 252 73260
light aleksandrow (poland) NaN NaN 15.0 15 seconds Two points of light following one another in a... 2013-09-30 50.465843 22.891814 21 9 21:00:00 252 75600
triangle gainesville fl us 60.0 1 minute Three lights in the sky that didn&#39t look li... 2013-09-30 29.651389 -82.325000 21 9 21:00:00 252 75600
light hamstead (hollyridge) nc NaN 120.0 2 minutes 8 to ten lights bright orange in color large t... 2013-09-30 34.367594 -77.710548 21 9 21:00:00 252 75600
fireball milton (canada) on ca 180.0 3 minutes Massive Bright Orange Fireball in Sky 2013-09-30 46.300000 -63.216667 21 9 21:00:00 252 75600
sphere woodstock ga us 20.0 20 seconds Driving 575 at 21:00 hrs saw a white and green... 2013-09-30 34.101389 -84.519444 21 9 21:00:00 252 75600
light nashville tn us 600.0 10 minutes Round from the distance/slowly changing colors... 2013-09-30 36.165833 -86.784444 21 9 21:15:00 252 76500
circle boise id us 1200.0 20 minutes Boise&#44 ID&#44 spherical&#44 20 min&#44 10 r... 2013-09-30 43.613611 -116.202500 22 9 22:00:00 252 79200
other napa ca us 1200.0 hour Napa UFO&#44 2013-09-30 38.297222 -122.284444 22 9 22:00:00 252 79200
circle vienna va us 5.0 5 seconds Saw a five gold lit cicular craft moving fastl... 2013-09-30 38.901111 -77.265556 22 9 22:20:00 252 80400
cigar edmond ok us 1020.0 17 minutes 2 witnesses 2 miles apart&#44 Red &amp; White... 2013-09-30 35.652778 -97.477778 23 9 23:00:00 252 82800

80332 rows × 14 columns


In [217]:
ufo_shapes.sort_index(inplace=True)

In [219]:
ufo_shapes.index.


Out[219]:
Index([  'changed',  'changing',   'chevron',     'cigar',    'circle',
            'cone',  'crescent',     'cross',  'cylinder',     'delta',
         'diamond',      'disk',      'dome',       'egg',  'fireball',
           'flare',     'flash', 'formation',   'hexagon',     'light',
           'other',      'oval',   'pyramid', 'rectangle',     'round',
          'sphere',  'teardrop',  'triangle',   'unknown',         nan],
      dtype='object', name='shape')

In [ ]: