In [1]:
# for dates
import datetime
# data processing
import numpy as np
import pandas
# plotting
import matplotlib.pyplot as plt
# downloading
import requests
# for inline plotting
%matplotlib inline
In [2]:
# pass menu and collect a cookie
url_menu = 'http://www.marinetraffic.com/en/users/ajax_user_menu/home:1'
# if we have a cookie gather data
url = 'http://www.marinetraffic.com/map/getDataJson/sw_x:-4.0/sw_y:50.0/ne_x:12.0/ne_y:54.0/zoom:7/station:0'
In [3]:
# We are a browser and we came from the home page.
headers = {
"Referer": "http://www.marinetraffic.com",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36"
}
# now we go to the menu page
response = requests.get(url_menu, headers=headers)
# and we get the cookie that we need
cookies = response.cookies
dict(cookies)
Out[3]:
In [4]:
# now we can gather data
response = requests.get(url, headers=headers, cookies=cookies)
# and convert it to json
data = response.json()
In [5]:
# this is the relevant part
rows = data['data']['rows']
# this is the other information, not that relevant
data['type'], data['data']['areaShips'], data['data']['totalShips']
Out[5]:
In [6]:
# let's make a table
# we need certain properties as numbers so convert them as everything is returned as strings
ships = [
{
"id": int(row["SHIP_ID"]),
"lat": float(row['LAT']),
"lon": float(row['LON']),
"heading": int(row['HEADING']),
"speed": float(row["SPEED"])/10.0,
**row
} for row in rows
]
# read them into a dataframe
df = pandas.DataFrame.from_records(ships)
# and use the ship id as an index
df = df.set_index('id')
In [7]:
# 511 is missing (still ints, so we can use ==)
df.loc[df['heading'] == 511, 'heading'] = None
# unfortunately 0 is also missing, so we have to drop all
df.loc[df['heading'] == 0, 'heading'] = None
# compute direction in rad, also from 0 up clockwise to 0 right, counterclockwise)
df['direction'] = -df['heading'].apply(np.deg2rad) + 0.5*np.pi
# and split in u,v components, for plotting
df['u'] = np.cos(df['direction'])
df['v'] = np.sin(df['direction'])
# suppose we run this every 10 minutes or so, store in a file
now = datetime.datetime.utcnow()
filename = 'marinetraffic-extract-%f.json' % (now.timestamp(), )
df.to_json(filename)
# toss out the missings
df = df.dropna()
In [8]:
fig, axes = plt.subplots(1, 2, figsize=(18, 6))
axes[0].set_title('all ships')
axes[0].quiver(df['lon'], df['lat'], df['u'], df['v'])
selection = df.ix[df['speed'] < 1.0]
axes[1].set_title('slow ships')
axes[1].quiver(selection['lon'], selection['lat'], selection['u'], selection['v'])
Out[8]:
In [10]:
# information that we collected
df.iloc[0]
Out[10]:
In [11]:
len(df)
Out[11]:
In [ ]: