In [1]:
import pandas
In [2]:
users = pandas.read_csv("users.csv", dtype=str)
trans = pandas.read_csv("transactions.csv", dtype=str)
In [3]:
users.head()
Out[3]:
In [4]:
trans.head()
Out[4]:
In [5]:
import math
def haversine(lon1, lat1, lon2, lat2):
# Convert coordinates to floats.
lon1, lat1, lon2, lat2 = [float(lon1), float(lat1), float(lon2), float(lat2)]
# Convert to radians from degrees.
lon1, lat1, lon2, lat2 = map(math.radians, [lon1, lat1, lon2, lat2])
# Compute distance.
dlon = lon2 - lon1
dlat = lat2 - lat1
a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
c = 2 * math.asin(math.sqrt(a))
km = 6367 * c
return km
In [6]:
def calc_dist(row):
dist = 0
try:
# Match source and destination to get coordinates.
source = users[users["id"] == row["seller_id"]].iloc[0]
dest = users[users["id"] == row["buyer_id"]].iloc[0]
# Use coordinates to compute distance.
dist = haversine(dest["longitude"], dest["latitude"], source["longitude"], source["latitude"])
except (ValueError, IndexError):
pass
return dist
In [8]:
# apply distance calc
trans_dist = trans[:500].apply(calc_dist, axis=1)
In [9]:
import matplotlib.pyplot as plt
%matplotlib inline
# in km
plt.hist(trans_dist, bins=20)
Out[9]:
In [10]:
from mpl_toolkits.basemap import Basemap
# Create a map on which to draw. We're using a mercator projection, and showing the whole world.
m = Basemap(projection='merc',llcrnrlat=-80,urcrnrlat=80,llcrnrlon=-180,urcrnrlon=180,lat_ts=20,resolution='c')
# Draw coastlines, and the edges of the map.
m.drawcoastlines()
m.drawmapboundary()
# Convert latitude and longitude to x and y coordinates
x, y = m(list(users["longitude"].astype(float)), list(users["latitude"].astype(float)))
# Use matplotlib to draw the points onto the map.
m.scatter(x,y,1,marker='o',color='red')
# Show the plot.
plt.show()
In [12]:
# Make a base map with a mercator projection. Draw the coastlines.
m = Basemap(projection='merc',llcrnrlat=-80,urcrnrlat=80,llcrnrlon=-180,urcrnrlon=180,lat_ts=20,resolution='c')
m.drawcoastlines()
# Iterate through the first few rows.
for name, row in trans[:1000].iterrows():
try:
seller = users[users["id"] == row["seller_id"]].iloc[0]
buyer = users[users["id"] == row["buyer_id"]].iloc[0]
# Skip long arcs for now
if abs(float(seller["longitude"]) - float(buyer["longitude"])) < 90:
# Draw a great circle between seller and buyer.
m.drawgreatcircle(float(seller["longitude"]), float(seller["latitude"]), float(buyer["longitude"]), float(buyer["latitude"]),linewidth=1,color='b')
except (ValueError, IndexError):
pass
# Show the map.
plt.show()
In [ ]: