In [162]:
import os
import folium
import pandas as pd
import numpy as np
from folium import plugins
import branca.colormap as cm
from tqdm import tqdm
import matplotlib.pyplot as plt
import json
In [120]:
import requests
import time
response = requests.get("http://opendata.paris.fr/api/records/1.0/download/?dataset=stations-velib-disponibilites-en-temps-reel&facet=banking&facet=bonus&facet=status&facet=contract_name&rows=-1")
txt = response.text
f = open('velib.csv', 'w+')
f.write(txt)
Out[120]:
In [121]:
velibs = pd.read_csv('velib.csv', sep=";")
In [122]:
velibs.head()
Out[122]:
In [123]:
velibs.shape
Out[123]:
In [124]:
velibs = velibs[velibs.status == 'OPEN']
In [125]:
velibs['arron'] = velibs['address'].map(lambda x: int(x.split()[-2][3:]) if x.split()[-2][:2] == '75' else np.NaN)
In [126]:
velibs.shape
Out[126]:
In [127]:
velibs.head()
Out[127]:
In [135]:
bike_stands_arron = velibs.groupby('arron')['bike_stands'].sum()
plt.bar(bike_stands_arron.index.astype(int), bike_stands_arron.values)
plt.show()
In [136]:
# https://github.com/codeforamerica/click_that_hood/raw/master/public/data/paris.geojson
state_geo = r'paris.json'
m = folium.Map(location=[48.856614, 2.3522219], zoom_start=13, tiles='Stamen Toner')
m.choropleth(geo_path=state_geo,
data=bike_stands_arron,
columns=['arron', 'bike_stands'],
key_on='properties.cartodb_id',
fill_color='PuBu',
fill_opacity=0.9, line_opacity=0.2,
legend_name='Number of bike stands',
highlight=1)
m
Out[136]:
In [137]:
from colour import Color
red = Color("red")
colors = list(red.range_to(Color("green").hex,10))
def red(brightness):
brightness = int(round(9 * brightness)) # convert from 0.0-1.0 to 0-255
return colors[brightness]
In [138]:
red(0).hex
Out[138]:
In [139]:
velibs['lat'] = velibs['position'].apply(lambda x: float(x.split(",")[0]))
velibs['long'] = velibs['position'].apply(lambda x: float(x.split(",")[1]))
velibs['color'] = velibs.apply(lambda x: red(x['available_bikes']/float(x['bike_stands'])).hex, axis=1)
In [140]:
m = folium.Map(location=[48.856614, 2.3522219], zoom_start=13, tiles='Stamen Toner')
m.add_child(plugins.HeatMap(velibs[['lat','long', 'available_bikes']].values, radius = 20), name='Training location')
colormap = cm.LinearColormap(['blue', 'green', 'yellow', 'orange', 'red'], index=[0, 0.25, 0.5, 0.75, 1.0]).scale(0,200)
colormap.caption = 'Number of velibs'
m.add_child(colormap)
m
Out[140]:
In [141]:
m = folium.Map(location=[48.856614, 2.3522219], zoom_start=13, tiles='Stamen Toner')
for k,v in velibs.iterrows():
folium.CircleMarker(location=[v.position.split(",")[0], v.position.split(",")[1]],
fill_color=red(v.available_bikes/float(v.bike_stands)).hex,
popup= str(v.available_bikes) + " / " + str(v.bike_stands),
radius=7).add_to(m)
In [19]:
m
Out[19]:
In [211]:
df = pd.DataFrame(columns=np.append("Date",velibs.number.values))
path_data = 'data/raw/velib/'
for i, name in tqdm(enumerate(os.listdir(path_data))):
#print(name.split("velib")[1].split(".")[0])
velibs_temp = pd.read_csv(path_data + name, sep=";")
positions = {}
#print(name.split("velib")[2])
positions['Date'] = name.split("velib")[1].split(".")[0]
for k,v in velibs_temp.iterrows():
positions[str(v.number)] = v.available_bikes
df.loc[i] = pd.Series(positions)
In [212]:
df.Date = pd.to_datetime(df.Date, format='%Y-%m-%d-%H-%M', errors='coerce')
In [213]:
df = df.sort_values('Date').reset_index(drop=True)
In [214]:
print(df.shape)
df.head()
Out[214]:
In [215]:
df.loc[:, df.columns != 'Date'].sum(axis=1).mean()
Out[215]:
In [284]:
df_grouped = df.groupby([df.Date.dt.dayofweek, df.Date.dt.hour]).agg({lambda x: np.mean(x)}).diff().clip(0).sum(axis=1)
In [303]:
plt.figure(figsize=(15,4))
for i in range(len(weekday)):
plt.plot(df_grouped.loc[i], label=weekday[i])
plt.legend()
plt.show()
In [289]:
weekday = ['monday','tuesday','wednesday','thursday','friday','saturday','sunday']
json_file = dict()
for i in range(len(weekday)):
json_file[weekday[i]] = {'hour': df_grouped.loc[i].index.values.tolist(), 'velibs': df_grouped.loc[i].round(1).values.tolist()}
with open('output/velibs.json', 'w') as outfile:
json.dump({'hourly': json_file},
outfile)
In [34]:
import pygal
from IPython.display import SVG, HTML
In [35]:
html_pygal = """
<!DOCTYPE html>
<html>
<head>
<script type="text/javascript" src="http://kozea.github.com/pygal.js/javascripts/svg.jquery.js"></script>
<script type="text/javascript" src="http://kozea.github.com/pygal.js/javascripts/pygal-tooltips.js"></script>
<!-- ... -->
</head>
<body>
<figure>
{pygal_render}
</figure>
</body>
</html>
"""
In [38]:
line_chart = pygal.Line(x_title='Time [hour]',y_title='Number of used bikes')
line_chart.title = 'Number of used bikes'
line_chart.x_labels = range(0, 24)
weekday = ['monday','tuesday','wednesday','thursday','friday','saturday','sunday']
for i in range(15,22):
subdf = df[df['Date'].dt.day == i]
tmp_df = subdf.ix[:, df.columns != 'Date'].diff().clip(0).sum(axis=1).values[1:]
line_chart.add(weekday[i-15], tmp_df)
HTML(html_pygal.format(pygal_render=line_chart.render()))
Out[38]: