In [1]:
import tqdm
import tarfile
import pandas
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import PIL
import gzip
from urllib.request import urlopen
%matplotlib inline
matplotlib.style.use('seaborn-bright')
# progress bar
tqdm.tqdm.pandas()
# 檔案名稱格式
filename_format="../Week03/M06A_{year:04d}{month:02d}{day:02d}.tar.gz".format
xz_filename_format="xz/M06A_{year:04d}{month:02d}{day:02d}.tar.xz".format
csv_format = "M06A/{year:04d}{month:02d}{day:02d}/{hour:02d}/TDCS_M06A_{year:04d}{month:02d}{day:02d}_{hour:02d}0000.csv".format
# 設定欄位名稱
M06A_fields = ['VehicleType',
'DetectionTime_O','GantryID_O',
'DetectionTime_D','GantryID_D ',
'TripLength', 'TripEnd', 'TripInformation']
import datetime
# 用來解析時間格式
def strptime(x):
return datetime.datetime(int(x[:4]), int(x[5:7]), int(x[8:10]),
int(x[11:13]), int(x[14:16]), int(x[17:19]) )
def parse_tripinfo(tripinfo):
split1 = tripinfo.split("; ")
return [(strptime(t), t[20:]) for t in split1]
In [2]:
## 讀取檔案
node_data=pandas.read_json('../Week03/node_data.json')
speedlimit = pandas.read_json('../Week03/speedlimit.json')
In [3]:
# 這個改了一點, SpeedInfo 這個 named tuple 同時紀錄了區段的起始時間和兩端點
from collections import namedtuple
SpeedInfo = namedtuple("SpeedInfo", ["speed", "time","loc1", "loc2"])
def compute_speed(trip):
rtn = []
for (t1,n1), (t2,n2) in zip(trip[:-1], trip[1:]):
if n1[:3] != n2[:3] or n1[-1]!=n2[-1]:
continue
if n1[3]=='R' or n2[3]=='R':
continue
km1 = int(n1[3:-1])/10
km2 = int(n2[3:-1])/10
hr_delta = (t2-t1).total_seconds()/60/60
speed = abs(km2-km1)/hr_delta
rtn.append(SpeedInfo(speed, t1, n1, n2))
return rtn
In [4]:
# 打開剛才下載的檔案試試
data_config ={"year":2016, "month":12, "day":18}
tar = tarfile.open(filename_format(**data_config), 'r')
# 如果沒有下載,可以試試看 xz 檔案
#data_dconfig ={"year":2016, "month":11, "day":18}
#tar = tarfile.open(xz_filename_format(**data_config), 'r')
In [ ]:
csvs = (tar.extractfile(csv_format(hour=hr, **data_config)) for hr in tqdm.trange(24))
data = pandas.concat([pandas.read_csv(csv, names=M06A_fields) for csv in csvs], ignore_index=True)
print("資料大小", data.shape)
# 檢查異常的資料
print("異常資料數:", data[data.TripEnd == 'N'].shape[0])
# 去除異常資料
data = data[data.TripEnd == 'Y']
# 只保留 TripInformation 和 VehicleType
data = data[['VehicleType', "TripInformation"]]
data['Trip'] = data.TripInformation.progress_apply(parse_tripinfo)
data['Speed'] = data.Trip.progress_apply(compute_speed)
In [ ]:
speedlimit_lookup = node_data.速限.to_dict()
def is_speeding(info):
speedlimit = max(speedlimit_lookup[info.loc1], speedlimit_lookup[info.loc2])
return info.speed > speedlimit + 10
# 存下所有超速的判斷
data['超速紀錄'] = data.Speed.progress_apply(lambda l:[is_speeding(x) for x in l])
In [ ]:
data.head(10)
In [ ]:
node_data
In [ ]:
S01F_list = list(sorted(x for x in node_data.編號 if x.startswith('01F') and x.endswith('S')))
In [ ]:
S01F = {s:i for i,s in enumerate(S01F_list)}
In [ ]:
len(S01F)
In [ ]:
speedlimit_lookup = node_data.速限.to_dict()
start_time = datetime.datetime(data_config['year'],data_config['month'],data_config['day'])
end_time = datetime.datetime(data_config['year'],data_config['month'],data_config['day']+1)
total_statistics=np.zeros((24*4, len(S01F)), dtype=int)
speeding_statistics=np.zeros((24*4, len(S01F)), dtype=int)
def count_speeding(info):
if not info.loc1.startswith('01F'):
return
if not info.loc1.endswith('S'):
return
if info.loc1 > info.loc2:
return
idx = int((info.time-start_time).seconds/(15*60))
idx2 = S01F[info.loc1]
total_statistics[idx][idx2] += 1
speedlimit = max(speedlimit_lookup[info.loc1], speedlimit_lookup[info.loc2])
if info.speed > speedlimit + 10:
speeding_statistics[idx][idx2] += 1
# 存下所有超速的判斷
data.Speed.apply(lambda l:[count_speeding(x) for x in l]);
In [ ]:
plt.imshow(speeding_statistics/(total_statistics+0.001), aspect=0.3);
In [ ]:
# 下面的中文方式,可能需要 matplotlib 2.0.0 以上
matplotlib.__version__
In [ ]:
# 如果你知道自己系統中的中文字形,可以直接用省略下面的步驟
# matplotlib.font_manager.fontManager.ttffiles # 列出你的字體
# matplotlib.font_manager.fontManager.ttflist # 列出你的字體
# plt.rcParams['font.family'] = 'AR PL Mingti2L Big5' # works on Ubuntu with `apt-get install fonts-arphic-bsmi00lp`
In [ ]:
# 從 00Download.ipynb 來的
import os.path
from urllib.request import urlopen
def download_req(req, filename):
total = int(req.getheader("Content-Length"))
tqdm_conf = dict(total=total, desc=filename, unit='B', unit_scale=True)
with tqdm.tqdm(**tqdm_conf) as pbar:
with open(filename,'wb') as f:
for data in iter(lambda: req.read(8192), b""):
pbar.update(f.write(data))
# 字體下載
font_filename = 'NotoSansCJKtc-hinted.zip'
font_url = "https://noto-website-2.storage.googleapis.com/pkgs/"+font_filename
# 改變這行才能真正下載
if not (os.path.isfile(font_filename) and os.path.getsize(font_filename)==121247052):
with urlopen(font_url) as req:
download_req(req, "NotoSansCJKtc-hinted.zip")
# Extract Font files
import zipfile
with zipfile.ZipFile(font_filename) as zf:
for f in zf.namelist():
if f.endswith('.otf'):
print("extract", f)
zf.extract(f)
fp = matplotlib.font_manager.FontProperties(fname = 'NotoSansCJKtc-Regular.otf')
matplotlib.font_manager.fontManager.ttffiles.append(fp.get_file())
font_entry = matplotlib.font_manager.FontEntry(fp.get_file(), name=fp.get_name(),
style=fp.get_style(), variant=fp.get_variant(),
weight=fp.get_weight(), stretch=fp.get_stretch(), size=fp.get_size())
matplotlib.font_manager.fontManager.ttflist.append(font_entry)
plt.rcParams['font.family'] = fp.get_name()
In [ ]:
plt.figure(figsize=(30,40));
plt.imshow((speeding_statistics/(total_statistics+0.001)), aspect=1,
interpolation='none',
extent=[0,72,24,0]
)
ax = plt.gca()
ax.set_yticks(range(0,24,2));
plt.xticks(range(73), [node_data['交流道(起)'][S01F_list[i]] for i in range(73)],
rotation='vertical');
plt.grid(True)
In [ ]:
plt.figure(figsize=(15,20));
plt.imshow((speeding_statistics/(total_statistics+0.001)).T, aspect=1,
interpolation='none',
extent=[0,24,72,0])
ax = plt.gca()
ax.set_xticks(range(0,26,2));
plt.yticks(range(73), [node_data['交流道(起)'][S01F_list[i]] for i in range(73)],
#fontproperties=fp, #or fontname='AR PL Mingti2L Big5' #'Droid Sans Fallback');
);
plt.grid(True)
In [ ]:
plt.plot(speeding_statistics.sum(axis=1)/(total_statistics.sum(axis=1)+0.00001) );
plt.xticks(range(0,24*4,4), range(24));
plt.grid(True);
plt.xlim(0,24*4-1);
plt.ylim(0, 0.35);
In [ ]:
matplotlib.style.use('seaborn-deep')
In [ ]:
plt.figure(figsize=(30,20))
plt.plot(speeding_statistics.sum(axis=0)/(total_statistics.sum(axis=0)+0.00001),
linewidth=5);
plt.xticks(range(73), [node_data['交流道(起)'][S01F_list[i]] for i in range(73)],
rotation=90, fontsize=18);
plt.grid(True);
plt.xlim(0,72);
plt.ylim(0, 0.35);
In [ ]:
# 將 data.Speed 展開成 DataFrame
from itertools import chain
speed_data = pandas.DataFrame.from_records(chain(*data.Speed), columns=SpeedInfo._fields )
speed_data.head(10)
In [ ]:
# 計算是否超速(完全跟前面一樣,雖然現在 info 是 pandas.Series)
def is_speeding(info):
speedlimit = max(speedlimit_lookup[info.loc1], speedlimit_lookup[info.loc2])
return info.speed > speedlimit + 10
# 存下所有超速的判斷
# 這樣會很慢,而且撞到 tdqm 的雷
# speed_data['超速'] = speed_data.progress_apply(is_speeding, axis=1)
In [ ]:
# 用 pandas 的方法做
speed_loc1 = speed_data.loc1.progress_apply(speedlimit_lookup.get)
speed_loc2 = speed_data.loc1.progress_apply(speedlimit_lookup.get)
speed_max = pandas.DataFrame({'1':speed_loc1, '2:':speed_loc2}).max(axis=1)
speed_data['超速'] = speed_data.speed > speed_max +10
In [ ]:
ts = speed_data.time.progress_apply(lambda t:t.hour*4+t.minute//15)
In [ ]:
groups = speed_data[['speed','超速']].groupby([speed_data.loc1, ts])
In [ ]:
statistics = groups.mean()
statistics
In [ ]:
statistics.loc['01F0005S']['超速'].plot()
plt.xticks(range(0,24*4,4), range(24));
In [ ]:
from ipywidgets import interact
import numpy as np
from bokeh.io import push_notebook, show, output_notebook
from bokeh.plotting import figure
output_notebook()
In [ ]:
from bokeh.charts import Bar, show
from bokeh.models import FixedTicker
from bokeh.models.formatters import TickFormatter, String, List
class FixedTickFormatter(TickFormatter):
"""
Class used to allow custom axis tick labels on a bokeh chart
Extends bokeh.model.formatters.TickFormatte
"""
JS_CODE = """
import {Model} from "model"
import * as p from "core/properties"
export class FixedTickFormatter extends Model
type: 'FixedTickFormatter'
doFormat: (ticks) ->
labels = @labels
return (labels[tick] ? "" for tick in ticks)
@define {
labels: [ p.Any ]
}
"""
labels = List(String, help="""
A mapping of integer ticks values to their labels.
""")
__implementation__ = JS_CODE
In [ ]:
x = range(73)
y = speeding_statistics.sum(axis=0)/(total_statistics.sum(axis=0)+0.00001)
p = figure(title="超速比例", plot_height=500,
plot_width=800, x_range=(0,72), y_range=(0,y.max()*1.05))
r = p.line(x, y, color="#2222aa", line_width=3)
#node_data['交流道(起)'][S01F_list[i]
labels = [node_data['交流道(起)'][S01F_list[i]] for i in range(73)]
p.xaxis[0].formatter = FixedTickFormatter(labels=labels)
p.xaxis[0].ticker = FixedTicker(ticks=list(range(73)))
p.xaxis.major_label_orientation = 3.14/2
In [ ]:
from bokeh.plotting import ColumnDataSource
from bokeh.models import HoverTool, BoxSelectTool
x = range(73)
y = speeding_statistics.sum(axis=0)/(total_statistics.sum(axis=0)+0.00001)
labels = [node_data['交流道(起)'][S01F_list[i]] for i in range(73)]
datasource = ColumnDataSource(
data = {
'x': x,
'y': y,
'labels': labels,
'ypercent': ["{:4.2f}%".format(100*i) for i in y]
}
)
TOOLS = [BoxSelectTool(), HoverTool( tooltips=[("站", "@labels"), ("比例", "@ypercent")])]
p = figure(title="超速比例", plot_height=500,
plot_width=800, x_range=(0,72), y_range=(0,y.max()*1.05), tools=TOOLS)
r = p.line(x='x', y='y', color="#2222aa", line_width=3, source=datasource)
#node_data['交流道(起)'][S01F_list[i]
p.xaxis[0].formatter = FixedTickFormatter(labels=labels)
p.xaxis[0].ticker = FixedTicker(ticks=list(range(73)))
p.xaxis.major_label_orientation = 3.14/2
In [ ]:
show(p, notebook_handle=True);
In [ ]:
highway_name = speed_data.loc1.progress_apply(lambda x:x[:3])
In [ ]:
# 依照高速公路和時間區隔來分組平均
mean_highway_time = speed_data[['speed','超速']].groupby([highway_name, ts]).mean()
# 依照高速公路來分組平均
mean_highway = speed_data[['speed','超速']].groupby(highway_name).mean()
In [ ]:
mean_highway_time.index.get_level_values('loc1').unique()
In [ ]:
x = np.arange(0,24,0.25)
p = figure(title="超速比例", plot_height=720,
plot_width=960, x_range=(0,24), y_range=(0,0.5))
groups = mean_highway_time.groupby(mean_highway_time.index.get_level_values('loc1'))
for hw in groups.groups:
y = groups.get_group(hw)['超速']
r = p.line(x, y, legend=hw, color=tuple(np.random.randint(0,255, size=3)), line_width=3, alpha=0.7)
show(p)
In [ ]:
x = np.arange(0,24,0.25)
p = figure(title="速率", plot_height=720,
plot_width=960, x_range=(0,24), y_range=(0,150))
groups = mean_highway_time.groupby(mean_highway_time.index.get_level_values('loc1'))
for hw in groups.groups:
y = groups.get_group(hw)['speed']
r = p.line(x, y, legend=hw, color=tuple(np.random.randint(0,255, size=3)), line_width=3, alpha=0.7)
show(p)
In [ ]:
In [ ]:
from bokeh.io import output_file, show
from bokeh.plotting import figure
from bokeh.tile_providers import STAMEN_TONER
from pyproj import Proj, transform
def trans(lng_lat):
from_proj = Proj(init="epsg:4326")
to_proj = Proj(init="epsg:3857")
return transform(from_proj, to_proj, *lng_lat)
fig = figure(tools='pan, wheel_zoom', x_range=(13410000, 13590000), y_range=(2610000, 2790000))
fig.axis.visible = False
fig.add_tile(STAMEN_TONER)
lng_lat = np.mgrid[120:122:0.1, 23:25:0.1].reshape(2,-1).T
xy = np.array(list(map(trans, lng_lat)))
dots = fig.circle(xy[:,0], xy[:,1])
map_handler = show(fig, notebook_handle=True)
In [ ]:
In [ ]:
import time
import math
for i in range(100):
dots.glyph.fill_color=['red', 'green', 'blue'][(i//5)%3]
shift = [math.cos(i/20), math.sin(i/20)]
xy = np.array(list(map(trans, lng_lat + shift)))
dots.data_source.data['x'] = xy[:,0]
dots.data_source.data['y'] = xy[:,1]
#push_notebook(handle=map_handler)
push_notebook()
time.sleep(0.05)
In [ ]:
from bqplot import pyplot as plt
from bqplot import topo_load
from bqplot.interacts import panzoom
In [ ]:
mean_highway.index
In [ ]:
x = np.arange(len(mean_highway))
y = mean_highway['超速']
plt.figure()
bar = plt.bar(list(mean_highway.index), y)
plt.show()
In [ ]:
# 互動式圖表
x = np.arange(0,24,0.25)
fig= plt.figure()
groups = mean_highway_time.groupby(mean_highway_time.index.get_level_values('loc1'))
def toggle_alpha(a,b):
a.opacities=[1.2-a.opacities[0]]
colors=['blue', 'red', 'purple', 'black', 'green']
for i,hw in enumerate(groups.groups):
y = groups.get_group(hw)['超速']
pp = plt.plot(x,y, colors=[colors[i]],
display_legend=True, line_width=3, opacities=[0.2], labels=[hw])
pp.on_legend_click(toggle_alpha)
plt.show()
In [ ]: