In order to run this notebook with the correct PYTHONPATH, you can use
$ ./e-mission-jupyter.bash notebook
This notebook makes heavy use of the following python libraries.
pandas
: from the python computing stack (http://pandas.pydata.org/pandas-docs/stable/)geojson
: standard JSON representation of geographic data (http://geojson.org/)folium
: (https://github.com/python-visualization/folium) python bridge to leaflet (http://leafletjs.com/)
In [ ]:
import emission.core.get_database as edb
import pandas as pd
all_users = pd.DataFrame(list(edb.get_uuid_db().find({}, {"user_email":1, "uuid": 1, "_id": 0})))
all_users
In [ ]:
from uuid import UUID
In [ ]:
test_user_id = all_users.iloc[1].uuid # replace with UUID from above
The preferred technique to access wrapper objects from the timeseries is to use the abstract timeseries interface. This makes it easier for us to switch to alternative timeseries implementations later
In [ ]:
import emission.storage.timeseries.abstract_timeseries as esta
import emission.storage.decorations.analysis_timeseries_queries as esda
import emission.core.wrapper.entry as ecwe
import emission.storage.decorations.trip_queries as esdt
In [ ]:
ts = esta.TimeSeries.get_time_series(test_user_id)
In [ ]:
entry_it = ts.find_entries(["analysis/cleaned_trip"], time_query=None)
All keys and their mapping to data model objects can be found in https://github.com/e-mission/e-mission-server/blob/master/emission/core/wrapper/entry.py
In [ ]:
for ct in entry_it:
cte = ecwe.Entry(ct)
print("=== Trip:", cte.data.start_loc, "->", cte.data.end_loc)
section_it = esdt.get_sections_for_trip("analysis/cleaned_section", test_user_id, cte.get_id())
for sec in section_it:
print(" --- Section:", sec.data.start_loc, "->", sec.data.end_loc, " on ", sec.data.sensed_mode)
In [ ]:
ct_df = ts.get_data_df("analysis/cleaned_trip", time_query=None)
In [ ]:
len(ct_df)
In [ ]:
ct_df.columns
In [ ]:
ct_df[["start_loc", "end_loc", "start_ts", "end_ts"]]
In [ ]:
cs_df = ts.get_data_df("analysis/cleaned_section", time_query=None)
In [ ]:
len(cs_df)
In [ ]:
cs_df.columns
In [ ]:
cs_df[["start_loc", "end_loc", "start_ts", "end_ts", "sensed_mode"]]
You can also use direct mongodb queries during exploratory work. I do ask that you create a storage decorator (emission/storage/decorations
) when you submit a pull request for ongoing use
In [ ]:
import emission.core.get_database as edb
In [ ]:
edb.get_timeseries_db().find_one()
In [ ]:
edb.get_timeseries_db().distinct("metadata.key")
Note that in this case, you need to know whether to use the timeseries
or the analysis_timeseries
collection
In [ ]:
edb.get_analysis_timeseries_db().distinct("metadata.key")
In [ ]:
edb.get_analysis_timeseries_db().find({"user_id": test_user_id, "metadata.key": "analysis/cleaned_trip"}).count()
In particular, you can use this to access entries that are not in the timeseries
In [ ]:
edb.get_uuid_db().distinct("uuid")
The trips and places maintain links to each other - e.g. start_place
, end_place
In [ ]:
ct_df[["start_place", "end_place"]]
These are primary key links to other entries in the database. It would be useful to have a doubly linked list representing this properly. The Timeline helps with that.
In [ ]:
import emission.storage.decorations.timeline as esdl
In [ ]:
tl = esdl.get_cleaned_timeline(test_user_id, ct_df.iloc[0].start_ts, ct_df.iloc[-1].end_ts)
In [ ]:
for e in tl:
if 'enter_ts' in e.data:
# Must be place-like
print(e.metadata.key, e.data.enter_fmt_time, "->", e.data.exit_fmt_time)
else:
print(e.metadata.key, e.data.start_fmt_time, "->", e.data.end_fmt_time)
In [ ]:
# The timeline is an iterator, so after it is consumed, it is empty
for e in tl:
if 'enter_ts' in e.data:
# Must be place-like
print(e.metadata.key, e.data.enter_fmt_time, "->", e.data.exit_fmt_time)
else:
print(e.metadata.key, e.data.start_fmt_time, "->", e.data.end_fmt_time)
In [ ]:
stl = esdt.get_cleaned_timeline_for_trip(test_user_id, tl.first_trip().get_id())
In [ ]:
for e in stl:
print(e.metadata.key)
In [ ]:
stl = esdt.get_cleaned_timeline_for_trip(test_user_id, tl.last_trip().get_id())
In [ ]:
for e in stl:
print(e.metadata.key)
Once we have trip and section objects, we can retrieve the sensed data associated with them by querying for data in various streams that falls within the time ranges associated with the trip/section. Here again, our architecture of storing the analysis results as a separate datastream makes it easy to retrieve data at various levels of processing.
In [ ]:
import emission.analysis.plotting.geojson.geojson_feature_converter as gfc
import emission.analysis.plotting.leaflet_osm.our_plotter as lo
In [ ]:
first_trip_for_user = ct_df.iloc[0]
first_trip_start_ts = first_trip_for_user.start_ts
first_trip_end_ts = first_trip_for_user.end_ts
trip_start_end_fuzz = 10 # seconds
trips_geojson_list = gfc.get_geojson_for_ts(test_user_id, first_trip_start_ts-trip_start_end_fuzz, ct_df.iloc[-1].end_ts+trip_start_end_fuzz)
In [ ]:
len(trips_geojson_list)
In [ ]:
map_list = lo.get_maps_for_geojson_trip_list(trips_geojson_list)
In [ ]:
len(map_list)
In [ ]:
map_list[0]
In [ ]:
map_list[-1]
In [ ]:
import branca.element as bre
In [ ]:
nrows = 2
ncols = 3
fig = bre.Figure()
for i, m in enumerate(map_list[:6]):
fig.add_subplot(nrows,ncols,i+1).add_child(m)
fig
In [ ]:
nrows = 2
ncols = 3
fig = bre.Figure()
for i, map in enumerate(map_list[-6:]):
fig.add_subplot(nrows,ncols,i+1).add_child(map)
fig
In [ ]:
all_locs = ts.get_data_df("background/location",
time_query = esda.get_time_query_for_trip_like(
"analysis/cleaned_section", ct_df.iloc[0]._id))
filtered_locs = ts.get_data_df("background/filtered_location",
time_query = esda.get_time_query_for_trip_like(
"analysis/cleaned_section", ct_df.iloc[0]._id))
resampled_locs = ts.get_data_df("analysis/recreated_location",
time_query = esda.get_time_query_for_trip_like(
"analysis/cleaned_section", ct_df.iloc[0]._id))
print("Locations go from all = %d -> filtered = %d -> resampled = %d" % (len(all_locs),
len(filtered_locs),
len(resampled_locs)))
In [ ]:
all_locs[["_id", "latitude", "longitude", "fmt_time"]]
In [ ]:
all_activity = ts.get_data_df("background/motion_activity",
esda.get_time_query_for_trip_like(
"analysis/cleaned_section", ct_df.iloc[0]._id))
In [ ]:
all_activity.columns
In [ ]:
import emission.core.wrapper.motionactivity as ecwm
print("Found %d walking entries, %d on_foot entries" % (len(all_activity[all_activity.type == ecwm.MotionTypes.WALKING.value]),
len(all_activity[all_activity.type == ecwm.MotionTypes.ON_FOOT.value])))
print("Found %d motorized entries" % (len(all_activity[all_activity.type == ecwm.MotionTypes.IN_VEHICLE.value])))
In [ ]:
map_list = lo.get_maps_for_geojson_unsectioned([gfc.get_feature_list_from_df(all_locs),
gfc.get_feature_list_from_df(filtered_locs),
gfc.get_feature_list_from_df(resampled_locs)])
In [ ]:
fig = bre.Figure()
for i, map in enumerate(map_list):
fig.add_subplot(1,3,i+1).add_child(map)
fig
In [ ]: