In [1]:
import pandas as pd

In [3]:
df = pd.read_csv('/Users/chengjun/bigdata/soda-2017-sample/2017/Mobike_SODA_Sample/shanghai_sample.csv')

In [4]:
df.head()


Out[4]:
orderid bikeid userid start_time start_location_x start_location_y end_time end_location_x end_location_y track
0 57099 4717 4717 2016-08-02 17:23:43 121.410 31.306 2016-08-02 17:31:08 121.402 31.311 121.402,31.310#121.402,31.311#121.403,31.309#1...
1 165262 413 413 2016-08-04 20:17:09 121.407 31.288 2016-08-04 20:19:58 121.408 31.291 121.407,31.288#121.408,31.289#121.408,31.290#1...
2 8023 3993 3993 2016-08-02 08:16:52 121.462 31.312 2016-08-02 08:25:10 121.449 31.305 121.449,31.305#121.450,31.305#121.451,31.305#1...
3 148871 1340 1340 2016-08-05 18:37:20 121.531 31.313 2016-08-05 18:45:19 121.519 31.308 121.519,31.308#121.519,31.309#121.520,31.309#1...
4 10222 4174 4174 2016-08-06 11:04:50 121.401 31.232 2016-08-06 11:21:04 121.376 31.237 121.374,31.234#121.375,31.233#121.375,31.235#1...

In [5]:
len(df)


Out[5]:
101259

In [21]:
df["userid"].value_counts()


Out[21]:
5054    37
6913    36
101     35
1718    35
726     35
5290    35
6590    33
3627    33
6354    33
1011    32
5235    32
2439    32
3651    32
197     31
2899    31
5447    31
5928    31
4591    31
5858    31
5663    31
6131    31
4518    31
20      30
4550    30
1491    30
2483    30
6337    30
4714    30
2869    30
6239    30
        ..
7627     1
9137     1
800      1
9073     1
8868     1
7947     1
3913     1
8075     1
2676     1
8831     1
8612     1
8867     1
8548     1
8973     1
9008     1
2366     1
8305     1
5259     1
1289     1
7836     1
3046     1
1439     1
8462     1
2379     1
7268     1
8590     1
714      1
9038     1
9166     1
9056     1
Name: userid, dtype: int64

In [15]:
starts = []
ends = []
edges = []
for i in df.index:
    starts.append((df.start_location_y[i], df.start_location_x[i]))
    ends.append((df.end_location_y[i], df.end_location_x[i]))
    edges.append([(df.start_location_y[i], df.start_location_x[i]), (df.end_location_y[i], df.end_location_x[i])])

In [34]:
df5054 = df[df['userid']==20]

edges5054 = []
for i in df5054.index:
    edges5054.append([(df5054.start_location_y[i], df5054.start_location_x[i]), \
                  (df5054.end_location_y[i], df5054.end_location_x[i])])

In [13]:
starts[:5]


Out[13]:
[(31.305999999999997, 121.41),
 (31.288, 121.40700000000001),
 (31.311999999999998, 121.462),
 (31.313000000000002, 121.53100000000001),
 (31.231999999999999, 121.40100000000001)]

In [6]:
import folium, jinja2, vincent
from IPython.display import IFrame
from IPython.core.display import HTML
print(folium.__file__)
print(folium.__version__)


/Users/chengjun/anaconda/lib/python2.7/site-packages/folium/__init__.pyc
0.2.0

In [7]:
shanghai = folium.Map(location=(31.306,121.410), zoom_start=10)

In [8]:
shanghai


Out[8]:

In [36]:
from folium import plugins

shanghai = folium.Map(location=(31.306,121.410), zoom_start=10)
shanghai.add_children(plugins.HeatMap(starts[:1000]))

shanghai


Out[36]:

In [19]:
shanghai = folium.Map(location=(31.306,121.410), zoom_start=10)

for loc in edges[:1000]:
    line = folium.PolyLine(locations=loc)
    shanghai.add_children(line)

In [20]:
shanghai


Out[20]:

In [35]:
shanghai5054 = folium.Map(location=(31.306,121.410), zoom_start=10)

for loc in edges5054:
    line = folium.PolyLine(locations=loc)
    shanghai5054.add_children(line)

shanghai5054


Out[35]:

In [ ]: