In [16]:
from itertools import chain
import nltk
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import LabelBinarizer
import sklearn
import pycrfsuite
from bs4 import BeautifulSoup
import pandas as pd
from os import listdir
from os.path import isfile, join
import xml.etree.ElementTree as ET
import codecs
import numpy as np
from dateutil import parser
import scipy as sp

In [3]:
# Transportation Mode (TM) Data
FOLDER_DATA_TM = 'data/gpx/'
FOLDER_DATA_BUS = 'data/gpx/bus/'
FOLDER_DATA_WALK = 'data/gpx/wk/'
FOLDER_DATA_CAR = 'data/gpx/car/'

Read in data

Read in data from the three folders. Each data file contains a set of trkpt (traking point), and each traking point contains latitude, longitude, elevation and timestamp information.

The sampling rate varied from files to files, and varied within a file as well. Thus the time between every trkpots are not necessarily the same. I found out that for some trace (each file contains a trace), the sampling rate for the end of the trace are sometimes quite slow because sometime people stop moving.


In [4]:
"""
READ IN ALL DATA FILE PATHS
"""
files = dict()
files["bus"] = [f for f in listdir(FOLDER_DATA_BUS) if isfile(join(FOLDER_DATA_BUS, f))]
files["walk"] = [f for f in listdir(FOLDER_DATA_WALK) if isfile(join(FOLDER_DATA_WALK, f))]
files["car"] = [f for f in listdir(FOLDER_DATA_CAR) if isfile(join(FOLDER_DATA_CAR, f))]

In [4]:
# tm = transportation mode
df = pd.DataFrame(columns=("trace_id", "ts", "lat", "lon", "tm"))

In [5]:
for tm in ["bus", "walk", "car"]:
    if tm == "walk":
        folder_root = FOLDER_DATA_WALK
    elif tm == "car":
        folder_root = FOLDER_DATA_CAR
    elif tm == "bus":
        folder_root = FOLDER_DATA_BUS
        
    for f_name in files[tm]:
        f = codecs.open(folder_root+f_name, 'r', encoding='utf8')
        print f_name
        try:
            # Read in traking points, each contains lat, longitude and timestamp
            soup = BeautifulSoup(f)
            trkpts = soup.find_all('trkpt')
            n, d = df.shape
            for trkpt in trkpts:
                trace_id = f_name.lower().replace(" ", "_")
                df.loc[n-1] = [trace_id, trkpt.time.string, trkpt["lat"], trkpt["lon"], tm]
                n = n+1
        except:
#             print trace_id
            print f_name

        f.close()


2011-07-21 0943 bus.gpx
2011-07-28 1119 bus.gpx
2011-07-28 1749 bus.gpx
2011-10-06 0805 bus-1.gpx
2011-10-06 0805 bus-2.gpx
2011-11-23 1004 bus-2.gpx
2011-11-23 1004 bus1.gpx
2011-11-28 1637 Riute 4A.gpx
2011-11-28 1648 Route 15 .gpx
2011-11-30 1108 Route 15.gpx
2011-12-01 1139 Route 4 B.gpx
2011-12-01 1147 Route 15.gpx
2011-12-06 1104 Route 15.gpx
2011-12-07 1118 Route3.gpx
2011-12-08 0812 Route15.gpx
2011-12-08 1146 Route 15.gpx
2011-12-09 0935 Route 15.gpx
2012-01-05 1637 Route 4b.gpx
2012-01-05 1647 15.gpx
2012-01-06 0937 ROUTE 15.gpx
2012-01-06 0942 Route 4.gpx
2012-01-06 1638 Route 3 (1).gpx
2012-01-06 1649 15 (1).gpx
2012-01-10 1108 15 (1).gpx
2012-01-10 1121 3 FromD (1).gpx
2012-01-10 1745 3 Tod (1).gpx
2012-01-12 1108 15.gpx
2012-01-13 1119 Route 3 ToNQ.gpx
2012-02-07 1103 Bus.gpx
April 10, 2012 at 10_49bus.gpx
April 6, 2012 at 9_29 Bus.gpx
April 9, 2012 at 16_52 Bus.gpx
December 12, 2011 at 2_36 PM Route 4A És15.gpx
December 12, 2011 at 2_36 PM Route 4A És15.gpx
December 14, 2011 at 8_40 AM Route 15 2A-2.gpx
December 14, 2011 at 8_40 AM Route 15 2A.gpx
December 6, 2011 at 9_21 AM 4A.gpx
November 30, 2011 at 5_15 PM 15.gpx
2011-06-20 1632 wk.gpx
2011-06-20 1752 wk.gpx
2011-07-19 1801 wk.gpx
2011-07-21 0943 wk.gpx
2011-07-21 1334 wk.gpx
2011-07-21 1453 wk.gpx
2011-07-25 1947 wk.gpx
2011-07-27 1521 wk.gpx
2011-07-27 1727 wk.gpx
2011-07-27 1734 wk.gpx
2011-07-28 1146 wk.gpx
2011-07-28 1749 wk.gpx
2011-08-04 0945 wk.gpx
2011-08-04 1135 wk.gpx
2011-08-04 1920 wk.gpx
2011-08-25 1319 wk.gpx
2011-08-26 1037 wk.gpx
2011-08-26 1114 wk.gpx
2011-08-26 1227 wk.gpx
2011-09-20 0857 walk.gpx
2011-09-28 1534 walk.gpx
2011-09-30 2046 wk.gpx
2012-02-07 1103 wk.gpx
2012-02-07 1429 Walk.gpx
2012-02-07 1741 Walk.gpx
2012-02-07 1752 Wall.gpx
2012-02-21 0935 walk.gpx
2012-03-07 1712 Walk.gpx
2012-03-19 0909 Wk.gpx
2012-03-19 1058 Wk.gpx
2012-03-21 0903 Wk.gpx
2012-03-21 1115 Wk.gpx
2012-03-21 1559 Wk.gpx
2012-03-22 1343 Wk.gpx
2012-03-22 1425 Wk.gpx
2012-03-22 1605 Wk.gpx
2012-03-23 0952 Wk.gpx
2012-03-23 1239 Wk.gpx
2012-03-23 1341 Wk.gpx
2012-03-23 1605 Wk.gpx
2012-03-27 1436 Wk.gpx
2012-03-27 1603 Wk.gpx
2012-03-28 1048 wk.gpx
2012-03-29 1426 wk.gpx
2012-03-29 1557 Wk.gpx
2012-03-30 0956 Wk.gpx
2012-03-31 1700 Wk.gpx
2012-04-03 1434 Wk.gpx
2012-04-03 1530 Wk.gpx
2012-04-03 1959 Wk.gpx
2012-04-05 1436 WK.gpx
2012-04-09 1133 Wk.gpx
April 6, 2012 at 11_44 Walk.gpx
Tk Wu wk.gpx
wk.gpx
2011-06-18 0947 dr.gpx
2011-06-18 1056 dr.gpx
2011-06-19 0959 dr.gpx
2011-06-19 1004 dr.gpx
2011-06-19 1947 dr.gpx
2011-06-20 0913 dr.gpx
2011-06-20 1344 dr.gpx
2011-07-01 1948 dr.gpx
2011-07-01 2333 dr.gpx
2011-07-02 0912 dr.gpx
2011-07-04 1314 dr.gpx
2011-07-05 1849 dr.gpx
2011-07-06 2351 dr.gpx
2011-07-15 1942 dr.gpx
2011-07-19 0029 dr.gpx
2011-07-20 1430 dr.gpx
2011-07-20 1603 dr.gpx
2011-07-20 1948 dr.gpx
2011-07-21 0051 dr.gpx
2011-07-22 1908 dr.gpx
2011-07-22 2120 dr.gpx
2011-07-23 1814 dr.gpx
2011-07-24 0029 dr.gpx
2011-07-24 1214 dr.gpx
2011-07-24 1220 dr.gpx
2011-07-24 1838 dr.gpx
2011-07-24 2103 dr.gpx
2011-07-24 2315 dr.gpx
2011-07-24 2325 dr.gpx
2011-07-26 2048 dr.gpx
2011-07-26 2106 dr.gpx
2011-07-26 2120 dr.gpx
2011-07-27 0006 state toward north Dr.gpx
2011-07-28 2209 dr.gpx
2011-07-29 0930 dr.gpx
2011-07-29 0938 dr.gpx
2011-07-29 0948 dr.gpx
2011-07-29 1203 dr.gpx
2011-07-30 2114 dr.gpx
2011-07-31 1416 dr.gpx
2011-07-31 1801 dr.gpx
2011-08-01 0138 dr.gpx
2011-08-01 1331 dr.gpx
2011-08-01 1654 dr.gpx
2011-08-02 1256 dr.gpx
2011-08-02 2153 dr.gpx
2011-08-03 1257 dr.gpx
2011-08-03 1320 dr.gpx
2011-08-03 1423 dr.gpx
2011-08-04 1141 dr.gpx
2011-08-04 1833 dr.gpx
2011-08-12 1612 dr.gpx
2011-08-23 1915 dr.gpx
2011-08-25 1246 dr.gpx
2011-08-25 1827 dr.gpx
2011-08-26 0943 dr.gpx
2011-08-26 0943 From north  campus to Dairy on main st DR.gpx
2011-08-26 2031 dr.gpx
2011-08-27 1452 dr.gpx
2011-08-27 1958 dr.gpx
2011-08-27 2111 dr.gpx
2011-08-29 1032 dr.gpx
2011-09-08 1449 dr.gpx
2011-09-15 1742 dr.gpx
2011-09-15 2018 dr.gpx
2011-09-17 0928 dr.gpx
2011-10-08 1017 dr.gpx
2011-10-09 1434 dr.gpx
2011-10-30 1610 dr.gpx
2011-11-04 1943 drive.gpx
2011-11-06 1852 dr.gpx
2011-11-11 0807 dr.gpx
2011-11-11 1314 dr.gpx
2011-11-11 1324 dr.gpx
2011-11-11 1743 dr.gpx
2011-11-12 1457 dr.gpx
2011-11-16 1719 dr.gpx
2011-11-16 1732 dr.gpx
2011-11-16 2018 dr.gpx
2011-11-19 1359 dr.gpx
2011-11-19 1445 dr.gpx
2011-11-19 1630 dr.gpx
2011-11-19 2059 dr.gpx
2011-11-21 1018 dr.gpx
2011-11-21 1637 dr.gpx
2011-11-21 1901 dr.gpx
2011-11-22 2053 dr.gpx
2011-11-25 0707 dr.gpx
2011-11-25 1038 dr.gpx
2011-11-26 1354 dr.gpx
2011-11-26 1630 dr.gpx
2011-11-27 0930 dr.gpx
2011-11-27 1254 dr.gpx
2011-11-27 1623 dr.gpx
2011-11-27 1740 dr.gpx
2011-11-27 1843 dr.gpx
2011-12-03 1758 dr.gpx
2011-12-11 1337 dr.gpx
2011-12-21 1927 dr.gpx
2011-12-22 1258 dr.gpx
2011-12-22 1542 dr.gpx
2012-01-14 2011 dr.gpx
2012-01-23 1043 dr.gpx
2012-01-27 1452 dr.gpx
2012-01-28 1423 dr.gpx
2012-01-31 2047 Dr.gpx
2012-01-31 2113 dr.gpx
2012-02-02 1137 Dr.gpx
2012-02-03 1927 Dr.gpx
2012-02-04 1323 Dr.gpx
2012-02-04 1909 Drive.gpx
2012-02-04 2315 Dr.gpx
2012-02-04 2326dr.gpx
2012-02-06 0845 Dr.gpx
2012-02-06 1850 Drive.gpx
2012-02-07 0939 Drive.gpx
2012-02-07 1834 Drive.gpx
2012-02-08 1013 Dribr.gpx
2012-02-27 1833 dr.gpx
2012-02-29 1322chicago.gpx
2012-03-02 2124 Dr.gpx
2012-03-03 1345 Dr.gpx
2012-03-03 1501 Dr.gpx
2012-03-03 1527 Dr.gpx
2012-03-04 1002 Dr.gpx
2012-03-04 1115 Dr.gpx
2012-03-04 1247 Dr.gpx
2012-03-04 1427 Dr.gpx
2012-03-07 1807 Dr.gpx
2012-03-09 1755 Dr.gpx
2012-03-09 1942 Dr.gpx
2012-03-10 1025 Dr.gpx
2012-03-10 1232 Dr_2.gpx
2012-03-10 1915 Dr.gpx
2012-03-13 1640 Dr.gpx
2012-03-16 1829 Dr.gpx
2012-03-17 2245 Dr.gpx
2012-03-18 1323 Dr.gpx
2012-03-19 0857 Dr.gpx
2012-03-19 1029 Dr.gpx
2012-03-20 1145 Dr.gpx
2012-03-21 0856 Dr.gpx
2012-03-21 1106 Dr.gpx
2012-03-22 0233 Dr.gpx
2012-03-22 0245 Dr.gpx
2012-03-23 0945 Dr.gpx
2012-03-23 1608 Dr.gpx
2012-03-23 1623 dr.gpx
2012-03-24 1214 Dr.gpx
2012-03-25 1304 Dr.gpx
2012-03-25 1719 Dr.gpx
2012-03-26 0102 Dr.gpx
2012-03-26 0849 Dr.gpx
2012-03-26 1038 Dr.gpx
2012-03-26 2001 Dr.gpx
2012-03-28 0851 Dr.gpx
2012-03-28 1037 Dr.gpx
2012-03-28 1706 Dr.gpx
2012-03-28 1911 Dr.gpx
2012-03-29 1103 Dr.gpx

Export files for future usage.


In [8]:
df.to_csv(FOLDER_DATA_TM+"raw_tm_dataframe~.csv")

In [414]:
df = pd.read_csv(FOLDER_DATA_TM+"raw_tm_dataframe~.csv")

Number of transportation mode instances


In [420]:
len(df)


Out[420]:
92157

Segmentation

There are three major units in this transportation dataset.

  • A trace, which is contained within a file;
  • A segment of the trace;
  • A tracking point.

Trace and traking points were introduced above, thus here I would only talk about segments.

In this dataset, an individual's manually record each trace, and each trace has quite clear start and end, and contains only one type of transportation mode. (Thus it ignores the transition between different transportation modes, which is not a good idea actually.)

There are multiple ways to group the traking points in each trace into segments so to compute the required features. For example, we could group the points by fixed time, or dynamic time given the speed of users, or group by distance or number of points.

The below code segments the data by time, and the duration is set to 120 seconds. The segmentation is done by calculating the time lapse from the beginning of a trace to a specific traking point, and then we can easily get the segment index from that time lapse.


In [7]:
"""
CREATE SEGMENTATION
"""
SEGMENTATION_DURATION = 120
def get_time_lapse(group):
    trace_id = group.iloc[0]["trace_id"]
    starttime = parser.parse(group.iloc[0]["ts"])
    group["time_lapse"] = group["ts"].apply(lambda x: (parser.parse(x) - starttime).total_seconds())
    group["seg_name"] = group["time_lapse"].apply(lambda x: trace_id+"__"+str(int(x/SEGMENTATION_DURATION)))
    group["seg_id"] = group["time_lapse"].apply(lambda x: int(x/SEGMENTATION_DURATION))
    
    return group

df = df.groupby("trace_id").apply(get_time_lapse)

Given the GPS information, we want to calculate the distance between segments


In [9]:
from math import radians, cos, sin, asin, sqrt

def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 

    # 6367 km is the radius of the Earth
    km = 6367 * c
    return km

In [10]:
"""
Get distance and velocity
"""
for index, row in df.iterrows():
    try:
        cur_trace = df.ix[index, "trace_id"]
        next_trace = df.ix[index+1, "trace_id"]
        
#         print lat1, lon1
        
        if cur_trace == next_trace:
            # distance
            lat1, lon1 = df.ix[index, ["lat", "lon"]]
            lat2, lon2 = df.ix[index+1, ["lat", "lon"]]
            dist = haversine(float(lon1), float(lat1), float(lon2), float(lat2)) * 1000
            df.ix[index, "dist_m"] = dist
        
            # duration
            t1 = df.ix[index, "time_lapse"]
            t2 = df.ix[index+1, "time_lapse"]
            dur = t2 - t1
        
            # speed m/s
            df.ix[index, "speed"] = dist/dur
            
        else:
            df.ix[index, "dist_m"] = None
            df.ix[index, "speed"] = None
    except:
        df.ix[index, "dist_m"] = None
        df.ix[index, "speed"] = None

Since to get acceleration we need velocity information of the future data point, we need to separate acceleration extraction from the above code.


In [11]:
"""
Get acceleration
"""
for index, row in df.iterrows():
    try:
        cur_trace = df.ix[index, "trace_id"]
        next_trace = df.ix[index+1, "trace_id"]
        
#         print lat1, lon1
        
        if cur_trace == next_trace:
            # acceleration
            v1 = df.ix[index, "speed"]
            v2 = df.ix[index+1, "speed"]
            t1 = df.ix[index, "time_lapse"]
            t2 = df.ix[index+1, "time_lapse"]
            dur = t2 - t1
            acce = (v2 - v1)/dur
            df.ix[index, "acce"] = acce
        else:
            df.ix[index, "dist_m"] = None
    except:
        df.ix[index, "dist_m"] = None

In [12]:
df.to_csv(FOLDER_DATA_TM+"tm_dataframe~.csv")

In [13]:
df.dropna(subset=["speed", "dist_m", "acce"], inplace=True)

Pandas treat speed, distance and acceleration as an object, however, we want it to be float.


In [14]:
df[["speed", "dist_m", "acce"]] = df[["speed", "dist_m", "acce"]].astype(float)
df_seg = df.groupby(["trace_id", "seg_id"])

Obtain features for segments

Now we have the distance between each tracking points as well as speed and acceleration, we could obtain features for segments (i.e., a group of tracking points.)

These features include maximum speed of a group of tracking points, mean and variation of speed; other features related to acceleration.


In [17]:
"""
SPEED RELATED FEATURES
"""
def get_top3_speed(data):
    data.sort(ascending=False)
    return data.iloc[0:3].mean() 

df_seg = df.groupby(["trace_id", "seg_id"])["speed"].agg({"max_speed": np.max,
                                            "avg_speed": np.mean,
                                            "avg_top3_speed": get_top3_speed,
                                            "var_speed": np.var
                                            })

df_seg


Out[17]:
avg_speed max_speed avg_top3_speed var_speed
trace_id seg_id
2011-06-18_0947_dr.gpx 0 11.538154 54.752102 30.830751 46.466349
1 7.706204 16.443149 14.541437 7.974436
2 15.391269 28.559408 25.841018 15.343339
3 14.444493 24.677293 21.270534 25.976733
4 5.307677 7.018591 6.559883 1.374237
2011-06-18_1056_dr.gpx 0 4.651058 12.223762 10.795077 9.897790
1 16.516460 20.673961 20.673892 12.906775
2 13.723858 18.223492 18.189504 19.199767
3 7.085528 17.424587 15.745808 41.682044
4 7.713151 15.234336 14.884946 28.962270
5 0.030393 0.030393 0.030393 NaN
6 0.010181 0.010181 0.010181 NaN
10 0.005702 0.005702 0.005702 NaN
18 0.011152 0.011152 0.011152 NaN
2011-06-19_0959_dr.gpx 0 14.730080 21.027324 20.204101 20.120307
1 9.056485 17.263750 16.193957 13.881236
2011-06-19_1004_dr.gpx 0 14.542129 20.532938 20.503918 13.108442
1 14.948355 21.834748 21.696186 26.525654
2 18.715354 34.536957 34.124536 87.884409
3 32.769273 36.432512 35.979849 3.957461
4 34.470555 37.858140 37.836353 5.070147
5 35.571595 37.194107 37.153577 0.630312
6 36.899610 43.136093 42.494370 5.425723
7 35.614048 38.684000 37.838210 0.556182
8 35.863728 38.968410 38.489758 1.166677
9 32.397064 37.471299 37.324761 13.994028
10 18.732742 33.377951 33.377950 59.550911
11 35.070955 37.818208 37.797401 5.418521
12 35.881081 38.845640 38.264049 0.791673
13 35.509204 38.971507 38.208492 2.418115
... ... ... ... ... ...
april_6,_2012_at_11_44_walk.gpx 0 5.819242 13.957784 13.021371 24.796552
1 2.172601 2.685488 2.172601 0.721471
april_6,_2012_at_9_29_bus.gpx 0 13.009367 19.599746 19.355153 33.694260
1 10.666147 19.613936 18.320933 23.856696
2 6.773855 11.575950 10.484743 14.140069
april_9,_2012_at_16_52_bus.gpx 0 10.240259 19.539810 17.940452 26.516630
1 8.886948 15.193357 14.494787 16.767020
december_14,_2011_at_8_40_am_route_15_2a-2.gpx 0 3.659817 8.844575 7.389603 6.247185
1 5.638129 17.520835 12.752477 18.547910
december_14,_2011_at_8_40_am_route_15_2a.gpx 0 6.007853 10.396240 10.045129 11.939613
1 6.034304 12.785005 12.018296 16.488222
2 7.316891 14.930311 12.463165 8.174243
3 5.561768 10.016671 9.902695 9.982387
4 4.036449 17.530768 11.933783 13.494974
5 0.844765 0.844765 0.844765 NaN
december_6,_2011_at_9_21_am_4a.gpx 0 4.164905 10.722410 10.619580 12.127728
1 8.786454 12.233028 12.131521 12.890448
2 6.239269 13.311453 11.988907 9.654239
3 4.825693 9.170460 7.875622 5.963478
november_30,_2011_at_5_15_pm_15.gpx 0 1.564586 5.628504 4.217703 2.270539
1 6.725017 52.353157 26.477631 91.999173
2 5.633766 12.048296 10.410869 9.766548
tk_wu_wk.gpx 0 1.980868 5.529358 5.371810 1.243029
1 1.857171 4.520404 3.758065 0.901414
wk.gpx 0 1.878076 4.673194 4.119896 0.872023
1 1.882308 5.616758 5.031863 2.123816
2 2.281483 10.646294 5.079502 8.165915
3 1.770278 4.445005 4.127848 0.918916
4 1.781270 4.682886 4.266384 0.737536
5 2.183998 8.516822 3.416336 12.693845

1386 rows × 4 columns


In [18]:
def get_top3_acce(data):
    data.sort(ascending=False)
    return data.iloc[0:3].mean()

df_seg_acce = df.groupby(["trace_id", "seg_id"])["acce"].agg({"avg_acce": np.mean,
                                             "top_acce": np.max,
                                             "avg_top3_acce": get_top3_acce,
                                             "var_acce": np.var})
df_seg_acce


Out[18]:
top_acce var_acce avg_top3_acce avg_acce
trace_id seg_id
2011-06-18_0947_dr.gpx 0 5.737922 22.524166 3.673204 -0.803202
1 4.879425 2.580380 3.611650 0.038851
2 10.731275 9.000715 9.211271 -0.062543
3 6.453855 3.917804 4.735725 -0.216421
4 1.157104 0.177065 0.468718 0.047855
2011-06-18_1056_dr.gpx 0 3.844423 2.804401 2.380079 -0.270697
1 3.842815 2.034245 3.082494 0.106425
2 2.602753 1.805781 2.267103 -0.283843
3 0.774225 0.232625 0.613609 0.091476
4 0.930638 0.276874 0.531812 -0.150022
5 -0.000114 NaN -0.000114 -0.000114
6 -0.000010 NaN -0.000010 -0.000010
10 0.000005 NaN 0.000005 0.000005
18 -0.000017 NaN -0.000017 -0.000017
2011-06-19_0959_dr.gpx 0 4.215468 1.545063 3.624399 0.094218
1 2.530010 2.083043 2.226849 -0.388055
2011-06-19_1004_dr.gpx 0 5.054952 2.263633 4.545702 0.001071
1 3.861887 2.394634 3.483079 -0.044449
2 3.392235 1.702455 3.285336 0.187826
3 2.216019 1.161041 2.189478 -0.030641
4 3.284631 1.316259 3.031594 0.069257
5 2.681952 1.416252 2.506367 0.001439
6 3.123779 1.604212 3.113585 -0.018504
7 4.144663 1.324183 3.092399 0.006549
8 2.597708 1.425680 2.519140 -0.019254
9 3.359982 1.858651 3.087741 -0.012728
10 4.169359 2.683024 3.570229 -0.115639
11 3.254743 1.843704 2.659779 0.080165
12 2.783200 1.937480 2.714856 0.013593
13 2.221872 1.242524 2.209210 -0.040990
... ... ... ... ... ...
april_6,_2012_at_11_44_walk.gpx 0 1.114246 1.710861 0.768834 -0.336250
1 -0.004533 0.000615 -0.031116 -0.031116
april_6,_2012_at_9_29_bus.gpx 0 3.482772 2.362665 1.993037 -0.257359
1 2.651707 5.786086 2.332451 -0.573997
2 1.145005 2.931728 0.832442 -0.564865
april_9,_2012_at_16_52_bus.gpx 0 3.135459 3.236233 2.107357 -0.308058
1 1.211598 1.187395 1.016238 -0.226627
december_14,_2011_at_8_40_am_route_15_2a-2.gpx 0 1.290801 0.548269 1.017371 -0.193513
1 3.266618 2.137572 1.567656 -0.263413
december_14,_2011_at_8_40_am_route_15_2a.gpx 0 1.784168 0.777785 1.253936 -0.108496
1 2.005384 1.330671 1.480908 -0.192867
2 2.325987 1.160495 1.470254 -0.038940
3 1.475464 0.761920 1.233487 -0.212175
4 2.226260 2.890826 1.995514 -0.198894
5 0.002637 NaN 0.002637 0.002637
december_6,_2011_at_9_21_am_4a.gpx 0 0.986310 0.332616 0.916140 -0.009714
1 1.336505 0.564952 0.977997 -0.087539
2 1.145576 0.536762 0.897760 -0.122037
3 1.385260 1.080696 1.116700 -0.056140
november_30,_2011_at_5_15_pm_15.gpx 0 0.616260 0.129100 0.237585 -0.146056
1 8.084065 16.690987 3.702041 -0.487813
2 1.263224 0.929744 1.202770 -0.256439
tk_wu_wk.gpx 0 4.234532 1.662940 3.379092 -0.068441
1 3.698219 1.778431 2.457884 0.015324
wk.gpx 0 2.764669 1.206307 2.537047 -0.070713
1 2.516915 1.651628 2.106118 -0.254529
2 3.248170 2.942989 1.253711 -0.344853
3 2.586934 1.192527 2.302683 -0.068433
4 2.497953 1.229380 2.385920 -0.072211
5 1.204856 2.219767 0.511072 -0.250595

1386 rows × 4 columns


In [19]:
def get_tm(data):
    return data.iloc[0]
df_seg_tm = df.groupby(["trace_id", "seg_id"])["tm"].agg({"tm": get_tm})

In [20]:
df_seg = df_seg.join(df_seg_acce)
df_seg = df_seg.join(df_seg_tm)

In [21]:
df_seg.dropna(inplace=True)

In [22]:
df_seg["tm"].value_counts()


Out[22]:
car     1031
walk     222
bus      111
dtype: int64

Discretize the data into N quantiles.


In [71]:
N_BINS_MAX_SPEED = 6

for col in df_seg.columns:
    try:
        field_name = "d_" + col
        df_seg[field_name] = pd.qcut(df_seg[col], N_BINS_MAX_SPEED, labels=[x+1 for x in range(N_BINS_MAX_SPEED)])
    except:
        continue

In [72]:
def get_d_tm(tm):
    tm_to_d = dict()
    tm_to_d = {
        "walk": 1,
        "car": 2,
        "bus": 3
    }
    return tm_to_d[tm]

df_seg["d_tm"] = df_seg["tm"].apply(get_d_tm)

In [396]:
df_seg


Out[396]:
avg_speed max_speed avg_top3_speed var_speed top_acce var_acce avg_top3_acce avg_acce tm d_avg_speed ... d_avg_acce d_tm d_d_avg_speed d_d_max_speed d_d_avg_top3_speed d_d_var_speed d_d_top_acce d_d_var_acce d_d_avg_top3_acce d_d_avg_acce
trace_id seg_id
2011-06-18_0947_dr.gpx 0 11.538154 54.752102 30.830751 46.466349 5.737922 22.524166 3.673204 -0.803202 car 4 ... 1 2 4 6 6 6 6 6 5 1
1 7.706204 16.443149 14.541437 7.974436 4.879425 2.580380 3.611650 0.038851 car 3 ... 6 2 3 3 3 3 5 5 5 6
2 15.391269 28.559408 25.841018 15.343339 10.731275 9.000715 9.211271 -0.062543 car 5 ... 3 2 5 6 6 4 6 6 6 3
3 14.444493 24.677293 21.270534 25.976733 6.453855 3.917804 4.735725 -0.216421 car 5 ... 1 2 5 5 5 5 6 6 6 1
4 5.307677 7.018591 6.559883 1.374237 1.157104 0.177065 0.468718 0.047855 car 2 ... 6 2 2 2 2 2 2 1 1 6
2011-06-18_1056_dr.gpx 0 4.651058 12.223762 10.795077 9.897790 3.844423 2.804401 2.380079 -0.270697 car 2 ... 1 2 2 2 2 3 4 5 3 1
1 16.516460 20.673961 20.673892 12.906775 3.842815 2.034245 3.082494 0.106425 car 5 ... 6 2 5 5 5 4 4 4 4 6
2 13.723858 18.223492 18.189504 19.199767 2.602753 1.805781 2.267103 -0.283843 car 5 ... 1 2 5 4 4 5 3 4 3 1
3 7.085528 17.424587 15.745808 41.682044 0.774225 0.232625 0.613609 0.091476 car 3 ... 6 2 3 4 3 6 1 1 1 6
4 7.713151 15.234336 14.884946 28.962270 0.930638 0.276874 0.531812 -0.150022 car 3 ... 2 2 3 3 3 6 1 1 1 2
2011-06-19_0959_dr.gpx 0 14.730080 21.027324 20.204101 20.120307 4.215468 1.545063 3.624399 0.094218 car 5 ... 6 2 5 5 5 5 5 3 5 6
1 9.056485 17.263750 16.193957 13.881236 2.530010 2.083043 2.226849 -0.388055 car 4 ... 1 2 4 4 3 4 3 4 3 1
2011-06-19_1004_dr.gpx 0 14.542129 20.532938 20.503918 13.108442 5.054952 2.263633 4.545702 0.001071 car 5 ... 5 2 5 5 5 4 5 4 6 5
1 14.948355 21.834748 21.696186 26.525654 3.861887 2.394634 3.483079 -0.044449 car 5 ... 4 2 5 5 5 6 4 5 5 4
2 18.715354 34.536957 34.124536 87.884409 3.392235 1.702455 3.285336 0.187826 car 6 ... 6 2 6 6 6 6 4 3 4 6
3 32.769273 36.432512 35.979849 3.957461 2.216019 1.161041 2.189478 -0.030641 car 6 ... 4 2 6 6 6 2 2 2 3 4
4 34.470555 37.858140 37.836353 5.070147 3.284631 1.316259 3.031594 0.069257 car 6 ... 6 2 6 6 6 2 3 3 4 6
5 35.571595 37.194107 37.153577 0.630312 2.681952 1.416252 2.506367 0.001439 car 6 ... 5 2 6 6 6 1 3 3 3 5
6 36.899610 43.136093 42.494370 5.425723 3.123779 1.604212 3.113585 -0.018504 car 6 ... 4 2 6 6 6 2 3 3 4 4
7 35.614048 38.684000 37.838210 0.556182 4.144663 1.324183 3.092399 0.006549 car 6 ... 5 2 6 6 6 1 5 3 4 5
8 35.863728 38.968410 38.489758 1.166677 2.597708 1.425680 2.519140 -0.019254 car 6 ... 4 2 6 6 6 2 3 3 3 4
9 32.397064 37.471299 37.324761 13.994028 3.359982 1.858651 3.087741 -0.012728 car 6 ... 5 2 6 6 6 4 4 4 4 5
10 18.732742 33.377951 33.377950 59.550911 4.169359 2.683024 3.570229 -0.115639 car 6 ... 2 2 6 6 6 6 5 5 5 2
11 35.070955 37.818208 37.797401 5.418521 3.254743 1.843704 2.659779 0.080165 car 6 ... 6 2 6 6 6 2 3 4 3 6
12 35.881081 38.845640 38.264049 0.791673 2.783200 1.937480 2.714856 0.013593 car 6 ... 5 2 6 6 6 1 3 4 3 5
13 35.509204 38.971507 38.208492 2.418115 2.221872 1.242524 2.209210 -0.040990 car 6 ... 4 2 6 6 6 2 2 3 3 4
14 32.687807 37.929132 37.545561 8.975534 2.895628 1.615188 2.864404 0.032111 car 6 ... 5 2 6 6 6 3 3 3 4 5
15 35.734783 37.022590 37.022393 0.480017 2.447435 1.084308 2.222972 0.000699 car 6 ... 5 2 6 6 6 1 3 2 3 5
16 35.910076 40.167184 40.162056 3.015150 2.441900 1.212370 2.216436 -0.058016 car 6 ... 3 2 6 6 6 2 3 2 3 3
17 24.020829 33.752188 33.494711 43.963003 4.429903 2.551099 3.663095 -0.197392 car 6 ... 1 2 6 6 6 6 5 5 5 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
april_10,_2012_at_10_49bus.gpx 2 7.377441 11.082855 10.121430 11.868290 0.611137 1.560010 0.569477 -0.312668 bus 3 ... 1 3 3 2 2 3 1 3 1 1
april_6,_2012_at_11_44_walk.gpx 0 5.819242 13.957784 13.021371 24.796552 1.114246 1.710861 0.768834 -0.336250 walk 2 ... 1 1 2 3 3 5 2 3 2 1
1 2.172601 2.685488 2.172601 0.721471 -0.004533 0.000615 -0.031116 -0.031116 walk 2 ... 4 1 2 1 1 1 1 1 1 4
april_6,_2012_at_9_29_bus.gpx 0 13.009367 19.599746 19.355153 33.694260 3.482772 2.362665 1.993037 -0.257359 bus 5 ... 1 3 5 4 5 6 4 4 3 1
1 10.666147 19.613936 18.320933 23.856696 2.651707 5.786086 2.332451 -0.573997 bus 4 ... 1 3 4 4 4 5 3 6 3 1
2 6.773855 11.575950 10.484743 14.140069 1.145005 2.931728 0.832442 -0.564865 bus 3 ... 1 3 3 2 2 4 2 5 2 1
april_9,_2012_at_16_52_bus.gpx 0 10.240259 19.539810 17.940452 26.516630 3.135459 3.236233 2.107357 -0.308058 bus 4 ... 1 3 4 4 4 6 3 5 3 1
1 8.886948 15.193357 14.494787 16.767020 1.211598 1.187395 1.016238 -0.226627 bus 3 ... 1 3 3 3 3 4 2 2 2 1
december_14,_2011_at_8_40_am_route_15_2a-2.gpx 0 3.659817 8.844575 7.389603 6.247185 1.290801 0.548269 1.017371 -0.193513 bus 2 ... 1 3 2 2 2 3 2 2 2 1
1 5.638129 17.520835 12.752477 18.547910 3.266618 2.137572 1.567656 -0.263413 bus 2 ... 1 3 2 4 3 5 3 4 2 1
december_14,_2011_at_8_40_am_route_15_2a.gpx 0 6.007853 10.396240 10.045129 11.939613 1.784168 0.777785 1.253936 -0.108496 bus 2 ... 3 3 2 2 2 3 2 2 2 3
1 6.034304 12.785005 12.018296 16.488222 2.005384 1.330671 1.480908 -0.192867 bus 2 ... 1 3 2 2 3 4 2 3 2 1
2 7.316891 14.930311 12.463165 8.174243 2.325987 1.160495 1.470254 -0.038940 bus 3 ... 4 3 3 3 3 3 2 2 2 4
3 5.561768 10.016671 9.902695 9.982387 1.475464 0.761920 1.233487 -0.212175 bus 2 ... 1 3 2 2 2 3 2 2 2 1
4 4.036449 17.530768 11.933783 13.494974 2.226260 2.890826 1.995514 -0.198894 bus 2 ... 1 3 2 4 2 4 2 5 3 1
december_6,_2011_at_9_21_am_4a.gpx 0 4.164905 10.722410 10.619580 12.127728 0.986310 0.332616 0.916140 -0.009714 bus 2 ... 5 3 2 2 2 3 2 1 2 5
1 8.786454 12.233028 12.131521 12.890448 1.336505 0.564952 0.977997 -0.087539 bus 3 ... 3 3 3 2 3 4 2 2 2 3
2 6.239269 13.311453 11.988907 9.654239 1.145576 0.536762 0.897760 -0.122037 bus 3 ... 2 3 3 3 2 3 2 2 2 2
3 4.825693 9.170460 7.875622 5.963478 1.385260 1.080696 1.116700 -0.056140 bus 2 ... 3 3 2 2 2 3 2 2 2 3
november_30,_2011_at_5_15_pm_15.gpx 0 1.564586 5.628504 4.217703 2.270539 0.616260 0.129100 0.237585 -0.146056 bus 1 ... 2 3 1 1 1 2 1 1 1 2
1 6.725017 52.353157 26.477631 91.999173 8.084065 16.690987 3.702041 -0.487813 bus 3 ... 1 3 3 6 6 6 6 6 5 1
2 5.633766 12.048296 10.410869 9.766548 1.263224 0.929744 1.202770 -0.256439 bus 2 ... 1 3 2 2 2 3 2 2 2 1
tk_wu_wk.gpx 0 1.980868 5.529358 5.371810 1.243029 4.234532 1.662940 3.379092 -0.068441 walk 1 ... 3 1 1 1 2 2 5 3 5 3
1 1.857171 4.520404 3.758065 0.901414 3.698219 1.778431 2.457884 0.015324 walk 1 ... 5 1 1 1 1 1 4 3 3 5
wk.gpx 0 1.878076 4.673194 4.119896 0.872023 2.764669 1.206307 2.537047 -0.070713 walk 1 ... 3 1 1 1 1 1 3 2 3 3
1 1.882308 5.616758 5.031863 2.123816 2.516915 1.651628 2.106118 -0.254529 walk 1 ... 1 1 1 1 2 2 3 3 3 1
2 2.281483 10.646294 5.079502 8.165915 3.248170 2.942989 1.253711 -0.344853 walk 2 ... 1 1 2 2 2 3 3 5 2 1
3 1.770278 4.445005 4.127848 0.918916 2.586934 1.192527 2.302683 -0.068433 walk 1 ... 3 1 1 1 1 1 3 2 3 3
4 1.781270 4.682886 4.266384 0.737536 2.497953 1.229380 2.385920 -0.072211 walk 1 ... 3 1 1 1 1 1 3 3 3 3
5 2.183998 8.516822 3.416336 12.693845 1.204856 2.219767 0.511072 -0.250595 walk 2 ... 1 1 2 2 1 4 2 4 1 1

1364 rows × 26 columns


In [74]:
from sklearn_pandas import DataFrameMapper

In [75]:
mapper = DataFrameMapper([
        ("d_tm", None),
        ("d_avg_speed", None),
        ("d_var_speed", None),
        ('d_max_speed', None),
        ('d_avg_top3_speed', None),
        ('d_top_acce', None),
        ('d_var_acce', None),
        ('d_avg_top3_acce', None),
        ('d_avg_acce', None)
])

In [76]:
n_feature = len(mapper.features)

In [77]:
import random

In [397]:
"""
RANDOM SAMPLING
"""
rows = random.sample(df_seg.index.levels[0].values, len(df_seg.index.levels[0])/2)
df_train = df_seg.ix[rows]

# Originally I used df_seg.drop(rows), however, for some reasons drop will not actually drop 
# the index of rows. 
rows_test = set(df_seg.index.levels[0].values) - set(rows)
df_test = df_seg.ix[rows_test]

In [398]:
df_train["tm"].value_counts()


Out[398]:
car     501
walk    120
bus      57
dtype: int64

In [399]:
df_test["tm"].value_counts()


Out[399]:
car     530
walk    102
bus      54
dtype: int64

In [81]:
from scipy import io

In [82]:
for dataset_type in ["training", "testing"]:
    if dataset_type == "training":
        df_temp = df_train.copy()
    else:
        df_temp = df_test.copy()
        
    output_matrix = np.zeros(shape=(n_feature, 0))
    for index, trace_id in enumerate(df_temp.index.levels[0].values):
        trace_matrix = mapper.fit_transform(df_temp.ix[trace_id])
        
        # I don't know why some index in the training set is still in the testing set
        n, d = trace_matrix.shape
        if n == 0:
            print dataset_type, trace_id
            
        if index == len(df_temp.index.levels[0].values) - 1:
            output_matrix = np.concatenate((output_matrix, trace_matrix.T), axis=1)
        else:
            trace_matrix = np.concatenate((trace_matrix.T, np.zeros(shape=(n_feature, 1))),axis=1)
            output_matrix = np.concatenate((output_matrix, trace_matrix), axis=1)
    
    sp.io.savemat(dataset_type, mdict={'X': output_matrix}, appendmat=True)

In [83]:
df_train.to_csv("")


Out[83]:
array([[ 2.,  2.,  2., ...,  1.,  1.,  1.],
       [ 4.,  3.,  5., ...,  1.,  1.,  1.],
       [ 6.,  3.,  4., ...,  1.,  1.,  1.],
       ..., 
       [ 6.,  5.,  6., ...,  1.,  1.,  1.],
       [ 5.,  5.,  6., ...,  1.,  1.,  1.],
       [ 1.,  6.,  3., ...,  4.,  5.,  3.]])

In [317]:
df_temp.ix["2011-06-18_1056_dr.gpx"]


Out[317]:
avg_speed max_speed avg_top3_speed var_speed top_acce var_acce avg_top3_acce avg_acce tm d_avg_speed d_max_speed d_avg_top3_speed d_var_speed d_top_acce d_var_acce d_avg_top3_acce d_avg_acce d_tm
seg_id

In [319]:
df_seg.ix["2011-06-18_1056_dr.gpx"]


Out[319]:
avg_speed max_speed avg_top3_speed var_speed top_acce var_acce avg_top3_acce avg_acce tm d_avg_speed d_max_speed d_avg_top3_speed d_var_speed d_top_acce d_var_acce d_avg_top3_acce d_avg_acce d_tm
seg_id
0 4.651058 12.223762 10.795077 9.897790 3.844423 2.804401 2.380079 -0.270697 car 1 1 1 2 3 4 2 0 2
1 16.516460 20.673961 20.673892 12.906775 3.842815 2.034245 3.082494 0.106425 car 4 4 4 3 3 3 3 5 2
2 13.723858 18.223492 18.189504 19.199767 2.602753 1.805781 2.267103 -0.283843 car 4 3 3 4 2 3 2 0 2
3 7.085528 17.424587 15.745808 41.682044 0.774225 0.232625 0.613609 0.091476 car 2 3 2 5 0 0 0 5 2
4 7.713151 15.234336 14.884946 28.962270 0.930638 0.276874 0.531812 -0.150022 car 2 2 2 5 0 0 0 1 2

In [404]:
df_train.to_csv("transportation_train.csv", index=False)

In [405]:
df_test.to_csv("transportation_test.csv", index=False)

In [409]:
len(df_train)


Out[409]:
678

In [307]:
output_matrix


Out[307]:
array([['2', '2', '2', ..., '1', '1', '0.0'],
       ['3', '2', '4', ..., '0', '0', '0.0'],
       ['5', '2', '3', ..., '1', '0', '0.0'],
       ..., 
       ['5', '4', '5', ..., '2', '2', '0.0'],
       ['4', '4', '5', ..., '4', '2', '0.0'],
       ['0', '5', '2', ..., '2', '4', '0.0']], 
      dtype='|S64')

In [68]:
X = sp.io.loadmat("training.mat")

In [403]:
df_


Out[403]:
0 1 2 3 4 5 6 7 8 9 ... 552 553 554 555 556 557 558 559 560 label
0 0.257178 -0.023285 -0.014654 -0.938404 -0.920091 -0.667683 -0.952501 -0.925249 -0.674302 -0.894088 ... -0.330370 -0.705974 0.006462 0.162920 -0.825886 0.271151 -0.720009 0.276801 -0.057978 5
1 0.286027 -0.013163 -0.119083 -0.975415 -0.967458 -0.944958 -0.986799 -0.968401 -0.945823 -0.894088 ... -0.121845 -0.594944 -0.083495 0.017500 -0.434375 0.920593 -0.698091 0.281343 -0.083898 5
2 0.275485 -0.026050 -0.118152 -0.993819 -0.969926 -0.962748 -0.994403 -0.970735 -0.963483 -0.939260 ... -0.190422 -0.640736 -0.034956 0.202302 0.064103 0.145068 -0.702771 0.280083 -0.079346 5
3 0.270298 -0.032614 -0.117520 -0.994743 -0.973268 -0.967091 -0.995274 -0.974471 -0.968897 -0.938610 ... -0.344418 -0.736124 -0.017067 0.154438 0.340134 0.296407 -0.698954 0.284114 -0.077108 5
4 0.274833 -0.027848 -0.129527 -0.993852 -0.967445 -0.978295 -0.994111 -0.965953 -0.977346 -0.938610 ... -0.534685 -0.846595 -0.002223 -0.040046 0.736715 -0.118545 -0.692245 0.290722 -0.073857 5
5 0.279220 -0.018620 -0.113902 -0.994455 -0.970417 -0.965316 -0.994585 -0.969481 -0.965897 -0.937856 ... -0.493517 -0.857565 -0.095681 0.048849 0.760684 -0.072216 -0.689816 0.294896 -0.068471 5
6 0.279746 -0.018271 -0.104000 -0.995819 -0.976354 -0.977725 -0.995996 -0.973665 -0.979253 -0.937856 ... -0.093855 -0.391286 -0.309083 -0.000857 0.728514 -0.333771 -0.690085 0.295282 -0.067065 5
7 0.274601 -0.025035 -0.116831 -0.995594 -0.982069 -0.985262 -0.995341 -0.981485 -0.984610 -0.941263 ... -0.338123 -0.672358 -0.025526 -0.214033 0.437640 -0.366674 -0.688785 0.295981 -0.067706 5
8 0.272529 -0.020954 -0.114472 -0.996784 -0.975906 -0.986597 -0.997029 -0.973735 -0.985556 -0.941263 ... -0.629599 -0.853248 0.144626 -0.035564 -0.181800 0.087320 -0.685390 0.299394 -0.065491 5
9 0.275746 -0.010372 -0.099776 -0.998373 -0.986933 -0.991022 -0.998663 -0.987140 -0.991084 -0.943761 ... -0.469743 -0.741435 0.157584 -0.033487 -0.350893 0.343146 -0.684110 0.300822 -0.064263 5
10 0.278596 -0.015232 -0.098908 -0.998785 -0.981943 -0.991379 -0.998828 -0.980015 -0.991409 -0.943761 ... -0.929881 -0.986342 -0.047689 0.077152 -0.786023 -0.285308 -0.686013 0.298478 -0.066625 5
11 0.279152 -0.021879 -0.109731 -0.997781 -0.992951 -0.985680 -0.997710 -0.992678 -0.984940 -0.943979 ... -0.633997 -0.842658 -0.240531 0.220424 0.079633 -0.258711 -0.685710 0.297687 -0.069092 5
12 0.274544 -0.023145 -0.112540 -0.996205 -0.991573 -0.987518 -0.996520 -0.992061 -0.987128 -0.941286 ... -0.766249 -0.939058 -0.019406 -0.202773 -0.114963 -0.157723 -0.683619 0.299401 -0.068704 5
13 0.269066 -0.027686 -0.110178 -0.996884 -0.986440 -0.988479 -0.997498 -0.987389 -0.989487 -0.941286 ... -0.605233 -0.879209 0.021211 -0.145114 -0.018868 0.648561 -0.680503 0.302289 -0.067274 5
14 0.275579 -0.018936 -0.097410 -0.996065 -0.968225 -0.980696 -0.996218 -0.964627 -0.982347 -0.943252 ... -0.720919 -0.930019 -0.046559 0.119279 0.060107 0.397350 -0.674944 0.306351 -0.067373 5
15 0.281931 -0.004881 -0.086106 -0.989076 -0.959006 -0.973024 -0.993782 -0.967953 -0.977848 -0.920505 ... 0.000577 -0.447324 0.016685 0.214136 0.770044 0.890289 -0.672948 0.306460 -0.070759 5
16 0.311078 -0.019431 -0.101866 -0.936688 -0.840186 -0.816826 -0.941337 -0.848833 -0.812606 -0.841582 ... 0.063405 -0.271344 -0.777828 0.123597 -0.428679 -0.756152 -0.591690 0.379258 -0.004655 5
17 0.262328 -0.023257 -0.125525 -0.984561 -0.913380 -0.912673 -0.983951 -0.907203 -0.900435 -0.938526 ... -0.621129 -0.818217 0.173074 0.080321 -0.459855 0.402487 -0.598653 0.374095 -0.008468 5
18 0.288416 -0.003485 -0.083828 -0.994571 -0.978360 -0.979943 -0.995337 -0.976731 -0.977797 -0.938281 ... -0.656707 -0.850874 0.034578 -0.339566 -0.709440 0.396758 -0.594755 0.377131 -0.004721 5
19 0.271166 -0.025973 -0.094923 -0.970124 -0.901878 -0.965300 -0.977387 -0.908967 -0.968392 -0.911152 ... 0.841485 0.737524 -0.065276 0.156377 -0.154816 0.131187 -0.601416 0.371978 -0.010461 5
20 0.253095 -0.044192 -0.139298 -0.968922 -0.910384 -0.918860 -0.975386 -0.918031 -0.909892 -0.911152 ... 0.824804 0.758511 -0.009371 0.110696 -0.242067 0.445223 -0.595470 0.375913 -0.012193 5
21 0.271339 -0.029954 -0.118595 -0.960931 -0.905035 -0.871270 -0.973400 -0.905701 -0.856332 -0.864369 ... -0.078417 -0.515394 -0.114588 -0.129403 0.444381 0.208119 -0.579426 0.387695 -0.005265 5
22 0.299267 0.011799 -0.031653 -0.948754 -0.844182 -0.880705 -0.958405 -0.853729 -0.884842 -0.838404 ... -0.181260 -0.570589 0.034920 0.630781 0.369759 -0.700671 -0.573604 0.391731 -0.005176 5
23 0.293897 0.011151 -0.069281 -0.962413 -0.863340 -0.859957 -0.972184 -0.869011 -0.856146 -0.838404 ... -0.037322 -0.395694 0.071300 0.095884 0.223739 -0.524834 -0.587638 0.379843 -0.023736 5
24 0.275639 -0.015566 -0.125458 -0.989161 -0.968516 -0.968723 -0.990635 -0.969443 -0.967756 -0.930510 ... -0.390130 -0.720884 0.128368 -0.399203 -0.008751 -0.333382 -0.601530 0.368305 -0.034342 5
25 0.272255 -0.025051 -0.133184 -0.991153 -0.963549 -0.973161 -0.991803 -0.962576 -0.970892 -0.934426 ... -0.440970 -0.765823 0.005536 -0.036520 0.058946 0.216278 -0.604059 0.367321 -0.030170 5
26 0.276427 -0.026278 -0.126941 -0.992989 -0.965620 -0.965738 -0.993417 -0.964456 -0.962185 -0.934426 ... -0.472949 -0.790058 -0.098328 0.058556 -0.303365 0.186886 -0.600177 0.371195 -0.023182 5
27 0.280170 -0.014362 -0.100471 -0.993592 -0.971903 -0.977940 -0.994521 -0.971907 -0.975690 -0.940317 ... -0.808991 -0.967055 -0.006488 0.461108 -0.295405 -0.375529 -0.596625 0.374298 -0.018806 5
28 0.277461 -0.015845 -0.106393 -0.995027 -0.971593 -0.978725 -0.995896 -0.971922 -0.976470 -0.940710 ... -0.778033 -0.948229 0.181142 -0.035276 -0.323056 0.711005 -0.597761 0.373296 -0.020333 5
29 0.288375 -0.008548 -0.107059 -0.993340 -0.961974 -0.984493 -0.993322 -0.958215 -0.982440 -0.936207 ... -0.871803 -0.967049 -0.037590 -0.053452 0.065760 -0.045327 -0.598912 0.372410 -0.020935 5
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2917 0.376577 -0.018081 -0.109896 -0.314382 -0.152071 -0.213623 -0.393594 -0.180600 -0.265530 -0.072573 ... -0.323285 -0.752765 -0.829318 0.048275 0.913474 -0.903792 -0.694649 0.245906 0.172715 2
2918 0.297434 -0.045270 -0.190596 -0.360521 -0.164580 -0.188636 -0.414821 -0.238855 -0.235164 -0.149166 ... -0.221525 -0.650547 -0.241364 -0.181262 -0.946507 0.033174 -0.722767 0.244779 0.144695 2
2919 0.253323 -0.024865 -0.170146 -0.307713 -0.188579 -0.141310 -0.377426 -0.226048 -0.220538 0.092017 ... -0.141846 -0.564142 0.004509 0.357282 -0.945578 0.613976 -0.694613 0.259140 0.157683 2
2920 0.208102 -0.008046 -0.075870 -0.323045 -0.095884 -0.217678 -0.424824 -0.130102 -0.322741 0.092017 ... -0.297224 -0.721821 0.752448 -0.835923 -0.918626 0.109159 -0.671194 0.265751 0.174766 2
2921 0.144149 -0.039858 -0.045135 -0.354687 -0.015797 -0.233415 -0.465077 -0.036371 -0.327340 -0.017856 ... -0.374529 -0.779528 0.556469 -0.206396 -0.943786 0.297510 -0.661045 0.270618 0.179726 2
2922 0.230852 -0.042286 -0.089920 -0.309347 -0.079126 -0.151727 -0.391110 -0.087048 -0.256664 0.056244 ... -0.138929 -0.589311 0.273411 0.855750 -0.962498 0.953145 -0.657085 0.276184 0.177337 2
2923 0.296221 -0.052292 -0.115492 -0.284000 -0.110110 -0.234592 -0.359815 -0.136375 -0.290923 0.056244 ... -0.291001 -0.704020 -0.300646 0.225721 0.868469 -0.461542 -0.663506 0.273305 0.173953 2
2924 0.357298 -0.044599 -0.129532 -0.314497 -0.055580 -0.173090 -0.385964 -0.057525 -0.216827 0.026249 ... -0.162555 -0.593285 -0.710861 -0.061232 -0.706116 0.064574 -0.660163 0.274327 0.176291 2
2925 0.344328 0.004793 -0.122453 -0.319987 -0.066677 -0.181803 -0.380404 -0.071025 -0.244559 -0.116867 ... 0.181042 -0.250170 -0.402779 -0.706228 0.738571 0.870613 -0.652636 0.277868 0.180047 2
2926 0.283644 -0.007958 -0.119018 -0.308558 -0.080392 -0.211476 -0.368919 -0.097063 -0.301231 -0.116867 ... 0.024468 -0.392944 -0.076085 -0.238582 0.960357 0.086598 -0.656760 0.271648 0.182987 2
2927 0.206988 0.024602 -0.103940 -0.364650 -0.169378 -0.215814 -0.449295 -0.185957 -0.326199 -0.175652 ... -0.351854 -0.734494 0.535018 -0.256868 0.927325 -0.084328 -0.657011 0.266990 0.187901 2
2928 0.392804 -0.017788 -0.090166 -0.096349 -0.174368 -0.256686 -0.152987 -0.207809 -0.265253 0.485680 ... 0.274132 -0.036768 -0.742630 -0.080227 0.927331 -0.652179 -0.807271 0.189885 0.118456 3
2929 0.301162 -0.030034 -0.120333 -0.056181 -0.151753 -0.254848 -0.093334 -0.206796 -0.275332 0.485680 ... 0.430681 0.204816 -0.464962 0.291873 0.921466 -0.869951 -0.814760 0.190418 0.110325 3
2930 0.288214 0.011493 -0.155453 -0.063550 -0.035160 -0.230059 -0.093119 -0.116987 -0.258696 0.160374 ... 0.055902 -0.299539 -0.057336 -0.592802 0.964828 0.606231 -0.809277 0.192232 0.113997 3
2931 0.114866 -0.000427 -0.116904 -0.189544 0.078012 -0.312489 -0.268158 -0.047632 -0.341972 0.160374 ... -0.148767 -0.564841 0.849907 -0.643277 0.973227 0.062755 -0.809257 0.185954 0.120358 3
2932 0.126970 -0.013276 -0.073441 -0.173816 0.041540 -0.342513 -0.253156 -0.093578 -0.350155 0.285773 ... 0.048103 -0.252670 0.872466 0.491872 -0.155724 -0.762511 -0.800932 0.190968 0.123600 3
2933 0.363943 -0.022212 -0.125437 -0.041180 0.020777 -0.243607 -0.113519 -0.120400 -0.221709 0.411506 ... 0.118344 -0.205856 -0.895624 0.648047 -0.624180 -0.441569 -0.803888 0.191732 0.119944 3
2934 0.331044 -0.063979 -0.117333 -0.068002 0.156431 -0.317060 -0.148922 0.070107 -0.290868 0.411506 ... -0.086279 -0.468001 -0.351287 -0.335934 0.966914 -0.715323 -0.810091 0.184712 0.120724 3
2935 0.272259 -0.007579 -0.072642 -0.072559 0.005472 -0.225400 -0.161118 -0.074187 -0.280956 0.402615 ... -0.104077 -0.408741 0.181631 0.491898 -0.977969 -0.124243 -0.797719 0.194819 0.122937 3
2936 0.277271 0.001084 -0.074001 -0.068523 -0.245059 -0.144552 -0.148731 -0.303187 -0.198912 0.402615 ... 0.160861 -0.125531 0.133749 0.882511 -0.994293 0.475236 -0.803914 0.196823 0.114498 3
2937 0.305631 -0.052702 -0.155684 -0.343078 -0.151250 -0.177296 -0.383228 -0.181989 -0.287325 -0.108216 ... -0.285985 -0.714939 -0.338596 0.363565 -0.951163 -0.228412 -0.691122 0.241953 0.180141 2
2938 0.321443 -0.023799 -0.119695 -0.336548 -0.183307 -0.154297 -0.396342 -0.224409 -0.220786 -0.073510 ... -0.172403 -0.563389 -0.874477 -0.684506 -0.948809 0.472612 -0.677946 0.256877 0.177768 2
2939 0.267413 -0.021596 -0.070550 -0.323426 -0.118042 -0.323907 -0.395468 -0.129623 -0.335741 0.053516 ... -0.388926 -0.761280 0.218079 -0.690839 -0.922779 0.232523 -0.672635 0.261034 0.178609 2
2940 0.147146 -0.046908 -0.069338 -0.348686 0.018057 -0.364716 -0.437199 0.019297 -0.411240 0.053516 ... -0.552567 -0.850109 0.524082 0.041970 -0.922941 0.489178 -0.660366 0.272243 0.178547 2
2941 0.192275 -0.033643 -0.105949 -0.354841 -0.092504 -0.312910 -0.433579 -0.088741 -0.336060 -0.041624 ... -0.629657 -0.916493 0.535983 0.689306 -0.936606 0.562375 -0.646754 0.282150 0.181152 2
2942 0.310155 -0.053391 -0.099109 -0.287866 -0.140589 -0.215088 -0.356083 -0.148775 -0.232057 0.185361 ... -0.376278 -0.750809 -0.337422 0.346295 0.884904 -0.698885 -0.651732 0.274627 0.184784 2
2943 0.363385 -0.039214 -0.105915 -0.305388 0.028148 -0.196373 -0.373540 -0.030036 -0.270237 0.185361 ... -0.320418 -0.700274 -0.736701 -0.372889 -0.657421 0.322549 -0.655181 0.273578 0.182412 2
2944 0.349966 0.030077 -0.115788 -0.329638 -0.042143 -0.250181 -0.388017 -0.133257 -0.347029 0.007471 ... -0.118854 -0.467179 -0.181560 0.088574 0.696663 0.363139 -0.655357 0.274479 0.181184 2
2945 0.237594 0.018467 -0.096499 -0.323114 -0.229775 -0.207574 -0.392380 -0.279610 -0.289477 0.007471 ... -0.205445 -0.617737 0.444558 -0.819188 0.929294 -0.008398 -0.659719 0.264782 0.187563 2
2946 0.153627 -0.018437 -0.137018 -0.330046 -0.195253 -0.164339 -0.430974 -0.218295 -0.229933 -0.111527 ... -0.072237 -0.436940 0.598808 -0.287951 0.876030 -0.024965 -0.660080 0.263936 0.188103 2

2947 rows × 562 columns


In [ ]:


In [ ]: