In [2]:
import pandas as pd
import numpy as np
In [3]:
%matplotlib inline
In [16]:
day_trip_pdf = pd.read_csv("/Volumes/KINGSTON/nhts/DAYPUB.csv")
In [17]:
day_trip_pdf.columns
Out[17]:
Index([u'HOUSEID', u'PERSONID', u'TDCASEID', u'TDTRPNUM', u'MSACAT', u'RAIL', u'CDIVMSAR', u'TRPTRANS', u'TRPPUB', u'PSGR_FLG', u'DRVR_FLG', u'VEHUSED', u'WHODROVE', u'TRPHHVEH', u'HHMEMDRV', u'URBRUR', u'WHYFROM', u'HTHUR', u'HBHUR', u'IMPTAGE', u'IMPTSEX', u'IMPTRACE', u'IMPTMILE', u'IMPTMIN', u'IMPTHOWN', u'IMPTHTYP', u'IMPTENTM', u'IMPTSTTM', u'IMPTTPUB', u'IMPTTRIP', u'TDMSDTRP', u'TPOVRLAP', u'AWAYHOME', u'DRIVER', u'ONTD_P1', u'ONTD_P2', u'ONTD_P3', u'ONTD_P4', u'ONTD_P5', u'ONTD_P6', u'ONTD_P7', u'ONTD_P8', u'ONTD_P9', u'ONTD_P10', u'ONTD_P11', u'ONTD_P12', u'PASSPURP', u'PUBTYPE', u'R_RELAT', u'TRACC1', u'TRACC2', u'TRACC3', u'TRACC4', u'TRACC5', u'TREGR1', u'TREGR2', u'TREGR3', u'TREGR4', u'TREGR5', u'WORKER', u'TRPHHACC', u'WHYTO', u'TDWKND', u'ONTD_P13', u'ONTD_P14', u'WHYTRP1S', u'STRTTIME', u'TRPDIST', u'ENDHOUR', u'TRACCTM', u'TRWAITTM', u'TRPBLKS', u'EDUC', u'OUTOFTWN', u'PROXY', u'PROXCAT', u'HHR_HISP', u'HHR_RACE', u'HHR_DRVR', u'HHR_WRKR', u'R_SEX', u'TRAVDAY', u'TDAYDATE', u'TDBOA911', u'SMPLAREA', u'SMPLFIRM', u'SMPLSRCE', u'HOMEOWN', u'HOMETYPE', u'HHR_EDUC', u'LANG', u'HHRESP', u'HHFAMINC', u'HHSIZE', u'URBAN', u'MSASIZE', u'CENSUS_R', u'CENSUS_D', u'HH_ONTD', u'NUMONTRP', ...], dtype='object')
In [18]:
impt_cols = [col for col in day_trip_pdf.columns if col.startswith("IMPT")]
In [19]:
impt_cols
Out[19]:
['IMPTAGE',
'IMPTSEX',
'IMPTRACE',
'IMPTMILE',
'IMPTMIN',
'IMPTHOWN',
'IMPTHTYP',
'IMPTENTM',
'IMPTSTTM',
'IMPTTPUB',
'IMPTTRIP']
In [20]:
day_trip_pdf.HOUSEID.unique()
Out[20]:
array([10000018, 10000045, 10000474, ..., 915637008, 915637240, 915637259], dtype=object)
In [21]:
day_trip_pdf = day_trip_pdf.drop(impt_cols, axis=1)
In [22]:
day_trip_pdf.columns
Out[22]:
Index([u'HOUSEID', u'PERSONID', u'TDCASEID', u'TDTRPNUM', u'MSACAT', u'RAIL', u'CDIVMSAR', u'TRPTRANS', u'TRPPUB', u'PSGR_FLG', u'DRVR_FLG', u'VEHUSED', u'WHODROVE', u'TRPHHVEH', u'HHMEMDRV', u'URBRUR', u'WHYFROM', u'HTHUR', u'HBHUR', u'TDMSDTRP', u'TPOVRLAP', u'AWAYHOME', u'DRIVER', u'ONTD_P1', u'ONTD_P2', u'ONTD_P3', u'ONTD_P4', u'ONTD_P5', u'ONTD_P6', u'ONTD_P7', u'ONTD_P8', u'ONTD_P9', u'ONTD_P10', u'ONTD_P11', u'ONTD_P12', u'PASSPURP', u'PUBTYPE', u'R_RELAT', u'TRACC1', u'TRACC2', u'TRACC3', u'TRACC4', u'TRACC5', u'TREGR1', u'TREGR2', u'TREGR3', u'TREGR4', u'TREGR5', u'WORKER', u'TRPHHACC', u'WHYTO', u'TDWKND', u'ONTD_P13', u'ONTD_P14', u'WHYTRP1S', u'STRTTIME', u'TRPDIST', u'ENDHOUR', u'TRACCTM', u'TRWAITTM', u'TRPBLKS', u'EDUC', u'OUTOFTWN', u'PROXY', u'PROXCAT', u'HHR_HISP', u'HHR_RACE', u'HHR_DRVR', u'HHR_WRKR', u'R_SEX', u'TRAVDAY', u'TDAYDATE', u'TDBOA911', u'SMPLAREA', u'SMPLFIRM', u'SMPLSRCE', u'HOMEOWN', u'HOMETYPE', u'HHR_EDUC', u'LANG', u'HHRESP', u'HHFAMINC', u'HHSIZE', u'URBAN', u'MSASIZE', u'CENSUS_R', u'CENSUS_D', u'HH_ONTD', u'NUMONTRP', u'DWELTIME', u'EDITMODE', u'EDITPURP', u'EDITSTTM', u'EDITENTM', u'EDITMILE', u'EDITMIN', u'FLGNXTDY', u'STRTHR', u'STRTMIN', u'ENDMIN', ...], dtype='object')
In [23]:
edit_cols = [col for col in day_trip_pdf.columns if col.startswith("EDIT")]
In [24]:
edit_cols
Out[24]:
['EDITMODE', 'EDITPURP', 'EDITSTTM', 'EDITENTM', 'EDITMILE', 'EDITMIN']
In [25]:
day_trip_pdf = day_trip_pdf.drop(edit_cols, axis=1)
In [28]:
day_trip_pdf.columns
Out[28]:
Index([u'HOUSEID', u'PERSONID', u'TDCASEID', u'TDTRPNUM', u'MSACAT', u'RAIL', u'CDIVMSAR', u'TRPTRANS', u'TRPPUB', u'PSGR_FLG', u'DRVR_FLG', u'VEHUSED', u'WHODROVE', u'TRPHHVEH', u'HHMEMDRV', u'URBRUR', u'WHYFROM', u'HTHUR', u'HBHUR', u'TDMSDTRP', u'TPOVRLAP', u'AWAYHOME', u'DRIVER', u'ONTD_P1', u'ONTD_P2', u'ONTD_P3', u'ONTD_P4', u'ONTD_P5', u'ONTD_P6', u'ONTD_P7', u'ONTD_P8', u'ONTD_P9', u'ONTD_P10', u'ONTD_P11', u'ONTD_P12', u'PASSPURP', u'PUBTYPE', u'R_RELAT', u'TRACC1', u'TRACC2', u'TRACC3', u'TRACC4', u'TRACC5', u'TREGR1', u'TREGR2', u'TREGR3', u'TREGR4', u'TREGR5', u'WORKER', u'TRPHHACC', u'WHYTO', u'TDWKND', u'ONTD_P13', u'ONTD_P14', u'WHYTRP1S', u'STRTTIME', u'TRPDIST', u'ENDHOUR', u'TRACCTM', u'TRWAITTM', u'TRPBLKS', u'EDUC', u'OUTOFTWN', u'PROXY', u'PROXCAT', u'HHR_HISP', u'HHR_RACE', u'HHR_DRVR', u'HHR_WRKR', u'R_SEX', u'TRAVDAY', u'TDAYDATE', u'TDBOA911', u'SMPLAREA', u'SMPLFIRM', u'SMPLSRCE', u'HOMEOWN', u'HOMETYPE', u'HHR_EDUC', u'LANG', u'HHRESP', u'HHFAMINC', u'HHSIZE', u'URBAN', u'MSASIZE', u'CENSUS_R', u'CENSUS_D', u'HH_ONTD', u'NUMONTRP', u'DWELTIME', u'FLGNXTDY', u'STRTHR', u'STRTMIN', u'ENDMIN', u'WHYTRP90', u'TRVL_MIN', u'WHYTRP01', u'TRVLCMIN', u'HHINCTTL', u'TRPNUMSQ', ...], dtype='object')
In [29]:
hhr_cols = [col for col in day_trip_pdf.columns if col.startswith("HHR")]
In [32]:
trp_cols = [col for col in day_trip_pdf.columns if "TRP" in col]
In [33]:
trp_cols
Out[33]:
['TDTRPNUM',
'TRPTRANS',
'TRPPUB',
'TRPHHVEH',
'TDMSDTRP',
'TRPHHACC',
'WHYTRP1S',
'TRPDIST',
'TRPBLKS',
'NUMONTRP',
'WHYTRP90',
'WHYTRP01',
'TRPNUMSQ',
'TRPMILES']
In [36]:
time_cols = [col for col in day_trip_pdf.columns if "TIME" in col]
In [49]:
time_cols
Out[49]:
['STRTTIME', 'DWELTIME', 'ENDTIME']
In [50]:
time_cols = ['STRTTIME', 'ENDTIME', 'DWELTIME']
In [51]:
other_cols = ['HOUSEID', 'PERSONID', 'TDCASEID', 'TDTRPNUM', 'TDAYDATE', 'TRAVDAY']
In [52]:
access_cols = [col for col in day_trip_pdf.columns if "TRACC" in col or "TREGR" in col]
In [53]:
all_cols = []
all_cols.extend(other_cols)
all_cols.extend(time_cols)
all_cols.extend(trp_cols)
all_cols.extend(access_cols)
In [54]:
sel_cols_df = day_trip_pdf[all_cols]
In [55]:
sel_cols_df
Out[55]:
HOUSEID
PERSONID
TDCASEID
TDTRPNUM
TDAYDATE
TRAVDAY
STRTTIME
ENDTIME
DWELTIME
TDTRPNUM
...
TRACC3
TRACC4
TRACC5
TREGR1
TREGR2
TREGR3
TREGR4
TREGR5
TRACCTM
TREGRTM
0
10000018
1
100000180101
1
200105
1
830
845
45
1
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
1
10000018
1
100000180102
2
200105
1
930
945
495
2
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
2
10000018
1
100000180103
3
200105
1
1800
1802
43
3
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
3
10000018
1
100000180104
4
200105
1
1845
1910
0
4
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
4
10000018
1
100000180105
5
200105
1
1910
1930
-1
5
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
5
10000018
2
100000180201
1
200105
1
830
845
45
1
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
6
10000018
2
100000180202
2
200105
1
930
945
495
2
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
7
10000018
2
100000180203
3
200105
1
1800
1802
43
3
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
8
10000018
2
100000180204
4
200105
1
1845
1910
0
4
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
9
10000018
2
100000180205
5
200105
1
1910
1930
-1
5
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
10
10000045
1
100000450101
1
200104
3
1345
1354
29
1
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
11
10000045
1
100000450102
2
200104
3
1423
1428
15
2
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
12
10000045
1
100000450103
3
200104
3
1443
1456
609
3
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
13
10000045
1
100000450104
4
200104
3
105
125
40
4
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
14
10000045
1
100000450105
5
200104
3
205
230
-1
5
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
15
10000474
1
100004740101
1
200104
2
810
815
2
1
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
16
10000474
1
100004740102
2
200104
2
817
820
1
2
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
17
10000474
1
100004740103
3
200104
2
821
823
37
3
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
18
10000474
1
100004740104
4
200104
2
900
916
7
4
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
19
10000474
1
100004740105
5
200104
2
923
928
252
5
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
20
10000474
1
100004740106
6
200104
2
1340
1351
94
6
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
21
10000474
1
100004740107
7
200104
2
1525
1529
-1
7
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
22
10000577
1
100005770101
1
200112
4
1000
1015
235
1
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
23
10000577
1
100005770102
2
200112
4
1410
1425
45
2
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
24
10000577
1
100005770103
3
200112
4
1510
2025
-1
3
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
25
10000577
2
100005770201
1
200112
4
815
825
515
1
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
26
10000577
2
100005770202
2
200112
4
1700
1710
-1
2
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
27
10000652
1
100006520101
1
200104
6
1500
1520
-1
1
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
28
10000652
2
100006520201
1
200104
6
1500
1520
-1
1
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
29
10000894
1
100008940101
1
200103
7
1100
1105
20
1
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
642262
915637240
1
9156372400104
4
200204
6
1120
1125
125
4
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
642263
915637240
1
9156372400105
5
200204
6
1330
1345
30
5
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
642264
915637240
1
9156372400106
6
200204
6
1415
1500
10
6
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
642265
915637240
1
9156372400107
7
200204
6
1510
1515
0
7
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
642266
915637240
1
9156372400108
8
200204
6
1515
1530
30
8
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
642267
915637240
1
9156372400109
9
200204
6
1600
1615
45
9
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
642268
915637240
1
9156372400110
10
200204
6
1700
1715
15
10
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
642269
915637240
1
9156372400111
11
200204
6
1730
1745
120
11
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
642270
915637240
1
9156372400112
12
200204
6
1945
2000
-1
12
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
642271
915637240
2
9156372400201
1
200204
6
915
925
55
1
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
642272
915637240
2
9156372400202
2
200204
6
1020
1024
16
2
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
642273
915637240
2
9156372400203
3
200204
6
1040
1100
20
3
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
642274
915637240
2
9156372400204
4
200204
6
1120
1125
125
4
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
642275
915637240
2
9156372400205
5
200204
6
1330
1345
30
5
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
642276
915637240
2
9156372400206
6
200204
6
1415
1500
10
6
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
642277
915637240
2
9156372400207
7
200204
6
1510
1515
0
7
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
642278
915637240
2
9156372400208
8
200204
6
1515
1530
30
8
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
642279
915637240
2
9156372400209
9
200204
6
1600
1615
45
9
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
642280
915637240
2
9156372400210
10
200204
6
1700
1715
15
10
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
642281
915637240
2
9156372400211
11
200204
6
1730
1745
120
11
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
642282
915637240
2
9156372400212
12
200204
6
1945
2000
-1
12
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
642283
915637259
1
9156372590101
1
200203
2
1040
1100
1
1
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
642284
915637259
1
9156372590102
2
200203
2
1101
1103
7
2
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
642285
915637259
1
9156372590103
3
200203
2
1110
1113
37
3
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
642286
915637259
1
9156372590104
4
200203
2
1150
1153
1
4
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
642287
915637259
1
9156372590105
5
200203
2
1154
1156
44
5
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
642288
915637259
1
9156372590106
6
200203
2
1240
1300
-1
6
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
642289
915637259
2
9156372590201
1
200203
2
1040
1100
54
1
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
642290
915637259
2
9156372590202
2
200203
2
1154
1156
44
2
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
642291
915637259
2
9156372590203
3
200203
2
1240
1300
-1
3
...
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
642292 rows × 35 columns
In [12]:
np.count_nonzero(day_trip_pdf.TRPTRANS == 23)
Out[12]:
118
In [13]:
day_trip_pdf.TRPTRANS.value_counts()
Out[13]:
1 314884
2 88625
3 79555
4 73811
26 51526
12 11544
25 5184
10 4892
5 2340
17 2123
91 1844
22 971
7 942
-8 795
8 562
13 526
16 484
11 306
14 265
6 203
21 175
15 149
24 125
23 118
18 102
20 95
-7 56
9 39
-1 21
-9 19
19 11
dtype: int64
In [14]:
trip_chain_df = pd.read_csv("/Volumes/KINGSTON/nhts_trip_chaining/TripChainingCSV/tour09.csv")
In [17]:
trip_chain_df.MODE_D.value_counts()
Out[17]:
1 387009
3 154635
4 99873
2 84139
23 60025
11 14621
22 6453
9 4724
97 4414
5 3113
7 2624
17 1242
8 1154
14 938
21 898
19 897
10 728
-8 703
16 571
24 562
12 527
-7 481
6 335
-1 240
18 133
15 101
20 81
-9 53
13 43
dtype: int64
In [19]:
trip_chain_df[.DIST_M.value_counts()
Out[19]:
2.000000 82923
3.000000 74762
1.000000 63140
5.000000 60218
4.000000 57820
6.000000 42324
7.000000 36812
8.000000 33883
10.000000 32762
0.555556 24847
15.000000 21202
12.000000 21077
9.000000 19330
0.222222 18633
11.000000 14174
20.000000 13811
13.000000 11966
0.111111 11711
-8.000000 11334
14.000000 10320
0.500000 8600
18.000000 8470
25.000000 8386
16.000000 8193
17.000000 8120
0.777778 7564
0.333333 7068
30.000000 6847
22.000000 5546
0.444444 4961
...
562.000000 1
1291.000000 1
1287.000000 1
1267.000000 1
965.000000 1
968.000000 1
969.000000 1
980.000000 1
999.000000 1
1005.000000 1
18.222222 1
1021.000000 1
1030.000000 1
566.000000 1
1050.000000 1
1056.000000 1
1062.000000 1
565.000000 1
1074.000000 1
1086.000000 1
1096.000000 1
1097.000000 1
1120.000000 1
564.000000 1
1163.000000 1
1191.000000 1
1192.000000 1
1245.000000 1
347.000000 1
1629.000000 1
dtype: int64
In [ ]:
Content source: joshzarrabi/e-mission-server
Similar notebooks: