In [2]:
import pandas as pd
import numpy as np

In [3]:
%matplotlib inline

In [16]:
day_trip_pdf = pd.read_csv("/Volumes/KINGSTON/nhts/DAYPUB.csv")

In [17]:
day_trip_pdf.columns


Out[17]:
Index([u'HOUSEID', u'PERSONID', u'TDCASEID', u'TDTRPNUM', u'MSACAT', u'RAIL', u'CDIVMSAR', u'TRPTRANS', u'TRPPUB', u'PSGR_FLG', u'DRVR_FLG', u'VEHUSED', u'WHODROVE', u'TRPHHVEH', u'HHMEMDRV', u'URBRUR', u'WHYFROM', u'HTHUR', u'HBHUR', u'IMPTAGE', u'IMPTSEX', u'IMPTRACE', u'IMPTMILE', u'IMPTMIN', u'IMPTHOWN', u'IMPTHTYP', u'IMPTENTM', u'IMPTSTTM', u'IMPTTPUB', u'IMPTTRIP', u'TDMSDTRP', u'TPOVRLAP', u'AWAYHOME', u'DRIVER', u'ONTD_P1', u'ONTD_P2', u'ONTD_P3', u'ONTD_P4', u'ONTD_P5', u'ONTD_P6', u'ONTD_P7', u'ONTD_P8', u'ONTD_P9', u'ONTD_P10', u'ONTD_P11', u'ONTD_P12', u'PASSPURP', u'PUBTYPE', u'R_RELAT', u'TRACC1', u'TRACC2', u'TRACC3', u'TRACC4', u'TRACC5', u'TREGR1', u'TREGR2', u'TREGR3', u'TREGR4', u'TREGR5', u'WORKER', u'TRPHHACC', u'WHYTO', u'TDWKND', u'ONTD_P13', u'ONTD_P14', u'WHYTRP1S', u'STRTTIME', u'TRPDIST', u'ENDHOUR', u'TRACCTM', u'TRWAITTM', u'TRPBLKS', u'EDUC', u'OUTOFTWN', u'PROXY', u'PROXCAT', u'HHR_HISP', u'HHR_RACE', u'HHR_DRVR', u'HHR_WRKR', u'R_SEX', u'TRAVDAY', u'TDAYDATE', u'TDBOA911', u'SMPLAREA', u'SMPLFIRM', u'SMPLSRCE', u'HOMEOWN', u'HOMETYPE', u'HHR_EDUC', u'LANG', u'HHRESP', u'HHFAMINC', u'HHSIZE', u'URBAN', u'MSASIZE', u'CENSUS_R', u'CENSUS_D', u'HH_ONTD', u'NUMONTRP', ...], dtype='object')

In [18]:
impt_cols = [col for col in day_trip_pdf.columns if col.startswith("IMPT")]

In [19]:
impt_cols


Out[19]:
['IMPTAGE',
 'IMPTSEX',
 'IMPTRACE',
 'IMPTMILE',
 'IMPTMIN',
 'IMPTHOWN',
 'IMPTHTYP',
 'IMPTENTM',
 'IMPTSTTM',
 'IMPTTPUB',
 'IMPTTRIP']

In [20]:
day_trip_pdf.HOUSEID.unique()


Out[20]:
array([10000018, 10000045, 10000474, ..., 915637008, 915637240, 915637259], dtype=object)

In [21]:
day_trip_pdf = day_trip_pdf.drop(impt_cols, axis=1)

In [22]:
day_trip_pdf.columns


Out[22]:
Index([u'HOUSEID', u'PERSONID', u'TDCASEID', u'TDTRPNUM', u'MSACAT', u'RAIL', u'CDIVMSAR', u'TRPTRANS', u'TRPPUB', u'PSGR_FLG', u'DRVR_FLG', u'VEHUSED', u'WHODROVE', u'TRPHHVEH', u'HHMEMDRV', u'URBRUR', u'WHYFROM', u'HTHUR', u'HBHUR', u'TDMSDTRP', u'TPOVRLAP', u'AWAYHOME', u'DRIVER', u'ONTD_P1', u'ONTD_P2', u'ONTD_P3', u'ONTD_P4', u'ONTD_P5', u'ONTD_P6', u'ONTD_P7', u'ONTD_P8', u'ONTD_P9', u'ONTD_P10', u'ONTD_P11', u'ONTD_P12', u'PASSPURP', u'PUBTYPE', u'R_RELAT', u'TRACC1', u'TRACC2', u'TRACC3', u'TRACC4', u'TRACC5', u'TREGR1', u'TREGR2', u'TREGR3', u'TREGR4', u'TREGR5', u'WORKER', u'TRPHHACC', u'WHYTO', u'TDWKND', u'ONTD_P13', u'ONTD_P14', u'WHYTRP1S', u'STRTTIME', u'TRPDIST', u'ENDHOUR', u'TRACCTM', u'TRWAITTM', u'TRPBLKS', u'EDUC', u'OUTOFTWN', u'PROXY', u'PROXCAT', u'HHR_HISP', u'HHR_RACE', u'HHR_DRVR', u'HHR_WRKR', u'R_SEX', u'TRAVDAY', u'TDAYDATE', u'TDBOA911', u'SMPLAREA', u'SMPLFIRM', u'SMPLSRCE', u'HOMEOWN', u'HOMETYPE', u'HHR_EDUC', u'LANG', u'HHRESP', u'HHFAMINC', u'HHSIZE', u'URBAN', u'MSASIZE', u'CENSUS_R', u'CENSUS_D', u'HH_ONTD', u'NUMONTRP', u'DWELTIME', u'EDITMODE', u'EDITPURP', u'EDITSTTM', u'EDITENTM', u'EDITMILE', u'EDITMIN', u'FLGNXTDY', u'STRTHR', u'STRTMIN', u'ENDMIN', ...], dtype='object')

In [23]:
edit_cols = [col for col in day_trip_pdf.columns if col.startswith("EDIT")]

In [24]:
edit_cols


Out[24]:
['EDITMODE', 'EDITPURP', 'EDITSTTM', 'EDITENTM', 'EDITMILE', 'EDITMIN']

In [25]:
day_trip_pdf = day_trip_pdf.drop(edit_cols, axis=1)

In [28]:
day_trip_pdf.columns


Out[28]:
Index([u'HOUSEID', u'PERSONID', u'TDCASEID', u'TDTRPNUM', u'MSACAT', u'RAIL', u'CDIVMSAR', u'TRPTRANS', u'TRPPUB', u'PSGR_FLG', u'DRVR_FLG', u'VEHUSED', u'WHODROVE', u'TRPHHVEH', u'HHMEMDRV', u'URBRUR', u'WHYFROM', u'HTHUR', u'HBHUR', u'TDMSDTRP', u'TPOVRLAP', u'AWAYHOME', u'DRIVER', u'ONTD_P1', u'ONTD_P2', u'ONTD_P3', u'ONTD_P4', u'ONTD_P5', u'ONTD_P6', u'ONTD_P7', u'ONTD_P8', u'ONTD_P9', u'ONTD_P10', u'ONTD_P11', u'ONTD_P12', u'PASSPURP', u'PUBTYPE', u'R_RELAT', u'TRACC1', u'TRACC2', u'TRACC3', u'TRACC4', u'TRACC5', u'TREGR1', u'TREGR2', u'TREGR3', u'TREGR4', u'TREGR5', u'WORKER', u'TRPHHACC', u'WHYTO', u'TDWKND', u'ONTD_P13', u'ONTD_P14', u'WHYTRP1S', u'STRTTIME', u'TRPDIST', u'ENDHOUR', u'TRACCTM', u'TRWAITTM', u'TRPBLKS', u'EDUC', u'OUTOFTWN', u'PROXY', u'PROXCAT', u'HHR_HISP', u'HHR_RACE', u'HHR_DRVR', u'HHR_WRKR', u'R_SEX', u'TRAVDAY', u'TDAYDATE', u'TDBOA911', u'SMPLAREA', u'SMPLFIRM', u'SMPLSRCE', u'HOMEOWN', u'HOMETYPE', u'HHR_EDUC', u'LANG', u'HHRESP', u'HHFAMINC', u'HHSIZE', u'URBAN', u'MSASIZE', u'CENSUS_R', u'CENSUS_D', u'HH_ONTD', u'NUMONTRP', u'DWELTIME', u'FLGNXTDY', u'STRTHR', u'STRTMIN', u'ENDMIN', u'WHYTRP90', u'TRVL_MIN', u'WHYTRP01', u'TRVLCMIN', u'HHINCTTL', u'TRPNUMSQ', ...], dtype='object')

In [29]:
hhr_cols = [col for col in day_trip_pdf.columns if col.startswith("HHR")]

In [32]:
trp_cols = [col for col in day_trip_pdf.columns if "TRP" in col]

In [33]:
trp_cols


Out[33]:
['TDTRPNUM',
 'TRPTRANS',
 'TRPPUB',
 'TRPHHVEH',
 'TDMSDTRP',
 'TRPHHACC',
 'WHYTRP1S',
 'TRPDIST',
 'TRPBLKS',
 'NUMONTRP',
 'WHYTRP90',
 'WHYTRP01',
 'TRPNUMSQ',
 'TRPMILES']

In [36]:
time_cols = [col for col in day_trip_pdf.columns if "TIME" in col]

In [49]:
time_cols


Out[49]:
['STRTTIME', 'DWELTIME', 'ENDTIME']

In [50]:
time_cols = ['STRTTIME', 'ENDTIME', 'DWELTIME']

In [51]:
other_cols = ['HOUSEID', 'PERSONID', 'TDCASEID', 'TDTRPNUM', 'TDAYDATE', 'TRAVDAY']

In [52]:
access_cols = [col for col in day_trip_pdf.columns if "TRACC" in col or "TREGR" in col]

In [53]:
all_cols = []
all_cols.extend(other_cols)
all_cols.extend(time_cols)
all_cols.extend(trp_cols)
all_cols.extend(access_cols)

In [54]:
sel_cols_df = day_trip_pdf[all_cols]

In [55]:
sel_cols_df


Out[55]:
HOUSEID PERSONID TDCASEID TDTRPNUM TDAYDATE TRAVDAY STRTTIME ENDTIME DWELTIME TDTRPNUM ... TRACC3 TRACC4 TRACC5 TREGR1 TREGR2 TREGR3 TREGR4 TREGR5 TRACCTM TREGRTM
0 10000018 1 100000180101 1 200105 1 830 845 45 1 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
1 10000018 1 100000180102 2 200105 1 930 945 495 2 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
2 10000018 1 100000180103 3 200105 1 1800 1802 43 3 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
3 10000018 1 100000180104 4 200105 1 1845 1910 0 4 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
4 10000018 1 100000180105 5 200105 1 1910 1930 -1 5 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
5 10000018 2 100000180201 1 200105 1 830 845 45 1 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
6 10000018 2 100000180202 2 200105 1 930 945 495 2 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
7 10000018 2 100000180203 3 200105 1 1800 1802 43 3 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
8 10000018 2 100000180204 4 200105 1 1845 1910 0 4 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
9 10000018 2 100000180205 5 200105 1 1910 1930 -1 5 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
10 10000045 1 100000450101 1 200104 3 1345 1354 29 1 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
11 10000045 1 100000450102 2 200104 3 1423 1428 15 2 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
12 10000045 1 100000450103 3 200104 3 1443 1456 609 3 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
13 10000045 1 100000450104 4 200104 3 105 125 40 4 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
14 10000045 1 100000450105 5 200104 3 205 230 -1 5 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
15 10000474 1 100004740101 1 200104 2 810 815 2 1 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
16 10000474 1 100004740102 2 200104 2 817 820 1 2 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
17 10000474 1 100004740103 3 200104 2 821 823 37 3 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
18 10000474 1 100004740104 4 200104 2 900 916 7 4 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
19 10000474 1 100004740105 5 200104 2 923 928 252 5 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
20 10000474 1 100004740106 6 200104 2 1340 1351 94 6 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
21 10000474 1 100004740107 7 200104 2 1525 1529 -1 7 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
22 10000577 1 100005770101 1 200112 4 1000 1015 235 1 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
23 10000577 1 100005770102 2 200112 4 1410 1425 45 2 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
24 10000577 1 100005770103 3 200112 4 1510 2025 -1 3 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
25 10000577 2 100005770201 1 200112 4 815 825 515 1 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
26 10000577 2 100005770202 2 200112 4 1700 1710 -1 2 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
27 10000652 1 100006520101 1 200104 6 1500 1520 -1 1 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
28 10000652 2 100006520201 1 200104 6 1500 1520 -1 1 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
29 10000894 1 100008940101 1 200103 7 1100 1105 20 1 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
642262 915637240 1 9156372400104 4 200204 6 1120 1125 125 4 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
642263 915637240 1 9156372400105 5 200204 6 1330 1345 30 5 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
642264 915637240 1 9156372400106 6 200204 6 1415 1500 10 6 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
642265 915637240 1 9156372400107 7 200204 6 1510 1515 0 7 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
642266 915637240 1 9156372400108 8 200204 6 1515 1530 30 8 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
642267 915637240 1 9156372400109 9 200204 6 1600 1615 45 9 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
642268 915637240 1 9156372400110 10 200204 6 1700 1715 15 10 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
642269 915637240 1 9156372400111 11 200204 6 1730 1745 120 11 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
642270 915637240 1 9156372400112 12 200204 6 1945 2000 -1 12 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
642271 915637240 2 9156372400201 1 200204 6 915 925 55 1 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
642272 915637240 2 9156372400202 2 200204 6 1020 1024 16 2 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
642273 915637240 2 9156372400203 3 200204 6 1040 1100 20 3 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
642274 915637240 2 9156372400204 4 200204 6 1120 1125 125 4 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
642275 915637240 2 9156372400205 5 200204 6 1330 1345 30 5 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
642276 915637240 2 9156372400206 6 200204 6 1415 1500 10 6 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
642277 915637240 2 9156372400207 7 200204 6 1510 1515 0 7 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
642278 915637240 2 9156372400208 8 200204 6 1515 1530 30 8 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
642279 915637240 2 9156372400209 9 200204 6 1600 1615 45 9 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
642280 915637240 2 9156372400210 10 200204 6 1700 1715 15 10 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
642281 915637240 2 9156372400211 11 200204 6 1730 1745 120 11 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
642282 915637240 2 9156372400212 12 200204 6 1945 2000 -1 12 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
642283 915637259 1 9156372590101 1 200203 2 1040 1100 1 1 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
642284 915637259 1 9156372590102 2 200203 2 1101 1103 7 2 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
642285 915637259 1 9156372590103 3 200203 2 1110 1113 37 3 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
642286 915637259 1 9156372590104 4 200203 2 1150 1153 1 4 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
642287 915637259 1 9156372590105 5 200203 2 1154 1156 44 5 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
642288 915637259 1 9156372590106 6 200203 2 1240 1300 -1 6 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
642289 915637259 2 9156372590201 1 200203 2 1040 1100 54 1 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
642290 915637259 2 9156372590202 2 200203 2 1154 1156 44 2 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
642291 915637259 2 9156372590203 3 200203 2 1240 1300 -1 3 ... -1 -1 -1 -1 -1 -1 -1 -1 -1 -1

642292 rows × 35 columns


In [12]:
np.count_nonzero(day_trip_pdf.TRPTRANS == 23)


Out[12]:
118

In [13]:
day_trip_pdf.TRPTRANS.value_counts()


Out[13]:
 1     314884
 2      88625
 3      79555
 4      73811
 26     51526
 12     11544
 25      5184
 10      4892
 5       2340
 17      2123
 91      1844
 22       971
 7        942
-8        795
 8        562
 13       526
 16       484
 11       306
 14       265
 6        203
 21       175
 15       149
 24       125
 23       118
 18       102
 20        95
-7         56
 9         39
-1         21
-9         19
 19        11
dtype: int64

In [14]:
trip_chain_df = pd.read_csv("/Volumes/KINGSTON/nhts_trip_chaining/TripChainingCSV/tour09.csv")

In [17]:
trip_chain_df.MODE_D.value_counts()


Out[17]:
 1     387009
 3     154635
 4      99873
 2      84139
 23     60025
 11     14621
 22      6453
 9       4724
 97      4414
 5       3113
 7       2624
 17      1242
 8       1154
 14       938
 21       898
 19       897
 10       728
-8        703
 16       571
 24       562
 12       527
-7        481
 6        335
-1        240
 18       133
 15       101
 20        81
-9         53
 13        43
dtype: int64

In [19]:
trip_chain_df[.DIST_M.value_counts()


Out[19]:
 2.000000       82923
 3.000000       74762
 1.000000       63140
 5.000000       60218
 4.000000       57820
 6.000000       42324
 7.000000       36812
 8.000000       33883
 10.000000      32762
 0.555556       24847
 15.000000      21202
 12.000000      21077
 9.000000       19330
 0.222222       18633
 11.000000      14174
 20.000000      13811
 13.000000      11966
 0.111111       11711
-8.000000       11334
 14.000000      10320
 0.500000        8600
 18.000000       8470
 25.000000       8386
 16.000000       8193
 17.000000       8120
 0.777778        7564
 0.333333        7068
 30.000000       6847
 22.000000       5546
 0.444444        4961
                ...  
 562.000000         1
 1291.000000        1
 1287.000000        1
 1267.000000        1
 965.000000         1
 968.000000         1
 969.000000         1
 980.000000         1
 999.000000         1
 1005.000000        1
 18.222222          1
 1021.000000        1
 1030.000000        1
 566.000000         1
 1050.000000        1
 1056.000000        1
 1062.000000        1
 565.000000         1
 1074.000000        1
 1086.000000        1
 1096.000000        1
 1097.000000        1
 1120.000000        1
 564.000000         1
 1163.000000        1
 1191.000000        1
 1192.000000        1
 1245.000000        1
 347.000000         1
 1629.000000        1
dtype: int64

In [ ]: