TxDOT Project



In [1]:

    
import os
import math
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt

import csv
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn import feature_selection, linear_model

Load in the Data



In [16]:

    
df = pd.read_csv('data/Bid_Data.csv')
len(df)









    



C:\Users\Collin\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:2717: DtypeWarning: Columns (27,32) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)






    Out[16]:





32447

Formatting & Identifying the Data



In [18]:

    
df = df[df['Rank'] == 1]
df = df[df['Type'] == 'Construction']

df = df.drop(' From', 1)
df = df.drop('To', 1)
df = df.drop('Contract Description', 1)
df = df.drop('Contractor', 1)
df = df.drop('Contract Category', 1)
df = df.drop('LNG MON', 1)
df = df.drop('MONTH', 1)

df['Award Amount'] = df['Award Amount'].str.lstrip('$')
df['Engineers Estimate'] = df['Engineers Estimate'].str.lstrip('$')
df['Award Amount'] = df['Award Amount'].str.replace(',','').astype(float)
df['Engineers Estimate'] = df['Engineers Estimate'].str.replace(',','').astype(float)

#Renaming Variables
df['EngEst'] = df['Engineers Estimate']
df['NBidders'] = df['Number of Bidders']
df['Date'] = pd.to_datetime(df['Letting Date'])
df.set_index('Date' , inplace=True)
df['Year'] = df.index.year
df['Month'] = df.index.month
df['WinBid'] = df['Award Amount']

# Creating New Varialbes
df['Diff'] = df['EngEst'] - df['WinBid']
df['lnWinBid'] = np.log(df['WinBid'])
df['lnEngEst'] = np.log(df['EngEst'])
df['DiffLn'] = df['lnWinBid'] - df['lnEngEst']
df['Within10Percent'] = 1 
df['PercentOff'] = df['Diff'] / df['EngEst']
df['MoreOrLessThan10'] = 0
df['LessThan10'] = 0
df['MoreThan10'] = 0

df.loc[(df.PercentOff > .10) , 'Within10Percent'] = 0
df.loc[(df.PercentOff < -.10) , 'Within10Percent'] = 0
df.loc[(df.PercentOff > .10) , 'MoreOrLessThan10'] = 1
df.loc[(df.PercentOff < -.10) , 'MoreOrLessThan10'] = 2
df.loc[(df.PercentOff > .10) , 'MoreThan10'] = 1
df.loc[(df.PercentOff < -.10) , 'LessThan10'] = 1

print(len(df))



In [15]:

    
df









    Out[15]:






  
    
      
      Type
      CCSJ
      Letting Call Number
      Project
      Length
      Highway
      Letting Date
      District
      County
      Number of Bidders
      ...
      WinBid
      Diff
      lnWinBid
      lnEngEst
      DiffLn
      Within10Percent
      PercentOff
      MoreOrLessThan10
      LessThan10
      MoreThan10
    
    
      Date
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      2010-10-21
      Construction
      0001-05-016
      10103012
      STP 2009(816)ES
      0.200
      FM 259
      10/21/2010
      El Paso
      El Paso
      1
      ...
      116285.82
      7730.08
      11.663806
      11.728165
      -0.064359
      1
      0.062331
      0
      0
      0
    
    
      2010-08-11
      Construction
      0002-01-083
      8103285
      STP 2011(321)
      9.050
      SH 20
      8/11/2010
      El Paso
      El Paso
      6
      ...
      2477913.10
      1365614.90
      14.722927
      15.161901
      -0.438974
      0
      0.355302
      1
      0
      1
    
    
      2010-04-07
      Construction
      0002-08-050
      4103999
      C 2-8-50
      1.250
      IH 10
      4/7/2010
      El Paso
      Hudspeth
      1
      ...
      228662.50
      37724.88
      12.340002
      12.492707
      -0.152704
      0
      0.141617
      1
      0
      1
    
    
      2010-08-11
      Construction
      0002-11-056
      8103286
      IM 0101(256)
      4.316
      IH 10
      8/11/2010
      El Paso
      Culberson
      2
      ...
      1094961.15
      492577.51
      13.906229
      14.277695
      -0.371466
      0
      0.310277
      1
      0
      1
    
    
      2010-05-12
      Construction
      0002-14-033
      5103213
      STP 2010(756)ES
      0.100
      FM 258
      5/12/2010
      El Paso
      El Paso
      4
      ...
      149069.50
      109807.00
      11.912168
      12.464106
      -0.551938
      0
      0.424168
      1
      0
      1
    
    
      2010-02-09
      Construction
      0003-01-051
      2103035
      IM 0101(254)
      1.069
      IH 10
      2/9/2010
      El Paso
      Culberson
      2
      ...
      409447.31
      -66821.41
      12.922564
      12.744394
      0.178169
      0
      -0.195027
      2
      1
      0
    
    
      2010-05-12
      Construction
      0003-05-046
      5103208
      IM 0201(176)
      3.806
      IH 20
      5/12/2010
      Odessa
      Reeves
      4
      ...
      2284464.17
      550755.83
      14.641642
      14.857630
      -0.215988
      0
      0.194255
      1
      0
      1
    
    
      2010-02-10
      Construction
      0003-05-048
      2103209
      IM 0201(175)
      57.050
      IH 20
      2/10/2010
      Odessa
      Reeves
      3
      ...
      3460027.14
      193621.95
      15.056787
      15.111237
      -0.054450
      1
      0.052994
      0
      0
      0
    
    
      2010-08-11
      Construction
      0004-04-076
      8103305
      STP 2000(400)TE
      1.181
      IH 20
      8/11/2010
      Odessa
      Ward
      8
      ...
      14473728.60
      -1989505.68
      16.487846
      16.339976
      0.147870
      0
      -0.159362
      2
      1
      0
    
    
      2010-12-08
      Construction
      0005-05-105
      12103231
      IM 0202(231)
      78.025
      IH 20
      12/8/2010
      Abilene
      Howard
      5
      ...
      2555217.75
      66828.32
      14.753648
      14.779466
      -0.025818
      1
      0.025487
      0
      0
      0
    
    
      2010-10-22
      Construction
      0005-07-049
      10103227
      STP 2011(623)ES
      18.838
      IH 20
      10/22/2010
      Abilene
      Mitchell
      6
      ...
      7827281.73
      1083350.06
      15.873126
      16.002756
      -0.129630
      0
      0.121579
      1
      0
      1
    
    
      2010-03-09
      Construction
      0005-08-097
      3103012
      STP 2010(601)HES
      0.001
      IH 20
      3/9/2010
      Abilene
      Mitchell
      4
      ...
      85238.00
      51205.42
      11.353203
      11.823665
      -0.470463
      0
      0.375287
      1
      0
      1
    
    
      2010-08-10
      Construction
      0005-13-053
      8103084
      IM 0201(177)
      9.164
      IH 20
      8/10/2010
      Odessa
      Ector
      1
      ...
      6438932.93
      -639101.05
      15.677873
      15.573339
      0.104534
      0
      -0.110193
      2
      1
      0
    
    
      2010-04-06
      Construction
      0006-01-091
      4103008
      IM 0202(230)
      9.040
      IH 20
      4/6/2010
      Abilene
      Mitchell
      6
      ...
      3272602.41
      -254919.33
      15.001096
      14.920000
      0.081096
      1
      -0.084475
      0
      0
      0
    
    
      2010-08-11
      Construction
      0006-02-104
      8103239
      C 6-2-104
      8.284
      IH 20
      8/11/2010
      Abilene
      Nolan
      6
      ...
      1038393.49
      82349.54
      13.853185
      13.929502
      -0.076317
      1
      0.073478
      0
      0
      0
    
    
      2010-08-10
      Construction
      0006-02-106
      8103085
      STP 2011(292)HES
      9.889
      IH 20
      8/10/2010
      Abilene
      Nolan
      8
      ...
      589049.95
      92165.21
      13.286266
      13.431633
      -0.145367
      0
      0.135295
      1
      0
      1
    
    
      2010-01-05
      Construction
      0006-04-068
      1103023
      STP 2010(233)SB
      17.488
      IH 20
      1/5/2010
      Abilene
      Taylor
      14
      ...
      1191750.29
      126077.35
      13.990934
      14.091495
      -0.100562
      1
      0.095671
      0
      0
      0
    
    
      2010-04-07
      Construction
      0006-05-108
      4103213
      STP 2010(559)ES
      2.986
      IH 20
      4/7/2010
      Abilene
      Taylor
      3
      ...
      548674.30
      36426.05
      13.215260
      13.279539
      -0.064278
      1
      0.062256
      0
      0
      0
    
    
      2010-05-12
      Construction
      0006-18-052
      5103221
      STP 2010(845)
      3.283
      BI 20-R
      5/12/2010
      Abilene
      Taylor
      5
      ...
      595110.27
      84426.13
      13.296502
      13.429166
      -0.132664
      0
      0.124241
      1
      0
      1
    
    
      2010-03-10
      Construction
      0007-02-047
      3103218
      STP 2010(602)ES
      8.914
      IH 20
      3/10/2010
      Abilene
      Callahan
      7
      ...
      2294673.03
      301437.12
      14.646101
      14.769525
      -0.123424
      0
      0.116111
      1
      0
      1
    
    
      2010-01-05
      Construction
      0007-03-083
      1103006
      IM 0203(077)
      308.483
      IH 20
      1/5/2010
      Brownwood
      Eastland
      4
      ...
      6349325.82
      408080.75
      15.663859
      15.726150
      -0.062291
      1
      0.060390
      0
      0
      0
    
    
      2010-02-09
      Construction
      0007-04-109
      2103024
      STP 2010(322)HES
      7.724
      IH 20
      2/9/2010
      Brownwood
      Eastland
      13
      ...
      515542.26
      393293.65
      13.152975
      13.719920
      -0.566945
      0
      0.432744
      1
      0
      1
    
    
      2010-03-10
      Construction
      0007-06-076
      3103217
      IM 0203(078)
      5.594
      IH 20
      3/10/2010
      Brownwood
      Eastland
      10
      ...
      2803289.11
      120462.21
      14.846304
      14.888378
      -0.042074
      1
      0.041201
      0
      0
      0
    
    
      2010-06-03
      Construction
      0007-06-081
      6103009
      IM 0203(079)
      2.244
      IH 20
      6/3/2010
      Brownwood
      Eastland
      4
      ...
      1554812.14
      -141855.95
      14.256865
      14.161195
      0.095671
      0
      -0.100397
      2
      1
      0
    
    
      2010-08-10
      Construction
      0007-10-054
      8103032
      STP 2011(203)
      7.380
      US 180
      8/10/2010
      Fort Worth
      Palo Pinto
      9
      ...
      1517352.16
      296081.24
      14.232477
      14.410733
      -0.178255
      0
      0.163271
      1
      0
      1
    
    
      2010-10-21
      Construction
      0008-02-070
      10103013
      STP 2011(472)
      179.385
      US 180
      10/21/2010
      Fort Worth
      Parker
      5
      ...
      7357575.84
      1156674.43
      15.811241
      15.957252
      -0.146011
      0
      0.135852
      1
      0
      1
    
    
      2010-04-06
      Construction
      0008-03-098
      4103032
      IM 0204(271)
      5.170
      IH 20
      4/6/2010
      Fort Worth
      Parker
      6
      ...
      1730019.68
      140955.22
      14.363643
      14.441970
      -0.078327
      1
      0.075338
      0
      0
      0
    
    
      2010-08-10
      Construction
      0008-06-048
      8103035
      BR 2011(098)
      0.169
      SH 180
      8/10/2010
      Fort Worth
      Tarrant
      5
      ...
      1447722.05
      48917.86
      14.185502
      14.218733
      -0.033231
      1
      0.032685
      0
      0
      0
    
    
      2010-12-07
      Construction
      0008-13-223
      12103027
      IM 8204(276)
      2.341
      IH 820
      12/7/2010
      Fort Worth
      Tarrant
      4
      ...
      1243621.79
      317799.96
      14.033538
      14.261107
      -0.227569
      0
      0.203532
      1
      0
      1
    
    
      2010-09-08
      Construction
      0008-15-044
      9103021
      IM 8204(273)
      0.100
      IH 820
      9/8/2010
      Fort Worth
      Tarrant
      5
      ...
      96000.00
      31793.18
      11.472103
      11.758168
      -0.286065
      0
      0.248786
      1
      0
      1
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      2016-03-03
      Construction
      1186-01-089
      3163028
      STP 2016(636)
      4.103
      FM 969
      3/3/2016
      Austin
      Travis
      6
      ...
      2067551.68
      284564.36
      14.541876
      14.670826
      -0.128950
      0
      0.120982
      1
      0
      1
    
    
      2016-03-04
      Construction
      1259-01-041
      3163228
      STP 2016(812)HES
      1.336
      FM 1097
      3/4/2016
      Houston
      Montgomery
      3
      ...
      238826.40
      -41172.80
      12.383492
      12.194271
      0.189221
      0
      -0.208308
      2
      1
      0
    
    
      2016-02-10
      Construction
      1277-01-015
      2163228
      BR 2016(411)
      0.318
      FM 1012
      2/10/2016
      Beaumont
      Newton
      3
      ...
      1777740.74
      -130944.23
      14.390854
      14.314342
      0.076511
      1
      -0.079515
      0
      0
      0
    
    
      2016-04-06
      Construction
      1289-01-029
      4163237
      BR 1602(046)
      3.551
      FM 1126
      4/6/2016
      Dallas
      Navarro
      8
      ...
      6714487.74
      268564.38
      15.719778
      15.758997
      -0.039219
      1
      0.038459
      0
      0
      0
    
    
      2016-01-06
      Construction
      1397-01-031
      1163240
      C 1397-1-31
      3.094
      FM 1836
      1/6/2016
      Dallas
      Kaufman
      5
      ...
      5943147.30
      -906869.64
      15.597749
      15.432178
      0.165572
      0
      -0.180067
      2
      1
      0
    
    
      2016-01-06
      Construction
      1402-03-011
      1163242
      STP 2016(732)
      20.298
      FM 1375
      1/6/2016
      Houston
      Montgomery
      4
      ...
      1297509.45
      352076.70
      14.075957
      14.316035
      -0.240078
      0
      0.213433
      1
      0
      1
    
    
      2016-04-06
      Construction
      1494-03-001
      4163222
      C 1494-3-1
      0.018
      FM 3486
      4/6/2016
      Dallas
      Kaufman
      5
      ...
      4849821.37
      -215531.22
      15.394452
      15.348994
      0.045459
      1
      -0.046508
      0
      0
      0
    
    
      2016-02-10
      Construction
      1526-03-015
      2163233
      STP 2016(678)
      1.865
      FM 1606
      2/10/2016
      Abilene
      Scurry
      5
      ...
      6876410.33
      1435554.66
      15.743607
      15.933207
      -0.189599
      0
      0.172709
      1
      0
      1
    
    
      2016-02-09
      Construction
      1531-02-012
      2163022
      STP 2016(679)
      0.001
      FM 1610
      2/9/2016
      Abilene
      Scurry
      7
      ...
      5397845.79
      369148.18
      15.501511
      15.567662
      -0.066151
      1
      0.064011
      0
      0
      0
    
    
      2016-02-09
      Construction
      1606-02-019
      2163020
      STP 2014(884)
      0.062
      FM 2123
      2/9/2016
      Fort Worth
      Wise
      6
      ...
      2779919.35
      676795.35
      14.837932
      15.055829
      -0.217897
      0
      0.195791
      1
      0
      1
    
    
      2016-02-10
      Construction
      1685-03-088
      2163237
      STP 2016(731)
      1.762
      FM 1960
      2/10/2016
      Houston
      Harris
      3
      ...
      6636000.51
      113980.17
      15.708020
      15.725050
      -0.017030
      1
      0.016886
      0
      0
      0
    
    
      2016-01-05
      Construction
      1685-03-096
      1163024
      STP 2016(474)
      2.088
      FM 1960
      1/5/2016
      Houston
      Harris
      3
      ...
      191207.77
      81159.98
      12.161116
      12.514908
      -0.353793
      0
      0.297979
      1
      0
      1
    
    
      2016-03-04
      Construction
      2003-01-008
      3163217
      STP 2016(839)HES
      3.594
      FM 2122
      3/4/2016
      Paris
      Lamar
      6
      ...
      763854.27
      -9884.53
      13.546132
      13.533108
      0.013025
      1
      -0.013110
      0
      0
      0
    
    
      2016-01-05
      Construction
      2079-01-043
      1163037
      STP 2013(901)
      2.879
      FM 1220
      1/5/2016
      Fort Worth
      Tarrant
      2
      ...
      8350775.03
      -2311318.34
      15.937865
      15.613825
      0.324040
      0
      -0.382703
      2
      1
      0
    
    
      2016-03-04
      Construction
      2263-03-041
      3163212
      C 2263-3-41
      5.830
      SH 361
      3/4/2016
      Corpus Christi
      Nueces
      4
      ...
      5552811.60
      302405.80
      15.529815
      15.582844
      -0.053029
      1
      0.051647
      0
      0
      0
    
    
      2016-01-06
      Construction
      2266-02-138
      1163205
      NH 2016(594)
      0.056
      SH 360
      1/6/2016
      Fort Worth
      Tarrant
      2
      ...
      2126639.56
      -165737.61
      14.570054
      14.488915
      0.081139
      1
      -0.084521
      0
      0
      0
    
    
      2016-01-05
      Construction
      2296-01-051
      1163035
      NH 2016(565)
      7.055
      SH 191
      1/5/2016
      Odessa
      Ector
      3
      ...
      4387311.96
      -338356.71
      15.294227
      15.213969
      0.080258
      1
      -0.083566
      0
      0
      0
    
    
      2016-01-05
      Construction
      2560-01-076
      1163033
      STP 2016(425)
      3.901
      SL 224
      1/5/2016
      Lufkin
      Nacogdoches
      3
      ...
      789787.24
      12523.33
      13.579519
      13.595251
      -0.015732
      1
      0.015609
      0
      0
      0
    
    
      2016-04-06
      Construction
      2607-01-011
      4163205
      STP 2016(962)
      1.492
      SH 188
      4/6/2016
      Corpus Christi
      Aransas
      2
      ...
      1142011.55
      -117358.92
      13.948302
      13.839864
      0.108438
      0
      -0.114535
      2
      1
      0
    
    
      2016-04-05
      Construction
      2754-01-008
      4163012
      STP 2016(370)
      3.626
      FM 2705
      4/5/2016
      Waco
      Limestone
      3
      ...
      4733942.79
      -79712.79
      15.370269
      15.353287
      0.016982
      1
      -0.017127
      0
      0
      0
    
    
      2016-04-06
      Construction
      2887-01-012
      4163206
      STP 2016(958)HES
      4.942
      FM 2830
      4/6/2016
      Beaumont
      Liberty
      5
      ...
      999523.59
      168362.80
      13.815034
      13.970706
      -0.155672
      0
      0.144160
      1
      0
      1
    
    
      2016-02-10
      Construction
      3050-02-024
      2163202
      STP 2016(119)MM
      0.001
      FM 2978
      2/10/2016
      Houston
      Montgomery
      5
      ...
      15788778.28
      612127.00
      16.574810
      16.612847
      -0.038037
      1
      0.037323
      0
      0
      0
    
    
      2016-02-10
      Construction
      3136-01-177
      2163239
      STP 2016(637)
      0.054
      SL 1
      2/10/2016
      Austin
      Travis
      4
      ...
      787593.55
      94266.36
      13.576737
      13.689788
      -0.113051
      0
      0.106895
      1
      0
      1
    
    
      2016-02-09
      Construction
      3256-02-091
      2163032
      NH 2016(728)
      0.026
      SL 8
      2/9/2016
      Houston
      Harris
      3
      ...
      596804.70
      -98636.20
      13.299345
      13.118694
      0.180652
      0
      -0.197998
      2
      1
      0
    
    
      2016-04-05
      Construction
      3271-01-011
      4163009
      STP 2016(959)
      0.016
      FM 3180
      4/5/2016
      Beaumont
      Chambers
      4
      ...
      6439873.51
      -1090631.57
      15.678019
      15.492465
      0.185554
      0
      -0.203885
      2
      1
      0
    
    
      2016-03-03
      Construction
      3277-01-022
      3163005
      STP 2014(116)TE
      30.194
      FM 3177
      3/3/2016
      Austin
      Travis
      5
      ...
      2045353.10
      67623.37
      14.531081
      14.563608
      -0.032527
      1
      0.032004
      0
      0
      0
    
    
      2016-01-05
      Construction
      3348-01-019
      1163018
      STP 1502(549)HES
      0.001
      FM 3136
      1/5/2016
      Fort Worth
      Johnson
      2
      ...
      6829235.17
      -2143649.77
      15.736723
      15.360001
      0.376722
      0
      -0.457499
      2
      1
      0
    
    
      2016-02-10
      Construction
      3417-01-028
      2163210
      CC 3417-1-28
      3.834
      FM 734
      2/10/2016
      Austin
      Travis
      4
      ...
      839340.91
      -60727.46
      13.640372
      13.565270
      0.075102
      1
      -0.077994
      0
      0
      0
    
    
      2016-04-05
      Construction
      3538-01-041
      4163027
      STP 2016(963)
      0.030
      SH 242
      4/5/2016
      Houston
      Montgomery
      2
      ...
      3635039.14
      841444.90
      15.106130
      15.314348
      -0.208218
      0
      0.187970
      1
      0
      1
    
    
      2016-02-09
      Construction
      3538-01-044
      2163036
      STP 2016(722)HES
      3.529
      SH 242
      2/9/2016
      Houston
      Montgomery
      3
      ...
      275772.88
      9539.89
      12.527333
      12.561341
      -0.034008
      1
      0.033437
      0
      0
      0
    
  

5177 rows × 43 columns

Exploring the Data

We see that Bids are Log Normally Distributed



In [26]:

    
sns.jointplot(x="EngEst", y="WinBid", data=df, kind="reg"); sns.jointplot(x="lnEngEst", y="lnWinBid", data=df, kind="reg");









    



C:\Users\Collin\Anaconda3\lib\site-packages\statsmodels\nonparametric\kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j



In [20]:

    
cmap = {'0': 'g', '1': 'r', '2': 'b' }
df['cMoreOrLessThan10'] = df.MoreOrLessThan10.apply(lambda x: cmap[str(x)])
print (df.plot('lnEngEst', 'lnWinBid', kind='scatter', c=df.cMoreOrLessThan10))









    



Axes(0.125,0.125;0.775x0.755)



In [24]:

    
df_test = df[(df.Year == 2016) & (df.Month == 4)]
print(len(df_test) , 'projects in April 2016')

df_train = df[(df.Year != 2016) | (df.Month != 4)]
print(len(df_train) ,'projects from Jan 2010 to April 2016')

#df_train[['Year','Month']].tail()









    



67 projects in April 2016
5110 projects from Jan 2010 to April 2016



In [28]:

    
#Using ALL the Data

Percent = float(df.Within10Percent.sum()) / len(df)
print(round((Percent)*100,2) , '% of All the TxDOT estimates were within 10% of actual bid')

Percent_April_2016 = float(df[(df.Year == 2016) & (df.Month == 4)].Within10Percent.sum()) / len(df_test)
print (round((Percent_April_2016)*100,2) , '% of the April 2016 TxDOT estimates were within 10% of actual bid')









    



50.34 % of All the TxDOT estimates were within 10% of actual bid
46.27 % of the April 2016 TxDOT estimates were within 10% of actual bid



In [31]:

    
names_X = ['Length','NBidders','Year','Month','lnEngEst','Time']

def X_y(df):
    X = df[ names_X ]
    y_more = df['MoreThan10']
    y_less =df['LessThan10']
    return X, y_more, y_less

train_X, train_y_more, train_y_less = X_y(df_train)
test_X, test_y_more, test_y_less = X_y(df_test)

print(len(train_y_more))
print(len(train_y_less))
print(len(test_y_more))
print(len(test_y_less))



In [32]:

    
test_X.head()



In [45]:

    
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold
def RFC_model(X, y):
    """ Performs grid search over the 'n_estimators' parameter for a 
        random forest regressor trained on the input data [X, y]. """
    
    # Create cross-validation sets from the training data
    cv_sets = KFold(n_splits = 30)
    cv_sets.split(X,y)

    # Create a decision tree regressor object
    clf = RandomForestClassifier()

    # Create a dictionary for the parameter 'max_depth' with a range from 1 to 100
    params = {'n_estimators':range(1,len(X.columns))}

    # Transform 'performance_metric' into a scoring function using 'make_scorer' 
    #scoring_fnc = make_scorer(performance_metric)

    # Create the grid search object
    grid = GridSearchCV(clf, params, cv=cv_sets)

    # Fit the grid search object to the data to compute the optimal model
    grid = grid.fit(X, y)

    # Return the optimal model after fitting the data
    return grid.best_estimator_



In [46]:

    
model_1 = RFC_model(train_X,train_y_more)



In [47]:

    
print('correct training classification = ', model_1.score(train_X, train_y_more))
print ('correct testing classification = ', model_1.score(test_X, test_y_more))









    



correct training classification =  0.929549902153
correct testing classification =  0.641791044776



In [ ]:

	Type	CCSJ	Letting Call Number	Project	Length	Highway	Letting Date	District	County	Number of Bidders	...	WinBid	Diff	lnWinBid	lnEngEst	DiffLn	Within10Percent	PercentOff	MoreOrLessThan10	LessThan10	MoreThan10
Date
2010-10-21	Construction	0001-05-016	10103012	STP 2009(816)ES	0.200	FM 259	10/21/2010	El Paso	El Paso	1	...	116285.82	7730.08	11.663806	11.728165	-0.064359	1	0.062331	0	0	0
2010-08-11	Construction	0002-01-083	8103285	STP 2011(321)	9.050	SH 20	8/11/2010	El Paso	El Paso	6	...	2477913.10	1365614.90	14.722927	15.161901	-0.438974	0	0.355302	1	0	1
2010-04-07	Construction	0002-08-050	4103999	C 2-8-50	1.250	IH 10	4/7/2010	El Paso	Hudspeth	1	...	228662.50	37724.88	12.340002	12.492707	-0.152704	0	0.141617	1	0	1
2010-08-11	Construction	0002-11-056	8103286	IM 0101(256)	4.316	IH 10	8/11/2010	El Paso	Culberson	2	...	1094961.15	492577.51	13.906229	14.277695	-0.371466	0	0.310277	1	0	1
2010-05-12	Construction	0002-14-033	5103213	STP 2010(756)ES	0.100	FM 258	5/12/2010	El Paso	El Paso	4	...	149069.50	109807.00	11.912168	12.464106	-0.551938	0	0.424168	1	0	1
2010-02-09	Construction	0003-01-051	2103035	IM 0101(254)	1.069	IH 10	2/9/2010	El Paso	Culberson	2	...	409447.31	-66821.41	12.922564	12.744394	0.178169	0	-0.195027	2	1	0
2010-05-12	Construction	0003-05-046	5103208	IM 0201(176)	3.806	IH 20	5/12/2010	Odessa	Reeves	4	...	2284464.17	550755.83	14.641642	14.857630	-0.215988	0	0.194255	1	0	1
2010-02-10	Construction	0003-05-048	2103209	IM 0201(175)	57.050	IH 20	2/10/2010	Odessa	Reeves	3	...	3460027.14	193621.95	15.056787	15.111237	-0.054450	1	0.052994	0	0	0
2010-08-11	Construction	0004-04-076	8103305	STP 2000(400)TE	1.181	IH 20	8/11/2010	Odessa	Ward	8	...	14473728.60	-1989505.68	16.487846	16.339976	0.147870	0	-0.159362	2	1	0
2010-12-08	Construction	0005-05-105	12103231	IM 0202(231)	78.025	IH 20	12/8/2010	Abilene	Howard	5	...	2555217.75	66828.32	14.753648	14.779466	-0.025818	1	0.025487	0	0	0
2010-10-22	Construction	0005-07-049	10103227	STP 2011(623)ES	18.838	IH 20	10/22/2010	Abilene	Mitchell	6	...	7827281.73	1083350.06	15.873126	16.002756	-0.129630	0	0.121579	1	0	1
2010-03-09	Construction	0005-08-097	3103012	STP 2010(601)HES	0.001	IH 20	3/9/2010	Abilene	Mitchell	4	...	85238.00	51205.42	11.353203	11.823665	-0.470463	0	0.375287	1	0	1
2010-08-10	Construction	0005-13-053	8103084	IM 0201(177)	9.164	IH 20	8/10/2010	Odessa	Ector	1	...	6438932.93	-639101.05	15.677873	15.573339	0.104534	0	-0.110193	2	1	0
2010-04-06	Construction	0006-01-091	4103008	IM 0202(230)	9.040	IH 20	4/6/2010	Abilene	Mitchell	6	...	3272602.41	-254919.33	15.001096	14.920000	0.081096	1	-0.084475	0	0	0
2010-08-11	Construction	0006-02-104	8103239	C 6-2-104	8.284	IH 20	8/11/2010	Abilene	Nolan	6	...	1038393.49	82349.54	13.853185	13.929502	-0.076317	1	0.073478	0	0	0
2010-08-10	Construction	0006-02-106	8103085	STP 2011(292)HES	9.889	IH 20	8/10/2010	Abilene	Nolan	8	...	589049.95	92165.21	13.286266	13.431633	-0.145367	0	0.135295	1	0	1
2010-01-05	Construction	0006-04-068	1103023	STP 2010(233)SB	17.488	IH 20	1/5/2010	Abilene	Taylor	14	...	1191750.29	126077.35	13.990934	14.091495	-0.100562	1	0.095671	0	0	0
2010-04-07	Construction	0006-05-108	4103213	STP 2010(559)ES	2.986	IH 20	4/7/2010	Abilene	Taylor	3	...	548674.30	36426.05	13.215260	13.279539	-0.064278	1	0.062256	0	0	0
2010-05-12	Construction	0006-18-052	5103221	STP 2010(845)	3.283	BI 20-R	5/12/2010	Abilene	Taylor	5	...	595110.27	84426.13	13.296502	13.429166	-0.132664	0	0.124241	1	0	1
2010-03-10	Construction	0007-02-047	3103218	STP 2010(602)ES	8.914	IH 20	3/10/2010	Abilene	Callahan	7	...	2294673.03	301437.12	14.646101	14.769525	-0.123424	0	0.116111	1	0	1
2010-01-05	Construction	0007-03-083	1103006	IM 0203(077)	308.483	IH 20	1/5/2010	Brownwood	Eastland	4	...	6349325.82	408080.75	15.663859	15.726150	-0.062291	1	0.060390	0	0	0
2010-02-09	Construction	0007-04-109	2103024	STP 2010(322)HES	7.724	IH 20	2/9/2010	Brownwood	Eastland	13	...	515542.26	393293.65	13.152975	13.719920	-0.566945	0	0.432744	1	0	1
2010-03-10	Construction	0007-06-076	3103217	IM 0203(078)	5.594	IH 20	3/10/2010	Brownwood	Eastland	10	...	2803289.11	120462.21	14.846304	14.888378	-0.042074	1	0.041201	0	0	0
2010-06-03	Construction	0007-06-081	6103009	IM 0203(079)	2.244	IH 20	6/3/2010	Brownwood	Eastland	4	...	1554812.14	-141855.95	14.256865	14.161195	0.095671	0	-0.100397	2	1	0
2010-08-10	Construction	0007-10-054	8103032	STP 2011(203)	7.380	US 180	8/10/2010	Fort Worth	Palo Pinto	9	...	1517352.16	296081.24	14.232477	14.410733	-0.178255	0	0.163271	1	0	1
2010-10-21	Construction	0008-02-070	10103013	STP 2011(472)	179.385	US 180	10/21/2010	Fort Worth	Parker	5	...	7357575.84	1156674.43	15.811241	15.957252	-0.146011	0	0.135852	1	0	1
2010-04-06	Construction	0008-03-098	4103032	IM 0204(271)	5.170	IH 20	4/6/2010	Fort Worth	Parker	6	...	1730019.68	140955.22	14.363643	14.441970	-0.078327	1	0.075338	0	0	0
2010-08-10	Construction	0008-06-048	8103035	BR 2011(098)	0.169	SH 180	8/10/2010	Fort Worth	Tarrant	5	...	1447722.05	48917.86	14.185502	14.218733	-0.033231	1	0.032685	0	0	0
2010-12-07	Construction	0008-13-223	12103027	IM 8204(276)	2.341	IH 820	12/7/2010	Fort Worth	Tarrant	4	...	1243621.79	317799.96	14.033538	14.261107	-0.227569	0	0.203532	1	0	1
2010-09-08	Construction	0008-15-044	9103021	IM 8204(273)	0.100	IH 820	9/8/2010	Fort Worth	Tarrant	5	...	96000.00	31793.18	11.472103	11.758168	-0.286065	0	0.248786	1	0	1
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
2016-03-03	Construction	1186-01-089	3163028	STP 2016(636)	4.103	FM 969	3/3/2016	Austin	Travis	6	...	2067551.68	284564.36	14.541876	14.670826	-0.128950	0	0.120982	1	0	1
2016-03-04	Construction	1259-01-041	3163228	STP 2016(812)HES	1.336	FM 1097	3/4/2016	Houston	Montgomery	3	...	238826.40	-41172.80	12.383492	12.194271	0.189221	0	-0.208308	2	1	0
2016-02-10	Construction	1277-01-015	2163228	BR 2016(411)	0.318	FM 1012	2/10/2016	Beaumont	Newton	3	...	1777740.74	-130944.23	14.390854	14.314342	0.076511	1	-0.079515	0	0	0
2016-04-06	Construction	1289-01-029	4163237	BR 1602(046)	3.551	FM 1126	4/6/2016	Dallas	Navarro	8	...	6714487.74	268564.38	15.719778	15.758997	-0.039219	1	0.038459	0	0	0
2016-01-06	Construction	1397-01-031	1163240	C 1397-1-31	3.094	FM 1836	1/6/2016	Dallas	Kaufman	5	...	5943147.30	-906869.64	15.597749	15.432178	0.165572	0	-0.180067	2	1	0
2016-01-06	Construction	1402-03-011	1163242	STP 2016(732)	20.298	FM 1375	1/6/2016	Houston	Montgomery	4	...	1297509.45	352076.70	14.075957	14.316035	-0.240078	0	0.213433	1	0	1
2016-04-06	Construction	1494-03-001	4163222	C 1494-3-1	0.018	FM 3486	4/6/2016	Dallas	Kaufman	5	...	4849821.37	-215531.22	15.394452	15.348994	0.045459	1	-0.046508	0	0	0
2016-02-10	Construction	1526-03-015	2163233	STP 2016(678)	1.865	FM 1606	2/10/2016	Abilene	Scurry	5	...	6876410.33	1435554.66	15.743607	15.933207	-0.189599	0	0.172709	1	0	1
2016-02-09	Construction	1531-02-012	2163022	STP 2016(679)	0.001	FM 1610	2/9/2016	Abilene	Scurry	7	...	5397845.79	369148.18	15.501511	15.567662	-0.066151	1	0.064011	0	0	0
2016-02-09	Construction	1606-02-019	2163020	STP 2014(884)	0.062	FM 2123	2/9/2016	Fort Worth	Wise	6	...	2779919.35	676795.35	14.837932	15.055829	-0.217897	0	0.195791	1	0	1
2016-02-10	Construction	1685-03-088	2163237	STP 2016(731)	1.762	FM 1960	2/10/2016	Houston	Harris	3	...	6636000.51	113980.17	15.708020	15.725050	-0.017030	1	0.016886	0	0	0
2016-01-05	Construction	1685-03-096	1163024	STP 2016(474)	2.088	FM 1960	1/5/2016	Houston	Harris	3	...	191207.77	81159.98	12.161116	12.514908	-0.353793	0	0.297979	1	0	1
2016-03-04	Construction	2003-01-008	3163217	STP 2016(839)HES	3.594	FM 2122	3/4/2016	Paris	Lamar	6	...	763854.27	-9884.53	13.546132	13.533108	0.013025	1	-0.013110	0	0	0
2016-01-05	Construction	2079-01-043	1163037	STP 2013(901)	2.879	FM 1220	1/5/2016	Fort Worth	Tarrant	2	...	8350775.03	-2311318.34	15.937865	15.613825	0.324040	0	-0.382703	2	1	0
2016-03-04	Construction	2263-03-041	3163212	C 2263-3-41	5.830	SH 361	3/4/2016	Corpus Christi	Nueces	4	...	5552811.60	302405.80	15.529815	15.582844	-0.053029	1	0.051647	0	0	0
2016-01-06	Construction	2266-02-138	1163205	NH 2016(594)	0.056	SH 360	1/6/2016	Fort Worth	Tarrant	2	...	2126639.56	-165737.61	14.570054	14.488915	0.081139	1	-0.084521	0	0	0
2016-01-05	Construction	2296-01-051	1163035	NH 2016(565)	7.055	SH 191	1/5/2016	Odessa	Ector	3	...	4387311.96	-338356.71	15.294227	15.213969	0.080258	1	-0.083566	0	0	0
2016-01-05	Construction	2560-01-076	1163033	STP 2016(425)	3.901	SL 224	1/5/2016	Lufkin	Nacogdoches	3	...	789787.24	12523.33	13.579519	13.595251	-0.015732	1	0.015609	0	0	0
2016-04-06	Construction	2607-01-011	4163205	STP 2016(962)	1.492	SH 188	4/6/2016	Corpus Christi	Aransas	2	...	1142011.55	-117358.92	13.948302	13.839864	0.108438	0	-0.114535	2	1	0
2016-04-05	Construction	2754-01-008	4163012	STP 2016(370)	3.626	FM 2705	4/5/2016	Waco	Limestone	3	...	4733942.79	-79712.79	15.370269	15.353287	0.016982	1	-0.017127	0	0	0
2016-04-06	Construction	2887-01-012	4163206	STP 2016(958)HES	4.942	FM 2830	4/6/2016	Beaumont	Liberty	5	...	999523.59	168362.80	13.815034	13.970706	-0.155672	0	0.144160	1	0	1
2016-02-10	Construction	3050-02-024	2163202	STP 2016(119)MM	0.001	FM 2978	2/10/2016	Houston	Montgomery	5	...	15788778.28	612127.00	16.574810	16.612847	-0.038037	1	0.037323	0	0	0
2016-02-10	Construction	3136-01-177	2163239	STP 2016(637)	0.054	SL 1	2/10/2016	Austin	Travis	4	...	787593.55	94266.36	13.576737	13.689788	-0.113051	0	0.106895	1	0	1
2016-02-09	Construction	3256-02-091	2163032	NH 2016(728)	0.026	SL 8	2/9/2016	Houston	Harris	3	...	596804.70	-98636.20	13.299345	13.118694	0.180652	0	-0.197998	2	1	0
2016-04-05	Construction	3271-01-011	4163009	STP 2016(959)	0.016	FM 3180	4/5/2016	Beaumont	Chambers	4	...	6439873.51	-1090631.57	15.678019	15.492465	0.185554	0	-0.203885	2	1	0
2016-03-03	Construction	3277-01-022	3163005	STP 2014(116)TE	30.194	FM 3177	3/3/2016	Austin	Travis	5	...	2045353.10	67623.37	14.531081	14.563608	-0.032527	1	0.032004	0	0	0
2016-01-05	Construction	3348-01-019	1163018	STP 1502(549)HES	0.001	FM 3136	1/5/2016	Fort Worth	Johnson	2	...	6829235.17	-2143649.77	15.736723	15.360001	0.376722	0	-0.457499	2	1	0
2016-02-10	Construction	3417-01-028	2163210	CC 3417-1-28	3.834	FM 734	2/10/2016	Austin	Travis	4	...	839340.91	-60727.46	13.640372	13.565270	0.075102	1	-0.077994	0	0	0
2016-04-05	Construction	3538-01-041	4163027	STP 2016(963)	0.030	SH 242	4/5/2016	Houston	Montgomery	2	...	3635039.14	841444.90	15.106130	15.314348	-0.208218	0	0.187970	1	0	1
2016-02-09	Construction	3538-01-044	2163036	STP 2016(722)HES	3.529	SH 242	2/9/2016	Houston	Montgomery	3	...	275772.88	9539.89	12.527333	12.561341	-0.034008	1	0.033437	0	0	0

	Length	NBidders	Year	Month	lnEngEst	Time
Date
2016-04-05	15.055	2	2016	4	14.979827	0.0
2016-04-06	0.016	5	2016	4	14.722602	0.0
2016-04-06	2.024	4	2016	4	16.836501	0.0
2016-04-06	6.294	3	2016	4	14.893941	0.0
2016-04-06	0.455	3	2016	4	13.837735	0.0