notebook.community

Edit and run



In [1]:

    
using DataFrames
using JSON
using Iterators
using taxis
using HDF5, JLD
using Stats
using kNN
using sequenceCompare
#reload("taxis")
#reload("sequenceCompare")
nprocs()









    Out[1]:





8



In [2]:

    
println("Begin")

println("loading csv files")
taxi_df = readtable("/home/tony/ML/taxi/taxi2_time/train_100k.csv")
taxi_validation_df = readtable("/home/tony/ML/taxi/taxi2_time/test.csv")

println("loading coords")
taxi_df[:COORDS] = [float(hcat(JSON.parse(x)...)) for x in  taxi_df[:POLYLINE]]
taxi_validation_df[:COORDS] = [float(hcat(JSON.parse(x)...)) for x in taxi_validation_df[:POLYLINE]]

println("getting coords counts")
taxi_df[:NUM_COORDS] = [length(x)::Int64 for x in taxi_df[:COORDS]]
taxi_validation_df[:NUM_COORDS] = [length(x)::Int64 for x in taxi_validation_df[:COORDS]]

println("deleting unneeded data rows/columns")
delete!(taxi_validation_df, :POLYLINE)
delete!(taxi_df, :POLYLINE)

println("adding start/end point columns")
taxi_df[:START] = [x[:,1] for x in taxi_df[:COORDS]]
taxi_validation_df[:START] = [x[:,1] for x in taxi_validation_df[:COORDS]]

taxi_df[:END] = [x[:,end] for x in taxi_df[:COORDS]]
taxi_validation_df[:END] = [x[:,end] for x in taxi_validation_df[:COORDS]]

println("deleting training examples with no coords!")
#These examples are not going to be useful!
deleterows!(taxi_df, find(taxi_df[:NUM_COORDS] .== 0))

println("generating test coords column")
taxi_df[:COORDS_TEST] = [x[1:2,1:round(rand(1)[1]*size(x,2))] for x in taxi_df[:COORDS]]

println("done!")









    



Begin
loading csv files
loading coords
getting coords counts
deleting unneeded data rows/columns
adding start/end point columns
deleting training examples with no coords!
generating test coords column
done!



In [3]:

    
println("finding unique number of coords")
all_coords_val = hcat(taxi_validation_df[:COORDS]...)
all_coords = hcat(taxi_df[:COORDS]...)









    



finding unique number of coords






    Out[3]:





2x4774976 Array{Float64,2}:
 -8.61864  -8.6185  -8.62033  -8.62215  …  -8.6304  -8.63042  -8.63041
 41.1414   41.1414  41.1425   41.1438      41.1579  41.158    41.1579

Creating coord dict



In [58]:

    
#small_taxi_df = GetTableOrderedSubset(taxi_df, 10000)
#coordsDB = ConstructCoordsDatabase(small_taxi_df, 4)

Creating new features



In [ ]:

    
function GetDateInfo(df)
    if haskey(df, :DAYOFWEEK)
        return df
end

function GetDistanceData(df)
    if haskey(df, :DISTANCE)
        return df
end

Training models



In [58]:

    
all_train_coords = taxi_df[:COORDS][1:200]
all_validation_coords = taxi_validation_df[:COORDS]
test_guess_paths = findClosestTrainingExampleForTestSet(all_train_coords, all_validation_coords, 2)
taxi_validation_df[:GUESS_PATHS] = test_guess_paths









    



20/320 for 200 train path examples
40/320 for 200 train path examples
60/320 for 200 train path examples
80/320 for 200 train path examples
100/320 for 200 train path examples
120/320 for 200 train path examples
140/320 for 200 train path examples
160/320 for 200 train path examples
180/320 for 200 train path examples
200/320 for 200 train path examples
220/320 for 200 train path examples
240/320 for 200 train path examples
260/320 for 200 train path examples
280/320 for 200 train path examples
300/320 for 200 train path examples
320/320 for 200 train path examples






    Out[58]:





320-element Array{Any,1}:
 2x4 Array{Float64,2}:
 -8.59926  -8.59849  -8.5967  -8.59458
 41.1492   41.1484   41.1494  41.1507                                                                      
 2x25 Array{Float64,2}:
 -8.6108  -8.6108  -8.6108  -8.61088  …  -8.60394  -8.60395  -8.60395
 41.1449  41.1449  41.1449  41.1452      41.1615   41.1615   41.1616       
 2x31 Array{Float64,2}:
 -8.58581  -8.58576  -8.58664  -8.58831  …  -8.5805  -8.58051  -8.58049
 41.1487   41.149    41.149    41.1494      41.1649  41.1649   41.1649   
 2x17 Array{Float64,2}:
 -8.61248  -8.6123  -8.61161  -8.61086  …  -8.61542  -8.61538  -8.61536
 41.1461   41.146   41.1461   41.1457      41.1426   41.1426   41.1426   
 2x1 Array{Float64,2}:
 -8.61766
 41.1464                                                                                                                                
 2x42 Array{Float64,2}:
 -8.62834  -8.62834  -8.62806  -8.62793  …  -8.62623  -8.62623  -8.62623
 41.1577   41.1578   41.1578   41.158       41.1745   41.1745   41.1745 
 2x4 Array{Float64,2}:
 -8.59926  -8.59849  -8.5967  -8.59458
 41.1492   41.1484   41.1494  41.1507                                                                      
 2x3 Array{Float64,2}:
 -8.60127  -8.60125  -8.60106
 41.1818   41.1818   41.1822                                                                                        
 2x35 Array{Float64,2}:
 -8.61756  -8.61755  -8.61724  -8.61574  …  -8.60553  -8.60558  -8.60616
 41.1461   41.1459   41.1449   41.1454      41.1255   41.1266   41.1261 
 2x43 Array{Float64,2}:
 -8.58564  -8.58567  -8.58641  -8.58625  …  -8.57739  -8.57677  -8.57659
 41.1486   41.1489   41.1489   41.148       41.1897   41.1895   41.1895 
 2x17 Array{Float64,2}:
 -8.59924  -8.59868  -8.59794  -8.59696  …  -8.58477  -8.58432  -8.58433
 41.1492   41.1493   41.1506   41.1523      41.1685   41.1693   41.1693 
 2x4 Array{Float64,2}:
 -8.59926  -8.59849  -8.5967  -8.59458
 41.1492   41.1484   41.1494  41.1507                                                                      
 2x4 Array{Float64,2}:
 -8.59926  -8.59849  -8.5967  -8.59458
 41.1492   41.1484   41.1494  41.1507                                                                      
 ⋮                                                                                                                                                                       
 2x20 Array{Float64,2}:
 -8.61085  -8.6109  -8.61092  -8.60998  …  -8.58616  -8.5861  -8.58618
 41.1457   41.1458  41.146    41.1466      41.1488   41.1488  41.1488     
 2x33 Array{Float64,2}:
 -8.61991  -8.61987  -8.62    -8.62115  …  -8.64491  -8.64612  -8.64652
 41.148    41.148    41.1478  41.1474      41.1406   41.1412   41.1416   
 2x25 Array{Float64,2}:
 -8.68934  -8.68887  -8.68769  -8.68648  …  -8.65397  -8.65405  -8.65407
 41.1681   41.1674   41.1663   41.1641      41.1493   41.1493   41.1493 
 2x53 Array{Float64,2}:
 -8.61397  -8.61372  -8.61338  -8.613   …  -8.67073  -8.67072  -8.67071
 41.1466   41.1474   41.1475   41.1482     41.1648   41.1648   41.1648   
 2x20 Array{Float64,2}:
 -8.61085  -8.6109  -8.61092  -8.60998  …  -8.58616  -8.5861  -8.58618
 41.1457   41.1458  41.146    41.1466      41.1488   41.1488  41.1488     
 2x46 Array{Float64,2}:
 -8.64938  -8.64943  -8.65016  -8.64953  …  -8.56612  -8.56612  -8.56614
 41.1543   41.1543   41.1541   41.154       41.1537   41.1537   41.1537 
 2x38 Array{Float64,2}:
 -8.6125  -8.61179  -8.61173  -8.61119  …  -8.59717  -8.59716  -8.59718
 41.146   41.146    41.146    41.1462      41.1824   41.1824   41.1824   
 2x64 Array{Float64,2}:
 -8.57362  -8.57406  -8.57344  -8.57364  …  -8.61544  -8.61733  -8.61767
 41.1708   41.1706   41.1718   41.172       41.1473   41.1473   41.1464 
 2x23 Array{Float64,2}:
 -8.61864  -8.6185  -8.62033  -8.62215  …  -8.63083  -8.63083  -8.63084
 41.1414   41.1414  41.1425   41.1438      41.1545   41.1545   41.1545   
 2x39 Array{Float64,2}:
 -8.64768  -8.64767  -8.64909  …  -8.6474  -8.6474  -8.6474  -8.64739
 41.1507   41.1507   41.1509      41.1486  41.1486  41.1486  41.1486       
 2x25 Array{Float64,2}:
 -8.57868  -8.57945  -8.57907  -8.57769  …  -8.56042  -8.56041  -8.56041
 41.1458   41.1465   41.1482   41.1498      41.1545   41.1545   41.1545 
 2x35 Array{Float64,2}:
 -8.60221  -8.60165  -8.60027  -8.59898  …  -8.60425  -8.60426  -8.56856
 41.1634   41.1638   41.1648   41.1655      41.1609   41.1609   41.1933



In [59]:

    
all_train_coords = taxi_df[:COORDS][1000:1200]
test_df = tail(taxi_df, 100)
all_test_coords = test_df[:COORDS_TEST]
test_guess_paths = findClosestTrainingExampleForTestSet(all_train_coords, all_test_coords, 2)
test_df[:GUESS_PATHS] = test_guess_paths









    



20/100 for 201 train path examples
40/100 for 201 train path examples
60/100 for 201 train path examples
80/100 for 201 train path examples
100/100 for 201 train path examples






    Out[59]:





100-element Array{Any,1}:
 2x36 Array{Float64,2}:
 -8.61116  -8.6112  -8.61087  -8.61088  …  -8.60263  -8.6026  -8.60257
 41.1493   41.1493  41.1492   41.1493      41.1616   41.1617  41.1617     
 2x1 Array{Float64,2}:
 -8.621 
 41.1501                                                                                                                                 
 2x43 Array{Float64,2}:
 -8.60153  -8.60109  -8.59983  -8.59957  …  -8.56193  -8.56193  -8.56189
 41.1457   41.1458   41.1459   41.1459      41.1413   41.1413   41.1413 
 2x7 Array{Float64,2}:
 -8.61071  -8.61082  -8.61055  -8.61039  -8.61008  -8.61191  -8.61199
 41.1457   41.1458   41.146    41.1461   41.1464   41.1486   41.1486        
 2x34 Array{Float64,2}:
 -8.61405  -8.61403  -8.61404  -8.61465  …  -8.61293  -8.61295  -8.61297
 41.1412   41.1412   41.1412   41.1411      41.1335   41.1335   41.1335 
 2x33 Array{Float64,2}:
 -8.60787  -8.60783  -8.60781  -8.60783  …  -8.60202  -8.60202  -8.60201
 41.1675   41.1674   41.1674   41.1674      41.1796   41.1796   41.1796 
 2x24 Array{Float64,2}:
 -8.61097  -8.61096  -8.6107  -8.60992  …  -8.6105  -8.61155  -8.61183
 41.1457   41.1457   41.146   41.1465      41.1431  41.1429   41.1425     
 2x24 Array{Float64,2}:
 -8.64961  -8.64974  -8.65026  -8.65071  …  -8.64407  -8.6441  -8.64411
 41.1673   41.1668   41.1652   41.1635      41.159    41.1591  41.1591   
 2x20 Array{Float64,2}:
 -8.58433  -8.58481  -8.58553  -8.58534  …  -8.58365  -8.58397  -8.58421
 41.1631   41.1629   41.1627   41.1613      41.1505   41.1495   41.1488 
 2x30 Array{Float64,2}:
 -8.61094  -8.61063  -8.60998  -8.60897  …  -8.60563  -8.60603  -8.60607
 41.1457   41.1459   41.1464   41.1467      41.1443   41.1444   41.1445 
 2x47 Array{Float64,2}:
 -8.65282  -8.65282  -8.65282  -8.65283  …  -8.622   -8.62193  -8.62184
 41.1513   41.1513   41.1513   41.1513      41.1476  41.1476   41.1476   
 2x7 Array{Float64,2}:
 -8.61071  -8.61082  -8.61055  -8.61039  -8.61008  -8.61191  -8.61199
 41.1457   41.1458   41.146    41.1461   41.1464   41.1486   41.1486        
 2x1 Array{Float64,2}:
 -8.621 
 41.1501                                                                                                                                 
 ⋮                                                                                                                                                                       
 2x7 Array{Float64,2}:
 -8.61071  -8.61082  -8.61055  -8.61039  -8.61008  -8.61191  -8.61199
 41.1457   41.1458   41.146    41.1461   41.1464   41.1486   41.1486        
 2x46 Array{Float64,2}:
 -8.61078  -8.61086  -8.61042  -8.61012  …  -8.63747  -8.63753  -8.63754
 41.1457   41.1458   41.1461   41.1465      41.1594   41.1594   41.1594 
 2x16 Array{Float64,2}:
 -8.62001  -8.62007  -8.62008  -8.62002  …  -8.62007  -8.62005  -8.62  
 41.1468   41.1467   41.1467   41.1467      41.1467   41.1467   41.1467  
 2x65 Array{Float64,2}:
 -8.60689  -8.60687  -8.6069  -8.60692  …  -8.58568  -8.58568  -8.58568
 41.1457   41.1458   41.1459  41.1459      41.1486   41.1486   41.1487   
 2x22 Array{Float64,2}:
 -8.66597  -8.66746  -8.66807  -8.66808  …  -8.66759  -8.66775  -8.66776
 41.1763   41.1756   41.1757   41.1757      41.1668   41.1667   41.1667 
 2x28 Array{Float64,2}:
 -8.58587  -8.58577  -8.58577  -8.58582  …  -8.60582  -8.60642  -8.6072
 41.1485   41.1486   41.1487   41.1489      41.1445   41.1446   41.1446  
 2x17 Array{Float64,2}:
 -8.61087  -8.61086  -8.61017  -8.60931  …  -8.61079  -8.61202  -8.61241
 41.1456   41.1457   41.1463   41.1467      41.1536   41.1545   41.1552 
 2x81 Array{Float64,2}:
 -8.62093  -8.621  -8.62103  -8.62101  …  -8.67019  -8.67019  -8.6702
 41.1499   41.15   41.15     41.15        41.2375   41.2375   41.2375      
 2x27 Array{Float64,2}:
 -8.63053  -8.63056  -8.63105  -8.63229  …  -8.64841  -8.64895  -8.64907
 41.1548   41.1548   41.1545   41.1536      41.1508   41.1504   41.1495 
 2x39 Array{Float64,2}:
 -8.61401  -8.61403  -8.61402  -8.61403  …  -8.62753  -8.62826  -8.62834
 41.1412   41.1412   41.1412   41.1412      41.1557   41.1565   41.1565 
 2x47 Array{Float64,2}:
 -8.60655  -8.60674  -8.6072  -8.60727  …  -8.63673  -8.63671  -8.63664
 41.1446   41.1446   41.1451  41.1453      41.1573   41.1573   41.1574   
 2x32 Array{Float64,2}:
 -8.60647  -8.60671  -8.60729  -8.60711  …  -8.6288  -8.6288  -8.62879
 41.1447   41.1448   41.1453   41.1459      41.1592  41.1591  41.1591



In [88]:

    
function score_path_guess(test_df)
    pred_paths = test_df[:GUESS_PATHS]
    actual_paths = test_df[:COORDS]
    pred_times = [(length(x)*15)::Int64 for x in pred_paths]
    actual_times = [(length(x)*15)::Int64 for x in actual_paths]
    score = sqrt(mean((log(max(1100, pred_times)+1)-log(actual_times-1)).^2))
    println("time score: ", score) 
    for k=1:length(pred_times)
        println("pred: ", pred_times[k], ", actual: ", actual_times[k]) 
    end
end

score_path_guess(test_df)









    



time score: 0.5085077313659201
pred: 1080, actual: 1290
pred: 30, actual: 1530
pred: 1290, actual: 2250
pred: 210, actual: 810
pred: 1020, actual: 390
pred: 990, actual: 1230
pred: 720, actual: 810
pred: 720, actual: 780
pred: 600, actual: 270
pred: 900, actual: 1020
pred: 1410, actual: 1200
pred: 210, actual: 780
pred: 30, actual: 1830
pred: 30, actual: 750
pred: 1920, actual: 8130
pred: 720, actual: 1140
pred: 600, actual: 1140
pred: 1620, actual: 1680
pred: 720, actual: 1980
pred: 600, actual: 1230
pred: 870, actual: 780
pred: 600, actual: 1500
pred: 2220, actual: 1500
pred: 900, actual: 690
pred: 1470, actual: 1770
pred: 600, actual: 960
pred: 1350, actual: 1290
pred: 900, actual: 2490
pred: 30, actual: 2370
pred: 840, actual: 510
pred: 630, actual: 1950
pred: 30, actual: 1950
pred: 30, actual: 2550
pred: 810, actual: 1800
pred: 600, actual: 1680
pred: 210, actual: 960
pred: 210, actual: 1170
pred: 720, actual: 1020
pred: 210, actual: 1440
pred: 1380, actual: 720
pred: 2070, actual: 870
pred: 1950, actual: 2790
pred: 2130, actual: 2400
pred: 1740, actual: 1680
pred: 1590, actual: 1320
pred: 930, actual: 810
pred: 2130, actual: 3600
pred: 510, actual: 1650
pred: 1410, actual: 2730
pred: 30, actual: 1380
pred: 30, actual: 1410
pred: 1530, actual: 1410
pred: 30, actual: 900
pred: 600, actual: 870
pred: 1710, actual: 1470
pred: 840, actual: 870
pred: 1620, actual: 360
pred: 960, actual: 2610
pred: 480, actual: 1260
pred: 1620, actual: 840
pred: 600, actual: 1290
pred: 1080, actual: 480
pred: 1350, actual: 1380
pred: 1080, actual: 1230
pred: 840, actual: 1770
pred: 2580, actual: 2700
pred: 930, actual: 1530
pred: 1170, actual: 900
pred: 450, actual: 1650
pred: 1230, actual: 1290
pred: 30, actual: 1440
pred: 30, actual: 1290
pred: 1410, actual: 1080
pred: 660, actual: 450
pred: 600, actual: 1410
pred: 30, actual: 1470
pred: 1410, actual: 2280
pred: 30, actual: 1860
pred: 1410, actual: 1710
pred: 1410, actual: 1650
pred: 870, actual: 810
pred: 810, actual: 630
pred: 480, actual: 1140
pred: 840, actual: 1050
pred: 600, actual: 930
pred: 810, actual: 570
pred: 2580, actual: 2100
pred: 1380, actual: 990
pred: 210, actual: 480
pred: 1380, actual: 1350
pred: 480, actual: 1110
pred: 1950, actual: 1560
pred: 660, actual: 3420
pred: 840, actual: 720
pred: 510, actual: 690
pred: 2430, actual: 2430
pred: 810, actual: 3990
pred: 1170, actual: 1680
pred: 1410, actual: 2490
pred: 960, actual: 960



In [126]:

    
log([1 2 3])









    Out[126]:





1x3 Array{Float64,2}:
 0.0  0.693147  1.09861



In [62]:

    
taxi_validation_df[:GUESS_PATHS] = test_guess_paths
#guess_times = [length(x)*15 for x in test_guess_paths]
num_test_examples = length(test_guess_paths)

guess_times = Array(Int64, num_test_examples)
dest_coords = cell(num_test_examples)

all_test_paths = taxi_validation_df[:COORDS]
for k=1:num_test_examples
    test_path = all_test_paths[k]
    best_guess_path = test_guess_paths[k]
    
    test_path_time = length(test_path)*15
    best_guess_time = length(best_guess_path)*15
    
    if test_path_time > best_guess_time
        println(k, ":  guessing ", best_guess_time, " but existing time is ", test_path_time)
        best_guess_time = test_path_time + 100
    end
    guess_times[k] = best_guess_time
end

submission_validation = guess_times









    



1:  guessing 30 but existing time is 330
2:  guessing 750 but existing time is 1200
3:  guessing 1050 but existing time is 1200
5:  guessing 30 but existing time is 60
6:  guessing 1170 but existing time is 4110
7:  guessing 300 but existing time is 720
8:  guessing 390 but existing time is 510
10:  guessing 1050 but existing time is 2370
11:  guessing 600 but existing time is 1890
12:  guessing 330 but existing time is 510
13:  guessing 30 but existing time is 150
14:  guessing 1200 but existing time is 2280
15:  guessing 930 but existing time is 1620
16:  guessing 1920 but existing time is 2910
18:  guessing 840 but existing time is 1890
19:  guessing 780 but existing time is 930
20:  guessing 600 but existing time is 4920
21:  guessing 30 but existing time is 180
23:  guessing 450 but existing time is 540
24:  guessing 30 but existing time is 120
30:  guessing 960 but existing time is 1200
31:  guessing 960 but existing time is 1350
32:  guessing 510 but existing time is 810
33:  guessing 1080 but existing time is 1140
36:  guessing 2070 but existing time is 4710
37:  guessing 900 but existing time is 2100
38:  guessing 660 but existing time is 720
39:  guessing 90 but existing time is 210
40:  guessing 480 but existing time is 630
41:  guessing 1350 but existing time is 1440
42:  guessing 90 but existing time is 210
43:  guessing 930 but existing time is 1200
44:  guessing 1140 but existing time is 1770
46:  guessing 120 but existing time is 390
47:  guessing 840 but existing time is 930
50:  guessing 1200 but existing time is 1710
51:  guessing 1350 but existing time is 1920
52:  guessing 420 but existing time is 690
54:  guessing 930 but existing time is 1320
57:  guessing 1140 but existing time is 1590
59:  guessing 990 but existing time is 4020
60:  guessing 480 but existing time is 1140
61:  guessing 330 but existing time is 480
63:  guessing 330 but existing time is 600
66:  guessing 1140 but existing time is 1770
69:  guessing 420 but existing time is 1230
70:  guessing 30 but existing time is 270
71:  guessing 4080 but existing time is 14100
72:  guessing 2340 but existing time is 3360
74:  guessing 1740 but existing time is 2340
75:  guessing 1200 but existing time is 2040
76:  guessing 180 but existing time is 330
77:  guessing 30 but existing time is 60
80:  guessing 1500 but existing time is 2400
81:  guessing 810 but existing time is 990
82:  guessing 360 but existing time is 570
83:  guessing 1260 but existing time is 1470
84:  guessing 810 but existing time is 2910
88:  guessing 930 but existing time is 1110
89:  guessing 990 but existing time is 1050
90:  guessing 1170 but existing time is 1260
92:  guessing 1020 but existing time is 1260
95:  guessing 1230 but existing time is 6750
96:  guessing 3660 but existing time is 4140
98:  guessing 1260 but existing time is 2010
99:  guessing 570 but existing time is 660
101:  guessing 1050 but existing time is 1980
108:  guessing 540 but existing time is 1920
109:  guessing 90 but existing time is 300
114:  guessing 600 but existing time is 690
117:  guessing 1200 but existing time is 1920
118:  guessing 1500 but existing time is 4560
119:  guessing 1200 but existing time is 1530
121:  guessing 600 but existing time is 1080
126:  guessing 3930 but existing time is 11610
127:  guessing 390 but existing time is 870
132:  guessing 1200 but existing time is 1380
133:  guessing 600 but existing time is 5760
136:  guessing 720 but existing time is 1350
139:  guessing 930 but existing time is 1110
141:  guessing 480 but existing time is 750
143:  guessing 690 but existing time is 2490
144:  guessing 2760 but existing time is 4650
147:  guessing 180 but existing time is 390
148:  guessing 600 but existing time is 870
150:  guessing 1380 but existing time is 2130
151:  guessing 60 but existing time is 360
153:  guessing 240 but existing time is 690
154:  guessing 60 but existing time is 330
158:  guessing 30 but existing time is 150
160:  guessing 690 but existing time is 780
161:  guessing 330 but existing time is 990
162:  guessing 1050 but existing time is 1380
164:  guessing 360 but existing time is 1440
165:  guessing 390 but existing time is 600
166:  guessing 1590 but existing time is 1800
167:  guessing 3660 but existing time is 11070
169:  guessing 30 but existing time is 90
171:  guessing 1230 but existing time is 2160
172:  guessing 990 but existing time is 2130
174:  guessing 1350 but existing time is 3300
176:  guessing 30 but existing time is 90
177:  guessing 1260 but existing time is 2370
179:  guessing 600 but existing time is 1710
181:  guessing 750 but existing time is 1710
183:  guessing 660 but existing time is 900
184:  guessing 30 but existing time is 240
185:  guessing 690 but existing time is 1020
187:  guessing 480 but existing time is 810
188:  guessing 60 but existing time is 180
190:  guessing 1110 but existing time is 6600
191:  guessing 570 but existing time is 630
192:  guessing 960 but existing time is 1350
194:  guessing 690 but existing time is 900
195:  guessing 1020 but existing time is 1380
196:  guessing 90 but existing time is 360
198:  guessing 600 but existing time is 1020
199:  guessing 870 but existing time is 9810
200:  guessing 1050 but existing time is 2400
201:  guessing 1710 but existing time is 18360
202:  guessing 30 but existing time is 270
204:  guessing 1050 but existing time is 3210
208:  guessing 60 but existing time is 180
209:  guessing 120 but existing time is 300
210:  guessing 840 but existing time is 3330
211:  guessing 600 but existing time is 2520
213:  guessing 1350 but existing time is 4890
214:  guessing 390 but existing time is 510
215:  guessing 1440 but existing time is 1620
216:  guessing 600 but existing time is 3450
217:  guessing 2640 but existing time is 7140
218:  guessing 960 but existing time is 2820
221:  guessing 1620 but existing time is 6450
223:  guessing 2400 but existing time is 10830
224:  guessing 840 but existing time is 1620
225:  guessing 30 but existing time is 120
226:  guessing 660 but existing time is 930
227:  guessing 510 but existing time is 540
228:  guessing 420 but existing time is 660
229:  guessing 30 but existing time is 90
231:  guessing 810 but existing time is 1080
232:  guessing 930 but existing time is 1140
234:  guessing 390 but existing time is 1140
235:  guessing 480 but existing time is 660
243:  guessing 600 but existing time is 690
244:  guessing 90 but existing time is 180
246:  guessing 1530 but existing time is 2550
250:  guessing 720 but existing time is 930
251:  guessing 30 but existing time is 150
252:  guessing 30 but existing time is 120
253:  guessing 1410 but existing time is 1440
256:  guessing 300 but existing time is 450
259:  guessing 30 but existing time is 150
260:  guessing 420 but existing time is 450
262:  guessing 390 but existing time is 450
263:  guessing 1020 but existing time is 1140
264:  guessing 780 but existing time is 1350
265:  guessing 210 but existing time is 240
266:  guessing 450 but existing time is 570
270:  guessing 780 but existing time is 2130
272:  guessing 30 but existing time is 60
273:  guessing 1740 but existing time is 1890
275:  guessing 480 but existing time is 510
277:  guessing 210 but existing time is 240
282:  guessing 60 but existing time is 90
293:  guessing 1560 but existing time is 1590
297:  guessing 150 but existing time is 300
300:  guessing 540 but existing time is 2280
301:  guessing 750 but existing time is 1140
302:  guessing 30 but existing time is 120
305:  guessing 570 but existing time is 780
306:  guessing 570 but existing time is 780
307:  guessing 480 but existing time is 600
310:  guessing 480 but existing time is 750
313:  guessing 1200 but existing time is 1350
314:  guessing 1410 but existing time is 8010
315:  guessing 1140 but existing time is 1410
316:  guessing 1410 but existing time is 1440
317:  guessing 690 but existing time is 2820
320:  guessing 1440 but existing time is 1560






    Out[62]:





320-element Array{Int64,1}:
  430
 1300
 1300
  510
  160
 4210
  820
  610
 1470
 2470
 1990
  610
  250
    ⋮
 1080
  850
  690
 3210
 1450
 8110
 1510
 1540
 2920
  390
  870
 1660

Generating Submission



In [53]:

    
# beat the benchmark example
#mean_time = mean(times_validation)
#submission_validation = [max(x, mean_time) for x in times_validation]

#submission_validation

df_submission = DataFrame()
df_submission[:TRIP_ID] = taxi_validation_df[:TRIP_ID]
df_submission[:TRAVEL_TIME] = submission_validation
writetable("second_submission.csv", df_submission)



In [50]:









    



`!` has no method matching !(::Function)
while loading In[50], in expression starting on line 1



In [388]:

    
immutable Point2{T}
  x::T
  y::T
end

D = [Point2(1.,2.) => 42]
haskey(D, Point2(1., 2.))  #False!









    Out[388]:





true



In [42]:









    



ArgumentError("setindex!(::DataFrame, ...) only broadcasts scalars, not arrays")
while loading In[42], in expression starting on line 2

 in setindex! at /home/tony/.julia/v0.3/DataFrames/src/dataframe/dataframe.jl:356
 in DataFrame at /home/tony/.julia/v0.3/DataFrames/src/dataframe/dataframe.jl:104



In [39]:

    
taxi_validation_df[:COORDS]









    Out[39]:





320-element DataArray{Any,1}:
 2x11 Array{Float64,2}:
 -8.58568  -8.58571  -8.58568  -8.58573  …  -8.587   -8.58658  -8.58488
 41.1485   41.1486   41.1489   41.1489      41.1475  41.1472   41.1466    
 2x40 Array{Float64,2}:
 -8.61088  -8.61086  -8.6109  -8.61044  …  -8.60293  -8.60255  -8.60189
 41.1456   41.1456   41.1458  41.1462      41.1628   41.1631   41.1636    
 2x40 Array{Float64,2}:
 -8.58574  -8.58573  -8.58572  -8.58629  …  -8.57695  -8.5759  -8.5749
 41.1486   41.1488   41.149    41.149       41.1664   41.1672  41.1677     
 2x8 Array{Float64,2}:
 -8.61396  -8.61412  -8.61509  -8.61528  …  -8.61524  -8.61505  -8.61464
 41.1412   41.1411   41.1409   41.1408      41.1408   41.1408   41.141    
 2x2 Array{Float64,2}:
 -8.6199  -8.61989
 41.148   41.148                                                                                                                
 2x137 Array{Float64,2}:
 -8.63061  -8.63061  -8.63074  -8.63151  …  -8.62639  -8.6264  -8.62641
 41.1782   41.1782   41.1782   41.1781      41.172    41.172   41.172    
 2x24 Array{Float64,2}:
 -8.58562  -8.58564  -8.58592  -8.58637  …  -8.58156  -8.58181  -8.58205
 41.1489   41.1489   41.1489   41.1489      41.1533   41.1535   41.1538  
 2x17 Array{Float64,2}:
 -8.58292  -8.582   -8.58108  -8.58011  …  -8.57703  -8.57753  -8.57877
 41.1811   41.1818  41.183    41.184       41.1861   41.1861   41.1852    
 2x43 Array{Float64,2}:
 -8.60653  -8.60667  -8.6068  -8.60679  …  -8.60548  -8.60549  -8.60549
 41.1447   41.1447   41.1447  41.1447      41.1257   41.1258   41.1258    
 2x79 Array{Float64,2}:
 -8.58566  -8.5857  -8.58573  -8.58574  …  -8.59117  -8.58826  -8.58631
 41.1486   41.1486  41.1486   41.1486      41.1942   41.1974   41.1993    
 2x63 Array{Float64,2}:
 -8.59123  -8.59123  -8.59122  -8.591   …  -8.58767  -8.5881  -8.58823
 41.1627   41.1627   41.1627   41.1626     41.1687   41.1689  41.1689      
 2x17 Array{Float64,2}:
 -8.58569  -8.58576  -8.58571  -8.58576  …  -8.59455  -8.59592  -8.59665
 41.1486   41.1487   41.1489   41.149       41.1507   41.1499   41.1494  
 2x5 Array{Float64,2}:
 -8.5801  -8.58023  -8.58152  -8.58252  -8.58433
 41.1594  41.1594   41.1593   41.1589   41.1583                                                   
 ⋮                                                                                                                                                                        
 2x21 Array{Float64,2}:
 -8.61072  -8.61049  -8.6094  -8.6085  …  -8.59074  -8.58956  -8.58816
 41.1445   41.1437   41.1432  41.1431     41.1469   41.1471   41.1473      
 2x25 Array{Float64,2}:
 -8.6406  -8.64005  -8.64022  -8.63974  …  -8.63605  -8.63605  -8.63605
 41.1549  41.1547   41.1536   41.1533      41.1405   41.1405   41.1406    
 2x19 Array{Float64,2}:
 -8.68929  -8.6893  -8.68873  -8.68765  …  -8.678   -8.67778  -8.67773
 41.1682   41.1682  41.1674   41.1663      41.1521  41.1517   41.1515      
 2x72 Array{Float64,2}:
 -8.60636  -8.60636  -8.60711  -8.6073  …  -8.68831  -8.6866  -8.68486
 41.1445   41.1446   41.1451   41.1457     41.1728   41.1734  41.1734      
 2x45 Array{Float64,2}:
 -8.61253  -8.61253  -8.61287  -8.61289  …  -8.58566  -8.58584  -8.58584
 41.1595   41.1595   41.1595   41.1595      41.1489   41.149    41.149   
 2x267 Array{Float64,2}:
 -8.66747  -8.66735  -8.66717  -8.66798  …  -8.53496  -8.53497  -8.53498
 41.2381   41.2383   41.2384   41.2387      41.1433   41.1433   41.1433 
 2x47 Array{Float64,2}:
 -8.60647  -8.60648  -8.60649  -8.60667  …  -8.5917  -8.59579  -8.60045
 41.1447   41.1447   41.1447   41.1448      41.1973  41.1973   41.1988    
 2x48 Array{Float64,2}:
 -8.5702  -8.57019  -8.56947  -8.56733  …  -8.59311  -8.59333  -8.59331
 41.1595  41.159    41.1591   41.1606      41.1511   41.151    41.1511    
 2x94 Array{Float64,2}:
 -8.61387  -8.61388  -8.61472  -8.61584  …  -8.62978  -8.62977  -8.62979
 41.1412   41.1412   41.1411   41.1407      41.1526   41.1526   41.1527  
 2x6 Array{Float64,2}:
 -8.6481  -8.64746  -8.64688  -8.64593  -8.64534  -8.6433
 41.1525  41.1524   41.1531   41.1538   41.1544   41.1543                                
 2x15 Array{Float64,2}:
 -8.5717  -8.57058  -8.569   -8.57006  …  -8.5658  -8.56669  -8.56921
 41.1561  41.1559   41.1555  41.1561      41.1647  41.1667   41.1676        
 2x52 Array{Float64,2}:
 -8.57456  -8.57225  -8.57049  -8.56883  …  -8.59046  -8.59078  -8.59234
 41.1802   41.1799   41.1795   41.1806      41.1978   41.1952   41.1922



In [ ]: