In [1]:
from __future__ import division
import pandas as pd
import numpy as np
import os
import graphlab as gl

In [16]:
na_values = ['-99900.0','-99901.0','-99903.0','999.0','nan']
train = gl.SFrame.read_csv(os.path.join("data", "train_2013.csv"))


PROGRESS: Finished parsing file /media/vladimir/1ab2d5e6-a134-47e7-ba27-b2d70ac5ffc5/workspace/kaggle_rain/data/train_2013.csv
PROGRESS: Parsing completed. Parsed 100 lines in 5.15926 secs.
PROGRESS: Read 55681 lines. Lines per second: 67416
PROGRESS: Read 668996 lines. Lines per second: 105953
PROGRESS: Finished parsing file /media/vladimir/1ab2d5e6-a134-47e7-ba27-b2d70ac5ffc5/workspace/kaggle_rain/data/train_2013.csv
PROGRESS: Parsing completed. Parsed 1126694 lines in 10.1901 secs.
------------------------------------------------------
Inferred types from first line of file as 
column_type_hints=[int,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,float]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------

In [17]:
train.head()


Out[17]:
Id TimeToEnd DistanceToRadar Composite
1 56.0 37.0 31.0 25.0 19.0
13.0 7.0 2.0 ...
30.0 30.0 30.0 30.0 30.0
30.0 30.0 30.0 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
2 58.0 48.0 38.0 29.0 19.0
9.0 ...
77.0 77.0 77.0 77.0 77.0
77.0 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
3 59.0 20.0 75.0 75.0 -99900.0 -99900.0
4 53.0 43.0 34.0 24.0 14.0
5.0 ...
21.0 21.0 21.0 21.0 21.0
21.0 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
5 56.0 52.0 43.0 59.0 54.0
48.0 42.0 36.0 31.0 57.0 ...
69.0 69.0 69.0 83.0 83.0
83.0 83.0 83.0 83.0 54.0 ...
23.0 24.0 22.0 15.5 14.5
16.0 15.0 18.5 12.5 16.0 ...
6 56.0 47.0 37.0 27.0 18.0
8.0 ...
1.0 1.0 1.0 1.0 1.0 1.0 -99900.0 -99900.0
-99900.0 -4.0 -99900.0 ...
7 59.0 55.0 51.0 46.0 42.0
38.0 33.0 29.0 25.0 20.0 ...
42.0 42.0 42.0 42.0 42.0
42.0 42.0 42.0 42.0 42.0 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
8 57.0 54.0 51.0 48.0 44.0
41.0 38.0 35.0 22.0 43.0 ...
10.0 10.0 10.0 10.0 10.0
10.0 10.0 10.0 10.0 8.0 ...
26.0 38.0 39.5 36.0 33.0
37.0 37.5 36.5 -99900.0 ...
9 36.0 26.0 92.0 92.0 -99900.0 -99900.0
10 15.0 5.0 53.0 43.0 33.0
14.0 9.0 3.0 ...
90.0 90.0 63.0 63.0 63.0
12.0 12.0 12.0 ...
13.0 12.0 -99900.0
-99900.0 -99900.0 9.5 ...
HybridScan HydrometeorType Kdp RR1
-99900.0 -99900.0
-99900.0 -99900.0 ...
8.0 8.0 8.0 8.0 8.0 8.0
8.0 8.0 ...
0.0 0.0 0.0 0.0 0.0 0.0
0.0 0.0 ...
0.0 0.0 0.0 0.0 0.0 0.0
0.0 0.0 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
8.0 8.0 8.0 8.0 8.0 8.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
-99900.0 -99900.0 8.0 8.0 0.0 0.0 0.0 0.0
-99900.0 -99900.0
-99900.0 -99900.0 ...
8.0 8.0 8.0 8.0 8.0 8.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
13.5 15.5 19.0 -99900.0
-99900.0 -99900.0 ...
9.0 9.0 9.0 8.0 8.0 8.0
8.0 9.0 9.0 9.0 9.0 9.0 ...
0.0 0.0 0.0 0.0 0.0 0.0
0.0 0.0 0.0 0.0 0.0 0.0 ...
0.0 0.0 0.0 0.0 0.0 0.0
0.0 0.0 0.0 1.27899 0.0 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
8.0 8.0 8.0 8.0 8.0 8.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
-99900.0 -99900.0
-99900.0 -99900.0 ...
8.0 8.0 8.0 8.0 8.0 8.0
8.0 8.0 8.0 8.0 8.0 8.0 ...
0.0 0.0 0.0 0.0 0.0 0.0
0.0 0.0 0.0 0.0 0.0 0.0 ...
0.0 0.0 0.0 0.0 0.0 0.0
0.0 0.0 0.0 -99900.0 ...
47.0 44.0 42.0 32.0 41.5
26.5 30.5 -99900.0 ...
9.0 13.0 13.0 13.0 9.0
9.0 13.0 9.0 8.0 8.0 8.0 ...
0.0 0.0 0.0 0.0 0.0 0.0
0.0 0.0 0.0 0.0 0.0 0.0 ...
0.0 6.048 4.66107 4.46988
3.07344 4.99969 4.3752 ...
-99900.0 -99900.0 8.0 8.0 0.0 0.0 0.0 0.0
11.0 13.5 -99900.0
-99900.0 -5.0 9.5 11.5 ...
8.0 9.0 8.0 8.0 8.0 8.0
8.0 8.0 ...
0.0 0.0 0.0 0.0 0.0 0.0
0.0 0.0 ...
0.0 0.0 0.0 0.0 0.0 0.0
0.0 0.0 ...
RR2 RR3 RadarQualityIndex Reflectivity
-99900.0 -99900.0
-99900.0 -99900.0 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
0.006246 0.0200476
0.0113924 0.217157 ...
13.0 17.5 14.0 8.5 7.0
11.0 9.0 9.0 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
999.0 999.0 999.0 999.0
999.0 999.0 ...
15.0 18.5 10.5 3.0 0.5
-3.0 ...
-99900.0 -99900.0 -99900.0 -99900.0 999.0 999.0 6.5 4.0
-99900.0 -99900.0
-99900.0 -99900.0 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
0.0 0.0 0.0 0.0 0.0 0.0 11.0 14.0 12.0 11.0 13.0
15.5 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
1.0 1.0 1.0 0.0 0.0 0.0
0.0 1.0 0.0 0.996433 0.0 ...
14.0 14.0 17.0 24.5 23.5
21.5 25.0 16.0 21.0 16.0 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
999.0 999.0 999.0 999.0
999.0 999.0 ...
-13.5 -8.5 9.5 14.0 13.0
15.5 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
0.0483593 0.0583249
0.103542 0.0419694 ...
15.5 16.0 3.5 -6.5 13.5
15.0 18.0 3.0 17.5 21.0 ...
-99900.0 9.72906 6.48322
6.28992 2.42506 4.78497 ...
-99900.0 -1.31778 18.6753
-6.69155 12.8562 -8.9 ...
1.0 1.0 1.0 1.0 1.0 1.0
1.0 0.948379 0.407035 ...
26.0 38.0 37.0 36.0 32.5
37.0 37.5 8.5 11.0 17.5 ...
-99900.0 -99900.0 -99900.0 -99900.0 999.0 999.0 17.0 5.5
-99900.0 -99900.0
-99900.0 -99900.0 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
0.0789454 0.765964
0.178427 0.0386251 ...
13.0 12.0 4.5 16.0 2.5
9.5 11.5 12.0 ...
ReflectivityQC RhoHV Velocity Zdr
-99900.0 -99900.0
-99900.0 -99900.0 ...
0.865 0.841667 0.765
0.985 0.768333 0.491667 ...
-99901.0 -99901.0
-99901.0 -99901.0 ...
7.9375 4.5 4.1875 5.5625
3.375 7.0625 5.3125 6 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
0.635 0.851667 0.891667
0.638333 0.791667 ...
-4.0 -3.0 -2.0 -0.5 -4.0
3.0 ...
2.6875 3.0 2.375 6.25
3.125 6.0625 ...
-99900.0 -99900.0 0.998333 0.891667 -99900.0 -3.5 -6.5 -4.6875
-99900.0 -99900.0
-99900.0 -99900.0 ...
0.688333 0.518333
0.708333 0.805 0.708333 ...
-7.0 -12.0 -11.5 -8.5
-8.0 -13.0 ...
-0.375 5.0625 1.1875 2.0
2.0625 0.3125 ...
14.0 14.0 17.0 -99900.0
-99900.0 -99900.0 ...
1.01833 1.01167 0.991667
1.015 1.015 1.005 1.0 ...
14.0 13.5 12.5 -13.5
-19.5 -16.0 -15.0 -14.0 ...
0.9375 -0.875 -0.75 0.0
0.0625 0.3125 0.5625 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
1.05167 0.988333 0.298333
0.215 0.301667 0.235 ...
15.0 8.0 5.5 7.5 7.0 7.0 -0.9375 0.8125 3.1875
3.3125 -1.1875 1.6875 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
0.901667 0.661667
0.688333 0.465 0.845 ...
-2.0 13.5 -3.5 5.5 -4.5
3.0 -1.5 1.0 0.5 3.5 7.5 ...
7.5 -0.0625 5.5625
-4.5625 1.5 1.5625 2. ...
26.0 38.0 37.0 36.0 32.5
37.0 37.5 8.5 -99900.0 ...
0.958333 0.978333
0.988333 0.991667 0.995 ...
10.5 10.0 10.0 12.0 12.5
12.5 14.5 -99900.0 16.0 ...
0.375 -0.3125 0.5625
1.0625 1.5625 0.8125 ...
-99900.0 -99900.0 0.948333 0.641667 -10.0 -99900.0 0.5 -5.8125
13.0 12.0 -99900.0
-99900.0 -99900.0 9.5 ...
0.971667 1.05167 0.915
0.208333 0.888333 1.0 ...
-12.0 -9.0 -18.5 -8.5
-99900.0 13.5 14.0 13.0 ...
2.375 0.0 0.25 4.1875
4.0625 0.625 0.875 -1.5 ...
LogWaterVolume MassWeightedMean MassWeightedSD Expected
nan nan nan nan nan nan
nan nan ...
nan nan nan nan nan nan
nan nan ...
nan nan nan nan nan nan
nan nan ...
0.0
nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 0.0
nan nan nan nan nan nan 0.0
nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 0.0
-13.4793885769
-12.1370512402 ...
1.86413642918
1.27740873124 ...
0.755068594278
0.502681241559 ...
0.0
nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 0.0
nan nan nan nan nan nan
nan nan nan nan nan nan ...
nan nan nan nan nan nan
nan nan nan nan nan nan ...
nan nan nan nan nan nan
nan nan nan nan nan nan ...
0.0
-10.3712052779
-7.19622143405 ...
1.73805086725
1.51845864912 ...
0.653683396104
0.585207001984 ...
5.6
nan nan nan nan nan nan 0.0
-14.7984398705
-13.315794926 nan nan ...
2.54496998173
1.57016791355 nan nan ...
1.05628634415
0.639012953381 nan nan ...
0.0
[10 rows x 20 columns]

Let's split each row.


In [4]:
na_values = ['-99900.0','-99901.0','-99903.0','999.0','nan']

In [30]:
def split_row(x):
    temp = x.strip().split()
    result = []
    for x in temp:
        if x not in na_values:      
        
            result += [float(x)]
    if result == []:
        return [np.nan]
    return result

In [31]:
features = train.column_names()
print features


['Id', 'TimeToEnd', 'DistanceToRadar', 'Composite', 'HybridScan', 'HydrometeorType', 'Kdp', 'RR1', 'RR2', 'RR3', 'RadarQualityIndex', 'Reflectivity', 'ReflectivityQC', 'RhoHV', 'Velocity', 'Zdr', 'LogWaterVolume', 'MassWeightedMean', 'MassWeightedSD', 'Expected']

In [32]:
train_splitted = gl.SFrame()
for column in ['TimeToEnd', 'DistanceToRadar', 'Composite', 'HybridScan', 'HydrometeorType', 'Kdp', 'RR1', 'RR2', 'RR3', 'RadarQualityIndex', 'Reflectivity', 'ReflectivityQC', 'RhoHV', 'Velocity', 'Zdr', 'LogWaterVolume', 'MassWeightedMean', 'MassWeightedSD']:
    train_splitted[column] = train[column].apply(split_row)

In [33]:
train_splitted.head()


Out[33]:
TimeToEnd DistanceToRadar Composite HybridScan
[56.0, 37.0, 31.0, 25.0,
19.0, 13.0, 7.0, 2.0] ...
[30.0, 30.0, 30.0, 30.0,
30.0, 30.0, 30.0, 30.0] ...
[nan] [nan]
[58.0, 48.0, 38.0, 29.0,
19.0, 9.0] ...
[77.0, 77.0, 77.0, 77.0,
77.0, 77.0] ...
[nan] [nan]
[59.0, 20.0] [75.0, 75.0] [nan] [nan]
[53.0, 43.0, 34.0, 24.0,
14.0, 5.0] ...
[21.0, 21.0, 21.0, 21.0,
21.0, 21.0] ...
[nan] [nan]
[56.0, 52.0, 43.0, 59.0,
54.0, 48.0, 42.0, 36.0, ...
[69.0, 69.0, 69.0, 83.0,
83.0, 83.0, 83.0, 83.0, ...
[23.0, 24.0, 22.0, 15.5,
14.5, 16.0, 15.0, 18.5, ...
[13.5, 15.5, 19.0, 19.5,
17.5, 20.0, 18.0, 23.5, ...
[56.0, 47.0, 37.0, 27.0,
18.0, 8.0] ...
[1.0, 1.0, 1.0, 1.0, 1.0,
1.0] ...
[-4.0] [nan]
[59.0, 55.0, 51.0, 46.0,
42.0, 38.0, 33.0, 29.0, ...
[42.0, 42.0, 42.0, 42.0,
42.0, 42.0, 42.0, 42.0, ...
[nan] [nan]
[57.0, 54.0, 51.0, 48.0,
44.0, 41.0, 38.0, 35.0, ...
[10.0, 10.0, 10.0, 10.0,
10.0, 10.0, 10.0, 10.0, ...
[26.0, 38.0, 39.5, 36.0,
33.0, 37.0, 37.5, 36.5, ...
[47.0, 44.0, 42.0, 32.0,
41.5, 26.5, 30.5, 17.5] ...
[36.0, 26.0] [92.0, 92.0] [nan] [nan]
[15.0, 5.0, 53.0, 43.0,
33.0, 14.0, 9.0, 3.0] ...
[90.0, 90.0, 63.0, 63.0,
63.0, 12.0, 12.0, 12.0] ...
[13.0, 12.0, 9.5, 11.5] [11.0, 13.5, -5.0, 9.5,
11.5] ...
HydrometeorType Kdp RR1 RR2
[8.0, 8.0, 8.0, 8.0, 8.0,
8.0, 8.0, 8.0] ...
[0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0] ...
[0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0] ...
[nan]
[8.0, 8.0, 8.0, 8.0, 8.0,
8.0] ...
[0.0, 0.0, 0.0, 0.0, 0.0,
0.0] ...
[0.0, 0.0, 0.0, 0.0, 0.0,
0.0] ...
[nan]
[8.0, 8.0] [0.0, 0.0] [0.0, 0.0] [nan]
[8.0, 8.0, 8.0, 8.0, 8.0,
8.0] ...
[0.0, 0.0, 0.0, 0.0, 0.0,
0.0] ...
[0.0, 0.0, 0.0, 0.0, 0.0,
0.0] ...
[nan]
[9.0, 9.0, 9.0, 8.0, 8.0,
8.0, 8.0, 9.0, 9.0, 9.0, ...
[0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, ...
[0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, ...
[0.42133, 0.5145,
0.518951] ...
[8.0, 8.0, 8.0, 8.0, 8.0,
8.0] ...
[0.0, 0.0, 0.0, 0.0, 0.0,
0.0] ...
[0.0, 0.0, 0.0, 0.0, 0.0,
0.0] ...
[nan]
[8.0, 8.0, 8.0, 8.0, 8.0,
8.0, 8.0, 8.0, 8.0, 8.0, ...
[0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, ...
[0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, ...
[nan]
[9.0, 13.0, 13.0, 13.0,
9.0, 9.0, 13.0, 9.0, ...
[0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, ...
[0.0, 6.048, 4.66107,
4.46988, 3.07344, ...
[9.72906, 6.48322,
6.28992, 2.42506, ...
[8.0, 8.0] [0.0, 0.0] [0.0, 0.0] [nan]
[8.0, 9.0, 8.0, 8.0, 8.0,
8.0, 8.0, 8.0] ...
[0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0] ...
[0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0] ...
[nan]
RR3 RadarQualityIndex Reflectivity ReflectivityQC
[nan] [0.006246, 0.0200476,
0.0113924, 0.217157, ...
[13.0, 17.5, 14.0, 8.5,
7.0, 11.0, 9.0, 9.0] ...
[nan]
[nan] [nan] [15.0, 18.5, 10.5, 3.0,
0.5, -3.0] ...
[nan]
[nan] [nan] [6.5, 4.0] [nan]
[nan] [0.0, 0.0, 0.0, 0.0, 0.0,
0.0] ...
[11.0, 14.0, 12.0, 11.0,
13.0, 15.5] ...
[nan]
[7.53835, -4.73293,
-12.2587] ...
[1.0, 1.0, 1.0, 0.0, 0.0,
0.0, 0.0, 1.0, 0.0, ...
[14.0, 14.0, 17.0, 24.5,
23.5, 21.5, 25.0, 16.0, ...
[14.0, 14.0, 17.0, 16.0,
16.0, 19.5, 23.0, 13.5, ...
[nan] [nan] [-13.5, -8.5, 9.5, 14.0,
13.0, 15.5] ...
[nan]
[nan] [0.0483593, 0.0583249,
0.103542, 0.0419694, ...
[15.5, 16.0, 3.5, -6.5,
13.5, 15.0, 18.0, 3.0, ...
[nan]
[-1.31778, 18.6753,
-6.69155, 12.8562, ...
[1.0, 1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 0.948379, ...
[26.0, 38.0, 37.0, 36.0,
32.5, 37.0, 37.5, 8.5, ...
[26.0, 38.0, 37.0, 36.0,
32.5, 37.0, 37.5, 8.5, ...
[nan] [nan] [17.0, 5.5] [nan]
[nan] [0.0789454, 0.765964,
0.178427, 0.0386251, ...
[13.0, 12.0, 4.5, 16.0,
2.5, 9.5, 11.5, 12.0] ...
[13.0, 12.0, 9.5, 11.5]
RhoHV Velocity Zdr LogWaterVolume
[0.865, 0.841667, 0.765,
0.985, 0.768333, ...
[nan] [7.9375, 4.5, 4.1875,
5.5625, 3.375, 7.0625, ...
[nan]
[0.635, 0.851667,
0.891667, 0.638333, ...
[-4.0, -3.0, -2.0, -0.5,
-4.0, 3.0] ...
[2.6875, 3.0, 2.375,
6.25, 3.125, 6.0625] ...
[nan]
[0.998333, 0.891667] [-3.5] [-6.5, -4.6875] [nan]
[0.688333, 0.518333,
0.708333, 0.805, ...
[-7.0, -12.0, -11.5,
-8.5, -8.0, -13.0] ...
[-0.375, 5.0625, 1.1875,
2.0, 2.0625, 0.3125] ...
[nan]
[1.01833, 1.01167,
0.991667, 1.015, 1.015, ...
[14.0, 13.5, 12.5, -13.5,
-19.5, -16.0, -15.0, ...
[0.9375, -0.875, -0.75,
0.0, 0.0625, 0.3125, ...
[-13.4793885769,
-12.1370512402, ...
[1.05167, 0.988333,
0.298333, 0.215, ...
[15.0, 8.0, 5.5, 7.5,
7.0, 7.0] ...
[-0.9375, 0.8125, 3.1875,
3.3125, -1.1875, 1.6875] ...
[nan]
[0.901667, 0.661667,
0.688333, 0.465, 0.845, ...
[-2.0, 13.5, -3.5, 5.5,
-4.5, 3.0, -1.5, 1.0, ...
[7.5, -0.0625, 5.5625,
-4.5625, 1.5, 1.5625, ...
[nan]
[0.958333, 0.978333,
0.988333, 0.991667, ...
[10.5, 10.0, 10.0, 12.0,
12.5, 12.5, 14.5, 16.0] ...
[0.375, -0.3125, 0.5625,
1.0625, 1.5625, 0.8125, ...
[-10.3712052779,
-7.19622143405, ...
[0.948333, 0.641667] [-10.0] [0.5, -5.8125] [nan]
[0.971667, 1.05167,
0.915, 0.208333, ...
[-12.0, -9.0, -18.5,
-8.5, 13.5, 14.0, 13.0] ...
[2.375, 0.0, 0.25,
4.1875, 4.0625, 0.625, ...
[-14.7984398705,
-13.315794926, ...
MassWeightedMean MassWeightedSD
[nan] [nan]
[nan] [nan]
[nan] [nan]
[nan] [nan]
[1.86413642918,
1.27740873124, ...
[0.755068594278,
0.502681241559, ...
[nan] [nan]
[nan] [nan]
[1.73805086725,
1.51845864912, ...
[0.653683396104,
0.585207001984, ...
[nan] [nan]
[2.54496998173,
1.57016791355, ...
[1.05628634415,
0.639012953381, ...
[10 rows x 18 columns]


In [34]:
np.mean([np.nan])


Out[34]:
nan

In [39]:
train_mean = gl.SFrame()
for column in train_splitted.column_names():
    print column
    train_mean[column] = train_splitted[column].apply(np.mean)


TimeToEnd
DistanceToRadar
Composite
HybridScan
HydrometeorType
Kdp
RR1
RR2
RR3
RadarQualityIndex
Reflectivity
ReflectivityQC
RhoHV
Velocity
Zdr
LogWaterVolume
MassWeightedMean
MassWeightedSD

In [40]:
train_mean[.head()]


Out[40]:
TimeToEnd DistanceToRadar Composite HybridScan HydrometeorType Kdp RR1
23.75 30.0 nan nan 8.0 0.0 0.0
33.5 77.0 nan nan 8.0 0.0 0.0
39.5 75.0 nan nan 8.0 0.0 0.0
28.8333333333 21.0 nan nan 8.0 0.0 0.0
46.0 49.9696969697 19.7878787879 19.4772727273 8.75757575758 0.0 0.117660908988
32.1666666667 1.0 -4.0 nan 8.0 0.0 0.0
31.1428571429 42.0 nan nan 8.0 0.0 0.0
45.5384615385 12.1538461538 34.1111111111 35.125 9.92307692308 0.0 2.12517536603
31.0 92.0 nan nan 8.0 0.0 0.0
21.875 50.625 11.5 8.1 8.125 0.0 0.0
RR2 RR3 RadarQualityIndex Reflectivity ReflectivityQC RhoHV Velocity
nan nan 0.0569944148592 11.125 nan 0.817917119712 nan
nan nan nan 7.41666666667 nan 0.757777829965 -1.75
nan nan nan 5.25 nan 0.944999992847 -3.5
nan nan 0.0 12.75 nan 0.663888672988 -10.0
0.484927008549 -3.15109348297 0.636865119934 18.6515151515 17.8461538462 1.00193370581 -5.75
nan nan nan 5.0 nan 0.515000489851 8.33333333333
nan nan 0.0811721075858 10.4642857143 nan 0.665476210415 1.96428571429
6.08509997527 2.60059517622 0.92837933037 27.0384615385 30.6666666667 0.988703986009 12.25
nan nan nan 11.25 nan 0.795000016689 -10.0
nan nan 0.474300644128 10.125 11.5 0.887500366196 -1.07142857143
Zdr LogWaterVolume MassWeightedMean MassWeightedSD
5.5078125 nan nan nan
3.91666666667 nan nan nan
-5.59375 nan nan nan
1.70833333333 nan nan nan
-0.0325 -11.8417641696 1.57242560387 0.614196679171
1.14583333333 nan nan nan
2.66964285714 nan nan nan
0.263888888889 -8.53405387061 1.83302199841 0.705382491861
-2.65625 nan nan nan
1.359375 -14.0804076195 1.92869055271 0.784794360399
[10 rows x 18 columns]


In [41]:
train_mean["Id"] = train["Id"]

In [42]:
train_mean["Expected"] = train["Expected"]

In [43]:
#save train_mean to file
train_mean.to_dataframe().to_csv(os.path.join("data", "train_mean.csv"), index=False)

In [ ]: