In [1]:
from __future__ import division
import pandas as pd
import numpy as np
import os
import graphlab as gl
In [16]:
na_values = ['-99900.0','-99901.0','-99903.0','999.0','nan']
train = gl.SFrame.read_csv(os.path.join("data", "train_2013.csv"))
PROGRESS: Finished parsing file /media/vladimir/1ab2d5e6-a134-47e7-ba27-b2d70ac5ffc5/workspace/kaggle_rain/data/train_2013.csv
PROGRESS: Parsing completed. Parsed 100 lines in 5.15926 secs.
PROGRESS: Read 55681 lines. Lines per second: 67416
PROGRESS: Read 668996 lines. Lines per second: 105953
PROGRESS: Finished parsing file /media/vladimir/1ab2d5e6-a134-47e7-ba27-b2d70ac5ffc5/workspace/kaggle_rain/data/train_2013.csv
PROGRESS: Parsing completed. Parsed 1126694 lines in 10.1901 secs.
------------------------------------------------------
Inferred types from first line of file as
column_type_hints=[int,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,float]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------
In [17]:
train.head()
Out[17]:
Id
TimeToEnd
DistanceToRadar
Composite
1
56.0 37.0 31.0 25.0 19.0
13.0 7.0 2.0 ...
30.0 30.0 30.0 30.0 30.0
30.0 30.0 30.0 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
2
58.0 48.0 38.0 29.0 19.0
9.0 ...
77.0 77.0 77.0 77.0 77.0
77.0 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
3
59.0 20.0
75.0 75.0
-99900.0 -99900.0
4
53.0 43.0 34.0 24.0 14.0
5.0 ...
21.0 21.0 21.0 21.0 21.0
21.0 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
5
56.0 52.0 43.0 59.0 54.0
48.0 42.0 36.0 31.0 57.0 ...
69.0 69.0 69.0 83.0 83.0
83.0 83.0 83.0 83.0 54.0 ...
23.0 24.0 22.0 15.5 14.5
16.0 15.0 18.5 12.5 16.0 ...
6
56.0 47.0 37.0 27.0 18.0
8.0 ...
1.0 1.0 1.0 1.0 1.0 1.0
-99900.0 -99900.0
-99900.0 -4.0 -99900.0 ...
7
59.0 55.0 51.0 46.0 42.0
38.0 33.0 29.0 25.0 20.0 ...
42.0 42.0 42.0 42.0 42.0
42.0 42.0 42.0 42.0 42.0 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
8
57.0 54.0 51.0 48.0 44.0
41.0 38.0 35.0 22.0 43.0 ...
10.0 10.0 10.0 10.0 10.0
10.0 10.0 10.0 10.0 8.0 ...
26.0 38.0 39.5 36.0 33.0
37.0 37.5 36.5 -99900.0 ...
9
36.0 26.0
92.0 92.0
-99900.0 -99900.0
10
15.0 5.0 53.0 43.0 33.0
14.0 9.0 3.0 ...
90.0 90.0 63.0 63.0 63.0
12.0 12.0 12.0 ...
13.0 12.0 -99900.0
-99900.0 -99900.0 9.5 ...
HybridScan
HydrometeorType
Kdp
RR1
-99900.0 -99900.0
-99900.0 -99900.0 ...
8.0 8.0 8.0 8.0 8.0 8.0
8.0 8.0 ...
0.0 0.0 0.0 0.0 0.0 0.0
0.0 0.0 ...
0.0 0.0 0.0 0.0 0.0 0.0
0.0 0.0 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
8.0 8.0 8.0 8.0 8.0 8.0
0.0 0.0 0.0 0.0 0.0 0.0
0.0 0.0 0.0 0.0 0.0 0.0
-99900.0 -99900.0
8.0 8.0
0.0 0.0
0.0 0.0
-99900.0 -99900.0
-99900.0 -99900.0 ...
8.0 8.0 8.0 8.0 8.0 8.0
0.0 0.0 0.0 0.0 0.0 0.0
0.0 0.0 0.0 0.0 0.0 0.0
13.5 15.5 19.0 -99900.0
-99900.0 -99900.0 ...
9.0 9.0 9.0 8.0 8.0 8.0
8.0 9.0 9.0 9.0 9.0 9.0 ...
0.0 0.0 0.0 0.0 0.0 0.0
0.0 0.0 0.0 0.0 0.0 0.0 ...
0.0 0.0 0.0 0.0 0.0 0.0
0.0 0.0 0.0 1.27899 0.0 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
8.0 8.0 8.0 8.0 8.0 8.0
0.0 0.0 0.0 0.0 0.0 0.0
0.0 0.0 0.0 0.0 0.0 0.0
-99900.0 -99900.0
-99900.0 -99900.0 ...
8.0 8.0 8.0 8.0 8.0 8.0
8.0 8.0 8.0 8.0 8.0 8.0 ...
0.0 0.0 0.0 0.0 0.0 0.0
0.0 0.0 0.0 0.0 0.0 0.0 ...
0.0 0.0 0.0 0.0 0.0 0.0
0.0 0.0 0.0 -99900.0 ...
47.0 44.0 42.0 32.0 41.5
26.5 30.5 -99900.0 ...
9.0 13.0 13.0 13.0 9.0
9.0 13.0 9.0 8.0 8.0 8.0 ...
0.0 0.0 0.0 0.0 0.0 0.0
0.0 0.0 0.0 0.0 0.0 0.0 ...
0.0 6.048 4.66107 4.46988
3.07344 4.99969 4.3752 ...
-99900.0 -99900.0
8.0 8.0
0.0 0.0
0.0 0.0
11.0 13.5 -99900.0
-99900.0 -5.0 9.5 11.5 ...
8.0 9.0 8.0 8.0 8.0 8.0
8.0 8.0 ...
0.0 0.0 0.0 0.0 0.0 0.0
0.0 0.0 ...
0.0 0.0 0.0 0.0 0.0 0.0
0.0 0.0 ...
RR2
RR3
RadarQualityIndex
Reflectivity
-99900.0 -99900.0
-99900.0 -99900.0 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
0.006246 0.0200476
0.0113924 0.217157 ...
13.0 17.5 14.0 8.5 7.0
11.0 9.0 9.0 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
999.0 999.0 999.0 999.0
999.0 999.0 ...
15.0 18.5 10.5 3.0 0.5
-3.0 ...
-99900.0 -99900.0
-99900.0 -99900.0
999.0 999.0
6.5 4.0
-99900.0 -99900.0
-99900.0 -99900.0 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
0.0 0.0 0.0 0.0 0.0 0.0
11.0 14.0 12.0 11.0 13.0
15.5 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
1.0 1.0 1.0 0.0 0.0 0.0
0.0 1.0 0.0 0.996433 0.0 ...
14.0 14.0 17.0 24.5 23.5
21.5 25.0 16.0 21.0 16.0 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
999.0 999.0 999.0 999.0
999.0 999.0 ...
-13.5 -8.5 9.5 14.0 13.0
15.5 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
0.0483593 0.0583249
0.103542 0.0419694 ...
15.5 16.0 3.5 -6.5 13.5
15.0 18.0 3.0 17.5 21.0 ...
-99900.0 9.72906 6.48322
6.28992 2.42506 4.78497 ...
-99900.0 -1.31778 18.6753
-6.69155 12.8562 -8.9 ...
1.0 1.0 1.0 1.0 1.0 1.0
1.0 0.948379 0.407035 ...
26.0 38.0 37.0 36.0 32.5
37.0 37.5 8.5 11.0 17.5 ...
-99900.0 -99900.0
-99900.0 -99900.0
999.0 999.0
17.0 5.5
-99900.0 -99900.0
-99900.0 -99900.0 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
0.0789454 0.765964
0.178427 0.0386251 ...
13.0 12.0 4.5 16.0 2.5
9.5 11.5 12.0 ...
ReflectivityQC
RhoHV
Velocity
Zdr
-99900.0 -99900.0
-99900.0 -99900.0 ...
0.865 0.841667 0.765
0.985 0.768333 0.491667 ...
-99901.0 -99901.0
-99901.0 -99901.0 ...
7.9375 4.5 4.1875 5.5625
3.375 7.0625 5.3125 6 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
0.635 0.851667 0.891667
0.638333 0.791667 ...
-4.0 -3.0 -2.0 -0.5 -4.0
3.0 ...
2.6875 3.0 2.375 6.25
3.125 6.0625 ...
-99900.0 -99900.0
0.998333 0.891667
-99900.0 -3.5
-6.5 -4.6875
-99900.0 -99900.0
-99900.0 -99900.0 ...
0.688333 0.518333
0.708333 0.805 0.708333 ...
-7.0 -12.0 -11.5 -8.5
-8.0 -13.0 ...
-0.375 5.0625 1.1875 2.0
2.0625 0.3125 ...
14.0 14.0 17.0 -99900.0
-99900.0 -99900.0 ...
1.01833 1.01167 0.991667
1.015 1.015 1.005 1.0 ...
14.0 13.5 12.5 -13.5
-19.5 -16.0 -15.0 -14.0 ...
0.9375 -0.875 -0.75 0.0
0.0625 0.3125 0.5625 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
1.05167 0.988333 0.298333
0.215 0.301667 0.235 ...
15.0 8.0 5.5 7.5 7.0 7.0
-0.9375 0.8125 3.1875
3.3125 -1.1875 1.6875 ...
-99900.0 -99900.0
-99900.0 -99900.0 ...
0.901667 0.661667
0.688333 0.465 0.845 ...
-2.0 13.5 -3.5 5.5 -4.5
3.0 -1.5 1.0 0.5 3.5 7.5 ...
7.5 -0.0625 5.5625
-4.5625 1.5 1.5625 2. ...
26.0 38.0 37.0 36.0 32.5
37.0 37.5 8.5 -99900.0 ...
0.958333 0.978333
0.988333 0.991667 0.995 ...
10.5 10.0 10.0 12.0 12.5
12.5 14.5 -99900.0 16.0 ...
0.375 -0.3125 0.5625
1.0625 1.5625 0.8125 ...
-99900.0 -99900.0
0.948333 0.641667
-10.0 -99900.0
0.5 -5.8125
13.0 12.0 -99900.0
-99900.0 -99900.0 9.5 ...
0.971667 1.05167 0.915
0.208333 0.888333 1.0 ...
-12.0 -9.0 -18.5 -8.5
-99900.0 13.5 14.0 13.0 ...
2.375 0.0 0.25 4.1875
4.0625 0.625 0.875 -1.5 ...
LogWaterVolume
MassWeightedMean
MassWeightedSD
Expected
nan nan nan nan nan nan
nan nan ...
nan nan nan nan nan nan
nan nan ...
nan nan nan nan nan nan
nan nan ...
0.0
nan nan nan nan nan nan
nan nan nan nan nan nan
nan nan nan nan nan nan
0.0
nan nan
nan nan
nan nan
0.0
nan nan nan nan nan nan
nan nan nan nan nan nan
nan nan nan nan nan nan
0.0
-13.4793885769
-12.1370512402 ...
1.86413642918
1.27740873124 ...
0.755068594278
0.502681241559 ...
0.0
nan nan nan nan nan nan
nan nan nan nan nan nan
nan nan nan nan nan nan
0.0
nan nan nan nan nan nan
nan nan nan nan nan nan ...
nan nan nan nan nan nan
nan nan nan nan nan nan ...
nan nan nan nan nan nan
nan nan nan nan nan nan ...
0.0
-10.3712052779
-7.19622143405 ...
1.73805086725
1.51845864912 ...
0.653683396104
0.585207001984 ...
5.6
nan nan
nan nan
nan nan
0.0
-14.7984398705
-13.315794926 nan nan ...
2.54496998173
1.57016791355 nan nan ...
1.05628634415
0.639012953381 nan nan ...
0.0
[10 rows x 20 columns]
In [4]:
na_values = ['-99900.0','-99901.0','-99903.0','999.0','nan']
In [30]:
def split_row(x):
temp = x.strip().split()
result = []
for x in temp:
if x not in na_values:
result += [float(x)]
if result == []:
return [np.nan]
return result
In [31]:
features = train.column_names()
print features
['Id', 'TimeToEnd', 'DistanceToRadar', 'Composite', 'HybridScan', 'HydrometeorType', 'Kdp', 'RR1', 'RR2', 'RR3', 'RadarQualityIndex', 'Reflectivity', 'ReflectivityQC', 'RhoHV', 'Velocity', 'Zdr', 'LogWaterVolume', 'MassWeightedMean', 'MassWeightedSD', 'Expected']
In [32]:
train_splitted = gl.SFrame()
for column in ['TimeToEnd', 'DistanceToRadar', 'Composite', 'HybridScan', 'HydrometeorType', 'Kdp', 'RR1', 'RR2', 'RR3', 'RadarQualityIndex', 'Reflectivity', 'ReflectivityQC', 'RhoHV', 'Velocity', 'Zdr', 'LogWaterVolume', 'MassWeightedMean', 'MassWeightedSD']:
train_splitted[column] = train[column].apply(split_row)
In [33]:
train_splitted.head()
Out[33]:
TimeToEnd
DistanceToRadar
Composite
HybridScan
[56.0, 37.0, 31.0, 25.0,
19.0, 13.0, 7.0, 2.0] ...
[30.0, 30.0, 30.0, 30.0,
30.0, 30.0, 30.0, 30.0] ...
[nan]
[nan]
[58.0, 48.0, 38.0, 29.0,
19.0, 9.0] ...
[77.0, 77.0, 77.0, 77.0,
77.0, 77.0] ...
[nan]
[nan]
[59.0, 20.0]
[75.0, 75.0]
[nan]
[nan]
[53.0, 43.0, 34.0, 24.0,
14.0, 5.0] ...
[21.0, 21.0, 21.0, 21.0,
21.0, 21.0] ...
[nan]
[nan]
[56.0, 52.0, 43.0, 59.0,
54.0, 48.0, 42.0, 36.0, ...
[69.0, 69.0, 69.0, 83.0,
83.0, 83.0, 83.0, 83.0, ...
[23.0, 24.0, 22.0, 15.5,
14.5, 16.0, 15.0, 18.5, ...
[13.5, 15.5, 19.0, 19.5,
17.5, 20.0, 18.0, 23.5, ...
[56.0, 47.0, 37.0, 27.0,
18.0, 8.0] ...
[1.0, 1.0, 1.0, 1.0, 1.0,
1.0] ...
[-4.0]
[nan]
[59.0, 55.0, 51.0, 46.0,
42.0, 38.0, 33.0, 29.0, ...
[42.0, 42.0, 42.0, 42.0,
42.0, 42.0, 42.0, 42.0, ...
[nan]
[nan]
[57.0, 54.0, 51.0, 48.0,
44.0, 41.0, 38.0, 35.0, ...
[10.0, 10.0, 10.0, 10.0,
10.0, 10.0, 10.0, 10.0, ...
[26.0, 38.0, 39.5, 36.0,
33.0, 37.0, 37.5, 36.5, ...
[47.0, 44.0, 42.0, 32.0,
41.5, 26.5, 30.5, 17.5] ...
[36.0, 26.0]
[92.0, 92.0]
[nan]
[nan]
[15.0, 5.0, 53.0, 43.0,
33.0, 14.0, 9.0, 3.0] ...
[90.0, 90.0, 63.0, 63.0,
63.0, 12.0, 12.0, 12.0] ...
[13.0, 12.0, 9.5, 11.5]
[11.0, 13.5, -5.0, 9.5,
11.5] ...
HydrometeorType
Kdp
RR1
RR2
[8.0, 8.0, 8.0, 8.0, 8.0,
8.0, 8.0, 8.0] ...
[0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0] ...
[0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0] ...
[nan]
[8.0, 8.0, 8.0, 8.0, 8.0,
8.0] ...
[0.0, 0.0, 0.0, 0.0, 0.0,
0.0] ...
[0.0, 0.0, 0.0, 0.0, 0.0,
0.0] ...
[nan]
[8.0, 8.0]
[0.0, 0.0]
[0.0, 0.0]
[nan]
[8.0, 8.0, 8.0, 8.0, 8.0,
8.0] ...
[0.0, 0.0, 0.0, 0.0, 0.0,
0.0] ...
[0.0, 0.0, 0.0, 0.0, 0.0,
0.0] ...
[nan]
[9.0, 9.0, 9.0, 8.0, 8.0,
8.0, 8.0, 9.0, 9.0, 9.0, ...
[0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, ...
[0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, ...
[0.42133, 0.5145,
0.518951] ...
[8.0, 8.0, 8.0, 8.0, 8.0,
8.0] ...
[0.0, 0.0, 0.0, 0.0, 0.0,
0.0] ...
[0.0, 0.0, 0.0, 0.0, 0.0,
0.0] ...
[nan]
[8.0, 8.0, 8.0, 8.0, 8.0,
8.0, 8.0, 8.0, 8.0, 8.0, ...
[0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, ...
[0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, ...
[nan]
[9.0, 13.0, 13.0, 13.0,
9.0, 9.0, 13.0, 9.0, ...
[0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, ...
[0.0, 6.048, 4.66107,
4.46988, 3.07344, ...
[9.72906, 6.48322,
6.28992, 2.42506, ...
[8.0, 8.0]
[0.0, 0.0]
[0.0, 0.0]
[nan]
[8.0, 9.0, 8.0, 8.0, 8.0,
8.0, 8.0, 8.0] ...
[0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0] ...
[0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0] ...
[nan]
RR3
RadarQualityIndex
Reflectivity
ReflectivityQC
[nan]
[0.006246, 0.0200476,
0.0113924, 0.217157, ...
[13.0, 17.5, 14.0, 8.5,
7.0, 11.0, 9.0, 9.0] ...
[nan]
[nan]
[nan]
[15.0, 18.5, 10.5, 3.0,
0.5, -3.0] ...
[nan]
[nan]
[nan]
[6.5, 4.0]
[nan]
[nan]
[0.0, 0.0, 0.0, 0.0, 0.0,
0.0] ...
[11.0, 14.0, 12.0, 11.0,
13.0, 15.5] ...
[nan]
[7.53835, -4.73293,
-12.2587] ...
[1.0, 1.0, 1.0, 0.0, 0.0,
0.0, 0.0, 1.0, 0.0, ...
[14.0, 14.0, 17.0, 24.5,
23.5, 21.5, 25.0, 16.0, ...
[14.0, 14.0, 17.0, 16.0,
16.0, 19.5, 23.0, 13.5, ...
[nan]
[nan]
[-13.5, -8.5, 9.5, 14.0,
13.0, 15.5] ...
[nan]
[nan]
[0.0483593, 0.0583249,
0.103542, 0.0419694, ...
[15.5, 16.0, 3.5, -6.5,
13.5, 15.0, 18.0, 3.0, ...
[nan]
[-1.31778, 18.6753,
-6.69155, 12.8562, ...
[1.0, 1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 0.948379, ...
[26.0, 38.0, 37.0, 36.0,
32.5, 37.0, 37.5, 8.5, ...
[26.0, 38.0, 37.0, 36.0,
32.5, 37.0, 37.5, 8.5, ...
[nan]
[nan]
[17.0, 5.5]
[nan]
[nan]
[0.0789454, 0.765964,
0.178427, 0.0386251, ...
[13.0, 12.0, 4.5, 16.0,
2.5, 9.5, 11.5, 12.0] ...
[13.0, 12.0, 9.5, 11.5]
RhoHV
Velocity
Zdr
LogWaterVolume
[0.865, 0.841667, 0.765,
0.985, 0.768333, ...
[nan]
[7.9375, 4.5, 4.1875,
5.5625, 3.375, 7.0625, ...
[nan]
[0.635, 0.851667,
0.891667, 0.638333, ...
[-4.0, -3.0, -2.0, -0.5,
-4.0, 3.0] ...
[2.6875, 3.0, 2.375,
6.25, 3.125, 6.0625] ...
[nan]
[0.998333, 0.891667]
[-3.5]
[-6.5, -4.6875]
[nan]
[0.688333, 0.518333,
0.708333, 0.805, ...
[-7.0, -12.0, -11.5,
-8.5, -8.0, -13.0] ...
[-0.375, 5.0625, 1.1875,
2.0, 2.0625, 0.3125] ...
[nan]
[1.01833, 1.01167,
0.991667, 1.015, 1.015, ...
[14.0, 13.5, 12.5, -13.5,
-19.5, -16.0, -15.0, ...
[0.9375, -0.875, -0.75,
0.0, 0.0625, 0.3125, ...
[-13.4793885769,
-12.1370512402, ...
[1.05167, 0.988333,
0.298333, 0.215, ...
[15.0, 8.0, 5.5, 7.5,
7.0, 7.0] ...
[-0.9375, 0.8125, 3.1875,
3.3125, -1.1875, 1.6875] ...
[nan]
[0.901667, 0.661667,
0.688333, 0.465, 0.845, ...
[-2.0, 13.5, -3.5, 5.5,
-4.5, 3.0, -1.5, 1.0, ...
[7.5, -0.0625, 5.5625,
-4.5625, 1.5, 1.5625, ...
[nan]
[0.958333, 0.978333,
0.988333, 0.991667, ...
[10.5, 10.0, 10.0, 12.0,
12.5, 12.5, 14.5, 16.0] ...
[0.375, -0.3125, 0.5625,
1.0625, 1.5625, 0.8125, ...
[-10.3712052779,
-7.19622143405, ...
[0.948333, 0.641667]
[-10.0]
[0.5, -5.8125]
[nan]
[0.971667, 1.05167,
0.915, 0.208333, ...
[-12.0, -9.0, -18.5,
-8.5, 13.5, 14.0, 13.0] ...
[2.375, 0.0, 0.25,
4.1875, 4.0625, 0.625, ...
[-14.7984398705,
-13.315794926, ...
MassWeightedMean
MassWeightedSD
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[1.86413642918,
1.27740873124, ...
[0.755068594278,
0.502681241559, ...
[nan]
[nan]
[nan]
[nan]
[1.73805086725,
1.51845864912, ...
[0.653683396104,
0.585207001984, ...
[nan]
[nan]
[2.54496998173,
1.57016791355, ...
[1.05628634415,
0.639012953381, ...
[10 rows x 18 columns]
In [34]:
np.mean([np.nan])
Out[34]:
nan
In [39]:
train_mean = gl.SFrame()
for column in train_splitted.column_names():
print column
train_mean[column] = train_splitted[column].apply(np.mean)
TimeToEnd
DistanceToRadar
Composite
HybridScan
HydrometeorType
Kdp
RR1
RR2
RR3
RadarQualityIndex
Reflectivity
ReflectivityQC
RhoHV
Velocity
Zdr
LogWaterVolume
MassWeightedMean
MassWeightedSD
In [40]:
train_mean[.head()]
Out[40]:
TimeToEnd
DistanceToRadar
Composite
HybridScan
HydrometeorType
Kdp
RR1
23.75
30.0
nan
nan
8.0
0.0
0.0
33.5
77.0
nan
nan
8.0
0.0
0.0
39.5
75.0
nan
nan
8.0
0.0
0.0
28.8333333333
21.0
nan
nan
8.0
0.0
0.0
46.0
49.9696969697
19.7878787879
19.4772727273
8.75757575758
0.0
0.117660908988
32.1666666667
1.0
-4.0
nan
8.0
0.0
0.0
31.1428571429
42.0
nan
nan
8.0
0.0
0.0
45.5384615385
12.1538461538
34.1111111111
35.125
9.92307692308
0.0
2.12517536603
31.0
92.0
nan
nan
8.0
0.0
0.0
21.875
50.625
11.5
8.1
8.125
0.0
0.0
RR2
RR3
RadarQualityIndex
Reflectivity
ReflectivityQC
RhoHV
Velocity
nan
nan
0.0569944148592
11.125
nan
0.817917119712
nan
nan
nan
nan
7.41666666667
nan
0.757777829965
-1.75
nan
nan
nan
5.25
nan
0.944999992847
-3.5
nan
nan
0.0
12.75
nan
0.663888672988
-10.0
0.484927008549
-3.15109348297
0.636865119934
18.6515151515
17.8461538462
1.00193370581
-5.75
nan
nan
nan
5.0
nan
0.515000489851
8.33333333333
nan
nan
0.0811721075858
10.4642857143
nan
0.665476210415
1.96428571429
6.08509997527
2.60059517622
0.92837933037
27.0384615385
30.6666666667
0.988703986009
12.25
nan
nan
nan
11.25
nan
0.795000016689
-10.0
nan
nan
0.474300644128
10.125
11.5
0.887500366196
-1.07142857143
Zdr
LogWaterVolume
MassWeightedMean
MassWeightedSD
5.5078125
nan
nan
nan
3.91666666667
nan
nan
nan
-5.59375
nan
nan
nan
1.70833333333
nan
nan
nan
-0.0325
-11.8417641696
1.57242560387
0.614196679171
1.14583333333
nan
nan
nan
2.66964285714
nan
nan
nan
0.263888888889
-8.53405387061
1.83302199841
0.705382491861
-2.65625
nan
nan
nan
1.359375
-14.0804076195
1.92869055271
0.784794360399
[10 rows x 18 columns]
In [41]:
train_mean["Id"] = train["Id"]
In [42]:
train_mean["Expected"] = train["Expected"]
In [43]:
#save train_mean to file
train_mean.to_dataframe().to_csv(os.path.join("data", "train_mean.csv"), index=False)
In [ ]:
Content source: ternaus/kaggle_rain
Similar notebooks: