The nearest neighbor model should retrieve the region Id and date for the three foecasts that are most similar to the current conditions.
Targets are date and region_id.
Features are mountain weather elements and elements in the forecasts of the previous days.
Output should be the varsom.no link with the following format, e.g. https://www.varsom.no/snoskredvarsling/varsel/Lyngen/2019-03-13
In [0]:
import pandas as pd
import numpy as np
import json
from sklearn import preprocessing
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.simplefilter('ignore')
In [0]:
# get the data
v_df = pd.read_csv('https://raw.githubusercontent.com/kmunve/APS/master/aps/notebooks/ml_varsom/varsom_ml_preproc_3y.csv', index_col=0) # where is the time stamp?)
v_df.drop_duplicates(keep='first', inplace=True) # for some reason all rows appear twice
v_df.describe()
Out[0]:
avalanche_problem_1_cause_id
avalanche_problem_1_destructive_size_ext_id
avalanche_problem_1_distribution_id
avalanche_problem_1_exposed_height_1
avalanche_problem_1_exposed_height_2
avalanche_problem_1_ext_id
avalanche_problem_1_probability_id
avalanche_problem_1_problem_id
avalanche_problem_1_problem_type_id
avalanche_problem_1_trigger_simple_id
avalanche_problem_1_type_id
avalanche_problem_2_cause_id
avalanche_problem_2_destructive_size_ext_id
avalanche_problem_2_distribution_id
avalanche_problem_2_exposed_height_1
avalanche_problem_2_exposed_height_2
avalanche_problem_2_ext_id
avalanche_problem_2_probability_id
avalanche_problem_2_problem_id
avalanche_problem_2_problem_type_id
avalanche_problem_2_trigger_simple_id
avalanche_problem_2_type_id
avalanche_problem_3_cause_id
avalanche_problem_3_destructive_size_ext_id
avalanche_problem_3_distribution_id
avalanche_problem_3_exposed_height_1
avalanche_problem_3_exposed_height_2
avalanche_problem_3_ext_id
avalanche_problem_3_probability_id
avalanche_problem_3_problem_id
avalanche_problem_3_problem_type_id
avalanche_problem_3_trigger_simple_id
avalanche_problem_3_type_id
danger_level
mountain_weather_freezing_level
mountain_weather_precip_most_exposed
mountain_weather_precip_region
mountain_weather_temperature_elevation
mountain_weather_temperature_max
mountain_weather_temperature_min
...
author_Ingrid@NVE
author_John Smits
author_JonasD@ObsKorps
author_Julie@SVV
author_Jørgen@obskorps
author_Karsten@NVE
author_MSA@nortind
author_Matilda@MET
author_Odd-Arne@NVE
author_Ragnar@NVE
author_Ronny@NVE
author_Silje@svv
author_Tommy@NVE
author_ToreV@met
author_anitaaw@met
author_emma@nve
author_haso@nve.no
author_heidi@nve.no
author_jan arild@obskorps
author_jegu@NVE
author_jostein@nve
author_knutinge@svv
author_magnush@met
author_martin@svv
author_ragnhildn@met
author_rue@nve
author_siri@met
author_solveig@NVE
author_torehum@svv
author_torolav@obskorps
mountain_weather_wind_direction_E
mountain_weather_wind_direction_N
mountain_weather_wind_direction_NE
mountain_weather_wind_direction_NW
mountain_weather_wind_direction_None
mountain_weather_wind_direction_Not given
mountain_weather_wind_direction_S
mountain_weather_wind_direction_SE
mountain_weather_wind_direction_SW
mountain_weather_wind_direction_W
count
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.00000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
...
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
16632.000000
mean
7.504990
1.160955
1.012145
335.918711
34.704185
10.563672
1.642496
0.536195
7.453523
10.168951
6.006494
4.886003
0.669252
0.491282
201.569264
28.330928
5.575397
0.854738
0.571789
6.408009
4.907047
3.455988
0.434103
0.061748
0.043891
17.821068
2.15849
0.501744
0.077321
0.076659
0.577381
0.453944
0.315657
1.145924
142.281145
2.084055
1.024110
383.700096
-1.064400
-3.280363
...
0.010402
0.009319
0.031926
0.000782
0.024952
0.013167
0.008418
0.006734
0.009680
0.014971
0.020984
0.006794
0.021224
0.018939
0.004089
0.014250
0.020022
0.024711
0.010221
0.007335
0.016114
0.023449
0.002706
0.015512
0.010943
0.010161
0.005652
0.009440
0.012386
0.024411
0.027718
0.012205
0.013949
0.027898
0.004630
0.690536
0.034392
0.097042
0.051166
0.040464
std
7.876984
1.170615
1.027010
401.058530
150.562400
9.951601
1.561408
0.498703
10.808093
10.027457
6.075860
8.182476
1.108225
0.840283
374.693192
133.425354
8.967815
1.372375
0.903706
12.767937
8.321628
5.884819
2.813269
0.394446
0.288054
126.367889
37.35957
3.143473
0.482797
0.473408
4.352117
2.939294
2.063900
1.186906
421.728012
6.712413
3.818734
596.377894
3.767183
6.136854
...
0.101460
0.096089
0.175809
0.027947
0.155983
0.113995
0.091363
0.081787
0.097913
0.121441
0.143334
0.082149
0.144135
0.136315
0.063812
0.118522
0.140078
0.155249
0.100585
0.085334
0.125916
0.151328
0.051947
0.123582
0.104037
0.100292
0.074968
0.096701
0.110603
0.154325
0.164168
0.109805
0.117283
0.164686
0.067886
0.462286
0.182238
0.296024
0.220344
0.197051
min
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.00000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
-22.000000
-32.000000
...
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
25%
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.00000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
-5.000000
...
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
50%
10.000000
1.000000
1.000000
200.000000
0.000000
15.000000
3.000000
1.000000
5.000000
10.000000
10.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.00000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
...
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
0.000000
75%
13.000000
2.000000
2.000000
600.000000
0.000000
20.000000
3.000000
1.000000
10.000000
21.000000
10.000000
10.000000
2.000000
1.000000
200.000000
0.000000
15.000000
3.000000
2.000000
5.000000
10.000000
10.000000
0.000000
0.000000
0.000000
0.000000
0.00000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
2.000000
0.000000
0.000000
0.000000
1100.000000
0.000000
0.000000
...
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
0.000000
max
24.000000
4.000000
4.000000
2100.000000
2000.000000
25.000000
5.000000
1.000000
50.000000
22.000000
20.000000
24.000000
4.000000
3.000000
2300.000000
1500.000000
25.000000
5.000000
2.000000
50.000000
22.000000
20.000000
24.000000
4.000000
3.000000
2000.000000
1100.00000
25.000000
5.000000
3.000000
50.000000
22.000000
20.000000
4.000000
2800.000000
160.000000
90.000000
1800.000000
20.000000
10.000000
...
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
8 rows × 122 columns
In [0]:
v_df.head()
Out[0]:
avalanche_problem_1_cause_id
avalanche_problem_1_destructive_size_ext_id
avalanche_problem_1_distribution_id
avalanche_problem_1_exposed_height_1
avalanche_problem_1_exposed_height_2
avalanche_problem_1_ext_id
avalanche_problem_1_probability_id
avalanche_problem_1_problem_id
avalanche_problem_1_problem_type_id
avalanche_problem_1_trigger_simple_id
avalanche_problem_1_type_id
avalanche_problem_2_cause_id
avalanche_problem_2_destructive_size_ext_id
avalanche_problem_2_distribution_id
avalanche_problem_2_exposed_height_1
avalanche_problem_2_exposed_height_2
avalanche_problem_2_ext_id
avalanche_problem_2_probability_id
avalanche_problem_2_problem_id
avalanche_problem_2_problem_type_id
avalanche_problem_2_trigger_simple_id
avalanche_problem_2_type_id
avalanche_problem_3_cause_id
avalanche_problem_3_destructive_size_ext_id
avalanche_problem_3_distribution_id
avalanche_problem_3_exposed_height_1
avalanche_problem_3_exposed_height_2
avalanche_problem_3_ext_id
avalanche_problem_3_probability_id
avalanche_problem_3_problem_id
avalanche_problem_3_problem_type_id
avalanche_problem_3_trigger_simple_id
avalanche_problem_3_type_id
danger_level
mountain_weather_freezing_level
mountain_weather_precip_most_exposed
mountain_weather_precip_region
mountain_weather_temperature_elevation
mountain_weather_temperature_max
mountain_weather_temperature_min
...
author_Ingrid@NVE
author_John Smits
author_JonasD@ObsKorps
author_Julie@SVV
author_Jørgen@obskorps
author_Karsten@NVE
author_MSA@nortind
author_Matilda@MET
author_Odd-Arne@NVE
author_Ragnar@NVE
author_Ronny@NVE
author_Silje@svv
author_Tommy@NVE
author_ToreV@met
author_anitaaw@met
author_emma@nve
author_haso@nve.no
author_heidi@nve.no
author_jan arild@obskorps
author_jegu@NVE
author_jostein@nve
author_knutinge@svv
author_magnush@met
author_martin@svv
author_ragnhildn@met
author_rue@nve
author_siri@met
author_solveig@NVE
author_torehum@svv
author_torolav@obskorps
mountain_weather_wind_direction_E
mountain_weather_wind_direction_N
mountain_weather_wind_direction_NE
mountain_weather_wind_direction_NW
mountain_weather_wind_direction_None
mountain_weather_wind_direction_Not given
mountain_weather_wind_direction_S
mountain_weather_wind_direction_SE
mountain_weather_wind_direction_SW
mountain_weather_wind_direction_W
index
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0.0
0.0
0.0
0.0
0.0
0.0
...
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
1
15
2
2
800
0
20
2
1
10
10
10
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
2
0.0
0.0
0.0
0.0
0.0
0.0
...
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
2
15
2
2
400
0
20
2
1
10
10
10
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
2
0.0
0.0
0.0
0.0
0.0
0.0
...
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
3
15
2
2
400
0
20
2
1
10
21
10
11
2
2
400
0
20
2
2
30
10
10
0
0
0
0
0
0
0
0
0
0
0
2
0.0
0.0
0.0
0.0
0.0
0.0
...
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
4
10
2
2
400
0
20
3
1
7
21
10
11
2
2
400
0
20
2
2
30
10
10
0
0
0
0
0
0
0
0
0
0
0
2
0.0
0.0
0.0
0.0
0.0
0.0
...
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
5 rows × 123 columns
In [0]:
# Why are there summer dates in the dataset???
v_df['date'].unique()
Out[0]:
array(['2016-12-01', '2016-12-02', '2016-12-03', '2016-12-04',
'2016-12-05', '2016-12-06', '2016-12-07', '2016-12-08',
'2016-12-09', '2016-12-10', '2016-12-11', '2016-12-12',
'2016-12-13', '2016-12-14', '2016-12-15', '2016-12-16',
'2016-12-17', '2016-12-18', '2016-12-19', '2016-12-20',
'2016-12-21', '2016-12-22', '2016-12-23', '2016-12-24',
'2016-12-25', '2016-12-26', '2016-12-27', '2016-12-28',
'2016-12-29', '2016-12-30', '2016-12-31', '2017-01-01',
'2017-01-02', '2017-01-03', '2017-01-04', '2017-01-05',
'2017-01-06', '2017-01-07', '2017-01-08', '2017-01-09',
'2017-01-10', '2017-01-11', '2017-01-12', '2017-01-13',
'2017-01-14', '2017-01-15', '2017-01-16', '2017-01-17',
'2017-01-18', '2017-01-19', '2017-01-20', '2017-01-21',
'2017-01-22', '2017-01-23', '2017-01-24', '2017-01-25',
'2017-01-26', '2017-01-27', '2017-01-28', '2017-01-29',
'2017-01-30', '2017-01-31', '2017-02-01', '2017-02-02',
'2017-02-03', '2017-02-04', '2017-02-05', '2017-02-06',
'2017-02-07', '2017-02-08', '2017-02-09', '2017-02-10',
'2017-02-11', '2017-02-12', '2017-02-13', '2017-02-14',
'2017-02-15', '2017-02-16', '2017-02-17', '2017-02-18',
'2017-02-19', '2017-02-20', '2017-02-21', '2017-02-22',
'2017-02-23', '2017-02-24', '2017-02-25', '2017-02-26',
'2017-02-27', '2017-02-28', '2017-03-01', '2017-03-02',
'2017-03-03', '2017-03-04', '2017-03-05', '2017-03-06',
'2017-03-07', '2017-03-08', '2017-03-09', '2017-03-10',
'2017-03-11', '2017-03-12', '2017-03-13', '2017-03-14',
'2017-03-15', '2017-03-16', '2017-03-17', '2017-03-18',
'2017-03-19', '2017-03-20', '2017-03-21', '2017-03-22',
'2017-03-23', '2017-03-24', '2017-03-25', '2017-03-26',
'2017-03-27', '2017-03-28', '2017-03-29', '2017-03-30',
'2017-03-31', '2017-04-01', '2017-04-02', '2017-04-03',
'2017-04-04', '2017-04-05', '2017-04-06', '2017-04-07',
'2017-04-08', '2017-04-09', '2017-04-10', '2017-04-11',
'2017-04-12', '2017-04-13', '2017-04-14', '2017-04-15',
'2017-04-16', '2017-04-17', '2017-04-18', '2017-04-19',
'2017-04-20', '2017-04-21', '2017-04-22', '2017-04-23',
'2017-04-24', '2017-04-25', '2017-04-26', '2017-04-27',
'2017-04-28', '2017-04-29', '2017-04-30', '2017-05-01',
'2017-05-02', '2017-05-03', '2017-05-04', '2017-05-05',
'2017-05-06', '2017-05-07', '2017-05-08', '2017-05-09',
'2017-05-10', '2017-05-11', '2017-05-12', '2017-05-13',
'2017-05-14', '2017-05-15', '2017-05-16', '2017-05-17',
'2017-05-18', '2017-05-19', '2017-05-20', '2017-05-21',
'2017-05-22', '2017-05-23', '2017-05-24', '2017-05-25',
'2017-05-26', '2017-05-27', '2017-05-28', '2017-05-29',
'2017-05-30', '2017-05-31', '2017-06-01', '2017-06-02',
'2017-06-03', '2017-06-04', '2017-06-05', '2017-06-06',
'2017-06-07', '2017-06-08', '2017-06-09', '2017-06-10',
'2017-06-11', '2017-06-12', '2017-06-13', '2017-06-14',
'2017-06-15', '2017-06-16', '2017-06-17', '2017-06-18',
'2017-06-19', '2017-06-20', '2017-06-21', '2017-06-22',
'2017-06-23', '2017-06-24', '2017-06-25', '2017-06-26',
'2017-06-27', '2017-06-28', '2017-06-29', '2017-06-30',
'2017-07-01', '2017-07-02', '2017-07-03', '2017-07-04',
'2017-07-05', '2017-07-06', '2017-07-07', '2017-07-08',
'2017-07-09', '2017-07-10', '2017-07-11', '2017-07-12',
'2017-07-13', '2017-07-14', '2017-07-15', '2017-07-16',
'2017-07-17', '2017-07-18', '2017-07-19', '2017-07-20',
'2017-07-21', '2017-07-22', '2017-07-23', '2017-07-24',
'2017-07-25', '2017-07-26', '2017-07-27', '2017-07-28',
'2017-07-29', '2017-07-30', '2017-07-31', '2017-08-01',
'2017-08-02', '2017-08-03', '2017-08-04', '2017-08-05',
'2017-08-06', '2017-08-07', '2017-08-08', '2017-08-09',
'2017-08-10', '2017-08-11', '2017-08-12', '2017-08-13',
'2017-08-14', '2017-08-15', '2017-08-16', '2017-08-17',
'2017-08-18', '2017-08-19', '2017-08-20', '2017-08-21',
'2017-08-22', '2017-08-23', '2017-08-24', '2017-08-25',
'2017-08-26', '2017-08-27', '2017-08-28', '2017-08-29',
'2017-08-30', '2017-08-31', '2017-09-01', '2017-09-02',
'2017-09-03', '2017-09-04', '2017-09-05', '2017-09-06',
'2017-09-07', '2017-09-08', '2017-09-09', '2017-09-10',
'2017-09-11', '2017-09-12', '2017-09-13', '2017-09-14',
'2017-09-15', '2017-09-16', '2017-09-17', '2017-09-18',
'2017-09-19', '2017-09-20', '2017-09-21', '2017-09-22',
'2017-09-23', '2017-09-24', '2017-09-25', '2017-09-26',
'2017-09-27', '2017-09-28', '2017-09-29', '2017-09-30',
'2017-10-01', '2017-10-02', '2017-10-03', '2017-10-04',
'2017-10-05', '2017-10-06', '2017-10-07', '2017-10-08',
'2017-10-09', '2017-10-10', '2017-10-11', '2017-10-12',
'2017-10-13', '2017-10-14', '2017-10-15', '2017-10-16',
'2017-10-17', '2017-10-18', '2017-10-19', '2017-10-20',
'2017-10-21', '2017-10-22', '2017-10-23', '2017-10-24',
'2017-10-25', '2017-10-26', '2017-10-27', '2017-10-28',
'2017-10-29', '2017-10-30', '2017-10-31', '2017-11-01',
'2017-11-02', '2017-11-03', '2017-11-04', '2017-11-05',
'2017-11-06', '2017-11-07', '2017-11-08', '2017-11-09',
'2017-11-10', '2017-11-11', '2017-11-12', '2017-11-13',
'2017-11-14', '2017-11-15', '2017-11-16', '2017-11-17',
'2017-11-18', '2017-11-19', '2017-11-20', '2017-11-21',
'2017-11-22', '2017-11-23', '2017-11-24', '2017-11-25',
'2017-11-26', '2017-11-27', '2017-11-28', '2017-11-29',
'2017-11-30', '2017-12-01', '2017-12-02', '2017-12-03',
'2017-12-04', '2017-12-05', '2017-12-06', '2017-12-07',
'2017-12-08', '2017-12-09', '2017-12-10', '2017-12-11',
'2017-12-12', '2017-12-13', '2017-12-14', '2017-12-15',
'2017-12-16', '2017-12-17', '2017-12-18', '2017-12-19',
'2017-12-20', '2017-12-21', '2017-12-22', '2017-12-23',
'2017-12-24', '2017-12-25', '2017-12-26', '2017-12-27',
'2017-12-28', '2017-12-29', '2017-12-30', '2017-12-31',
'2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
'2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08',
'2018-01-09', '2018-01-10', '2018-01-11', '2018-01-12',
'2018-01-13', '2018-01-14', '2018-01-15', '2018-01-16',
'2018-01-17', '2018-01-18', '2018-01-19', '2018-01-20',
'2018-01-21', '2018-01-22', '2018-01-23', '2018-01-24',
'2018-01-25', '2018-01-26', '2018-01-27', '2018-01-28',
'2018-01-29', '2018-01-30', '2018-01-31', '2018-02-01',
'2018-02-02', '2018-02-03', '2018-02-04', '2018-02-05',
'2018-02-06', '2018-02-07', '2018-02-08', '2018-02-09',
'2018-02-10', '2018-02-11', '2018-02-12', '2018-02-13',
'2018-02-14', '2018-02-15', '2018-02-16', '2018-02-17',
'2018-02-18', '2018-02-19', '2018-02-20', '2018-02-21',
'2018-02-22', '2018-02-23', '2018-02-24', '2018-02-25',
'2018-02-26', '2018-02-27', '2018-02-28', '2018-03-01',
'2018-03-02', '2018-03-03', '2018-03-04', '2018-03-05',
'2018-03-06', '2018-03-07', '2018-03-08', '2018-03-09',
'2018-03-10', '2018-03-11', '2018-03-12', '2018-03-13',
'2018-03-14', '2018-03-15', '2018-03-16', '2018-03-17',
'2018-03-18', '2018-03-19', '2018-03-20', '2018-03-21',
'2018-03-22', '2018-03-23', '2018-03-24', '2018-03-25',
'2018-03-26', '2018-03-27', '2018-03-28', '2018-03-29',
'2018-03-30', '2018-03-31', '2018-04-01', '2018-04-02',
'2018-04-03', '2018-04-04', '2018-04-05', '2018-04-06',
'2018-04-07', '2018-04-08', '2018-04-09', '2018-04-10',
'2018-04-11', '2018-04-12', '2018-04-13', '2018-04-14',
'2018-04-15', '2018-04-16', '2018-04-17', '2018-04-18',
'2018-04-19', '2018-04-20', '2018-04-21', '2018-04-22',
'2018-04-23', '2018-04-24', '2018-04-25', '2018-04-26',
'2018-04-27', '2018-04-28', '2018-04-29', '2018-04-30',
'2018-05-01', '2018-05-02', '2018-05-03', '2018-05-04',
'2018-05-05', '2018-05-06', '2018-05-07', '2018-05-08',
'2018-05-09', '2018-05-10', '2018-05-11', '2018-05-12',
'2018-05-13', '2018-05-14', '2018-05-15', '2018-05-16',
'2018-05-17', '2018-05-18', '2018-05-19', '2018-05-20',
'2018-05-21', '2018-05-22', '2018-05-23', '2018-05-24',
'2018-05-25', '2018-05-26', '2018-05-27', '2018-05-28',
'2018-05-29', '2018-05-30', '2018-05-31', '2018-06-01',
'2018-06-02', '2018-06-03', '2018-06-04', '2018-06-05',
'2018-06-06', '2018-06-07', '2018-06-08', '2018-06-09',
'2018-06-10', '2018-06-11', '2018-06-12', '2018-06-13',
'2018-06-14', '2018-06-15', '2018-06-16', '2018-06-17',
'2018-06-18', '2018-06-19', '2018-06-20', '2018-06-21',
'2018-06-22', '2018-06-23', '2018-06-24', '2018-06-25',
'2018-06-26', '2018-06-27', '2018-06-28', '2018-06-29',
'2018-06-30', '2018-07-01', '2018-07-02', '2018-07-03',
'2018-07-04', '2018-07-05', '2018-07-06', '2018-07-07',
'2018-07-08', '2018-07-09', '2018-07-10', '2018-07-11',
'2018-07-12', '2018-07-13', '2018-07-14', '2018-07-15',
'2018-07-16', '2018-07-17', '2018-07-18', '2018-07-19',
'2018-07-20', '2018-07-21', '2018-07-22', '2018-07-23',
'2018-07-24', '2018-07-25', '2018-07-26', '2018-07-27',
'2018-07-28', '2018-07-29', '2018-07-30', '2018-07-31',
'2018-08-01', '2018-08-02', '2018-08-03', '2018-08-04',
'2018-08-05', '2018-08-06', '2018-08-07', '2018-08-08',
'2018-08-09', '2018-08-10', '2018-08-11', '2018-08-12',
'2018-08-13', '2018-08-14', '2018-08-15', '2018-08-16',
'2018-08-17', '2018-08-18', '2018-08-19', '2018-08-20',
'2018-08-21', '2018-08-22', '2018-08-23', '2018-08-24',
'2018-08-25', '2018-08-26', '2018-08-27', '2018-08-28',
'2018-08-29', '2018-08-30', '2018-08-31', '2018-09-01',
'2018-09-02', '2018-09-03', '2018-09-04', '2018-09-05',
'2018-09-06', '2018-09-07', '2018-09-08', '2018-09-09',
'2018-09-10', '2018-09-11', '2018-09-12', '2018-09-13',
'2018-09-14', '2018-09-15', '2018-09-16', '2018-09-17',
'2018-09-18', '2018-09-19', '2018-09-20', '2018-09-21',
'2018-09-22', '2018-09-23', '2018-09-24', '2018-09-25',
'2018-09-26', '2018-09-27', '2018-09-28', '2018-09-29',
'2018-09-30', '2018-10-01', '2018-10-02', '2018-10-03',
'2018-10-04', '2018-10-05', '2018-10-06', '2018-10-07',
'2018-10-08', '2018-10-09', '2018-10-10', '2018-10-11',
'2018-10-12', '2018-10-13', '2018-10-14', '2018-10-15',
'2018-10-16', '2018-10-17', '2018-10-18', '2018-10-19',
'2018-10-20', '2018-10-21', '2018-10-22', '2018-10-23',
'2018-10-24', '2018-10-25', '2018-10-26', '2018-10-27',
'2018-10-28', '2018-10-29', '2018-10-30', '2018-10-31',
'2018-11-01', '2018-11-02', '2018-11-03', '2018-11-04',
'2018-11-05', '2018-11-06', '2018-11-07', '2018-11-08',
'2018-11-09', '2018-11-10', '2018-11-11', '2018-11-12',
'2018-11-13', '2018-11-14', '2018-11-15', '2018-11-16',
'2018-11-17', '2018-11-18', '2018-11-19', '2018-11-20',
'2018-11-21', '2018-11-22', '2018-11-23', '2018-11-24',
'2018-11-25', '2018-11-26', '2018-11-27', '2018-11-28',
'2018-11-29', '2018-11-30', '2018-12-01', '2018-12-02',
'2018-12-03', '2018-12-04', '2018-12-05', '2018-12-06',
'2018-12-07', '2018-12-08', '2018-12-09', '2018-12-10',
'2018-12-11', '2018-12-12', '2018-12-13', '2018-12-14',
'2018-12-15', '2018-12-16', '2018-12-17', '2018-12-18',
'2018-12-19', '2018-12-20', '2018-12-21', '2018-12-22',
'2018-12-23', '2018-12-24', '2018-12-25', '2018-12-26',
'2018-12-27', '2018-12-28', '2018-12-29', '2018-12-30',
'2018-12-31', '2019-01-01', '2019-01-02', '2019-01-03',
'2019-01-04', '2019-01-05', '2019-01-06', '2019-01-07',
'2019-01-08', '2019-01-09', '2019-01-10', '2019-01-11',
'2019-01-12', '2019-01-13', '2019-01-14', '2019-01-15',
'2019-01-16', '2019-01-17', '2019-01-18', '2019-01-19',
'2019-01-20', '2019-01-21', '2019-01-22', '2019-01-23',
'2019-01-24', '2019-01-25', '2019-01-26', '2019-01-27',
'2019-01-28', '2019-01-29', '2019-01-30', '2019-01-31'],
dtype=object)
In [0]:
v_df['date'] = v_df['date'].apply(lambda d: pd.to_datetime(d))
v_df['month'] = v_df['date'].apply(lambda d: d.month)
In [0]:
v_df['month'].unique()
Out[0]:
array([12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
In [0]:
# remove summer month
v_df.drop(v_df[v_df['month'].isin([6, 7, 8, 9, 10, 11])].index, inplace=True)
v_df.describe()
Out[0]:
avalanche_problem_1_cause_id
avalanche_problem_1_destructive_size_ext_id
avalanche_problem_1_distribution_id
avalanche_problem_1_exposed_height_1
avalanche_problem_1_exposed_height_2
avalanche_problem_1_ext_id
avalanche_problem_1_probability_id
avalanche_problem_1_problem_id
avalanche_problem_1_problem_type_id
avalanche_problem_1_trigger_simple_id
avalanche_problem_1_type_id
avalanche_problem_2_cause_id
avalanche_problem_2_destructive_size_ext_id
avalanche_problem_2_distribution_id
avalanche_problem_2_exposed_height_1
avalanche_problem_2_exposed_height_2
avalanche_problem_2_ext_id
avalanche_problem_2_probability_id
avalanche_problem_2_problem_id
avalanche_problem_2_problem_type_id
avalanche_problem_2_trigger_simple_id
avalanche_problem_2_type_id
avalanche_problem_3_cause_id
avalanche_problem_3_destructive_size_ext_id
avalanche_problem_3_distribution_id
avalanche_problem_3_exposed_height_1
avalanche_problem_3_exposed_height_2
avalanche_problem_3_ext_id
avalanche_problem_3_probability_id
avalanche_problem_3_problem_id
avalanche_problem_3_problem_type_id
avalanche_problem_3_trigger_simple_id
avalanche_problem_3_type_id
danger_level
mountain_weather_freezing_level
mountain_weather_precip_most_exposed
mountain_weather_precip_region
mountain_weather_temperature_elevation
mountain_weather_temperature_max
mountain_weather_temperature_min
...
author_John Smits
author_JonasD@ObsKorps
author_Julie@SVV
author_Jørgen@obskorps
author_Karsten@NVE
author_MSA@nortind
author_Matilda@MET
author_Odd-Arne@NVE
author_Ragnar@NVE
author_Ronny@NVE
author_Silje@svv
author_Tommy@NVE
author_ToreV@met
author_anitaaw@met
author_emma@nve
author_haso@nve.no
author_heidi@nve.no
author_jan arild@obskorps
author_jegu@NVE
author_jostein@nve
author_knutinge@svv
author_magnush@met
author_martin@svv
author_ragnhildn@met
author_rue@nve
author_siri@met
author_solveig@NVE
author_torehum@svv
author_torolav@obskorps
mountain_weather_wind_direction_E
mountain_weather_wind_direction_N
mountain_weather_wind_direction_NE
mountain_weather_wind_direction_NW
mountain_weather_wind_direction_None
mountain_weather_wind_direction_Not given
mountain_weather_wind_direction_S
mountain_weather_wind_direction_SE
mountain_weather_wind_direction_SW
mountain_weather_wind_direction_W
month
count
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
...
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
mean
13.938967
2.155600
1.878493
624.111335
64.185111
19.617147
3.049408
0.995752
13.847753
18.882070
11.155824
9.071205
1.242231
0.912028
374.480215
52.671585
10.352113
1.587078
1.061704
11.893360
9.108205
6.418511
0.807065
0.114800
0.081601
33.132126
4.012967
0.932819
0.143751
0.142522
1.073441
0.843953
0.586854
2.126649
263.013637
3.845741
1.892242
710.216857
-1.977990
-6.083054
...
0.017326
0.059356
0.001453
0.044154
0.023810
0.015649
0.012520
0.017997
0.027834
0.039012
0.012631
0.039459
0.035211
0.007601
0.025598
0.037223
0.045942
0.019003
0.013637
0.029958
0.043595
0.005030
0.028840
0.020344
0.018891
0.010507
0.017550
0.023027
0.045383
0.051531
0.022692
0.025822
0.051867
0.008607
0.427230
0.063827
0.178627
0.094567
0.075229
4.828638
std
5.058746
0.632049
0.572026
344.991990
200.034907
2.509094
0.481539
0.065040
11.341079
4.706317
3.337689
9.291766
1.251766
0.962794
442.964230
178.372604
9.993798
1.528074
0.998150
15.406953
9.498982
6.732525
3.796574
0.532152
0.388837
170.829574
50.868296
4.239090
0.651020
0.638199
5.889259
3.966576
2.785794
0.724212
545.759132
8.753089
5.040852
653.097378
4.955183
7.273690
...
0.130491
0.236303
0.038095
0.205448
0.152464
0.124122
0.111194
0.132947
0.164505
0.193634
0.111683
0.194695
0.184324
0.086858
0.157942
0.189319
0.209372
0.136543
0.115987
0.170480
0.204203
0.070749
0.167365
0.141183
0.136148
0.101972
0.131315
0.149998
0.208155
0.221091
0.148927
0.158612
0.221770
0.092380
0.494704
0.244459
0.383061
0.292633
0.263775
4.008575
min
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
-22.000000
-32.000000
...
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
25%
10.000000
2.000000
2.000000
400.000000
0.000000
20.000000
3.000000
1.000000
7.000000
21.000000
10.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
2.000000
0.000000
0.000000
0.000000
0.000000
-5.000000
-12.000000
...
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
2.000000
50%
11.000000
2.000000
2.000000
600.000000
0.000000
20.000000
3.000000
1.000000
10.000000
21.000000
10.000000
10.000000
1.000000
1.000000
100.000000
0.000000
15.000000
2.000000
2.000000
5.000000
10.000000
10.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
2.000000
0.000000
0.000000
0.000000
700.000000
0.000000
-3.000000
...
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
4.000000
75%
15.000000
3.000000
2.000000
900.000000
0.000000
20.000000
3.000000
1.000000
10.000000
21.000000
10.000000
18.000000
2.000000
2.000000
700.000000
0.000000
20.000000
3.000000
2.000000
30.000000
21.000000
10.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
3.000000
200.000000
4.000000
1.000000
1400.000000
0.000000
0.000000
...
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.000000
0.000000
0.000000
0.000000
5.000000
max
24.000000
4.000000
4.000000
2100.000000
2000.000000
25.000000
5.000000
1.000000
50.000000
22.000000
20.000000
24.000000
4.000000
3.000000
2300.000000
1500.000000
25.000000
5.000000
2.000000
50.000000
22.000000
20.000000
24.000000
4.000000
3.000000
2000.000000
1100.000000
25.000000
5.000000
3.000000
50.000000
22.000000
20.000000
4.000000
2800.000000
160.000000
90.000000
1800.000000
20.000000
10.000000
...
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
12.000000
8 rows × 123 columns
In [0]:
v_df['num_date'] = v_df['date'].apply(lambda d: d.timestamp())
In [0]:
#v_df.set_index('date', inplace=True)
v_df.sort_values(by='date', inplace=True)
In [0]:
v_df.tail(5)
Out[0]:
avalanche_problem_1_cause_id
avalanche_problem_1_destructive_size_ext_id
avalanche_problem_1_distribution_id
avalanche_problem_1_exposed_height_1
avalanche_problem_1_exposed_height_2
avalanche_problem_1_ext_id
avalanche_problem_1_probability_id
avalanche_problem_1_problem_id
avalanche_problem_1_problem_type_id
avalanche_problem_1_trigger_simple_id
avalanche_problem_1_type_id
avalanche_problem_2_cause_id
avalanche_problem_2_destructive_size_ext_id
avalanche_problem_2_distribution_id
avalanche_problem_2_exposed_height_1
avalanche_problem_2_exposed_height_2
avalanche_problem_2_ext_id
avalanche_problem_2_probability_id
avalanche_problem_2_problem_id
avalanche_problem_2_problem_type_id
avalanche_problem_2_trigger_simple_id
avalanche_problem_2_type_id
avalanche_problem_3_cause_id
avalanche_problem_3_destructive_size_ext_id
avalanche_problem_3_distribution_id
avalanche_problem_3_exposed_height_1
avalanche_problem_3_exposed_height_2
avalanche_problem_3_ext_id
avalanche_problem_3_probability_id
avalanche_problem_3_problem_id
avalanche_problem_3_problem_type_id
avalanche_problem_3_trigger_simple_id
avalanche_problem_3_type_id
danger_level
mountain_weather_freezing_level
mountain_weather_precip_most_exposed
mountain_weather_precip_region
mountain_weather_temperature_elevation
mountain_weather_temperature_max
mountain_weather_temperature_min
...
author_JonasD@ObsKorps
author_Julie@SVV
author_Jørgen@obskorps
author_Karsten@NVE
author_MSA@nortind
author_Matilda@MET
author_Odd-Arne@NVE
author_Ragnar@NVE
author_Ronny@NVE
author_Silje@svv
author_Tommy@NVE
author_ToreV@met
author_anitaaw@met
author_emma@nve
author_haso@nve.no
author_heidi@nve.no
author_jan arild@obskorps
author_jegu@NVE
author_jostein@nve
author_knutinge@svv
author_magnush@met
author_martin@svv
author_ragnhildn@met
author_rue@nve
author_siri@met
author_solveig@NVE
author_torehum@svv
author_torolav@obskorps
mountain_weather_wind_direction_E
mountain_weather_wind_direction_N
mountain_weather_wind_direction_NE
mountain_weather_wind_direction_NW
mountain_weather_wind_direction_None
mountain_weather_wind_direction_Not given
mountain_weather_wind_direction_S
mountain_weather_wind_direction_SE
mountain_weather_wind_direction_SW
mountain_weather_wind_direction_W
month
num_date
index
2375
15
2
1
300
0
20
3
1
10
21
10
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
2
0.0
0.0
0.0
1100.0
-10.0
-18.0
...
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
1
1.548893e+09
29303
10
2
2
1000
0
20
3
1
10
21
10
19
2
1
1100
0
20
2
2
30
10
10
0
0
0
0
0
0
0
0
0
0
0
2
0.0
5.0
4.0
1400.0
-10.0
-14.0
...
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
1
1.548893e+09
791
10
3
2
0
0
20
3
1
7
21
10
13
3
1
0
0
20
3
2
30
10
10
0
0
0
0
0
0
0
0
0
0
0
3
0.0
10.0
4.0
700.0
-4.0
-7.0
...
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
1
1.548893e+09
21383
11
2
2
700
0
20
3
1
30
21
10
15
2
2
700
0
20
3
2
10
21
10
0
0
0
0
0
0
0
0
0
0
0
2
0.0
0.0
0.0
1400.0
-7.0
-15.0
...
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
1
1.548893e+09
32471
10
2
2
700
700
20
3
1
7
21
10
19
2
1
700
0
20
3
2
30
10
10
0
0
0
0
0
0
0
0
0
0
0
2
0.0
12.0
8.0
1400.0
-9.0
-14.0
...
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
1
1.548893e+09
5 rows × 125 columns
In [0]:
# keep only numeric columns
from pandas.api.types import is_numeric_dtype
num_cols = [var for var in v_df.columns.values if is_numeric_dtype(v_df[var])]
print(len(num_cols))
num_cols
124
Out[0]:
['avalanche_problem_1_cause_id',
'avalanche_problem_1_destructive_size_ext_id',
'avalanche_problem_1_distribution_id',
'avalanche_problem_1_exposed_height_1',
'avalanche_problem_1_exposed_height_2',
'avalanche_problem_1_ext_id',
'avalanche_problem_1_probability_id',
'avalanche_problem_1_problem_id',
'avalanche_problem_1_problem_type_id',
'avalanche_problem_1_trigger_simple_id',
'avalanche_problem_1_type_id',
'avalanche_problem_2_cause_id',
'avalanche_problem_2_destructive_size_ext_id',
'avalanche_problem_2_distribution_id',
'avalanche_problem_2_exposed_height_1',
'avalanche_problem_2_exposed_height_2',
'avalanche_problem_2_ext_id',
'avalanche_problem_2_probability_id',
'avalanche_problem_2_problem_id',
'avalanche_problem_2_problem_type_id',
'avalanche_problem_2_trigger_simple_id',
'avalanche_problem_2_type_id',
'avalanche_problem_3_cause_id',
'avalanche_problem_3_destructive_size_ext_id',
'avalanche_problem_3_distribution_id',
'avalanche_problem_3_exposed_height_1',
'avalanche_problem_3_exposed_height_2',
'avalanche_problem_3_ext_id',
'avalanche_problem_3_probability_id',
'avalanche_problem_3_problem_id',
'avalanche_problem_3_problem_type_id',
'avalanche_problem_3_trigger_simple_id',
'avalanche_problem_3_type_id',
'danger_level',
'mountain_weather_freezing_level',
'mountain_weather_precip_most_exposed',
'mountain_weather_precip_region',
'mountain_weather_temperature_elevation',
'mountain_weather_temperature_max',
'mountain_weather_temperature_min',
'region_id',
'region_type_id',
'danger_level_prev1day',
'danger_level_prev2day',
'danger_level_prev3day',
'avalanche_problem_1_cause_id_prev1day',
'avalanche_problem_1_problem_type_id_prev1day',
'avalanche_problem_1_cause_id_prev2day',
'avalanche_problem_1_problem_type_id_prev2day',
'avalanche_problem_1_cause_id_prev3day',
'avalanche_problem_1_problem_type_id_prev3day',
'avalanche_problem_2_cause_id_prev1day',
'avalanche_problem_2_problem_type_id_prev1day',
'avalanche_problem_2_cause_id_prev2day',
'avalanche_problem_2_problem_type_id_prev2day',
'avalanche_problem_2_cause_id_prev3day',
'avalanche_problem_2_problem_type_id_prev3day',
'mountain_weather_precip_region_prev1day',
'mountain_weather_precip_most_exposed_prev1day',
'mountain_weather_precip_region_prev3daysum',
'mountain_weather_wind_speed_num',
'mountain_weather_wind_direction_num',
'avalanche_problem_1_problem_type_id_class',
'avalanche_problem_1_sensitivity_id_class',
'avalanche_problem_1_trigger_simple_id_class',
'avalanche_problem_2_problem_type_id_class',
'avalanche_problem_2_sensitivity_id_class',
'avalanche_problem_2_trigger_simple_id_class',
'avalanche_problem_3_problem_type_id_class',
'avalanche_problem_3_sensitivity_id_class',
'avalanche_problem_3_trigger_simple_id_class',
'region_group_id',
'aval_problem_1_combined',
'emergency_warning_Ikke gitt',
'emergency_warning_Naturlig utløste skred',
'author_Andreas@nve',
'author_Eldbjorg@MET',
'author_Espen Granan',
'author_EspenN',
'author_Halvor@NVE',
'author_HåvardT@met',
'author_Ida@met',
'author_Ingrid@NVE',
'author_John Smits',
'author_JonasD@ObsKorps',
'author_Julie@SVV',
'author_Jørgen@obskorps',
'author_Karsten@NVE',
'author_MSA@nortind',
'author_Matilda@MET',
'author_Odd-Arne@NVE',
'author_Ragnar@NVE',
'author_Ronny@NVE',
'author_Silje@svv',
'author_Tommy@NVE',
'author_ToreV@met',
'author_anitaaw@met',
'author_emma@nve',
'author_haso@nve.no',
'author_heidi@nve.no',
'author_jan arild@obskorps',
'author_jegu@NVE',
'author_jostein@nve',
'author_knutinge@svv',
'author_magnush@met',
'author_martin@svv',
'author_ragnhildn@met',
'author_rue@nve',
'author_siri@met',
'author_solveig@NVE',
'author_torehum@svv',
'author_torolav@obskorps',
'mountain_weather_wind_direction_E',
'mountain_weather_wind_direction_N',
'mountain_weather_wind_direction_NE',
'mountain_weather_wind_direction_NW',
'mountain_weather_wind_direction_None',
'mountain_weather_wind_direction_Not given',
'mountain_weather_wind_direction_S',
'mountain_weather_wind_direction_SE',
'mountain_weather_wind_direction_SW',
'mountain_weather_wind_direction_W',
'month',
'num_date']
In [0]:
# drop features that are related to the forecast we want to predict and features that should have no influence
drop_list = [
'danger_level',
'aval_problem_1_combined',
'avalanche_problem_1_cause_id',
'avalanche_problem_1_destructive_size_ext_id',
'avalanche_problem_1_distribution_id',
'avalanche_problem_1_exposed_height_1',
'avalanche_problem_1_exposed_height_2',
'avalanche_problem_1_ext_id',
'avalanche_problem_1_probability_id',
'avalanche_problem_1_problem_id',
'avalanche_problem_1_problem_type_id',
'avalanche_problem_1_trigger_simple_id',
'avalanche_problem_1_type_id',
'avalanche_problem_2_cause_id',
'avalanche_problem_2_destructive_size_ext_id',
'avalanche_problem_2_distribution_id',
'avalanche_problem_2_exposed_height_1',
'avalanche_problem_2_exposed_height_2',
'avalanche_problem_2_ext_id',
'avalanche_problem_2_probability_id',
'avalanche_problem_2_problem_id',
'avalanche_problem_2_problem_type_id',
'avalanche_problem_2_trigger_simple_id',
'avalanche_problem_2_type_id',
'avalanche_problem_3_cause_id',
'avalanche_problem_3_destructive_size_ext_id',
'avalanche_problem_3_distribution_id',
'avalanche_problem_3_exposed_height_1',
'avalanche_problem_3_exposed_height_2',
'avalanche_problem_3_ext_id',
'avalanche_problem_3_probability_id',
'avalanche_problem_3_problem_id',
'avalanche_problem_3_problem_type_id',
'avalanche_problem_3_trigger_simple_id',
'avalanche_problem_3_type_id',
'avalanche_problem_1_problem_type_id_class',
'avalanche_problem_1_sensitivity_id_class',
'avalanche_problem_1_trigger_simple_id_class',
'avalanche_problem_2_problem_type_id_class',
'avalanche_problem_2_sensitivity_id_class',
'avalanche_problem_2_trigger_simple_id_class',
'avalanche_problem_3_problem_type_id_class',
'avalanche_problem_3_sensitivity_id_class',
'avalanche_problem_3_trigger_simple_id_class',
'emergency_warning_Ikke gitt',
'emergency_warning_Naturlig utløste skred',
'author_Andreas@nve',
'author_Eldbjorg@MET',
'author_Espen Granan',
'author_EspenN',
'author_Halvor@NVE',
'author_HåvardT@met',
'author_Ida@met',
'author_Ingrid@NVE',
'author_John Smits',
'author_JonasD@ObsKorps',
'author_Julie@SVV',
'author_Jørgen@obskorps',
'author_Karsten@NVE',
'author_MSA@nortind',
'author_Matilda@MET',
'author_Odd-Arne@NVE',
'author_Ragnar@NVE',
'author_Ronny@NVE',
'author_Silje@svv',
'author_Tommy@NVE',
'author_ToreV@met',
'author_anitaaw@met',
'author_emma@nve',
'author_haso@nve.no',
'author_heidi@nve.no',
'author_jan arild@obskorps',
'author_jegu@NVE',
'author_jostein@nve',
'author_knutinge@svv',
'author_magnush@met',
'author_martin@svv',
'author_ragnhildn@met',
'author_rue@nve',
'author_siri@met',
'author_solveig@NVE',
'author_torehum@svv',
'author_torolav@obskorps',
'mountain_weather_wind_direction_E',
'mountain_weather_wind_direction_N',
'mountain_weather_wind_direction_NE',
'mountain_weather_wind_direction_NW',
'mountain_weather_wind_direction_None',
'mountain_weather_wind_direction_Not given',
'mountain_weather_wind_direction_S',
'mountain_weather_wind_direction_SE',
'mountain_weather_wind_direction_SW',
'mountain_weather_wind_direction_W'
]
In [0]:
reference_names = ['region_id', 'num_date', 'date']
y_df = v_df[reference_names]
y = y_df.values
X_df = v_df.filter(num_cols).drop(drop_list, axis='columns')
X = X_df.values
feature_names = X_df.columns.values
In [0]:
X_df.describe(percentiles=[])
Out[0]:
mountain_weather_freezing_level
mountain_weather_precip_most_exposed
mountain_weather_precip_region
mountain_weather_temperature_elevation
mountain_weather_temperature_max
mountain_weather_temperature_min
region_id
region_type_id
danger_level_prev1day
danger_level_prev2day
danger_level_prev3day
avalanche_problem_1_cause_id_prev1day
avalanche_problem_1_problem_type_id_prev1day
avalanche_problem_1_cause_id_prev2day
avalanche_problem_1_problem_type_id_prev2day
avalanche_problem_1_cause_id_prev3day
avalanche_problem_1_problem_type_id_prev3day
avalanche_problem_2_cause_id_prev1day
avalanche_problem_2_problem_type_id_prev1day
avalanche_problem_2_cause_id_prev2day
avalanche_problem_2_problem_type_id_prev2day
avalanche_problem_2_cause_id_prev3day
avalanche_problem_2_problem_type_id_prev3day
mountain_weather_precip_region_prev1day
mountain_weather_precip_most_exposed_prev1day
mountain_weather_precip_region_prev3daysum
mountain_weather_wind_speed_num
mountain_weather_wind_direction_num
region_group_id
month
num_date
count
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.0
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.00000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8946.000000
8.946000e+03
mean
263.013637
3.845741
1.892242
710.216857
-1.977990
-6.083054
3019.619048
10.0
2.116477
2.107758
2.096915
13.818913
13.754415
13.699642
13.681869
13.574000
13.589425
9.020791
11.839146
8.99061
11.783255
8.940756
11.692712
1.897943
3.858596
5.610776
2.879499
2.771518
3.952381
4.828638
1.510268e+09
std
545.759132
8.753089
5.040852
653.097378
4.955183
7.273690
9.424287
0.0
0.742289
0.758490
0.776037
5.158252
11.334931
5.240061
11.344558
5.328489
11.342609
9.285028
15.393470
9.28359
15.364615
9.280419
15.317852
5.042556
8.759708
12.352439
2.694094
2.788040
2.126511
4.008575
2.123845e+07
min
0.000000
0.000000
0.000000
0.000000
-22.000000
-32.000000
3003.000000
10.0
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.00000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
1.480550e+09
50%
0.000000
0.000000
0.000000
700.000000
0.000000
-3.000000
3017.000000
10.0
2.000000
2.000000
2.000000
10.000000
10.000000
10.000000
10.000000
10.000000
10.000000
10.000000
5.000000
10.00000
5.000000
10.000000
5.000000
0.000000
0.000000
0.000000
4.000000
3.000000
3.000000
4.000000
1.514722e+09
max
2800.000000
160.000000
90.000000
1800.000000
20.000000
10.000000
3035.000000
10.0
4.000000
4.000000
4.000000
24.000000
50.000000
24.000000
50.000000
24.000000
50.000000
24.000000
50.000000
24.00000
50.000000
24.000000
50.000000
90.000000
160.000000
165.000000
10.000000
8.000000
7.000000
12.000000
1.548893e+09
In [0]:
y_df.describe(percentiles=[])
Out[0]:
region_id
num_date
count
8946.000000
8.946000e+03
mean
3019.619048
1.510268e+09
std
9.424287
2.123845e+07
min
3003.000000
1.480550e+09
50%
3017.000000
1.514722e+09
max
3035.000000
1.548893e+09
In [0]:
In [0]:
import datetime as dt
In [0]:
a = np.arange(10)
print(a[2:])
print(a[:-2])
print(a[-2:])
[2 3 4 5 6 7 8 9]
[0 1 2 3 4 5 6 7]
[8 9]
In [0]:
split_index = 300
X_train = X[:-split_index, :]
X_test = X[-split_index:, :]
y_train = y[:-split_index, :]
y_test = y[-split_index:, :]
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
print(dt.datetime.fromtimestamp(y_train[:,1].min()), dt.datetime.fromtimestamp(y_train[:,1].max()))
print(dt.datetime.fromtimestamp(y_test[:,1].min()), dt.datetime.fromtimestamp(y_test[:,1].max()))
(8646, 31) (8646, 3) (300, 31) (300, 3)
2016-12-01 00:00:00 2019-01-17 00:00:00
2019-01-17 00:00:00 2019-01-31 00:00:00
In [0]:
scaler = preprocessing.StandardScaler().fit(X_train)
scaler
scaler.mean_
scaler.scale_
scaler.transform(X_train)
Out[0]:
array([[-0.49039145, -0.43240002, -0.37084221, ..., -1.85833447,
1.75425904, -1.39448436],
[-0.49039145, -0.43240002, -0.37084221, ..., -0.91771685,
1.75425904, -1.39448436],
[-0.49039145, -0.43240002, -0.37084221, ..., 1.43382719,
1.75425904, -1.39448436],
...,
[-0.49039145, -0.20593907, -0.17481007, ..., -0.44740804,
-0.98734905, 1.90218227],
[-0.49039145, -0.20593907, -0.17481007, ..., -0.91771685,
-0.98734905, 1.90218227],
[-0.49039145, -0.20593907, -0.17481007, ..., -1.38802566,
-0.98734905, 1.90218227]])
In [0]:
from sklearn.neighbors import NearestNeighbors
%time nbrs = NearestNeighbors(n_neighbors=3, algorithm='ball_tree').fit(scaler.transform(X_train))
CPU times: user 64.6 ms, sys: 1.89 ms, total: 66.5 ms
Wall time: 68.5 ms
In [0]:
#print(X_test[4,:].reshape(1,-1).shape)
i = 245
distances, indices = nbrs.kneighbors(scaler.transform(X_test[i,:].reshape(1,-1)))
#print(y_pred, y_test[i])
print(distances, indices)
print("Input:")
print("Region: {}".format(y_test[i, :][0]), "Date: {}".format(y_test[i, :][2]))
print("\nSuggestions:")
for k in range(len(indices[0])):
#print("Region: {}".format(y_train[indices[0][k], :][0]), "Date: {}".format(dt.datetime.fromtimestamp(y_train[indices[0][k], :][1])))
print("Region: {}".format(y_train[indices[0][k], :][0]), "Date: {}".format(y_train[indices[0][k], :][2]))
[[2.083942 2.13339542 2.14536812]] [[4865 4507 4889]]
Input:
Region: 3024 Date: 2019-01-29 00:00:00
Suggestions:
Region: 3022 Date: 2018-01-19 00:00:00
Region: 3028 Date: 2018-01-02 00:00:00
Region: 3022 Date: 2018-01-20 00:00:00
In [0]:
def get_varsom_link(region_id, valid_date):
pass
In [0]:
Content source: kmunve/APS
Similar notebooks: