Find forecast for a similar conditions

The nearest neighbor model should retrieve the region Id and date for the three foecasts that are most similar to the current conditions.

Targets are date and region_id.

Features are mountain weather elements and elements in the forecasts of the previous days.

Output should be the varsom.no link with the following format, e.g. https://www.varsom.no/snoskredvarsling/varsel/Lyngen/2019-03-13


In [0]:
import pandas as pd
import numpy as np
import json

from sklearn import preprocessing

import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.simplefilter('ignore')

Get data


In [0]:
# get the data

v_df = pd.read_csv('https://raw.githubusercontent.com/kmunve/APS/master/aps/notebooks/ml_varsom/varsom_ml_preproc_3y.csv', index_col=0) # where is the time stamp?)
v_df.drop_duplicates(keep='first', inplace=True) # for some reason all rows appear twice
v_df.describe()


Out[0]:
avalanche_problem_1_cause_id avalanche_problem_1_destructive_size_ext_id avalanche_problem_1_distribution_id avalanche_problem_1_exposed_height_1 avalanche_problem_1_exposed_height_2 avalanche_problem_1_ext_id avalanche_problem_1_probability_id avalanche_problem_1_problem_id avalanche_problem_1_problem_type_id avalanche_problem_1_trigger_simple_id avalanche_problem_1_type_id avalanche_problem_2_cause_id avalanche_problem_2_destructive_size_ext_id avalanche_problem_2_distribution_id avalanche_problem_2_exposed_height_1 avalanche_problem_2_exposed_height_2 avalanche_problem_2_ext_id avalanche_problem_2_probability_id avalanche_problem_2_problem_id avalanche_problem_2_problem_type_id avalanche_problem_2_trigger_simple_id avalanche_problem_2_type_id avalanche_problem_3_cause_id avalanche_problem_3_destructive_size_ext_id avalanche_problem_3_distribution_id avalanche_problem_3_exposed_height_1 avalanche_problem_3_exposed_height_2 avalanche_problem_3_ext_id avalanche_problem_3_probability_id avalanche_problem_3_problem_id avalanche_problem_3_problem_type_id avalanche_problem_3_trigger_simple_id avalanche_problem_3_type_id danger_level mountain_weather_freezing_level mountain_weather_precip_most_exposed mountain_weather_precip_region mountain_weather_temperature_elevation mountain_weather_temperature_max mountain_weather_temperature_min ... author_Ingrid@NVE author_John Smits author_JonasD@ObsKorps author_Julie@SVV author_Jørgen@obskorps author_Karsten@NVE author_MSA@nortind author_Matilda@MET author_Odd-Arne@NVE author_Ragnar@NVE author_Ronny@NVE author_Silje@svv author_Tommy@NVE author_ToreV@met author_anitaaw@met author_emma@nve author_haso@nve.no author_heidi@nve.no author_jan arild@obskorps author_jegu@NVE author_jostein@nve author_knutinge@svv author_magnush@met author_martin@svv author_ragnhildn@met author_rue@nve author_siri@met author_solveig@NVE author_torehum@svv author_torolav@obskorps mountain_weather_wind_direction_E mountain_weather_wind_direction_N mountain_weather_wind_direction_NE mountain_weather_wind_direction_NW mountain_weather_wind_direction_None mountain_weather_wind_direction_Not given mountain_weather_wind_direction_S mountain_weather_wind_direction_SE mountain_weather_wind_direction_SW mountain_weather_wind_direction_W
count 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.00000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 ... 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000 16632.000000
mean 7.504990 1.160955 1.012145 335.918711 34.704185 10.563672 1.642496 0.536195 7.453523 10.168951 6.006494 4.886003 0.669252 0.491282 201.569264 28.330928 5.575397 0.854738 0.571789 6.408009 4.907047 3.455988 0.434103 0.061748 0.043891 17.821068 2.15849 0.501744 0.077321 0.076659 0.577381 0.453944 0.315657 1.145924 142.281145 2.084055 1.024110 383.700096 -1.064400 -3.280363 ... 0.010402 0.009319 0.031926 0.000782 0.024952 0.013167 0.008418 0.006734 0.009680 0.014971 0.020984 0.006794 0.021224 0.018939 0.004089 0.014250 0.020022 0.024711 0.010221 0.007335 0.016114 0.023449 0.002706 0.015512 0.010943 0.010161 0.005652 0.009440 0.012386 0.024411 0.027718 0.012205 0.013949 0.027898 0.004630 0.690536 0.034392 0.097042 0.051166 0.040464
std 7.876984 1.170615 1.027010 401.058530 150.562400 9.951601 1.561408 0.498703 10.808093 10.027457 6.075860 8.182476 1.108225 0.840283 374.693192 133.425354 8.967815 1.372375 0.903706 12.767937 8.321628 5.884819 2.813269 0.394446 0.288054 126.367889 37.35957 3.143473 0.482797 0.473408 4.352117 2.939294 2.063900 1.186906 421.728012 6.712413 3.818734 596.377894 3.767183 6.136854 ... 0.101460 0.096089 0.175809 0.027947 0.155983 0.113995 0.091363 0.081787 0.097913 0.121441 0.143334 0.082149 0.144135 0.136315 0.063812 0.118522 0.140078 0.155249 0.100585 0.085334 0.125916 0.151328 0.051947 0.123582 0.104037 0.100292 0.074968 0.096701 0.110603 0.154325 0.164168 0.109805 0.117283 0.164686 0.067886 0.462286 0.182238 0.296024 0.220344 0.197051
min 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -22.000000 -32.000000 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -5.000000 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
50% 10.000000 1.000000 1.000000 200.000000 0.000000 15.000000 3.000000 1.000000 5.000000 10.000000 10.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000
75% 13.000000 2.000000 2.000000 600.000000 0.000000 20.000000 3.000000 1.000000 10.000000 21.000000 10.000000 10.000000 2.000000 1.000000 200.000000 0.000000 15.000000 3.000000 2.000000 5.000000 10.000000 10.000000 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 2.000000 0.000000 0.000000 0.000000 1100.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000
max 24.000000 4.000000 4.000000 2100.000000 2000.000000 25.000000 5.000000 1.000000 50.000000 22.000000 20.000000 24.000000 4.000000 3.000000 2300.000000 1500.000000 25.000000 5.000000 2.000000 50.000000 22.000000 20.000000 24.000000 4.000000 3.000000 2000.000000 1100.00000 25.000000 5.000000 3.000000 50.000000 22.000000 20.000000 4.000000 2800.000000 160.000000 90.000000 1800.000000 20.000000 10.000000 ... 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000

8 rows × 122 columns


In [0]:
v_df.head()


Out[0]:
avalanche_problem_1_cause_id avalanche_problem_1_destructive_size_ext_id avalanche_problem_1_distribution_id avalanche_problem_1_exposed_height_1 avalanche_problem_1_exposed_height_2 avalanche_problem_1_ext_id avalanche_problem_1_probability_id avalanche_problem_1_problem_id avalanche_problem_1_problem_type_id avalanche_problem_1_trigger_simple_id avalanche_problem_1_type_id avalanche_problem_2_cause_id avalanche_problem_2_destructive_size_ext_id avalanche_problem_2_distribution_id avalanche_problem_2_exposed_height_1 avalanche_problem_2_exposed_height_2 avalanche_problem_2_ext_id avalanche_problem_2_probability_id avalanche_problem_2_problem_id avalanche_problem_2_problem_type_id avalanche_problem_2_trigger_simple_id avalanche_problem_2_type_id avalanche_problem_3_cause_id avalanche_problem_3_destructive_size_ext_id avalanche_problem_3_distribution_id avalanche_problem_3_exposed_height_1 avalanche_problem_3_exposed_height_2 avalanche_problem_3_ext_id avalanche_problem_3_probability_id avalanche_problem_3_problem_id avalanche_problem_3_problem_type_id avalanche_problem_3_trigger_simple_id avalanche_problem_3_type_id danger_level mountain_weather_freezing_level mountain_weather_precip_most_exposed mountain_weather_precip_region mountain_weather_temperature_elevation mountain_weather_temperature_max mountain_weather_temperature_min ... author_Ingrid@NVE author_John Smits author_JonasD@ObsKorps author_Julie@SVV author_Jørgen@obskorps author_Karsten@NVE author_MSA@nortind author_Matilda@MET author_Odd-Arne@NVE author_Ragnar@NVE author_Ronny@NVE author_Silje@svv author_Tommy@NVE author_ToreV@met author_anitaaw@met author_emma@nve author_haso@nve.no author_heidi@nve.no author_jan arild@obskorps author_jegu@NVE author_jostein@nve author_knutinge@svv author_magnush@met author_martin@svv author_ragnhildn@met author_rue@nve author_siri@met author_solveig@NVE author_torehum@svv author_torolav@obskorps mountain_weather_wind_direction_E mountain_weather_wind_direction_N mountain_weather_wind_direction_NE mountain_weather_wind_direction_NW mountain_weather_wind_direction_None mountain_weather_wind_direction_Not given mountain_weather_wind_direction_S mountain_weather_wind_direction_SE mountain_weather_wind_direction_SW mountain_weather_wind_direction_W
index
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
1 15 2 2 800 0 20 2 1 10 10 10 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0.0 0.0 0.0 0.0 0.0 0.0 ... 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
2 15 2 2 400 0 20 2 1 10 10 10 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0.0 0.0 0.0 0.0 0.0 0.0 ... 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
3 15 2 2 400 0 20 2 1 10 21 10 11 2 2 400 0 20 2 2 30 10 10 0 0 0 0 0 0 0 0 0 0 0 2 0.0 0.0 0.0 0.0 0.0 0.0 ... 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
4 10 2 2 400 0 20 3 1 7 21 10 11 2 2 400 0 20 2 2 30 10 10 0 0 0 0 0 0 0 0 0 0 0 2 0.0 0.0 0.0 0.0 0.0 0.0 ... 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0

5 rows × 123 columns


In [0]:
# Why are there summer dates in the dataset???
v_df['date'].unique()


Out[0]:
array(['2016-12-01', '2016-12-02', '2016-12-03', '2016-12-04',
       '2016-12-05', '2016-12-06', '2016-12-07', '2016-12-08',
       '2016-12-09', '2016-12-10', '2016-12-11', '2016-12-12',
       '2016-12-13', '2016-12-14', '2016-12-15', '2016-12-16',
       '2016-12-17', '2016-12-18', '2016-12-19', '2016-12-20',
       '2016-12-21', '2016-12-22', '2016-12-23', '2016-12-24',
       '2016-12-25', '2016-12-26', '2016-12-27', '2016-12-28',
       '2016-12-29', '2016-12-30', '2016-12-31', '2017-01-01',
       '2017-01-02', '2017-01-03', '2017-01-04', '2017-01-05',
       '2017-01-06', '2017-01-07', '2017-01-08', '2017-01-09',
       '2017-01-10', '2017-01-11', '2017-01-12', '2017-01-13',
       '2017-01-14', '2017-01-15', '2017-01-16', '2017-01-17',
       '2017-01-18', '2017-01-19', '2017-01-20', '2017-01-21',
       '2017-01-22', '2017-01-23', '2017-01-24', '2017-01-25',
       '2017-01-26', '2017-01-27', '2017-01-28', '2017-01-29',
       '2017-01-30', '2017-01-31', '2017-02-01', '2017-02-02',
       '2017-02-03', '2017-02-04', '2017-02-05', '2017-02-06',
       '2017-02-07', '2017-02-08', '2017-02-09', '2017-02-10',
       '2017-02-11', '2017-02-12', '2017-02-13', '2017-02-14',
       '2017-02-15', '2017-02-16', '2017-02-17', '2017-02-18',
       '2017-02-19', '2017-02-20', '2017-02-21', '2017-02-22',
       '2017-02-23', '2017-02-24', '2017-02-25', '2017-02-26',
       '2017-02-27', '2017-02-28', '2017-03-01', '2017-03-02',
       '2017-03-03', '2017-03-04', '2017-03-05', '2017-03-06',
       '2017-03-07', '2017-03-08', '2017-03-09', '2017-03-10',
       '2017-03-11', '2017-03-12', '2017-03-13', '2017-03-14',
       '2017-03-15', '2017-03-16', '2017-03-17', '2017-03-18',
       '2017-03-19', '2017-03-20', '2017-03-21', '2017-03-22',
       '2017-03-23', '2017-03-24', '2017-03-25', '2017-03-26',
       '2017-03-27', '2017-03-28', '2017-03-29', '2017-03-30',
       '2017-03-31', '2017-04-01', '2017-04-02', '2017-04-03',
       '2017-04-04', '2017-04-05', '2017-04-06', '2017-04-07',
       '2017-04-08', '2017-04-09', '2017-04-10', '2017-04-11',
       '2017-04-12', '2017-04-13', '2017-04-14', '2017-04-15',
       '2017-04-16', '2017-04-17', '2017-04-18', '2017-04-19',
       '2017-04-20', '2017-04-21', '2017-04-22', '2017-04-23',
       '2017-04-24', '2017-04-25', '2017-04-26', '2017-04-27',
       '2017-04-28', '2017-04-29', '2017-04-30', '2017-05-01',
       '2017-05-02', '2017-05-03', '2017-05-04', '2017-05-05',
       '2017-05-06', '2017-05-07', '2017-05-08', '2017-05-09',
       '2017-05-10', '2017-05-11', '2017-05-12', '2017-05-13',
       '2017-05-14', '2017-05-15', '2017-05-16', '2017-05-17',
       '2017-05-18', '2017-05-19', '2017-05-20', '2017-05-21',
       '2017-05-22', '2017-05-23', '2017-05-24', '2017-05-25',
       '2017-05-26', '2017-05-27', '2017-05-28', '2017-05-29',
       '2017-05-30', '2017-05-31', '2017-06-01', '2017-06-02',
       '2017-06-03', '2017-06-04', '2017-06-05', '2017-06-06',
       '2017-06-07', '2017-06-08', '2017-06-09', '2017-06-10',
       '2017-06-11', '2017-06-12', '2017-06-13', '2017-06-14',
       '2017-06-15', '2017-06-16', '2017-06-17', '2017-06-18',
       '2017-06-19', '2017-06-20', '2017-06-21', '2017-06-22',
       '2017-06-23', '2017-06-24', '2017-06-25', '2017-06-26',
       '2017-06-27', '2017-06-28', '2017-06-29', '2017-06-30',
       '2017-07-01', '2017-07-02', '2017-07-03', '2017-07-04',
       '2017-07-05', '2017-07-06', '2017-07-07', '2017-07-08',
       '2017-07-09', '2017-07-10', '2017-07-11', '2017-07-12',
       '2017-07-13', '2017-07-14', '2017-07-15', '2017-07-16',
       '2017-07-17', '2017-07-18', '2017-07-19', '2017-07-20',
       '2017-07-21', '2017-07-22', '2017-07-23', '2017-07-24',
       '2017-07-25', '2017-07-26', '2017-07-27', '2017-07-28',
       '2017-07-29', '2017-07-30', '2017-07-31', '2017-08-01',
       '2017-08-02', '2017-08-03', '2017-08-04', '2017-08-05',
       '2017-08-06', '2017-08-07', '2017-08-08', '2017-08-09',
       '2017-08-10', '2017-08-11', '2017-08-12', '2017-08-13',
       '2017-08-14', '2017-08-15', '2017-08-16', '2017-08-17',
       '2017-08-18', '2017-08-19', '2017-08-20', '2017-08-21',
       '2017-08-22', '2017-08-23', '2017-08-24', '2017-08-25',
       '2017-08-26', '2017-08-27', '2017-08-28', '2017-08-29',
       '2017-08-30', '2017-08-31', '2017-09-01', '2017-09-02',
       '2017-09-03', '2017-09-04', '2017-09-05', '2017-09-06',
       '2017-09-07', '2017-09-08', '2017-09-09', '2017-09-10',
       '2017-09-11', '2017-09-12', '2017-09-13', '2017-09-14',
       '2017-09-15', '2017-09-16', '2017-09-17', '2017-09-18',
       '2017-09-19', '2017-09-20', '2017-09-21', '2017-09-22',
       '2017-09-23', '2017-09-24', '2017-09-25', '2017-09-26',
       '2017-09-27', '2017-09-28', '2017-09-29', '2017-09-30',
       '2017-10-01', '2017-10-02', '2017-10-03', '2017-10-04',
       '2017-10-05', '2017-10-06', '2017-10-07', '2017-10-08',
       '2017-10-09', '2017-10-10', '2017-10-11', '2017-10-12',
       '2017-10-13', '2017-10-14', '2017-10-15', '2017-10-16',
       '2017-10-17', '2017-10-18', '2017-10-19', '2017-10-20',
       '2017-10-21', '2017-10-22', '2017-10-23', '2017-10-24',
       '2017-10-25', '2017-10-26', '2017-10-27', '2017-10-28',
       '2017-10-29', '2017-10-30', '2017-10-31', '2017-11-01',
       '2017-11-02', '2017-11-03', '2017-11-04', '2017-11-05',
       '2017-11-06', '2017-11-07', '2017-11-08', '2017-11-09',
       '2017-11-10', '2017-11-11', '2017-11-12', '2017-11-13',
       '2017-11-14', '2017-11-15', '2017-11-16', '2017-11-17',
       '2017-11-18', '2017-11-19', '2017-11-20', '2017-11-21',
       '2017-11-22', '2017-11-23', '2017-11-24', '2017-11-25',
       '2017-11-26', '2017-11-27', '2017-11-28', '2017-11-29',
       '2017-11-30', '2017-12-01', '2017-12-02', '2017-12-03',
       '2017-12-04', '2017-12-05', '2017-12-06', '2017-12-07',
       '2017-12-08', '2017-12-09', '2017-12-10', '2017-12-11',
       '2017-12-12', '2017-12-13', '2017-12-14', '2017-12-15',
       '2017-12-16', '2017-12-17', '2017-12-18', '2017-12-19',
       '2017-12-20', '2017-12-21', '2017-12-22', '2017-12-23',
       '2017-12-24', '2017-12-25', '2017-12-26', '2017-12-27',
       '2017-12-28', '2017-12-29', '2017-12-30', '2017-12-31',
       '2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
       '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08',
       '2018-01-09', '2018-01-10', '2018-01-11', '2018-01-12',
       '2018-01-13', '2018-01-14', '2018-01-15', '2018-01-16',
       '2018-01-17', '2018-01-18', '2018-01-19', '2018-01-20',
       '2018-01-21', '2018-01-22', '2018-01-23', '2018-01-24',
       '2018-01-25', '2018-01-26', '2018-01-27', '2018-01-28',
       '2018-01-29', '2018-01-30', '2018-01-31', '2018-02-01',
       '2018-02-02', '2018-02-03', '2018-02-04', '2018-02-05',
       '2018-02-06', '2018-02-07', '2018-02-08', '2018-02-09',
       '2018-02-10', '2018-02-11', '2018-02-12', '2018-02-13',
       '2018-02-14', '2018-02-15', '2018-02-16', '2018-02-17',
       '2018-02-18', '2018-02-19', '2018-02-20', '2018-02-21',
       '2018-02-22', '2018-02-23', '2018-02-24', '2018-02-25',
       '2018-02-26', '2018-02-27', '2018-02-28', '2018-03-01',
       '2018-03-02', '2018-03-03', '2018-03-04', '2018-03-05',
       '2018-03-06', '2018-03-07', '2018-03-08', '2018-03-09',
       '2018-03-10', '2018-03-11', '2018-03-12', '2018-03-13',
       '2018-03-14', '2018-03-15', '2018-03-16', '2018-03-17',
       '2018-03-18', '2018-03-19', '2018-03-20', '2018-03-21',
       '2018-03-22', '2018-03-23', '2018-03-24', '2018-03-25',
       '2018-03-26', '2018-03-27', '2018-03-28', '2018-03-29',
       '2018-03-30', '2018-03-31', '2018-04-01', '2018-04-02',
       '2018-04-03', '2018-04-04', '2018-04-05', '2018-04-06',
       '2018-04-07', '2018-04-08', '2018-04-09', '2018-04-10',
       '2018-04-11', '2018-04-12', '2018-04-13', '2018-04-14',
       '2018-04-15', '2018-04-16', '2018-04-17', '2018-04-18',
       '2018-04-19', '2018-04-20', '2018-04-21', '2018-04-22',
       '2018-04-23', '2018-04-24', '2018-04-25', '2018-04-26',
       '2018-04-27', '2018-04-28', '2018-04-29', '2018-04-30',
       '2018-05-01', '2018-05-02', '2018-05-03', '2018-05-04',
       '2018-05-05', '2018-05-06', '2018-05-07', '2018-05-08',
       '2018-05-09', '2018-05-10', '2018-05-11', '2018-05-12',
       '2018-05-13', '2018-05-14', '2018-05-15', '2018-05-16',
       '2018-05-17', '2018-05-18', '2018-05-19', '2018-05-20',
       '2018-05-21', '2018-05-22', '2018-05-23', '2018-05-24',
       '2018-05-25', '2018-05-26', '2018-05-27', '2018-05-28',
       '2018-05-29', '2018-05-30', '2018-05-31', '2018-06-01',
       '2018-06-02', '2018-06-03', '2018-06-04', '2018-06-05',
       '2018-06-06', '2018-06-07', '2018-06-08', '2018-06-09',
       '2018-06-10', '2018-06-11', '2018-06-12', '2018-06-13',
       '2018-06-14', '2018-06-15', '2018-06-16', '2018-06-17',
       '2018-06-18', '2018-06-19', '2018-06-20', '2018-06-21',
       '2018-06-22', '2018-06-23', '2018-06-24', '2018-06-25',
       '2018-06-26', '2018-06-27', '2018-06-28', '2018-06-29',
       '2018-06-30', '2018-07-01', '2018-07-02', '2018-07-03',
       '2018-07-04', '2018-07-05', '2018-07-06', '2018-07-07',
       '2018-07-08', '2018-07-09', '2018-07-10', '2018-07-11',
       '2018-07-12', '2018-07-13', '2018-07-14', '2018-07-15',
       '2018-07-16', '2018-07-17', '2018-07-18', '2018-07-19',
       '2018-07-20', '2018-07-21', '2018-07-22', '2018-07-23',
       '2018-07-24', '2018-07-25', '2018-07-26', '2018-07-27',
       '2018-07-28', '2018-07-29', '2018-07-30', '2018-07-31',
       '2018-08-01', '2018-08-02', '2018-08-03', '2018-08-04',
       '2018-08-05', '2018-08-06', '2018-08-07', '2018-08-08',
       '2018-08-09', '2018-08-10', '2018-08-11', '2018-08-12',
       '2018-08-13', '2018-08-14', '2018-08-15', '2018-08-16',
       '2018-08-17', '2018-08-18', '2018-08-19', '2018-08-20',
       '2018-08-21', '2018-08-22', '2018-08-23', '2018-08-24',
       '2018-08-25', '2018-08-26', '2018-08-27', '2018-08-28',
       '2018-08-29', '2018-08-30', '2018-08-31', '2018-09-01',
       '2018-09-02', '2018-09-03', '2018-09-04', '2018-09-05',
       '2018-09-06', '2018-09-07', '2018-09-08', '2018-09-09',
       '2018-09-10', '2018-09-11', '2018-09-12', '2018-09-13',
       '2018-09-14', '2018-09-15', '2018-09-16', '2018-09-17',
       '2018-09-18', '2018-09-19', '2018-09-20', '2018-09-21',
       '2018-09-22', '2018-09-23', '2018-09-24', '2018-09-25',
       '2018-09-26', '2018-09-27', '2018-09-28', '2018-09-29',
       '2018-09-30', '2018-10-01', '2018-10-02', '2018-10-03',
       '2018-10-04', '2018-10-05', '2018-10-06', '2018-10-07',
       '2018-10-08', '2018-10-09', '2018-10-10', '2018-10-11',
       '2018-10-12', '2018-10-13', '2018-10-14', '2018-10-15',
       '2018-10-16', '2018-10-17', '2018-10-18', '2018-10-19',
       '2018-10-20', '2018-10-21', '2018-10-22', '2018-10-23',
       '2018-10-24', '2018-10-25', '2018-10-26', '2018-10-27',
       '2018-10-28', '2018-10-29', '2018-10-30', '2018-10-31',
       '2018-11-01', '2018-11-02', '2018-11-03', '2018-11-04',
       '2018-11-05', '2018-11-06', '2018-11-07', '2018-11-08',
       '2018-11-09', '2018-11-10', '2018-11-11', '2018-11-12',
       '2018-11-13', '2018-11-14', '2018-11-15', '2018-11-16',
       '2018-11-17', '2018-11-18', '2018-11-19', '2018-11-20',
       '2018-11-21', '2018-11-22', '2018-11-23', '2018-11-24',
       '2018-11-25', '2018-11-26', '2018-11-27', '2018-11-28',
       '2018-11-29', '2018-11-30', '2018-12-01', '2018-12-02',
       '2018-12-03', '2018-12-04', '2018-12-05', '2018-12-06',
       '2018-12-07', '2018-12-08', '2018-12-09', '2018-12-10',
       '2018-12-11', '2018-12-12', '2018-12-13', '2018-12-14',
       '2018-12-15', '2018-12-16', '2018-12-17', '2018-12-18',
       '2018-12-19', '2018-12-20', '2018-12-21', '2018-12-22',
       '2018-12-23', '2018-12-24', '2018-12-25', '2018-12-26',
       '2018-12-27', '2018-12-28', '2018-12-29', '2018-12-30',
       '2018-12-31', '2019-01-01', '2019-01-02', '2019-01-03',
       '2019-01-04', '2019-01-05', '2019-01-06', '2019-01-07',
       '2019-01-08', '2019-01-09', '2019-01-10', '2019-01-11',
       '2019-01-12', '2019-01-13', '2019-01-14', '2019-01-15',
       '2019-01-16', '2019-01-17', '2019-01-18', '2019-01-19',
       '2019-01-20', '2019-01-21', '2019-01-22', '2019-01-23',
       '2019-01-24', '2019-01-25', '2019-01-26', '2019-01-27',
       '2019-01-28', '2019-01-29', '2019-01-30', '2019-01-31'],
      dtype=object)

Pre-processing for kNN

  • convert dates to correct format
  • take out _regionid and date as reference
  • normalize the other numeric features

In [0]:
v_df['date'] = v_df['date'].apply(lambda d: pd.to_datetime(d))
v_df['month'] = v_df['date'].apply(lambda d: d.month)

In [0]:
v_df['month'].unique()


Out[0]:
array([12,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [0]:
# remove summer month
v_df.drop(v_df[v_df['month'].isin([6, 7, 8, 9, 10, 11])].index, inplace=True)
v_df.describe()


Out[0]:
avalanche_problem_1_cause_id avalanche_problem_1_destructive_size_ext_id avalanche_problem_1_distribution_id avalanche_problem_1_exposed_height_1 avalanche_problem_1_exposed_height_2 avalanche_problem_1_ext_id avalanche_problem_1_probability_id avalanche_problem_1_problem_id avalanche_problem_1_problem_type_id avalanche_problem_1_trigger_simple_id avalanche_problem_1_type_id avalanche_problem_2_cause_id avalanche_problem_2_destructive_size_ext_id avalanche_problem_2_distribution_id avalanche_problem_2_exposed_height_1 avalanche_problem_2_exposed_height_2 avalanche_problem_2_ext_id avalanche_problem_2_probability_id avalanche_problem_2_problem_id avalanche_problem_2_problem_type_id avalanche_problem_2_trigger_simple_id avalanche_problem_2_type_id avalanche_problem_3_cause_id avalanche_problem_3_destructive_size_ext_id avalanche_problem_3_distribution_id avalanche_problem_3_exposed_height_1 avalanche_problem_3_exposed_height_2 avalanche_problem_3_ext_id avalanche_problem_3_probability_id avalanche_problem_3_problem_id avalanche_problem_3_problem_type_id avalanche_problem_3_trigger_simple_id avalanche_problem_3_type_id danger_level mountain_weather_freezing_level mountain_weather_precip_most_exposed mountain_weather_precip_region mountain_weather_temperature_elevation mountain_weather_temperature_max mountain_weather_temperature_min ... author_John Smits author_JonasD@ObsKorps author_Julie@SVV author_Jørgen@obskorps author_Karsten@NVE author_MSA@nortind author_Matilda@MET author_Odd-Arne@NVE author_Ragnar@NVE author_Ronny@NVE author_Silje@svv author_Tommy@NVE author_ToreV@met author_anitaaw@met author_emma@nve author_haso@nve.no author_heidi@nve.no author_jan arild@obskorps author_jegu@NVE author_jostein@nve author_knutinge@svv author_magnush@met author_martin@svv author_ragnhildn@met author_rue@nve author_siri@met author_solveig@NVE author_torehum@svv author_torolav@obskorps mountain_weather_wind_direction_E mountain_weather_wind_direction_N mountain_weather_wind_direction_NE mountain_weather_wind_direction_NW mountain_weather_wind_direction_None mountain_weather_wind_direction_Not given mountain_weather_wind_direction_S mountain_weather_wind_direction_SE mountain_weather_wind_direction_SW mountain_weather_wind_direction_W month
count 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 ... 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000
mean 13.938967 2.155600 1.878493 624.111335 64.185111 19.617147 3.049408 0.995752 13.847753 18.882070 11.155824 9.071205 1.242231 0.912028 374.480215 52.671585 10.352113 1.587078 1.061704 11.893360 9.108205 6.418511 0.807065 0.114800 0.081601 33.132126 4.012967 0.932819 0.143751 0.142522 1.073441 0.843953 0.586854 2.126649 263.013637 3.845741 1.892242 710.216857 -1.977990 -6.083054 ... 0.017326 0.059356 0.001453 0.044154 0.023810 0.015649 0.012520 0.017997 0.027834 0.039012 0.012631 0.039459 0.035211 0.007601 0.025598 0.037223 0.045942 0.019003 0.013637 0.029958 0.043595 0.005030 0.028840 0.020344 0.018891 0.010507 0.017550 0.023027 0.045383 0.051531 0.022692 0.025822 0.051867 0.008607 0.427230 0.063827 0.178627 0.094567 0.075229 4.828638
std 5.058746 0.632049 0.572026 344.991990 200.034907 2.509094 0.481539 0.065040 11.341079 4.706317 3.337689 9.291766 1.251766 0.962794 442.964230 178.372604 9.993798 1.528074 0.998150 15.406953 9.498982 6.732525 3.796574 0.532152 0.388837 170.829574 50.868296 4.239090 0.651020 0.638199 5.889259 3.966576 2.785794 0.724212 545.759132 8.753089 5.040852 653.097378 4.955183 7.273690 ... 0.130491 0.236303 0.038095 0.205448 0.152464 0.124122 0.111194 0.132947 0.164505 0.193634 0.111683 0.194695 0.184324 0.086858 0.157942 0.189319 0.209372 0.136543 0.115987 0.170480 0.204203 0.070749 0.167365 0.141183 0.136148 0.101972 0.131315 0.149998 0.208155 0.221091 0.148927 0.158612 0.221770 0.092380 0.494704 0.244459 0.383061 0.292633 0.263775 4.008575
min 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -22.000000 -32.000000 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000
25% 10.000000 2.000000 2.000000 400.000000 0.000000 20.000000 3.000000 1.000000 7.000000 21.000000 10.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 2.000000 0.000000 0.000000 0.000000 0.000000 -5.000000 -12.000000 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 2.000000
50% 11.000000 2.000000 2.000000 600.000000 0.000000 20.000000 3.000000 1.000000 10.000000 21.000000 10.000000 10.000000 1.000000 1.000000 100.000000 0.000000 15.000000 2.000000 2.000000 5.000000 10.000000 10.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 2.000000 0.000000 0.000000 0.000000 700.000000 0.000000 -3.000000 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 4.000000
75% 15.000000 3.000000 2.000000 900.000000 0.000000 20.000000 3.000000 1.000000 10.000000 21.000000 10.000000 18.000000 2.000000 2.000000 700.000000 0.000000 20.000000 3.000000 2.000000 30.000000 21.000000 10.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.000000 200.000000 4.000000 1.000000 1400.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 5.000000
max 24.000000 4.000000 4.000000 2100.000000 2000.000000 25.000000 5.000000 1.000000 50.000000 22.000000 20.000000 24.000000 4.000000 3.000000 2300.000000 1500.000000 25.000000 5.000000 2.000000 50.000000 22.000000 20.000000 24.000000 4.000000 3.000000 2000.000000 1100.000000 25.000000 5.000000 3.000000 50.000000 22.000000 20.000000 4.000000 2800.000000 160.000000 90.000000 1800.000000 20.000000 10.000000 ... 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 12.000000

8 rows × 123 columns


In [0]:
v_df['num_date'] = v_df['date'].apply(lambda d: d.timestamp())

In [0]:
#v_df.set_index('date', inplace=True)
v_df.sort_values(by='date', inplace=True)

In [0]:
v_df.tail(5)


Out[0]:
avalanche_problem_1_cause_id avalanche_problem_1_destructive_size_ext_id avalanche_problem_1_distribution_id avalanche_problem_1_exposed_height_1 avalanche_problem_1_exposed_height_2 avalanche_problem_1_ext_id avalanche_problem_1_probability_id avalanche_problem_1_problem_id avalanche_problem_1_problem_type_id avalanche_problem_1_trigger_simple_id avalanche_problem_1_type_id avalanche_problem_2_cause_id avalanche_problem_2_destructive_size_ext_id avalanche_problem_2_distribution_id avalanche_problem_2_exposed_height_1 avalanche_problem_2_exposed_height_2 avalanche_problem_2_ext_id avalanche_problem_2_probability_id avalanche_problem_2_problem_id avalanche_problem_2_problem_type_id avalanche_problem_2_trigger_simple_id avalanche_problem_2_type_id avalanche_problem_3_cause_id avalanche_problem_3_destructive_size_ext_id avalanche_problem_3_distribution_id avalanche_problem_3_exposed_height_1 avalanche_problem_3_exposed_height_2 avalanche_problem_3_ext_id avalanche_problem_3_probability_id avalanche_problem_3_problem_id avalanche_problem_3_problem_type_id avalanche_problem_3_trigger_simple_id avalanche_problem_3_type_id danger_level mountain_weather_freezing_level mountain_weather_precip_most_exposed mountain_weather_precip_region mountain_weather_temperature_elevation mountain_weather_temperature_max mountain_weather_temperature_min ... author_JonasD@ObsKorps author_Julie@SVV author_Jørgen@obskorps author_Karsten@NVE author_MSA@nortind author_Matilda@MET author_Odd-Arne@NVE author_Ragnar@NVE author_Ronny@NVE author_Silje@svv author_Tommy@NVE author_ToreV@met author_anitaaw@met author_emma@nve author_haso@nve.no author_heidi@nve.no author_jan arild@obskorps author_jegu@NVE author_jostein@nve author_knutinge@svv author_magnush@met author_martin@svv author_ragnhildn@met author_rue@nve author_siri@met author_solveig@NVE author_torehum@svv author_torolav@obskorps mountain_weather_wind_direction_E mountain_weather_wind_direction_N mountain_weather_wind_direction_NE mountain_weather_wind_direction_NW mountain_weather_wind_direction_None mountain_weather_wind_direction_Not given mountain_weather_wind_direction_S mountain_weather_wind_direction_SE mountain_weather_wind_direction_SW mountain_weather_wind_direction_W month num_date
index
2375 15 2 1 300 0 20 3 1 10 21 10 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0.0 0.0 0.0 1100.0 -10.0 -18.0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1.548893e+09
29303 10 2 2 1000 0 20 3 1 10 21 10 19 2 1 1100 0 20 2 2 30 10 10 0 0 0 0 0 0 0 0 0 0 0 2 0.0 5.0 4.0 1400.0 -10.0 -14.0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 1.548893e+09
791 10 3 2 0 0 20 3 1 7 21 10 13 3 1 0 0 20 3 2 30 10 10 0 0 0 0 0 0 0 0 0 0 0 3 0.0 10.0 4.0 700.0 -4.0 -7.0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1.548893e+09
21383 11 2 2 700 0 20 3 1 30 21 10 15 2 2 700 0 20 3 2 10 21 10 0 0 0 0 0 0 0 0 0 0 0 2 0.0 0.0 0.0 1400.0 -7.0 -15.0 ... 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1.548893e+09
32471 10 2 2 700 700 20 3 1 7 21 10 19 2 1 700 0 20 3 2 30 10 10 0 0 0 0 0 0 0 0 0 0 0 2 0.0 12.0 8.0 1400.0 -9.0 -14.0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 1.548893e+09

5 rows × 125 columns


In [0]:
# keep only numeric columns
from pandas.api.types import is_numeric_dtype
num_cols = [var for var in v_df.columns.values if is_numeric_dtype(v_df[var])] 
    
print(len(num_cols))
num_cols


124
Out[0]:
['avalanche_problem_1_cause_id',
 'avalanche_problem_1_destructive_size_ext_id',
 'avalanche_problem_1_distribution_id',
 'avalanche_problem_1_exposed_height_1',
 'avalanche_problem_1_exposed_height_2',
 'avalanche_problem_1_ext_id',
 'avalanche_problem_1_probability_id',
 'avalanche_problem_1_problem_id',
 'avalanche_problem_1_problem_type_id',
 'avalanche_problem_1_trigger_simple_id',
 'avalanche_problem_1_type_id',
 'avalanche_problem_2_cause_id',
 'avalanche_problem_2_destructive_size_ext_id',
 'avalanche_problem_2_distribution_id',
 'avalanche_problem_2_exposed_height_1',
 'avalanche_problem_2_exposed_height_2',
 'avalanche_problem_2_ext_id',
 'avalanche_problem_2_probability_id',
 'avalanche_problem_2_problem_id',
 'avalanche_problem_2_problem_type_id',
 'avalanche_problem_2_trigger_simple_id',
 'avalanche_problem_2_type_id',
 'avalanche_problem_3_cause_id',
 'avalanche_problem_3_destructive_size_ext_id',
 'avalanche_problem_3_distribution_id',
 'avalanche_problem_3_exposed_height_1',
 'avalanche_problem_3_exposed_height_2',
 'avalanche_problem_3_ext_id',
 'avalanche_problem_3_probability_id',
 'avalanche_problem_3_problem_id',
 'avalanche_problem_3_problem_type_id',
 'avalanche_problem_3_trigger_simple_id',
 'avalanche_problem_3_type_id',
 'danger_level',
 'mountain_weather_freezing_level',
 'mountain_weather_precip_most_exposed',
 'mountain_weather_precip_region',
 'mountain_weather_temperature_elevation',
 'mountain_weather_temperature_max',
 'mountain_weather_temperature_min',
 'region_id',
 'region_type_id',
 'danger_level_prev1day',
 'danger_level_prev2day',
 'danger_level_prev3day',
 'avalanche_problem_1_cause_id_prev1day',
 'avalanche_problem_1_problem_type_id_prev1day',
 'avalanche_problem_1_cause_id_prev2day',
 'avalanche_problem_1_problem_type_id_prev2day',
 'avalanche_problem_1_cause_id_prev3day',
 'avalanche_problem_1_problem_type_id_prev3day',
 'avalanche_problem_2_cause_id_prev1day',
 'avalanche_problem_2_problem_type_id_prev1day',
 'avalanche_problem_2_cause_id_prev2day',
 'avalanche_problem_2_problem_type_id_prev2day',
 'avalanche_problem_2_cause_id_prev3day',
 'avalanche_problem_2_problem_type_id_prev3day',
 'mountain_weather_precip_region_prev1day',
 'mountain_weather_precip_most_exposed_prev1day',
 'mountain_weather_precip_region_prev3daysum',
 'mountain_weather_wind_speed_num',
 'mountain_weather_wind_direction_num',
 'avalanche_problem_1_problem_type_id_class',
 'avalanche_problem_1_sensitivity_id_class',
 'avalanche_problem_1_trigger_simple_id_class',
 'avalanche_problem_2_problem_type_id_class',
 'avalanche_problem_2_sensitivity_id_class',
 'avalanche_problem_2_trigger_simple_id_class',
 'avalanche_problem_3_problem_type_id_class',
 'avalanche_problem_3_sensitivity_id_class',
 'avalanche_problem_3_trigger_simple_id_class',
 'region_group_id',
 'aval_problem_1_combined',
 'emergency_warning_Ikke gitt',
 'emergency_warning_Naturlig utløste skred',
 'author_Andreas@nve',
 'author_Eldbjorg@MET',
 'author_Espen Granan',
 'author_EspenN',
 'author_Halvor@NVE',
 'author_HåvardT@met',
 'author_Ida@met',
 'author_Ingrid@NVE',
 'author_John Smits',
 'author_JonasD@ObsKorps',
 'author_Julie@SVV',
 'author_Jørgen@obskorps',
 'author_Karsten@NVE',
 'author_MSA@nortind',
 'author_Matilda@MET',
 'author_Odd-Arne@NVE',
 'author_Ragnar@NVE',
 'author_Ronny@NVE',
 'author_Silje@svv',
 'author_Tommy@NVE',
 'author_ToreV@met',
 'author_anitaaw@met',
 'author_emma@nve',
 'author_haso@nve.no',
 'author_heidi@nve.no',
 'author_jan arild@obskorps',
 'author_jegu@NVE',
 'author_jostein@nve',
 'author_knutinge@svv',
 'author_magnush@met',
 'author_martin@svv',
 'author_ragnhildn@met',
 'author_rue@nve',
 'author_siri@met',
 'author_solveig@NVE',
 'author_torehum@svv',
 'author_torolav@obskorps',
 'mountain_weather_wind_direction_E',
 'mountain_weather_wind_direction_N',
 'mountain_weather_wind_direction_NE',
 'mountain_weather_wind_direction_NW',
 'mountain_weather_wind_direction_None',
 'mountain_weather_wind_direction_Not given',
 'mountain_weather_wind_direction_S',
 'mountain_weather_wind_direction_SE',
 'mountain_weather_wind_direction_SW',
 'mountain_weather_wind_direction_W',
 'month',
 'num_date']

In [0]:
# drop features that are related to the forecast we want to predict and features that should have no influence

drop_list = [
    'danger_level',
    'aval_problem_1_combined',
    'avalanche_problem_1_cause_id',
    'avalanche_problem_1_destructive_size_ext_id',
    'avalanche_problem_1_distribution_id',
    'avalanche_problem_1_exposed_height_1',
    'avalanche_problem_1_exposed_height_2',
    'avalanche_problem_1_ext_id',
    'avalanche_problem_1_probability_id',
    'avalanche_problem_1_problem_id',
    'avalanche_problem_1_problem_type_id',
    'avalanche_problem_1_trigger_simple_id',
    'avalanche_problem_1_type_id',
    'avalanche_problem_2_cause_id',
    'avalanche_problem_2_destructive_size_ext_id',
    'avalanche_problem_2_distribution_id',
    'avalanche_problem_2_exposed_height_1',
    'avalanche_problem_2_exposed_height_2',
    'avalanche_problem_2_ext_id',
    'avalanche_problem_2_probability_id',
    'avalanche_problem_2_problem_id',
    'avalanche_problem_2_problem_type_id',
    'avalanche_problem_2_trigger_simple_id',
    'avalanche_problem_2_type_id',
    'avalanche_problem_3_cause_id',
    'avalanche_problem_3_destructive_size_ext_id',
    'avalanche_problem_3_distribution_id',
    'avalanche_problem_3_exposed_height_1',
    'avalanche_problem_3_exposed_height_2',
    'avalanche_problem_3_ext_id',
    'avalanche_problem_3_probability_id',
    'avalanche_problem_3_problem_id',
    'avalanche_problem_3_problem_type_id',
    'avalanche_problem_3_trigger_simple_id',
    'avalanche_problem_3_type_id',
    'avalanche_problem_1_problem_type_id_class',
    'avalanche_problem_1_sensitivity_id_class',
    'avalanche_problem_1_trigger_simple_id_class',
    'avalanche_problem_2_problem_type_id_class',
    'avalanche_problem_2_sensitivity_id_class',
    'avalanche_problem_2_trigger_simple_id_class',
    'avalanche_problem_3_problem_type_id_class',
    'avalanche_problem_3_sensitivity_id_class',
    'avalanche_problem_3_trigger_simple_id_class',
    'emergency_warning_Ikke gitt',
    'emergency_warning_Naturlig utløste skred',
    'author_Andreas@nve',
    'author_Eldbjorg@MET',
    'author_Espen Granan',
    'author_EspenN',
    'author_Halvor@NVE',
    'author_HåvardT@met',
    'author_Ida@met',
    'author_Ingrid@NVE',
    'author_John Smits',
    'author_JonasD@ObsKorps',
    'author_Julie@SVV',
    'author_Jørgen@obskorps',
    'author_Karsten@NVE',
    'author_MSA@nortind',
    'author_Matilda@MET',
    'author_Odd-Arne@NVE',
    'author_Ragnar@NVE',
    'author_Ronny@NVE',
    'author_Silje@svv',
    'author_Tommy@NVE',
    'author_ToreV@met',
    'author_anitaaw@met',
    'author_emma@nve',
    'author_haso@nve.no',
    'author_heidi@nve.no',
    'author_jan arild@obskorps',
    'author_jegu@NVE',
    'author_jostein@nve',
    'author_knutinge@svv',
    'author_magnush@met',
    'author_martin@svv',
    'author_ragnhildn@met',
    'author_rue@nve',
    'author_siri@met',
    'author_solveig@NVE',
    'author_torehum@svv',
    'author_torolav@obskorps',
    'mountain_weather_wind_direction_E',
    'mountain_weather_wind_direction_N',
    'mountain_weather_wind_direction_NE',
    'mountain_weather_wind_direction_NW',
    'mountain_weather_wind_direction_None',
    'mountain_weather_wind_direction_Not given',
    'mountain_weather_wind_direction_S',
    'mountain_weather_wind_direction_SE',
    'mountain_weather_wind_direction_SW',
    'mountain_weather_wind_direction_W'
]

In [0]:
reference_names = ['region_id', 'num_date', 'date']
y_df = v_df[reference_names]
y = y_df.values

X_df = v_df.filter(num_cols).drop(drop_list, axis='columns')
X = X_df.values
feature_names = X_df.columns.values

In [0]:
X_df.describe(percentiles=[])


Out[0]:
mountain_weather_freezing_level mountain_weather_precip_most_exposed mountain_weather_precip_region mountain_weather_temperature_elevation mountain_weather_temperature_max mountain_weather_temperature_min region_id region_type_id danger_level_prev1day danger_level_prev2day danger_level_prev3day avalanche_problem_1_cause_id_prev1day avalanche_problem_1_problem_type_id_prev1day avalanche_problem_1_cause_id_prev2day avalanche_problem_1_problem_type_id_prev2day avalanche_problem_1_cause_id_prev3day avalanche_problem_1_problem_type_id_prev3day avalanche_problem_2_cause_id_prev1day avalanche_problem_2_problem_type_id_prev1day avalanche_problem_2_cause_id_prev2day avalanche_problem_2_problem_type_id_prev2day avalanche_problem_2_cause_id_prev3day avalanche_problem_2_problem_type_id_prev3day mountain_weather_precip_region_prev1day mountain_weather_precip_most_exposed_prev1day mountain_weather_precip_region_prev3daysum mountain_weather_wind_speed_num mountain_weather_wind_direction_num region_group_id month num_date
count 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.0 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.00000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8946.000000 8.946000e+03
mean 263.013637 3.845741 1.892242 710.216857 -1.977990 -6.083054 3019.619048 10.0 2.116477 2.107758 2.096915 13.818913 13.754415 13.699642 13.681869 13.574000 13.589425 9.020791 11.839146 8.99061 11.783255 8.940756 11.692712 1.897943 3.858596 5.610776 2.879499 2.771518 3.952381 4.828638 1.510268e+09
std 545.759132 8.753089 5.040852 653.097378 4.955183 7.273690 9.424287 0.0 0.742289 0.758490 0.776037 5.158252 11.334931 5.240061 11.344558 5.328489 11.342609 9.285028 15.393470 9.28359 15.364615 9.280419 15.317852 5.042556 8.759708 12.352439 2.694094 2.788040 2.126511 4.008575 2.123845e+07
min 0.000000 0.000000 0.000000 0.000000 -22.000000 -32.000000 3003.000000 10.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.480550e+09
50% 0.000000 0.000000 0.000000 700.000000 0.000000 -3.000000 3017.000000 10.0 2.000000 2.000000 2.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 5.000000 10.00000 5.000000 10.000000 5.000000 0.000000 0.000000 0.000000 4.000000 3.000000 3.000000 4.000000 1.514722e+09
max 2800.000000 160.000000 90.000000 1800.000000 20.000000 10.000000 3035.000000 10.0 4.000000 4.000000 4.000000 24.000000 50.000000 24.000000 50.000000 24.000000 50.000000 24.000000 50.000000 24.00000 50.000000 24.000000 50.000000 90.000000 160.000000 165.000000 10.000000 8.000000 7.000000 12.000000 1.548893e+09

In [0]:
y_df.describe(percentiles=[])


Out[0]:
region_id num_date
count 8946.000000 8.946000e+03
mean 3019.619048 1.510268e+09
std 9.424287 2.123845e+07
min 3003.000000 1.480550e+09
50% 3017.000000 1.514722e+09
max 3035.000000 1.548893e+09

In [0]:

Classification


In [0]:
import datetime as dt

In [0]:
a = np.arange(10) 
print(a[2:])
print(a[:-2])
print(a[-2:])


[2 3 4 5 6 7 8 9]
[0 1 2 3 4 5 6 7]
[8 9]

In [0]:
split_index = 300
X_train = X[:-split_index, :]
X_test = X[-split_index:, :]


y_train = y[:-split_index, :]
y_test = y[-split_index:, :]

print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
print(dt.datetime.fromtimestamp(y_train[:,1].min()), dt.datetime.fromtimestamp(y_train[:,1].max()))
print(dt.datetime.fromtimestamp(y_test[:,1].min()), dt.datetime.fromtimestamp(y_test[:,1].max()))


(8646, 31) (8646, 3) (300, 31) (300, 3)
2016-12-01 00:00:00 2019-01-17 00:00:00
2019-01-17 00:00:00 2019-01-31 00:00:00

In [0]:
scaler = preprocessing.StandardScaler().fit(X_train)
scaler

scaler.mean_                                      
scaler.scale_                                       

scaler.transform(X_train)


Out[0]:
array([[-0.49039145, -0.43240002, -0.37084221, ..., -1.85833447,
         1.75425904, -1.39448436],
       [-0.49039145, -0.43240002, -0.37084221, ..., -0.91771685,
         1.75425904, -1.39448436],
       [-0.49039145, -0.43240002, -0.37084221, ...,  1.43382719,
         1.75425904, -1.39448436],
       ...,
       [-0.49039145, -0.20593907, -0.17481007, ..., -0.44740804,
        -0.98734905,  1.90218227],
       [-0.49039145, -0.20593907, -0.17481007, ..., -0.91771685,
        -0.98734905,  1.90218227],
       [-0.49039145, -0.20593907, -0.17481007, ..., -1.38802566,
        -0.98734905,  1.90218227]])

In [0]:
from sklearn.neighbors import NearestNeighbors

%time nbrs = NearestNeighbors(n_neighbors=3, algorithm='ball_tree').fit(scaler.transform(X_train))


CPU times: user 64.6 ms, sys: 1.89 ms, total: 66.5 ms
Wall time: 68.5 ms

In [0]:
#print(X_test[4,:].reshape(1,-1).shape)
i = 245
distances, indices = nbrs.kneighbors(scaler.transform(X_test[i,:].reshape(1,-1)))   
#print(y_pred, y_test[i])
print(distances, indices)
print("Input:")
print("Region: {}".format(y_test[i, :][0]), "Date: {}".format(y_test[i, :][2]))

print("\nSuggestions:")
for k in range(len(indices[0])):
  #print("Region: {}".format(y_train[indices[0][k], :][0]), "Date: {}".format(dt.datetime.fromtimestamp(y_train[indices[0][k], :][1])))
  print("Region: {}".format(y_train[indices[0][k], :][0]), "Date: {}".format(y_train[indices[0][k], :][2]))


[[2.083942   2.13339542 2.14536812]] [[4865 4507 4889]]
Input:
Region: 3024 Date: 2019-01-29 00:00:00

Suggestions:
Region: 3022 Date: 2018-01-19 00:00:00
Region: 3028 Date: 2018-01-02 00:00:00
Region: 3022 Date: 2018-01-20 00:00:00

In [0]:
def get_varsom_link(region_id, valid_date):
  pass

In [0]: