In [1]:
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from sklearn import metrics
import seaborn; seaborn.set()
In [2]:
solarGe = pd.read_csv("Actual Generation per Production Type_201501010000-201501020000.csv", sep=",")
solarSz = pd.read_csv("Generation Forecasts - Day Ahead for Wind and Solar_201501010000-201501020000.csv", sep=",")
In [3]:
solarGe.head(2)
Out[3]:
In [4]:
solarSz.head()
Out[4]:
In [5]:
solar = pd.concat([solarGe, solarSz], axis=1)
solar.columns
Out[5]:
In [6]:
solarC = solar.drop(['Area','MTU','Solar - Actual Consumption [MW]','Generation - Sum [MW] (D) / Germany (DE)','MTU (CET)'],axis=1)
solarC.columns = ['Gen','Szac']
In [7]:
solarC.info()
In [8]:
range = pd.date_range('2015-01-01 00:15:00', '2015-01-02 00:00:00', freq='15min')
solarC = solarC.set_index(range)
In [9]:
solarC.plot(figsize=(20,10), x_compat=True, title='Rzeczywista generacja energii vs szacunkowa')
plt.gca().xaxis.set_major_locator(mdates.HourLocator())
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%H : %M'));
In [10]:
solarC['error'] = np.abs(solarC['Gen'] - solarC['Szac'])
In [11]:
solarC['error'].describe()
Out[11]:
In [12]:
print(np.sqrt(metrics.mean_squared_error(solarC['Gen'], solarC['Szac'])))