In [1]:
%matplotlib inline
%config InlineBackend.figure_format='retina'
from IPython.display import Image, HTML
import matplotlib.pyplot as plt
import seaborn as sns
from itertools import cycle
import math
import datetime as dt
import pandas as pd
import numpy as np
import pytz
import matplotlib.dates as mpd
from mpl_toolkits.mplot3d import Axes3D
import scipy
from scipy import signal
from scipy.signal import medfilt
import numexpr as ne
from patsy import dmatrices
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.cross_validation import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler, PolynomialFeatures, Normalizer, MinMaxScaler, Binarizer
from sklearn import metrics
from sklearn.cluster import AgglomerativeClustering, KMeans, DBSCAN, Birch, AffinityPropagation
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA, KernelPCA, MiniBatchSparsePCA, IncrementalPCA
from hdbscan import HDBSCAN
import statsmodels.api as sm
import random
from enerpi.base import timeit
from enerpi.api import enerpi_data_catalog
from enerpi.enerplot import write_fig_to_svg, tableau20
from enerpi.sunposition import sun_position
from prettyprinting import *
def _f_perc(y):
def __perc(x):
return np.percentile(x[np.isfinite(x)], y) if np.isfinite(x).sum() > 5 else np.nan
__perc.__name__ = 'perc_{}'.format(y)
return __perc
def _step(x):
__name__ = 'step'
return x[-1] - x[0]
@timeit('gen_data_ldr_minutes', verbose=True)
def gen_data_ldr_minutes(homog):
rs = homog.resample('1min')
ldr_minutos_smin = rs.ldr.min()
minutos_no_nulos_mask = ldr_minutos_smin > 0
ldr_minutos = rs.median_filter.agg(['median', 'std', _step]).astype('int32') #.unstack()
ldr_minutos = ldr_minutos.join(sun_position(ldr_minutos.index, latitude_deg=LAT, longitude_deg=LONG, elevation=ELEV_M)) # .round(1)
cols_deriv_t = ['median', 'altitude', 'azimuth']
ldr_minutos_d1 = ldr_minutos[cols_deriv_t].diff().rename(columns=lambda x: x + '_d1')
ldr_minutos_d1.loc[ldr_minutos_d1['azimuth_d1'].abs() > 100, 'azimuth_d1'] += 360
ldr_minutos_d2 = ldr_minutos_d1.diff().rename(columns=lambda x: x[:-1] + '2')
ldr_minutos = ldr_minutos.join(ldr_minutos_d1.join(ldr_minutos_d2))
ldr_usar = ldr_minutos[minutos_no_nulos_mask].dropna()
print_red('LDR por minutos. {} --> {} válidos. Head:\n{}'.format(len(ldr_minutos), len(ldr_usar), ldr_usar.head()))
return ldr_usar
@timeit('LOAD LDR DATA', verbose=True)
def load_data():
# Catálogo y lectura de todos los datos.
cat = enerpi_data_catalog()
data, data_s = cat.get_all_data()
LDR = pd.DataFrame(data.ldr).tz_localize(TZ)
print_cyan('LDR Raw Data (from {:%d-%m-%y} to {:%d-%m-%y}):\n{}'.format(LDR.index[0], LDR.index[-1], LDR.describe().T.astype(int)))
homog = LDR.resample('1s').mean().fillna(method='ffill', limit=5).fillna(method='bfill', limit=5).fillna(-1)
homog['median_filter'] = medfilt(homog.ldr, kernel_size=7)
return homog, data, data_s
LAT = 38.631463
LONG = -0.866402
ELEV_M = 500
TZ = pytz.timezone('Europe/Madrid')
FS = (16, 10)
sns.set_style('ticks')
homog, raw, hourly_data = load_data()
ldr_minutos = gen_data_ldr_minutes(homog)
homog.info()
ldr_minutos.info()
print_info('Plot distribuciones de derivadas 1ª y 2ª de altura y azimut solar (presentan continuidad)')
_, axes = plt.subplots(2, 2, figsize=(12, 12))
columns = ['azimuth_d1', 'azimuth_d2', 'altitude_d1', 'altitude_d2']
for c, ax in zip(columns, axes.ravel()):
ldr_minutos[c].hist(bins=100, log=True, ax=ax)
ax.set_title(c.capitalize().replace('_d', ' ∆^') + '/∆t')
In [47]:
_, axes = plt.subplots(2, 2, figsize=(12, 12))
columns = ['std', 'azimuth', 'altitude', 'median']
for c, ax in zip(columns, axes.ravel()):
ldr_minutos[c].hist(bins=100, log=True, ax=ax)
ax.set_title(c.capitalize())
In [73]:
sns.pairplot(ldr_minutos, size=4,
plot_kws=dict(lw=0, alpha=.01, s=4, color=tableau20[6]),
diag_kind='hist', diag_kws=dict(bins=100, lw=0, log=True, facecolor=tableau20[4], alpha=.9))
Out[73]:
In [2]:
X = ldr_minutos.values
X_std = StandardScaler().fit_transform(X)
X_pca = PCA(n_components=2, whiten=True).fit_transform(X_std)
X_pca3 = PCA(n_components=3, whiten=True).fit_transform(X_std)
X.shape, X_std.shape, X_pca.shape, X_pca3.shape
Out[2]:
In [86]:
def _plot_scatter(x, labels=None):
cmap_ctableau = cycle(tableau20[::2])
f, axes = plt.subplots(1, 2, figsize=(12.5, 6))
if labels is None:
axes[0].scatter(x[:,0], x[:,1], c=tableau20[4], lw=0, s=2, alpha=.8)
axes[1].scatter(x[:,0], x[:,1], c=tableau20[4], lw=0, s=10, alpha=.05)
else:
for k, col in zip(sorted(set(labels)), cmap_ctableau):
if k == -1:
col = 'grey'
mask = labels == k
axes[0].scatter(x[mask,0], x[mask,1], c=col, lw=0, s=2, alpha=.8)
axes[1].scatter(x[mask,0], x[mask,1], c=col, lw=0, s=10, alpha=.05)
return axes
for x, x_plot in zip([X, X_std, X_pca, X_pca3], [X_pca, X_pca, X_pca, X_pca3]):
print_info(x.shape)
clusterer_hdbscan = HDBSCAN(min_cluster_size=100, min_samples=50).fit(x)
print_red(len(set(clusterer_hdbscan.labels_)))
_plot_scatter(x_plot, clusterer_hdbscan.labels_)
plt.show()
In [ ]:
tsne = TSNE(n_components=2, random_state=0)
X_tsne = tsne.fit_transform(X_pca3)
tsne
In [ ]:
#clusterer = AffinityPropagation(preference=-250).fit(X_std)
print_red(len(set(clusterer.labels_)))
_plot_scatter(X_pca, clusterer.labels_)
In [ ]:
clusterer = KMeans(n_clusters=7).fit(X_pca)
print_red(len(set(clusterer.labels_)))
_plot_scatter(X_pca, clusterer.labels_)
In [ ]:
ldr_minutos.head()
In [1]:
features = homog.median_filter.iloc[:1000000].resample('1min').agg(['min', 'max', 'median', 'std', _step])
print_cyan(features.head())
f_sol = sun_position(features.index)
features = f_sol.join(features)
features['minuteofday'] = features.index.hour * 60 + features.index.minute
features['dayofyear'] = features.index.dayofyear
features.tail()
for x in [X, X_kpca_std, X_pca, tsne_reduced]:
print_info(x.shape)
clusterer_hdbscan = HDBSCAN(min_cluster_size=10, min_samples=100).fit(x)
print_red(len(set(clusterer_hdbscan.labels_)))
_plot_scatter(x, clusterer_hdbscan.labels_)
_plot_scatter(X_pca, clusterer_hdbscan.labels_)
plt.show()
In [8]:
sm.OLS(X_std[:,0], X_std[:,1:]).fit().summary()
Out[8]:
In [9]:
sm.OLS(ldr_minutos['median'], ldr_minutos.drop(['median'], axis=1)).fit().summary()
Out[9]:
In [14]:
ldr_minutos.groupby(lambda x: x.time).median().plot()
Out[14]:
In [15]:
ldr_minutos.groupby(lambda x: x.time).mean().plot()
Out[15]:
In [19]:
ldr_minutos.groupby(lambda x: x.time).std().plot()
Out[19]:
In [36]:
median_daily = ldr_minutos.groupby(lambda x: x.time)['median'].median()
day = ldr_minutos.index[0].date()
median_daily.index = pd.DatetimeIndex(([dt.datetime.combine(day, t) for t in median_daily.index]))
median_daily.plot()
median_daily.rolling(15, center=True).median().plot()
Out[36]:
In [37]:
ldr_minutos['median'].plot(figsize=(15, 5))
Out[37]:
In [47]:
ldr_minutos.loc['2016-08-13'].set_index('azimuth')['median'].sort_index().plot()
#ldr_minutos.loc['2016-09-01'].set_index('azimuth')['median'].sort_index().plot()
ldr_minutos.loc['2016-09-11'].set_index('azimuth')['median'].sort_index().plot()
Out[47]:
In [53]:
ldr_minutos.loc[ldr_minutos['altitude'] > 0].loc['2016-08-13'].set_index('azimuth')['median'].sort_index().plot()
ldr_minutos.loc[ldr_minutos['altitude'] > 0].loc['2016-08-29'].set_index('azimuth')['median'].sort_index().plot()
Out[53]:
In [104]:
luz_solar = ldr_minutos.loc[ldr_minutos['altitude'] > 10].reset_index().set_index('azimuth').copy()
luz_solar['coef_alt'] = luz_solar['median'] / luz_solar['altitude']
luz_solar['coef_alt_cos'] = luz_solar['median'] * (luz_solar.altitude.apply(np.deg2rad).apply(np.cos))
luz_solar['coef_alt_sin'] = luz_solar['median'] * (luz_solar.altitude.apply(np.deg2rad).apply(np.sin))
plt.figure(figsize=(15, 5))
plt.subplot(1, 3, 1)
luz_solar.coef_alt.hist(log=True, bins=100)
plt.subplot(1, 3, 2)
luz_solar.coef_alt_cos.hist(log=True, bins=100)
plt.subplot(1, 3, 3)
luz_solar.coef_alt_sin.hist(log=True, bins=100)
Out[104]:
In [105]:
f = plt.figure(figsize=(12, 12))
for i, (c, ylabel) in enumerate(zip(['coef_alt', 'coef_alt_cos', 'coef_alt_sin', 'median', 'std'],
['Median LDR / Altura solar', 'Median LDR / cos(Altura solar)',
'Median LDR / sin(Altura solar)', 'Median LDR'])):
plt.subplot(2, 2, i + 1)
cmap = cycle(tableau20)
for k, day in luz_solar.groupby(luz_solar['ts'].dt.date):
day.sort_index()[c].plot(lw=.75, alpha=.8, color=next(cmap))
plt.ylabel(ylabel)
plt.show()
Out[105]:
In [106]:
for k, day in luz_solar.groupby(luz_solar['ts'].dt.date):
day.sort_index()['coef_alt_sin'].plot()
In [218]:
curva_ajuste = luz_solar.groupby(lambda x: round(x))['coef_alt_sin'].mean()
curva_ajuste.plot()
luz_solar.groupby(lambda x: round(x))['coef_alt_cos'].mean().plot()
luz_solar.groupby(lambda x: round(x))['coef_alt'].mean().plot()
curva_altura = luz_solar.groupby(lambda x: round(x))['altitude'].mean()
(curva_altura * 10).plot()
Out[218]:
In [145]:
x_azimut = np.linspace(-180, 180, num=201)
offset = 180
y_s = 100 * (1 - np.cos(np.deg2rad(x_azimut + offset)))
offset2 = 90
y_s2 = 50 * (1 - np.cos(np.deg2rad(x_azimut + offset2)))
plt.plot(x_azimut, y_s)
plt.plot(x_azimut, y_s2)
plt.plot(x_azimut, y_s + y_s2)
Out[145]:
In [170]:
from scipy.optimize import leastsq
def _fitfunc(p, x) :
return p[0] * (p[1] * np.cos(np.deg2rad(x + p[2]) * 2)) + p[3] * (p[4] * np.sin(np.deg2rad(x + p[5]) * 2)) + p[6]
def _residuals(p, y, x) :
return y - _fitfunc(p, x)
p0 = np.array([curva_ajuste.max() / 4, 0.5, 0, curva_ajuste.mean() / 2, 0.5, 90, 100])
x_ls, cov_ls, infodict_ls, mesg, ier = leastsq(residuals, p0, args=(luz_solar['coef_alt_sin'].values, luz_solar.index), factor=1, full_output=True)
curva_ajuste.plot()
if (ier > 0) and (ier < 5):
y_calc = _fitfunc(x_ls, curva_ajuste.index)
plt.plot(curva_ajuste.index, y_calc, color='r')
else:
print_warn('Solución no encontrada: "{}"\nNº llamadas: {}. infodict_ls: {}'.format(mesg, infodict_ls['nfev'], infodict_ls.keys()))
In [220]:
#x_ls, mesg
#luz_solar.corr()
night_value = ldr_minutos.loc[ldr_minutos['altitude'] < -10]['median'].median()
print_red('night_value = {}'.format(night_value))
sim_luz_solar = ldr_minutos[['median', 'std', '_step', 'altitude', 'azimuth']].copy()
sim_luz_solar.loc[sim_luz_solar['altitude'] < -5, 'median'] = night_value
sim_luz_solar.loc[sim_luz_solar['altitude'] < -5, 'altitude'] = -5
for c in ['altitude', 'azimuth']:
sim_luz_solar['sin_' + c] = sim_luz_solar[c].apply(lambda x: np.sin(np.deg2rad(x)))
sim_luz_solar['cos_' + c] = sim_luz_solar[c].apply(lambda x: np.cos(np.deg2rad(x)))
sim_luz_solar = sim_luz_solar.drop(c, axis=1)
sim_luz_solar = sim_luz_solar.loc[:'2016-08-30'].copy()
sim_luz_solar['median'].plot(figsize=FS, alpha=.9, color=tableau20[2])
sim_luz_solar.head()
#median_daily
Out[220]:
In [310]:
plt.figure(figsize=FS)
plt.plot(ldr_minutos.loc['2016-08-28 11:30':'2016-09-01 16:00', 'median'])
#plt.plot(raw.loc['2016-08-29':'2016-08-30', 'ldr'].resample('10s').median())
#plt.plot(ldr_minutos.loc['2016-08-29', 'median'])
#plt.plot(ldr_minutos.loc['2016-08-30', 'median'])
#plt.plot(ldr_minutos.loc['2016-09-01', 'median'])
Out[310]:
In [327]:
homog = raw.ldr.resample('1s').mean().fillna(method='bfill', limit=3).fillna(method='ffill', limit=3).fillna(-100)
mf = pd.DataFrame(medfilt(homog, kernel_size=7), index=homog.index, columns=['ldr']).astype('int32')
mf['delta'] = mf.diff().fillna(0).astype('int32')
print(type(mf), mf.shape, raw.shape, homog.shape)
mf.info()
mf.head()
Out[327]:
In [330]:
sum_deltas_roll7 = mf.delta.rolling(7, center=True).sum()
In [335]:
sum_deltas_roll7.abs().hist(log=True, bins=600, figsize=FS, lw=0, color=tableau20[2])
Out[335]:
In [338]:
intervalos_unif = (sum_deltas_roll7.abs() > 5).cumsum()
mf['interv_unif'] = intervalos_unif
mf.head()
Out[338]:
In [339]:
pd.concat([sum_deltas_roll7.abs().value_counts().sort_index(),
sum_deltas_roll7.abs().value_counts().sort_index().cumsum() / len(sum_deltas_roll7)], axis=1).head(10)
Out[339]:
In [356]:
intervalos_largos = mf[mf.ldr > 0].groupby('interv_unif').delta.count().sort_values(ascending=False).head(10)
dfs_largos = []
plt.figure(figsize=(18, 9))
for i, intv in enumerate(intervalos_largos.index):
df = mf[mf.interv_unif == intv]
dfs_largos.append(df)
plt.subplot(2, 5, i + 1)
plt.title('{:%-d de %B}'.format(df.index[0]))
plt.xlabel('')
df.ldr.plot()
In [ ]:
In [ ]:
In [ ]:
In [315]:
#curva_ajuste
#luz_solar = ldr_minutos.loc[ldr_minutos['altitude'] > 10].reset_index().set_index('azimuth').copy()
#luz_solar['coef_alt'] = luz_solar['median'] / luz_solar['altitude']
#luz_solar['coef_alt_cos'] = luz_solar['median'] * (luz_solar.altitude.apply(np.deg2rad).apply(np.cos))
#luz_solar['coef_alt_sin'] = luz_solar['median'] * (luz_solar.altitude.apply(np.deg2rad).apply(np.sin))
gb_time = sim_luz_solar.groupby(lambda x: x.time)
y_medday = gb_time['median'].median()
y_medday.plot()
y_medday.rolling(19, center=True).median().plot()
plt.show()
In [255]:
#plt.plot(np.fft.rfft(sim_luz_solar.loc[:'2016-08-30']['median']), lw=.5)
#y = sim_luz_solar.loc[:'2016-08-30']['median'].values
N = y.shape[0]
print(N)
T = 1 / (60)
x = np.linspace(0.0, N*T, N)
#yf = scipy.fftpack.fft(y)
yf = np.fft.rfft(y)
xf = np.linspace(0.0, 1.0/(2.0*T), N/2)
fig, ax = plt.subplots()
y_cal = 2.0/N * np.abs(yf[:N/2])
#plt.semilogy(xf, 2.0/N * np.abs(yf[0:N/2]))
ax.semilogy(xf, y_cal)
#ax.set_xlim((1, N/2))
#ax.set_ylim((0, np.max(y_cal[2:])))
plt.figure()
plt.plot(x, y)
Out[255]:
In [276]:
x = np.array(list(range(len(y_medday.values))))
y = y_medday.values
spline = inter.InterpolatedUnivariateSpline(x, y)
Out[276]:
In [284]:
#plt.plot(y_medday)
x = np.array(list(range(len(y_medday.values))))
y = y_medday.values
fit_poly = np.polyfit(x, y, 20)
plt.figure(figsize=FS)
plt.plot(x, y, color='darkgreen', lw=1)
plt.plot(x, np.polyval(fit_poly, x), color='darkred')
plt.plot(x, inter.InterpolatedUnivariateSpline(x[::50], y[::50])(x), color='darkblue', alpha=.5)
Out[284]:
In [272]:
import scipy.interpolate as inter
import numpy as np
import pylab as plt
x = np.array([13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1])
y = np.array([2.404070, 1.588134, 1.760112, 1.771360, 1.860087,
1.955789, 1.910408, 1.655911, 1.778952, 2.624719,
1.698099, 3.022607, 3.303135])
xx = np.arange(1,13.01,0.1)
s1 = inter.InterpolatedUnivariateSpline (x, y)
s1rev = inter.InterpolatedUnivariateSpline (x[::-1], y[::-1])
# Use a smallish value for s
s2 = inter.UnivariateSpline (x[::-1], y[::-1], s=0.1)
plt.plot (x, y, 'bo', label='Data')
plt.plot (xx, s1rev(xx), 'k--', label='Spline, correct order')
plt.plot (xx, s2(xx), 'r-', label='Spline, fit')
plt.minorticks_on()
plt.legend()
plt.xlabel('x')
plt.ylabel('y')
plt.show()
In [204]:
N = 600
# sample spacing
T = 1.0 / 800.0
x = np.linspace(0.0, N*T, N)
y = 6 * np.sin(50.0 * 2.0*np.pi*x) + 0.5*np.sin(80.0 * 2.0*np.pi*x)
yf = scipy.fftpack.fft(y)
xf = np.linspace(0.0, 1.0/(2.0*T), N/2)
fig, ax = plt.subplots()
ax.plot(xf, 2.0/N * np.abs(yf[:N/2]))
plt.show()
In [80]:
from scipy.optimize import leastsq
def fitfunc(p, x) :
return (p[0] * (1 - p[1] * np.sin(2 * np.pi / (24 * 3600) * (x + p[2]))))
def residuals(p, y, x) :
return y - fitfunc(p, x)
def fit(tsdf) :
tsgb = tsdf.groupby(tsdf.timeofday).mean()
p0 = np.array([tsgb['conns'].mean(), 1.0, 0.0])
plsq, suc = leastsq(_residuals, p0, args=(tsgb['conns'], np.array(tsgb.index)))
return plsq
suc
In [316]:
#np.fft.rfft(curva_ajuste)
In [95]:
(luz_solar['median'] / (luz_solar['altitude'] * luz_solar.altitude.apply(np.deg2rad).apply(np.cos))).sort_index().plot()
Out[95]:
In [148]:
Out[148]:
In [ ]: