In [3]:
from hourlypowerconsumptions import HourlyPowerConsumptions
from visualizations import plot_barchart
import numpy as np
from sklearn.cluster import KMeans


---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-3-4d727dc0e3fe> in <module>()
      1 from hourlypowerconsumptions import HourlyPowerConsumptions
----> 2 from visualizations import plot_barchart
      3 import numpy as np
      4 from sklearn.cluster import KMeans

ImportError: No module named visualizations

In [2]:
dir_path = "/Users/zoraida/Desktop/TEFCON/all-country-data/hourly"
pattern = "/Hourly_201*month*.xls"

In [3]:
pc = HourlyPowerConsumptions(dir_path, pattern, skiprows=9, maxcolumns=26, hourchange='3B:00:00')

In [4]:
country = "ES"# country to analyse
df = pc.normalized_hourly_country_data(country)

In [5]:
df.head()


Out[5]:
H01 H02 H03 H04 H05 H06 H07 H08 H09 H10 ... H15 H16 H17 H18 H19 H20 H21 H22 H23 H24
Country year month weekday date Day
ES 2010 1 4 2010-01-01 2010-01-01 0.042322 0.039458 0.037003 0.035206 0.034550 0.034550 0.035132 0.034663 0.034548 0.036679 ... 0.040560 0.040003 0.041568 0.047490 0.049900 0.051403 0.052409 0.052646 0.049255 0.044677
5 2010-01-02 2010-01-02 0.034882 0.031838 0.030237 0.029740 0.030203 0.032157 0.034508 0.036928 0.039869 0.044210 ... 0.044156 0.043585 0.044473 0.049226 0.050569 0.050968 0.050885 0.050023 0.047008 0.043177
6 2010-01-03 2010-01-03 0.039333 0.035873 0.033641 0.032639 0.032790 0.033366 0.034053 0.034705 0.036517 0.040244 ... 0.043664 0.042735 0.043382 0.047867 0.049781 0.051046 0.051753 0.050986 0.047782 0.042932
0 2010-01-04 2010-01-04 0.032141 0.029628 0.028357 0.028110 0.028937 0.031947 0.036898 0.041733 0.045361 0.047912 ... 0.045734 0.045659 0.046631 0.048852 0.048892 0.048506 0.048206 0.046073 0.042039 0.037860
1 2010-01-05 2010-01-05 0.034756 0.032135 0.030840 0.030394 0.030932 0.033729 0.038231 0.042670 0.046062 0.048104 ... 0.044777 0.044654 0.044921 0.046676 0.046032 0.045558 0.045429 0.044337 0.041870 0.038519

5 rows × 24 columns


In [6]:
X = df.iloc[:, 4:28].values

In [7]:
X


Out[7]:
array([[ 0.03455016,  0.03455016,  0.03513193, ...,  0.05264601,
         0.04925547,  0.04467732],
       [ 0.03020292,  0.03215701,  0.03450772, ...,  0.05002273,
         0.04700776,  0.04317697],
       [ 0.03278973,  0.03336615,  0.03405267, ...,  0.05098591,
         0.04778158,  0.04293218],
       ..., 
       [ 0.03552622,  0.03593053,  0.03757593, ...,  0.04350139,
         0.04046741,  0.03814594],
       [ 0.0354403 ,  0.03566766,  0.03648358, ...,  0.04793214,
         0.04434209,  0.04116825],
       [ 0.0309379 ,  0.03465274,  0.04035536, ...,  0.04352465,
         0.03974652,  0.03713633]])

In [8]:
X.shape


Out[8]:
(1461, 20)

In [9]:
df.values.shape


Out[9]:
(1461, 24)

In [10]:
kmeans = KMeans(init='k-means++', n_clusters=2, n_init=10)

In [11]:
labels_ = kmeans.fit_predict(df.iloc[:, 4:28].values)

In [12]:
type(labels_)


Out[12]:
numpy.ndarray

In [13]:
labels_.shape


Out[13]:
(1461,)

In [14]:
print labels_


[1 1 1 ..., 1 1 0]

In [ ]: