notebook.community

Edit and run



In [3]:

    
from hourlypowerconsumptions import HourlyPowerConsumptions
from visualizations import plot_barchart
import numpy as np
from sklearn.cluster import KMeans









    



---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-3-4d727dc0e3fe> in <module>()
      1 from hourlypowerconsumptions import HourlyPowerConsumptions
----> 2 from visualizations import plot_barchart
      3 import numpy as np
      4 from sklearn.cluster import KMeans

ImportError: No module named visualizations



In [2]:

    
dir_path = "/Users/zoraida/Desktop/TEFCON/all-country-data/hourly"
pattern = "/Hourly_201*month*.xls"



In [3]:

    
pc = HourlyPowerConsumptions(dir_path, pattern, skiprows=9, maxcolumns=26, hourchange='3B:00:00')



In [4]:

    
country = "ES"# country to analyse
df = pc.normalized_hourly_country_data(country)



In [5]:

    
df.head()









    Out[5]:






  
    
      
      
      
      
      
      
      H01
      H02
      H03
      H04
      H05
      H06
      H07
      H08
      H09
      H10
      ...
      H15
      H16
      H17
      H18
      H19
      H20
      H21
      H22
      H23
      H24
    
    
      Country
      year
      month
      weekday
      date
      Day
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      ES
      2010
      1
      4
      2010-01-01
      2010-01-01
       0.042322
       0.039458
       0.037003
       0.035206
       0.034550
       0.034550
       0.035132
       0.034663
       0.034548
       0.036679
      ...
       0.040560
       0.040003
       0.041568
       0.047490
       0.049900
       0.051403
       0.052409
       0.052646
       0.049255
       0.044677
    
    
      5
      2010-01-02
      2010-01-02
       0.034882
       0.031838
       0.030237
       0.029740
       0.030203
       0.032157
       0.034508
       0.036928
       0.039869
       0.044210
      ...
       0.044156
       0.043585
       0.044473
       0.049226
       0.050569
       0.050968
       0.050885
       0.050023
       0.047008
       0.043177
    
    
      6
      2010-01-03
      2010-01-03
       0.039333
       0.035873
       0.033641
       0.032639
       0.032790
       0.033366
       0.034053
       0.034705
       0.036517
       0.040244
      ...
       0.043664
       0.042735
       0.043382
       0.047867
       0.049781
       0.051046
       0.051753
       0.050986
       0.047782
       0.042932
    
    
      0
      2010-01-04
      2010-01-04
       0.032141
       0.029628
       0.028357
       0.028110
       0.028937
       0.031947
       0.036898
       0.041733
       0.045361
       0.047912
      ...
       0.045734
       0.045659
       0.046631
       0.048852
       0.048892
       0.048506
       0.048206
       0.046073
       0.042039
       0.037860
    
    
      1
      2010-01-05
      2010-01-05
       0.034756
       0.032135
       0.030840
       0.030394
       0.030932
       0.033729
       0.038231
       0.042670
       0.046062
       0.048104
      ...
       0.044777
       0.044654
       0.044921
       0.046676
       0.046032
       0.045558
       0.045429
       0.044337
       0.041870
       0.038519
    
  

5 rows × 24 columns



In [6]:

    
X = df.iloc[:, 4:28].values



In [7]:

    
X









    Out[7]:





array([[ 0.03455016,  0.03455016,  0.03513193, ...,  0.05264601,
         0.04925547,  0.04467732],
       [ 0.03020292,  0.03215701,  0.03450772, ...,  0.05002273,
         0.04700776,  0.04317697],
       [ 0.03278973,  0.03336615,  0.03405267, ...,  0.05098591,
         0.04778158,  0.04293218],
       ..., 
       [ 0.03552622,  0.03593053,  0.03757593, ...,  0.04350139,
         0.04046741,  0.03814594],
       [ 0.0354403 ,  0.03566766,  0.03648358, ...,  0.04793214,
         0.04434209,  0.04116825],
       [ 0.0309379 ,  0.03465274,  0.04035536, ...,  0.04352465,
         0.03974652,  0.03713633]])



In [8]:

    
X.shape









    Out[8]:





(1461, 20)



In [9]:

    
df.values.shape









    Out[9]:





(1461, 24)



In [10]:

    
kmeans = KMeans(init='k-means++', n_clusters=2, n_init=10)



In [11]:

    
labels_ = kmeans.fit_predict(df.iloc[:, 4:28].values)



In [12]:

    
type(labels_)









    Out[12]:





numpy.ndarray



In [13]:

    
labels_.shape









    Out[13]:





(1461,)



In [14]:

    
print labels_









    



[1 1 1 ..., 1 1 0]



In [ ]:

						H01	H02	H03	H04	H05	H06	H07	H08	H09	H10	...	H15	H16	H17	H18	H19	H20	H21	H22	H23	H24
Country	year	month	weekday	date	Day
ES	2010	1	4	2010-01-01	2010-01-01	0.042322	0.039458	0.037003	0.035206	0.034550	0.034550	0.035132	0.034663	0.034548	0.036679	...	0.040560	0.040003	0.041568	0.047490	0.049900	0.051403	0.052409	0.052646	0.049255	0.044677
			5	2010-01-02	2010-01-02	0.034882	0.031838	0.030237	0.029740	0.030203	0.032157	0.034508	0.036928	0.039869	0.044210	...	0.044156	0.043585	0.044473	0.049226	0.050569	0.050968	0.050885	0.050023	0.047008	0.043177
			6	2010-01-03	2010-01-03	0.039333	0.035873	0.033641	0.032639	0.032790	0.033366	0.034053	0.034705	0.036517	0.040244	...	0.043664	0.042735	0.043382	0.047867	0.049781	0.051046	0.051753	0.050986	0.047782	0.042932
			0	2010-01-04	2010-01-04	0.032141	0.029628	0.028357	0.028110	0.028937	0.031947	0.036898	0.041733	0.045361	0.047912	...	0.045734	0.045659	0.046631	0.048852	0.048892	0.048506	0.048206	0.046073	0.042039	0.037860
			1	2010-01-05	2010-01-05	0.034756	0.032135	0.030840	0.030394	0.030932	0.033729	0.038231	0.042670	0.046062	0.048104	...	0.044777	0.044654	0.044921	0.046676	0.046032	0.045558	0.045429	0.044337	0.041870	0.038519