SolarProductionProducer (4)



In [3]:
cd git/SolarDataRESTfulAPI/


/root/git/SolarDataRESTfulAPI

In [4]:
import json
import pandas as pd
import InfluxDBInterface
import time
reload(InfluxDBInterface)
from ElasticsearchInterface import ESinterface
import sys


DataLink = InfluxDBInterface.InfluxDBInterface("influxInterfaceCredentials2.json")

LogDB = DataLink.databases[u'SolarLogdata']
ProductionDB = DataLink.databases[u'SolarProductionSites']


es = ESinterface()

In [23]:
def CalculateProduction(LogDB,ProductionDB):
    
    aWeek = 7*60*60*24
    
    Sites = LogDB.ListSeries()
    
    for Site in Sites:
        CalculateEnergyCounterForSite(LogDB,ProductionDB,Site,aWeek)
    
            
            
            
            
            
def CalculatePowerForSite(LogDB,ProductionDB,Site,PeriodSize):
    print "Processing Power for %s" % Site
    Properties = LogDB.GetPropertiesPartiallyMatchingAbutNotB(Site,"Pac","Tot")
    print "\t%i inverters found" % len(Properties)
    
    LastUpdate = ProductionDB.GetLastTimestamp(Site,"Power")
    
    #No previous calculations done, start from the beginnnig of log series. 
    if LastUpdate == None:
        print "\tNo previous power data calculated for %s, starting from beginning." % Site
        LastUpdate = LogDB.GetFirstTimestamp(Site)
        
        #No data.
        if LastUpdate == None:
            print "\tNo data found for %s" % Site
            return
    
    else:
        print "\tStarting calculations from: %i" % LastUpdate
        
    DataUntil = LogDB.GetLastTimestamp(Site)
    PeriodStart = LastUpdate
    
    if DataUntil == PeriodStart:
        print "\tUp to date!"
    
    #Loop trough timeseries
    while PeriodStart < DataUntil:
        df = LogDB.GetDataPeriod(Site,Properties,PeriodStart/1000,PeriodSize,10000)
        if type(df) != pd.core.frame.DataFrame:
            print "Missing data at: %i" % PeriodStart
            PeriodStart += PeriodSize*1000
            continue
            
        SumColsIntoCol(df,Properties,"Power")
        row = ProductionDB.Save(Site,df[["Power"]])
        print "\t%i rows of data saved to %s" % (row,Site)
        PeriodStart += PeriodSize*1000
        
def CalculateEnergyCounterForSite(LogDB,ProductionDB,Site,PeriodSize):
    print "Processing Energy counter for %s" % Site
    Properties = LogDB.GetPropertiesPartiallyMatchingAbutNotB(Site,"POWc","Tot")
    print "\t%i inverters found" % len(Properties)
    
    (LastUpdate,LastValue) = ProductionDB.GetLastValue(Site,"Energy")
    
    
    
    #No previous calculations done, start from the beginnnig of log series. 
    if LastUpdate == None:
        print "\tNo previous energy data calculated for %s, starting from beginning." % Site
        
        #Start from where we have raw data
        LastUpdate = LogDB.GetFirstTimestamp(Site)
        
        #Counter start from 0 
        LastValue = 0
        
        #No data.
        if LastUpdate == None:
            print "\tNo data found for %s" % Site
            return
    
    else:
        print "\tStarting calculations from: %s" % EpocToDate(LastUpdate/1000)
        
    DataUntil = LogDB.GetLastTimestamp(Site)
    PeriodStart = LastUpdate
    
    if DataUntil == PeriodStart:
        print "\tUp to date!"
    
    #Loop trough timeseries
    while PeriodStart < DataUntil:
        print "\tRunning period %s to %s" % (EpocToDate(PeriodStart/1000),EpocToDate((PeriodStart/1000+PeriodSize)))
        df = LogDB.GetDataPeriod(Site,Properties,PeriodStart/1000,PeriodSize,10000)
        if type(df) != pd.core.frame.DataFrame:
            print "\t\tNo data"
            PeriodStart += PeriodSize*1000
            continue
        else:
            print "\t\t%i rows of data found." % df.shape[0]
        
        #Remove the reset every 24h.
        df = df.apply(RemoveResets)
        
        #We need a continious series 
        if df.index[0]*1000 != LastUpdate:
            print "\t*** Sync error"
            
            print df.index[0]*1000 , LastUpdate
            print df.index[-1]*1000
            print df.shape
            print PeriodStart
            break
        
        SumColsIntoCol(df,Properties,"Energy")
        
        #Add previous counter value 
        Offset = LastValue - df["Energy"].iloc[0]
        df["Energy"] += Offset
        
        LastUpdate = df.index[-1]*1000
        LastValue = df["Energy"].iloc[-1]
        
        #Drop duplicate row and save if data.
        if df.shape[0] > 1:
            row = ProductionDB.Save(Site,df.iloc[1:][["Energy"]])
            print "\t%i rows of data saved to %s" % (row,Site)
            
        PeriodStart += PeriodSize*1000
        
    print "\tEnergy calculations finnished!"
                
def SumColsIntoCol(df,Properties,Name):
    df[Name] = 0
    
    for p in Properties:
        df[Name] += df[p]

In [28]:
def EpocToDate(timestamp):
    return time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(timestamp))
        
def SecToHMS(sec):
    sec = int(sec)
    hour = sec / 3600
    minutes = (sec - (hour * 3600))/60
    secs = sec % 60
    return "%i h %i min %i s" %(hour,minutes,secs)

def RemoveResets(series):
    
    FirstValue = series.iloc[0]
    change =  series.diff().clip(0)
    change.iloc[0] = FirstValue
    return change.cumsum()

def CalculateProduction(Site,LogDB,ProductionDB,Recalculate=False):

    #Create property lists
    EnergyProp = LogDB.GetPropertiesPartiallyMatchingAbutNotB(Site,"POWc","Tot")
    PowerProp = LogDB.GetPropertiesPartiallyMatchingAbutNotB(Site,"Pac","Tot")
    
    PreviousLastValidValue = 0
    PreviousLastValidValueTime = 0
    
    #Determine where to resume.
    if Recalculate == False:
        (PreviousLastValidValueTime,PreviousLastValidValue) = ProductionDB.GetLastValue(Site,"Energy")
        TimestampP = ProductionDB.GetLastTimestamp(Site,"Power")
        
        #The start from where we have both power and energy values. 
        if TimestampP < PreviousLastValidValueTime:
            PreviousLastValidValueTime = TimestampP
            
        PreviousLastValidValueTime = PreviousLastValidValueTime / 1000
            
        print "\tResuming calculation from: %s" % EpocToDate(PreviousLastValidValueTime)
            
        #Get last data. 
        dfLog = LogDB.GetDataAfterTime(Site,EnergyProp + PowerProp,PreviousLastValidValueTime,1000)
    
    else:  
        #Get a log data chunck
        dfLog = LogDB.GetDataAfterTime(Site,EnergyProp + PowerProp,None,1000)
    
    while (dfLog.shape[0] > 1):
    
        #Create a production frame.
        dfProduction = pd.DataFrame(columns = ["Power","Energy"])
        
        
        #Calculate power
        dfProduction["Power"] = dfLog[PowerProp].sum(axis=1)
        
        #Calculate energy
        dfPOWc = dfLog[EnergyProp]
        dfProduction["Energy"] = dfPOWc.apply(RemoveResets).sum(axis=1)
        
        #Add offset from previus iteration.
        
        #Check if we have overlap. Is the last time the same as the smallest countervalue in the current array.
        FirstValidValueTime = dfProduction["Energy"].idxmin()
        
        #First time ever... or just NaN values in data. 
        if PreviousLastValidValueTime == None or pd.isnull(FirstValidValueTime):
            offset = 0
        #Normal overlap
        else:   
            offset = PreviousLastValidValue - dfProduction["Energy"][FirstValidValueTime]
        
        dfProduction["Energy"] += offset
        
        #Update database
        ProductionDB.Replace(Site,dfProduction)
        
        #Keep track of counter max.
        MaxEnergyTime = dfProduction["Energy"].idxmax()
        
        if not pd.isnull(MaxEnergyTime):
            PreviousLastValidValue = dfProduction["Energy"][MaxEnergyTime]
            PreviousLastValidValueTime = MaxEnergyTime
        
        dfLog = LogDB.GetNextNRows(dfLog,1000)
        
    return dfLog.index[-1]

In [30]:
Sites = LogDB.ListSeries()
#Sites = ["8b28b202-1da8-11e4-a510-f23c9173ce4a"]

now = time.time()
    
for Site in Sites:
 
#Site = "46d55815-f927-459f-a8e2-8bbcd88008ee"
    print "Processing %s " % Site 
    
    sys.stdout.flush()

    until = CalculateProduction(Site,LogDB,ProductionDB,False)
    
    until = int(now - until)
    
    hour = until / 3600
    
    minutes = (until - (hour * 3600))/60
    
    secs = until % 60
    
    print "\tFinnished processing up to %i hours %i minutes and %i seconds from script start time" % (hour,minutes,secs)
    
    sys.stdout.flush()

print "Done"

sys.stdout.flush()


Processing 2a31fb24-347b-4924-ab89-5c434771a2ae 
	Resuming calculation from: 2014-09-04 19:30:00
	Finnished processing up to 5 hours 52 minutes and 37 seconds from script start time
Processing 32383bde-7648-4abe-9dac-44701eabd72f 
	Resuming calculation from: 2014-09-04 19:00:00
	Finnished processing up to 5 hours 52 minutes and 37 seconds from script start time
Processing 46d55815-f927-459f-a8e2-8bbcd88008ee 
	Resuming calculation from: 2014-09-03 09:40:00
	Finnished processing up to 6 hours 2 minutes and 37 seconds from script start time
Processing 4a39b124-f594-11e3-a510-f23c9173ce4a 
	Resuming calculation from: 2014-09-04 19:20:00
	Finnished processing up to 5 hours 52 minutes and 37 seconds from script start time
Processing 4cf6c743-8d0b-45f0-aa72-8a0c10315cf5 
	Resuming calculation from: 2014-09-04 19:00:00
	Finnished processing up to 5 hours 32 minutes and 37 seconds from script start time
Processing 6f075b69-b823-4e6b-8021-dd751cc79eea 
	Resuming calculation from: 2014-09-04 19:40:00
	Finnished processing up to 5 hours 42 minutes and 37 seconds from script start time
Processing 709e47a1-ca88-4c22-890f-2407e9cb131a 
	Resuming calculation from: 2014-09-04 19:00:00
	Finnished processing up to 5 hours 32 minutes and 37 seconds from script start time
Processing 71b0e5ec-1da8-11e4-a510-f23c9173ce4a 
	Resuming calculation from: 2014-08-06 20:20:00
	Finnished processing up to 691 hours 32 minutes and 37 seconds from script start time
Processing 79ea7d0e-1da8-11e4-a510-f23c9173ce4a 
	Resuming calculation from: 2014-08-15 19:50:00
	Finnished processing up to 5 hours 32 minutes and 37 seconds from script start time
Processing 802afb51-c5eb-4230-9b22-9a77ef7260b3 
	Resuming calculation from: 2014-09-04 19:30:00
	Finnished processing up to 5 hours 52 minutes and 37 seconds from script start time
Processing 845e5e54-1da8-11e4-a510-f23c9173ce4a 
	Resuming calculation from: 2014-08-07 20:30:00
	Finnished processing up to 5 hours 32 minutes and 37 seconds from script start time
Processing 8b28b202-1da8-11e4-a510-f23c9173ce4a 
	Resuming calculation from: 2014-09-04 19:10:00
	Finnished processing up to 5 hours 42 minutes and 37 seconds from script start time
Processing 916b6e8e-1da8-11e4-a510-f23c9173ce4a 
	Resuming calculation from: 2014-08-07 20:50:00
	Finnished processing up to 5 hours 32 minutes and 37 seconds from script start time
Processing b0dd71a8-efe8-41eb-bab1-633ebaaa778c 
	Resuming calculation from: 2014-09-04 19:20:00
	Finnished processing up to 5 hours 32 minutes and 37 seconds from script start time
Processing c6261166-f651-11e3-a510-f23c9173ce4a 
	Resuming calculation from: 2014-09-04 18:50:00
	Finnished processing up to 6 hours 2 minutes and 37 seconds from script start time
Processing e63710a4-78d9-4071-9a2e-ad35534355f4 
	Resuming calculation from: 2014-09-04 19:10:00
	Finnished processing up to 5 hours 52 minutes and 37 seconds from script start time
Processing e8a13982-f651-11e3-a510-f23c9173ce4a 
	Resuming calculation from: 2014-09-04 18:50:00
	Finnished processing up to 5 hours 52 minutes and 37 seconds from script start time
Processing f1136e33-d227-4fbd-ae72-d3488d70ba89 
	Resuming calculation from: 2014-09-04 19:30:00
	Finnished processing up to 5 hours 52 minutes and 37 seconds from script start time
Processing face0d13-b22d-48fe-bec1-d29732604133 
	Resuming calculation from: 2014-08-28 12:50:00
	Finnished processing up to 160 hours 22 minutes and 37 seconds from script start time
Done

In [8]:
print "SITES"

for site in Sites:
    print "-"*50
    LogStart = LogDB.GetFirstTimestamp(site)/1000
    LogStop = LogDB.GetLastTimestamp(site)/1000
    ProdStart = ProductionDB.GetFirstTimestamp(site)/1000
    ProdStop = ProductionDB.GetLastTimestamp(site)/1000
    now = time.time() 
    LogLag = now - LogStop
    ProdLag = now - ProdStop
    print site +"\n"
    print "Log data: \n\tLagging\t\t%s  \n\tFirst value at\t%s \n\tLast value at\t%s " %(SecToHMS(LogLag),EpocToDate(LogStart),EpocToDate(LogStop))
    print "Production data: \n\tLagging\t\t%s  \n\tFirst value at\t%s \n\tLast value at\t%s " %(SecToHMS(ProdLag),EpocToDate(ProdStart),EpocToDate(ProdStop))
    
print "-"*50


SITES
--------------------------------------------------
8b28b202-1da8-11e4-a510-f23c9173ce4a

Log data: 
	Lagging		5 h 13 min 31 s  
	First value at	2014-06-11 19:10:00 
	Last value at	2014-09-04 20:10:00 
Production data: 
	Lagging		604 h 33 min 31 s  
	First value at	2014-06-11 19:20:00 
	Last value at	2014-08-10 20:50:00 
--------------------------------------------------

In [ ]:


In [31]:
#Debug
Site = "71b0e5ec-1da8-11e4-a510-f23c9173ce4a"

print CalculateProduction(Site,LogDB,ProductionDB,False)


	Resuming calculation from: 2014-08-07 05:20:00
1407385200

In [12]:
PowerProp = LogDB.GetPropertiesPartiallyMatchingAbutNotB(Site,"Pac","Tot")
PowerProp
EnergyProp = LogDB.GetPropertiesPartiallyMatchingAbutNotB(Site,"POWc","Tot")
EnergyProp


Out[12]:
[u'POWc001']

In [32]:
Recalculate=False

#Create property lists
EnergyProp = LogDB.GetPropertiesPartiallyMatchingAbutNotB(Site,"POWc","Tot")
PowerProp = LogDB.GetPropertiesPartiallyMatchingAbutNotB(Site,"Pac","Tot")

PreviousLastValidValue = 0
PreviousLastValidValueTime = 0

#Determine where to resume.
if Recalculate == False:
    (PreviousLastValidValueTime,PreviousLastValidValue) = ProductionDB.GetLastValue(Site,"Energy")
    TimestampP = ProductionDB.GetLastTimestamp(Site,"Power")
    
 
    #The start from where we have both power and energy values. 
    if TimestampP < PreviousLastValidValueTime:
        PreviousLastValidValueTime = TimestampP
        
    PreviousLastValidValueTime = PreviousLastValidValueTime / 1000
        
    print "\tResuming calculation from: %s" % EpocToDate(PreviousLastValidValueTime)
        
    #Get last data. 
    dfLog = LogDB.GetDataAfterTime(Site,EnergyProp + PowerProp,PreviousLastValidValueTime,100)

else:  
    #Get a log data chunck
    dfLog = LogDB.GetDataAfterTime(Site,EnergyProp + PowerProp,None,100)


	Resuming calculation from: 2014-08-07 05:20:00

In [237]:
print (dfLog.shape[0] > 1)

#Create a production frame.
dfProduction = pd.DataFrame(columns = ["Power","Energy"])


#Calculate power
dfProduction["Power"] = dfLog[PowerProp].sum(axis=1)

#Calculate energy
dfPOWc = dfLog[EnergyProp]
dfProduction["Energy"] = dfPOWc.apply(RemoveResets).sum(axis=1)

#Add offset from previus iteration.

#Check if we have overlap. Is the last time the same as the smallest countervalue in the current array.
FirstValidValueTime = dfProduction["Energy"].idxmin()

#First time ever...
if PreviousLastValidValueTime == None:
    offset = 0
#Normal overlap
else: 
    offset = PreviousLastValidValue - dfProduction["Energy"][FirstValidValueTime]

dfProduction["Energy"] += offset

#Update database
ProductionDB.Replace(Site,dfProduction)

#Keep track of counter max.
MaxEnergyTime = dfProduction["Energy"].idxmax()

if not pd.isnull(MaxEnergyTime):
    PreviousLastValidValue = dfProduction["Energy"][MaxEnergyTime]
    PreviousLastValidValueTime = MaxEnergyTime

oldLog = dfLog.iloc[-3:-1]
oldLog.series = dfLog.series
oldLog.properties = dfLog.properties

dfLog = LogDB.GetNextNRows(dfLog,1000)
    
print dfLog.index[-1]


True
1394779200

In [14]:
dfLog


Out[14]:
POWc001 Pac001
time
1407696000 96700 600

In [15]:
PreviousLastValidValueTime


Out[15]:
1407696000

In [35]:
LogDB.GetDataAfterTime(Site,EnergyProp + PowerProp,PreviousLastValidValueTime,3000)


Out[35]:
POWc001 Pac001
time
1407381600 100 600
1407382200 100 NaN
1407382800 100 NaN
1407383400 100 NaN
1407384000 100 NaN
1407384600 100 NaN
1407385200 100 NaN

In [ ]: