CVS Importer



In [202]:
import requests
import pandas
import time
import json
from influxdb import InfluxDBClient
import numpy

def download_file(url):
    local_filename = url.split('/')[-1]
    # NOTE the stream=True parameter
    r = requests.get(url, stream=True)
    with open(local_filename, 'wb') as f:
        for chunk in r.iter_content(chunk_size=1024): 
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)
                f.flush()
    return local_filename
    
def LoadSiteIds(file="SiteIDs.json"):
    fp = open(file,"r")
    dic = json.load(fp)
    fp.close()
    return dic
    
def ParseSLBData(slb_id="h00t",start=time.time()-(24*60*60),stop=time.time()):
  
  starttime = time.strftime("%y%m%d%H%M",time.localtime(start))
  stoptime = time.strftime("%y%m%d%H%M",time.localtime(stop))
  url = "http://slb.nu/soldata/index.php?KEY=%s&start=%s&stop=%s" %(slb_id,starttime,stoptime)

  df = pandas.read_csv(url,sep = ";",parse_dates=[[0, 1]],skiprows=8, header = None ,infer_datetime_format = True,na_values = ["     ","    ","  "," ",""])
  cl = pandas.read_csv(url,sep = ";", header = 6,error_bad_lines= False,na_values = [""],nrows=1)

  #Align keys to data and rename time col. 
  cols = cl.keys()
  cols = cols[2:]
  col2 = cols.insert(0,"Time")
  col2 = col2.insert(-1,"NAN")
  
  #Set data keys as column descriptors
  df.columns = col2
  
  #Delete trailing columns with junk. 
  for key in df.keys()[-5:df.shape[1]-1]:
      if key.find(slb_id) == -1:
          del df[key]

  #Reformat timestamps
  for i in range(0,df.shape[0]):
      try:
          timestamp = time.mktime(time.strptime(df["Time"][i],"%y-%m-%d %H:%M"))
          df["Time"][i] = timestamp
      except:
          df = df.drop(df.index[i])

  return df
  



def ParseCVS(file):
  file = open(file,"r")
  
  df = pandas.read_csv(file)


  print df
  print df['Line']


def SendToInfluxDB(df,FeedId,config_file="influx.json"):
    
    #Series name
    series = FeedId + "/raw_data" 
    
    #Load database credentials
    fp = open(config_file,"r")
    config = json.load(fp)
    fp.close()
    
    #Connect
    client = InfluxDBClient(config["host"], config["port"], config["user"], config["password"], config["database"])
    
  #Save each row
    for i in range(0,Data.shape[0]):
        timestamp = Data.irow(i)[0]
        column = ["time"]
        data = [int(timestamp*1000)]
        
        
        #Remove NANs
        for j in range(1,Data.shape[1]):
            if Data.iloc[i,j] == "NaN" or Data.iloc[i,j] == "nan":
                continue
            
            #Add key
            column.append(Data.keys()[j])
            data.append(Data.iloc[i,j])
            
        fdata = [{
            "points": [data],
            "name": series,
            "columns": column
            }]
    
        client.write_points_with_precision(fdata,"m")
        return

In [203]:
#ParseCVS("testdata/h00t_1310160720_1406030410.csv")
SiteIDs = LoadSiteIds()
print SiteIDs.keys()


[u'h00t']

In [228]:
x = 5
timestamp = 1402015728.10898
Data = ParseSLBData(SiteIDs.keys()[0],timestamp-(24*60*60*7*(x+30)),timestamp -(24*60*60*7*x))

In [234]:
Data.shape


Out[234]:
(30241, 86)

In [235]:
FeedId = SiteIDs["h00t"]
config_file="influx.json"

In [236]:
#Series name
series = FeedId + "/raw_data" 
    
    #Load database credentials
fp = open(config_file,"r")
config = json.load(fp)
fp.close()
    
   #Connect
client = InfluxDBClient(config["host"], config["port"], config["user"], config["password"], config["database"])

In [237]:
#Save each row
for i in range(0,Data.shape[0]):
    timestamp = Data.irow(i)[0]
    column = ["time"]
    data = [int(timestamp*1000)]
    
    
    #Remove NANs
    for j in range(1,Data.shape[1]):
        if numpy.isnan(Data.iloc[i,j]):
            continue
        
        #Add key
        column.append(Data.keys()[j])
        data.append(Data.iloc[i,j])
        
    fdata = [{
        "points": [data],
        "name": series,
        "columns": column
        }]

    client.write_points_with_precision(fdata,"m")

In [222]:
time.time()


Out[222]:
1402015728.10898

In [260]:
client


Out[260]:
<influxdb.client.InfluxDBClient at 0x7f78b0c17890>

In [293]:
class InfluxFeedLTSInterface(InfluxDBClient):
  def __init__(self,config_file="influx.json"):

    #Load database credentials
    fp = open(config_file,"r")
    self.config = json.load(fp)
    fp.close()
    
    #Connect
    InfluxDBClient.__init__(self,self.config["host"], self.config["port"], self.config["user"], self.config["password"], self.config["database"])

  def GetLastTimeStamp(self,FluxId):

    result = self.query('select time from %s order desc limit 1;' % FluxId, time_precision='m')

    try:
      return float(result[0]["points"][0][0])/1000.0
    except:
      return 0.0

  def SendToInfluxDB(self,df,FeedId):
    #Series name
    series = FeedId + "/raw_data" 

    #Save each row
    for i in range(0,Data.shape[0]):
        timestamp = Data.irow(i)[0]
        column = ["time"]
        data = [int(timestamp*1000)]
        
        
        #Remove NANs
        for j in range(1,Data.shape[1]):
          if numpy.isnan(Data.iloc[i,j]):
            continue
            
          #Add key
          column.append(Data.keys()[j])
          data.append(Data.iloc[i,j])

        #If there where only nan on this row continue to next row. 
        if len(column) == 0:
          continue
            
        fdata = [{
            "points": [data],
            "name": series,
            "columns": column
            }]
    
        self.write_points_with_precision(fdata,"m")
        return

In [271]:
#ParseCVS("testdata/h00t_1310160720_1406030410.csv")
SiteIDs = LoadSiteIds()

Feeds = InfluxFeedLTSInterface()

#Get all data until now + 1h
StopTime = time.time() + 3600


PeriodLen = 60*60*24*7

In [267]:
for Site in SiteIDs:
  FeedId = "\"%s/raw_data\"" % SiteIDs[Site]
  StartTime = Feeds.GetLastTimeStamp(FeedId)
    
  Current = StartTime
    

  while Current < StopTime:
     print "Reading SLB data from: " + Site
     Data = ParseSLBData(Site,Current,Current + PeriodLen)
     Current += PeriodLen
     print "Sending data to influx as: " + FeedId
     #Feeds.SendToInfluxDB(Data,FeedId)


Reading SLB data from: h00t
Sending data to influx as: "59.344528-18.126394/raw_data"
Reading SLB data from: h00t
Sending data to influx as: "59.344528-18.126394/raw_data"
Reading SLB data from: h00t
Sending data to influx as: "59.344528-18.126394/raw_data"
Reading SLB data from: h00t
Sending data to influx as: "59.344528-18.126394/raw_data"
Reading SLB data from: h00t
Sending data to influx as: "59.344528-18.126394/raw_data"
Reading SLB data from: h00t
Sending data to influx as: "59.344528-18.126394/raw_data"
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-267-b082efe053e4> in <module>()
      6 while Current < StopTime:
      7   print "Reading SLB data from: " + Site
----> 8   Data = ParseSLBData(Site,Current,Current + PeriodLen)
      9   Current += PeriodLen
     10   print "Sending data to influx as: " + FeedId

<ipython-input-202-8c5ebc6ebf0f> in ParseSLBData(slb_id, start, stop)
     29   url = "http://slb.nu/soldata/index.php?KEY=%s&start=%s&stop=%s" %(slb_id,starttime,stoptime)
     30 
---> 31   df = pandas.read_csv(url,sep = ";",parse_dates=[[0, 1]],skiprows=8, header = None ,infer_datetime_format = True,na_values = ["     ","    ","  "," ",""])
     32   cl = pandas.read_csv(url,sep = ";", header = 6,error_bad_lines= False,na_values = [""],nrows=1)
     33 

/usr/lib/python2.7/dist-packages/pandas/io/parsers.pyc in parser_f(filepath_or_buffer, sep, dialect, compression, doublequote, escapechar, quotechar, quoting, skipinitialspace, lineterminator, header, index_col, names, prefix, skiprows, skipfooter, skip_footer, na_values, na_fvalues, true_values, false_values, delimiter, converters, dtype, usecols, engine, delim_whitespace, as_recarray, na_filter, compact_ints, use_unsigned, low_memory, buffer_lines, warn_bad_lines, error_bad_lines, keep_default_na, thousands, comment, decimal, parse_dates, keep_date_col, dayfirst, date_parser, memory_map, nrows, iterator, chunksize, verbose, encoding, squeeze, mangle_dupe_cols, tupleize_cols, infer_datetime_format)
    418                     infer_datetime_format=infer_datetime_format)
    419 
--> 420         return _read(filepath_or_buffer, kwds)
    421 
    422     parser_f.__name__ = name

/usr/lib/python2.7/dist-packages/pandas/io/parsers.pyc in _read(filepath_or_buffer, kwds)
    204 
    205     filepath_or_buffer, _ = get_filepath_or_buffer(filepath_or_buffer,
--> 206                                                    encoding)
    207 
    208     if kwds.get('date_parser', None) is not None:

/usr/lib/python2.7/dist-packages/pandas/io/common.pyc in get_filepath_or_buffer(filepath_or_buffer, encoding)
    116 
    117     if _is_url(filepath_or_buffer):
--> 118         req = _urlopen(str(filepath_or_buffer))
    119         return maybe_read_encoded_stream(req, encoding)
    120 

/usr/lib/python2.7/urllib2.pyc in urlopen(url, data, timeout)
    125     if _opener is None:
    126         _opener = build_opener()
--> 127     return _opener.open(url, data, timeout)
    128 
    129 def install_opener(opener):

/usr/lib/python2.7/urllib2.pyc in open(self, fullurl, data, timeout)
    402             req = meth(req)
    403 
--> 404         response = self._open(req, data)
    405 
    406         # post-process response

/usr/lib/python2.7/urllib2.pyc in _open(self, req, data)
    420         protocol = req.get_type()
    421         result = self._call_chain(self.handle_open, protocol, protocol +
--> 422                                   '_open', req)
    423         if result:
    424             return result

/usr/lib/python2.7/urllib2.pyc in _call_chain(self, chain, kind, meth_name, *args)
    380             func = getattr(handler, meth_name)
    381 
--> 382             result = func(*args)
    383             if result is not None:
    384                 return result

/usr/lib/python2.7/urllib2.pyc in http_open(self, req)
   1212 
   1213     def http_open(self, req):
-> 1214         return self.do_open(httplib.HTTPConnection, req)
   1215 
   1216     http_request = AbstractHTTPHandler.do_request_

/usr/lib/python2.7/urllib2.pyc in do_open(self, http_class, req)
   1185         else:
   1186             try:
-> 1187                 r = h.getresponse(buffering=True)
   1188             except TypeError: # buffering kw not supported
   1189                 r = h.getresponse()

/usr/lib/python2.7/httplib.pyc in getresponse(self, buffering)
   1043         response = self.response_class(*args, **kwds)
   1044 
-> 1045         response.begin()
   1046         assert response.will_close != _UNKNOWN
   1047         self.__state = _CS_IDLE

/usr/lib/python2.7/httplib.pyc in begin(self)
    407         # read until we get a non-100 response
    408         while True:
--> 409             version, status, reason = self._read_status()
    410             if status != CONTINUE:
    411                 break

/usr/lib/python2.7/httplib.pyc in _read_status(self)
    363     def _read_status(self):
    364         # Initialize with Simple-Response defaults
--> 365         line = self.fp.readline(_MAXLINE + 1)
    366         if len(line) > _MAXLINE:
    367             raise LineTooLong("header line")

/usr/lib/python2.7/socket.pyc in readline(self, size)
    474             while True:
    475                 try:
--> 476                     data = self._sock.recv(self._rbufsize)
    477                 except error, e:
    478                     if e.args[0] == EINTR:

KeyboardInterrupt: 
Reading SLB data from: h00t

In [308]:
#ParseCVS("testdata/h00t_1310160720_1406030410.csv")
SiteIDs = LoadSiteIds()

Feeds = InfluxFeedLTSInterface()

#Get all data until now + 1h
StopTime = time.time() + 3600

for Site in SiteIDs:
    FeedId = "\"%s/raw_data\"" % SiteIDs[Site]
    StartTime = Feeds.GetLastTimeStamp(FeedId)
    
    if StartTime == 0:
      print "No previous records in:" + FeedId
      print "Starting from Okt 2013"
      StartTime = time.mktime(time.strptime("2013-10-01","%Y-%m-%d"))
    else:
      print "Last record in stream" + FeedId
      print "at: " + time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(StartTime))
    
    #Start a tiny bit after the last value.
    Current = StartTime + 0.5
    
    
    PeriodLen = 60*60*24*7
    
    while Current < StopTime:
        print "Reading SLB data from: " + Site 
        print "From: " + time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(Current))
        print "To:   " + time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(Current + PeriodLen))
        Data = ParseSLBData(Site,Current,Current + PeriodLen)

        #Remove duplicate
        if Data["Time"][0] == StartTime:
            Data = Data.drop(Data.index[0])
        
        Current += PeriodLen
        print "Sending data to influx as: " + FeedId
        #Feeds.SendToInfluxDB(Data,FeedId)


Last record in stream"59.344528-18.126394/raw_data"
at: 2014-06-05 22:10:00
Reading SLB data from: h00t
From: 2014-06-05 22:10:00
To:   2014-06-12 22:10:00
Sending data to influx as: "59.344528-18.126394/raw_data"
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-308-b28c735bd8ea> in <module>()
     29         print "From: " + time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(Current))
     30         print "To:   " + time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(Current + PeriodLen))
---> 31         Data = ParseSLBData(Site,Current,Current + PeriodLen)
     32 
     33         #Remove duplicate

<ipython-input-202-8c5ebc6ebf0f> in ParseSLBData(slb_id, start, stop)
     29   url = "http://slb.nu/soldata/index.php?KEY=%s&start=%s&stop=%s" %(slb_id,starttime,stoptime)
     30 
---> 31   df = pandas.read_csv(url,sep = ";",parse_dates=[[0, 1]],skiprows=8, header = None ,infer_datetime_format = True,na_values = ["     ","    ","  "," ",""])
     32   cl = pandas.read_csv(url,sep = ";", header = 6,error_bad_lines= False,na_values = [""],nrows=1)
     33 

/usr/lib/python2.7/dist-packages/pandas/io/parsers.pyc in parser_f(filepath_or_buffer, sep, dialect, compression, doublequote, escapechar, quotechar, quoting, skipinitialspace, lineterminator, header, index_col, names, prefix, skiprows, skipfooter, skip_footer, na_values, na_fvalues, true_values, false_values, delimiter, converters, dtype, usecols, engine, delim_whitespace, as_recarray, na_filter, compact_ints, use_unsigned, low_memory, buffer_lines, warn_bad_lines, error_bad_lines, keep_default_na, thousands, comment, decimal, parse_dates, keep_date_col, dayfirst, date_parser, memory_map, nrows, iterator, chunksize, verbose, encoding, squeeze, mangle_dupe_cols, tupleize_cols, infer_datetime_format)
    418                     infer_datetime_format=infer_datetime_format)
    419 
--> 420         return _read(filepath_or_buffer, kwds)
    421 
    422     parser_f.__name__ = name

/usr/lib/python2.7/dist-packages/pandas/io/parsers.pyc in _read(filepath_or_buffer, kwds)
    216 
    217     # Create the parser.
--> 218     parser = TextFileReader(filepath_or_buffer, **kwds)
    219 
    220     if nrows is not None:

/usr/lib/python2.7/dist-packages/pandas/io/parsers.pyc in __init__(self, f, engine, **kwds)
    500             self.options['has_index_names'] = kwds['has_index_names']
    501 
--> 502         self._make_engine(self.engine)
    503 
    504     def _get_options_with_defaults(self, engine):

/usr/lib/python2.7/dist-packages/pandas/io/parsers.pyc in _make_engine(self, engine)
    608     def _make_engine(self, engine='c'):
    609         if engine == 'c':
--> 610             self._engine = CParserWrapper(self.f, **self.options)
    611         else:
    612             if engine == 'python':

/usr/lib/python2.7/dist-packages/pandas/io/parsers.pyc in __init__(self, src, **kwds)
    970         kwds['allow_leading_cols'] = self.index_col is not False
    971 
--> 972         self._reader = _parser.TextReader(src, **kwds)
    973 
    974         # XXX

/usr/lib/python2.7/dist-packages/pandas/parser.so in pandas.parser.TextReader.__cinit__ (pandas/parser.c:4698)()

ValueError: No columns to parse from file
No previous records in:"59.4090146-17.9255376/raw_data"
Starting from Okt 2013
Reading SLB data from: b01
From: 2013-10-01 00:00:00
To:   2013-10-08 00:00:00

In [306]:



Out[306]:
1401999600.0

In [298]:
StartTime


Out[298]:
1401999000.0

In [302]:
Data = Data.drop(Data.index[0])

In [303]:
Data


Out[303]:
Time h00tM0Pac001 h00tM0Pac002 h00tM0Pac003 h00tM0Pac004 h00tM0Pac005 h00tM0Pac006 h00tM0Pac007 h00tM0Pac008 h00tM0Pac009 h00tM0Pac010 h00tM0Pac011 h00tM0PacTot h00tMErro001 h00tMErro002 h00tMErro003 h00tMErro004 h00tMErro005 h00tMErro006 h00tMErro007
1 1.402e+09 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN NaN ...
2 1.402e+09 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN NaN ...
3 1.402001e+09 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN NaN ...
4 1.402001e+09 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN NaN ...
5 1.402002e+09 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN NaN ...
6 1.402003e+09 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN NaN ...
7 1.402003e+09 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN NaN ...
8 1.402004e+09 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN NaN ...
9 1.402004e+09 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN NaN ...
10 1.402005e+09 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN NaN ...
11 1.402006e+09 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN NaN ...
12 1.402006e+09 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN NaN ...
13 1.402007e+09 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN NaN ...
14 1.402007e+09 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN NaN ...
15 1.402008e+09 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN NaN ...
16 1.402009e+09 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN NaN ...
17 1.402009e+09 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN NaN ...
18 1.40201e+09 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN NaN ...
19 1.40201e+09 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN NaN ...
20 1.402011e+09 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN NaN ...
21 1.402012e+09 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN NaN ...
22 1.402012e+09 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN NaN ...
23 1.402013e+09 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN NaN ...
24 1.402013e+09 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN NaN ...
25 1.402014e+09 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN NaN ...
26 1.402015e+09 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN NaN ...
27 1.402015e+09 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN NaN ...
28 1.402016e+09 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN NaN ...
29 1.402016e+09 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN NaN ...
30 1.402017e+09 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN NaN ...
31 1.402018e+09 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN NaN ...
32 1.402018e+09 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0 NaN NaN NaN NaN NaN NaN NaN ...
33 1.402019e+09 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
34 1.402019e+09 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
35 1.40202e+09 37 29 45 35 40 17 45 44 33 31 48 404 0 0 0 0 0 0 0 ...
36 1.402021e+09 145 140 153 156 161 114 178 173 137 141 173 1671 0 0 0 0 0 0 0 ...
37 1.402021e+09 256 258 290 282 285 221 307 302 248 252 300 3001 0 0 0 0 0 0 0 ...
38 1.402022e+09 365 369 410 400 408 319 436 433 367 367 432 4306 0 0 0 0 0 0 0 ...
39 1.402022e+09 427 457 504 485 503 395 540 532 427 441 519 5230 0 0 0 0 0 0 0 ...
40 1.402023e+09 403 483 536 507 527 407 579 566 429 441 546 5424 0 0 0 0 0 0 0 ...
41 1.402024e+09 398 539 612 555 592 443 656 646 453 465 607 5966 0 0 0 0 0 0 0 ...
42 1.402024e+09 438 636 735 661 690 506 792 789 517 523 728 7015 0 0 0 0 0 0 0 ...
43 1.402025e+09 522 805 918 833 857 616 983 981 615 620 928 8678 0 0 0 0 0 0 0 ...
44 1.402025e+09 629 1127 1260 1210 1219 783 1355 1350 753 767 1306 11759 0 0 0 0 0 0 0 ...
45 1.402026e+09 749 1429 1599 1540 1603 1041 1708 1701 894 909 1647 14820 0 0 0 0 0 0 0 ...
46 1.402027e+09 955 1714 2026 1961 2035 1507 2139 2131 1115 1113 2051 18747 0 0 0 0 0 0 0 ...
47 1.402027e+09 1138 1909 2334 2265 2333 1780 2436 2424 1289 1267 2351 21526 0 0 0 0 0 0 0 ...
48 1.402028e+09 1367 1767 2095 2052 2100 1638 2181 2172 1418 1403 2151 20344 0 0 0 0 0 0 0 ...
49 1.402028e+09 1343 1261 1384 1352 1377 1137 1456 1444 1265 1288 1443 14750 0 0 0 0 0 0 0 ...
50 1.402029e+09 919 864 944 911 932 780 979 976 885 906 977 10073 0 0 0 0 0 0 0 ...
51 1.40203e+09 532 509 560 539 562 453 587 585 534 534 586 5981 0 0 0 0 0 0 0 ...
52 1.40203e+09 516 487 536 529 539 437 561 557 515 506 560 5743 0 0 0 0 0 0 0 ...
53 1.402031e+09 468 440 484 475 489 393 515 509 469 443 513 5198 0 0 0 0 0 0 0 ...
54 1.402031e+09 524 488 536 532 537 438 558 555 518 503 555 5744 0 0 0 0 0 0 0 ...
55 1.402032e+09 823 765 840 821 834 699 867 861 805 762 865 8942 0 0 0 0 0 0 0 ...
56 1.402033e+09 812 755 826 803 826 687 859 856 797 759 860 8840 0 0 0 0 0 0 0 ...
57 1.402033e+09 849 774 849 832 843 700 867 864 829 820 869 9096 0 0 0 0 0 0 0 ...
58 1.402034e+09 1627 1438 1568 1556 1557 1305 1627 1611 1536 1563 1623 17011 0 0 0 0 0 0 0 ...
59 1.402034e+09 2323 2035 2221 2238 2238 1862 2325 2308 2186 2218 2330 24284 0 0 0 0 0 0 0 ...
60 1.402035e+09 2832 2505 2736 2758 2758 2294 2877 2859 2643 2581 2867 29710 0 0 0 0 0 0 0 ...
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...

1008 rows × 86 columns


In [313]:
url = "http://www.slb.nu/soldata/index.php?KEY=b01t&start=1305310000&stop=1306080360"

In [314]:
df = pandas.read_csv(url,sep = ";",parse_dates=[[0, 1]],skiprows=8, header = None ,infer_datetime_format = True,na_values = ["     ","    ","  "," ",""])
cl = pandas.read_csv(url,sep = ";", header = 6,error_bad_lines= False,na_values = [""],nrows=1)

In [316]:
#Align keys to data and rename time col. 
cols = cl.keys()
cols = cols[2:]
col2 = cols.insert(0,"Time")
col2 = col2.insert(-1,"NAN")

#Set data keys as column descriptors
df.columns = col2

#Delete trailing columns with junk. 
for key in df.keys()[-5:df.shape[1]-1]:
  if key.find(slb_id) == -1:
      del df[key]

#Reformat timestamps
for i in range(0,df.shape[0]):
  try:
      timestamp = time.mktime(time.strptime(df["Time"][i],"%y-%m-%d %H:%M"))
      df["Time"][i] = timestamp
  except:
      df = df.drop(df.index[i])


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-316-c59ec0c6d2f9> in <module>()
     10 #Delete trailing columns with junk.
     11 for key in df.keys()[-5:df.shape[1]-1]:
---> 12   if key.find(slb_id) == -1:
     13       del df[key]
     14 

NameError: name 'slb_id' is not defined

In [317]:
df


Out[317]:
Time b01tM0Pac001 b01tM0Pac002 b01tMGLOB003 b01tMGLOc003 b01tMPOWc001 b01tMPOWc002 b01tMTmpM003 NAN Unnamed: 9
0 13-05-31 00:00 NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 13-05-31 00:10 NaN NaN NaN NaN NaN NaN NaN NaN NaN
2 13-05-31 00:20 NaN NaN NaN NaN NaN NaN NaN NaN NaN
3 13-05-31 00:30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
4 13-05-31 00:40 NaN NaN NaN NaN NaN NaN NaN NaN NaN
5 13-05-31 00:50 NaN NaN NaN NaN NaN NaN NaN NaN NaN
6 13-05-31 01:00 NaN NaN NaN NaN NaN NaN NaN NaN NaN
7 13-05-31 01:10 NaN NaN NaN NaN NaN NaN NaN NaN NaN
8 13-05-31 01:20 NaN NaN NaN NaN NaN NaN NaN NaN NaN
9 13-05-31 01:30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
10 13-05-31 01:40 NaN NaN NaN NaN NaN NaN NaN NaN NaN
11 13-05-31 01:50 NaN NaN NaN NaN NaN NaN NaN NaN NaN
12 13-05-31 02:00 NaN NaN NaN NaN NaN NaN NaN NaN NaN
13 13-05-31 02:10 NaN NaN NaN NaN NaN NaN NaN NaN NaN
14 13-05-31 02:20 NaN NaN NaN NaN NaN NaN NaN NaN NaN
15 13-05-31 02:30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
16 13-05-31 02:40 NaN NaN NaN NaN NaN NaN NaN NaN NaN
17 13-05-31 02:50 NaN NaN NaN NaN NaN NaN NaN NaN NaN
18 13-05-31 03:00 NaN NaN NaN NaN NaN NaN NaN NaN NaN
19 13-05-31 03:10 NaN NaN NaN NaN NaN NaN NaN NaN NaN
20 13-05-31 03:20 NaN NaN NaN NaN NaN NaN NaN NaN NaN
21 13-05-31 03:30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
22 13-05-31 03:40 NaN NaN NaN NaN NaN NaN NaN NaN NaN
23 13-05-31 03:50 NaN NaN NaN NaN NaN NaN NaN NaN NaN
24 13-05-31 04:00 NaN NaN NaN NaN NaN NaN NaN NaN NaN
25 13-05-31 04:10 NaN NaN NaN NaN NaN NaN NaN NaN NaN
26 13-05-31 04:20 NaN NaN NaN NaN NaN NaN NaN NaN NaN
27 13-05-31 04:30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
28 13-05-31 04:40 NaN NaN NaN NaN NaN NaN NaN NaN NaN
29 13-05-31 04:50 NaN NaN NaN NaN NaN NaN NaN NaN NaN
30 13-05-31 05:00 NaN NaN NaN NaN NaN NaN NaN NaN NaN
31 13-05-31 05:10 NaN NaN NaN NaN NaN NaN NaN NaN NaN
32 13-05-31 05:20 NaN NaN NaN NaN NaN NaN NaN NaN NaN
33 13-05-31 05:30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
34 13-05-31 05:40 NaN NaN NaN NaN NaN NaN NaN NaN NaN
35 13-05-31 05:50 NaN NaN NaN NaN NaN NaN NaN NaN NaN
36 13-05-31 06:00 NaN NaN NaN NaN NaN NaN NaN NaN NaN
37 13-05-31 06:10 NaN NaN NaN NaN NaN NaN NaN NaN NaN
38 13-05-31 06:20 NaN NaN NaN NaN NaN NaN NaN NaN NaN
39 13-05-31 06:30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
40 13-05-31 06:40 NaN NaN NaN NaN NaN NaN NaN NaN NaN
41 13-05-31 06:50 NaN NaN NaN NaN NaN NaN NaN NaN NaN
42 13-05-31 07:00 NaN NaN NaN NaN NaN NaN NaN NaN NaN
43 13-05-31 07:10 NaN NaN NaN NaN NaN NaN NaN NaN NaN
44 13-05-31 07:20 NaN NaN NaN NaN NaN NaN NaN NaN NaN
45 13-05-31 07:30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
46 13-05-31 07:40 NaN NaN NaN NaN NaN NaN NaN NaN NaN
47 13-05-31 07:50 NaN NaN NaN NaN NaN NaN NaN NaN NaN
48 13-05-31 08:00 NaN NaN NaN NaN NaN NaN NaN NaN NaN
49 13-05-31 08:10 NaN NaN NaN NaN NaN NaN NaN NaN NaN
50 13-05-31 08:20 NaN NaN NaN NaN NaN NaN NaN NaN NaN
51 13-05-31 08:30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
52 13-05-31 08:40 NaN NaN NaN NaN NaN NaN NaN NaN NaN
53 13-05-31 08:50 NaN NaN NaN NaN NaN NaN NaN NaN NaN
54 13-05-31 09:00 NaN NaN NaN NaN NaN NaN NaN NaN NaN
55 13-05-31 09:10 NaN NaN NaN NaN NaN NaN NaN NaN NaN
56 13-05-31 09:20 NaN NaN NaN NaN NaN NaN NaN NaN NaN
57 13-05-31 09:30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
58 13-05-31 09:40 NaN NaN NaN NaN NaN NaN NaN NaN NaN
59 13-05-31 09:50 NaN NaN NaN NaN NaN NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ...

1178 rows × 10 columns


In [318]:
url


Out[318]:
'http://www.slb.nu/soldata/index.php?KEY=b01t&start=1305310000&stop=1306080360'

In [319]:
cl


Out[319]:
KEY => Unnamed: 1 b01tM0Pac001 b01tM0Pac002 b01tMGLOB003 b01tMGLOc003 b01tMPOWc001 b01tMPOWc002 b01tMTmpM003 Unnamed: 9
0 Date Time Data Data Data Data Data Data Data NaN

1 rows × 10 columns


In [320]:
cl.keys()


Out[320]:
Index([u'KEY => ', u'Unnamed: 1', u'b01tM0Pac001', u'b01tM0Pac002', u'b01tMGLOB003', u'b01tMGLOc003', u'b01tMPOWc001', u'b01tMPOWc002', u'b01tMTmpM003', u'Unnamed: 9'], dtype='object')

In [321]:
cols = cl.keys()

In [322]:
type(col)


Out[322]:
list

In [324]:
cols[2] = cols[2].strip("b01t")


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-324-8274a635857f> in <module>()
----> 1 cols[2] = cols[2].strip("b01t")

/usr/lib/python2.7/dist-packages/pandas/core/base.pyc in _disabled(self, *args, **kwargs)
    178         """This method will not function because object is immutable."""
    179         raise TypeError("'%s' does not support mutable operations." %
--> 180                         self.__class__)
    181 
    182     __setitem__ = __setslice__ = __delitem__ = __delslice__ = _disabled

TypeError: '<class 'pandas.core.index.Index'>' does not support mutable operations.

In [326]:
t = cols[2].strip("b01t")

In [328]:
type(cols)


Out[328]:
pandas.core.index.Index

In [330]:
df.columns = [1,2,3]


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-330-1813fda7135b> in <module>()
----> 1 df.columns = [1,2,3]

/usr/lib/python2.7/dist-packages/pandas/core/generic.pyc in __setattr__(self, name, value)
   1826                 existing = getattr(self, name)
   1827                 if isinstance(existing, Index):
-> 1828                     object.__setattr__(self, name, value)
   1829                 elif name in self._info_axis:
   1830                     self[name] = value

/usr/lib/python2.7/dist-packages/pandas/lib.so in pandas.lib.AxisProperty.__set__ (pandas/lib.c:31048)()

/usr/lib/python2.7/dist-packages/pandas/core/generic.pyc in _set_axis(self, axis, labels)
    394 
    395     def _set_axis(self, axis, labels):
--> 396         self._data.set_axis(axis, labels)
    397         self._clear_item_cache()
    398 

/usr/lib/python2.7/dist-packages/pandas/core/internals.pyc in set_axis(self, axis, value, maybe_rename, check_axis)
   2084 
   2085     def set_axis(self, axis, value, maybe_rename=True, check_axis=True):
-> 2086         cur_axis, value = self._set_axis(axis, value, check_axis)
   2087 
   2088         if axis == 0:

/usr/lib/python2.7/dist-packages/pandas/core/internals.pyc in _set_axis(self, axis, value, check_axis)
   2077             raise ValueError('Length mismatch: Expected axis has %d elements, '
   2078                              'new values have %d elements' % (len(cur_axis),
-> 2079                                                               len(value)))
   2080 
   2081         self.axes[axis] = value

ValueError: Length mismatch: Expected axis has 10 elements, new values have 3 elements

In [334]:
#Remove SLB station id from key.
slb_id = "b01t"

NewCols = []

for datakey in cols:
    NewCols.append(datakey.strip(slb_id))

In [335]:
NewCols


Out[335]:
['KEY => ',
 'Unnamed: ',
 'M0Pac',
 'M0Pac002',
 'MGLOB003',
 'MGLOc003',
 'MPOWc',
 'MPOWc002',
 'MTmpM003',
 'Unnamed: 9']

In [336]:
cl


Out[336]:
KEY => Unnamed: 1 b01tM0Pac001 b01tM0Pac002 b01tMGLOB003 b01tMGLOc003 b01tMPOWc001 b01tMPOWc002 b01tMTmpM003 Unnamed: 9
0 Date Time Data Data Data Data Data Data Data NaN

1 rows × 10 columns


In [337]:
cl.columns = NewCols

In [338]:
cl


Out[338]:
KEY => Unnamed: M0Pac M0Pac002 MGLOB003 MGLOc003 MPOWc MPOWc002 MTmpM003 Unnamed: 9
0 Date Time Data Data Data Data Data Data Data NaN

1 rows × 10 columns


In [341]:
SiteIDs = LoadSiteIds()

In [342]:
SiteIDs


Out[342]:
{u'b01t': u'f1136e33-d227-4fbd-ae72-d3488d70ba89',
 u'b03t': u'e63710a4-78d9-4071-9a2e-ad35534355f4',
 u'b04t': u'6f075b69-b823-4e6b-8021-dd751cc79eea',
 u'b05t': u'802afb51-c5eb-4230-9b22-9a77ef7260b3',
 u'b06t': u'2a31fb24-347b-4924-ab89-5c434771a2ae',
 u'f04t': u'32383bde-7648-4abe-9dac-44701eabd72f',
 u'f05t': u'face0d13-b22d-48fe-bec1-d29732604133',
 u'f06t': u'709e47a1-ca88-4c22-890f-2407e9cb131a',
 u'f07t': u'b0dd71a8-efe8-41eb-bab1-633ebaaa778c',
 u'f09t': u'4cf6c743-8d0b-45f0-aa72-8a0c10315cf5',
 u'h00t': u'46d55815-f927-459f-a8e2-8bbcd88008ee'}

In [ ]: