Testing file opening code


In [1]:
import numpy as np
import pandas as pd
import re
import openfunctions
import aq_plot

import matplotlib.pyplot as plt
#%matplotlib inline

In [2]:
test_folder = '../../Ebas_150116_1110/'
test_file = 'GB0046R.20030107080000.20040302000000.low_vol_sampler.pm10_mass.pm10.6mo.1w.NO01L_lvs_uk17.NO01L_Thermo_Optical-Sunset_Lab.lev2.nas'

In [3]:
aq_plot.nice_fill_plot(test_folder+test_file)

In [6]:
testDF = openfunctions.data_to_pandas_dataframe(test_folder+test_file)
dictionary = openfunctions.read_and_clean(test_folder+test_file)

In [14]:
dictionary.keys()


Out[14]:
['component',
 'units',
 'data_flag',
 'lat',
 'data',
 'station_name',
 'lon',
 'start_index',
 'end_index']

In [13]:
testDF['pm10_mass'].plot(kind='area',color='#7c8c93')
plt.ylabel(dictionary['units'])
plt.xlabel('Date of observation')
plt.show()

In [33]:



Out[33]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7f60ea125990>]], dtype=object)

In [ ]:


In [6]:
data_info = openfunctions.read_and_clean(test_folder+test_file)
slim_data = {data_info['component']:data_info['data']}

In [7]:
pd.DataFrame(slim_data,index=data_info['start_index']).plot()


Out[7]:
<matplotlib.axes._subplots.AxesSubplot at 0x7feb796092d0>

In [ ]:


In [ ]:
for n, string in enumerate(test_file.split('.')):
    print(n,string)

In [16]:
def filename_unpack(filename):
    list_of_values = filename.split('.')
    start_date = list_of_values[1]
    duration = list_of_values[6]
    frequency = list_of_values[7]
    component = list_of_values[4]
    return(start_date,duration,frequency,component)

In [17]:
filename_unpack(test_file)


Out[17]:
('20030107080000', '6mo', '1w', 'pm10_mass')

In [18]:
test_file_2 = r'GB0043R.20030101000000.20110101000000.low_vol_sampler.pm10_mass.pm10.1y.1h.GB02L_lvs_43.GB02L_gravimetri.lev2.nas'

In [19]:
filename_unpack(test_file_2)


Out[19]:
('20030101000000', '1y', '1h', 'pm10_mass')

In [20]:
with open(test_folder+test_file, 'rt') as opened_file:
    opened_lines = opened_file.readlines()

In [21]:
for n,line in enumerate(opened_lines):
    print(n,line)


(0, '45 1001\r\n')
(1, 'Cape, Niel J\r\n')
(2, 'NO01L, Norwegian Institute for Air Research, NILU, , Instituttveien 18, , 2007, Kjeller, Norway\r\n')
(3, 'Cape, Niel J\r\n')
(4, 'CAMPAIGN EMEP\r\n')
(5, '1 1\r\n')
(6, '2003 01 01 2004 03 02\r\n')
(7, '0\r\n')
(8, 'days from file reference point\r\n')
(9, '3\r\n')
(10, '1 1 1\r\n')
(11, '999.999999 999.99 9.999\r\n')
(12, 'end_time of measurement, days from the file reference point\r\n')
(13, 'pm10_mass, ug/m3\r\n')
(14, 'numflag pm10_mass, no unit\r\n')
(15, '0\r\n')
(16, '28\r\n')
(17, 'Data definition:              EBAS_1.1\r\n')
(18, 'Set type code:                TI\r\n')
(19, 'Timezone:                     UTC\r\n')
(20, 'Timeref:                      00_00\r\n')
(21, 'File name:                    GB0046R.20030107080000.20040302000000.low_vol_sampler.pm10_mass.pm10.6mo.1w.NO01L_lvs_uk17.NO01L_Thermo_Optical-Sunset_Lab.lev2.nas\r\n')
(22, 'File creation:                20150116101026\r\n')
(23, 'Startdate:                    20030107080000\r\n')
(24, 'Revision date:                20040302000000\r\n')
(25, 'Statistics:                   arithmetic mean\r\n')
(26, 'Period code:                  6mo\r\n')
(27, 'Resolution code:              1w\r\n')
(28, 'Station code:                 GB0046R\r\n')
(29, 'Platform code:                GB0046S\r\n')
(30, 'Station name:                 CEH Edingburgh\r\n')
(31, 'Station latitude:             55.86157\r\n')
(32, 'Station longitude:            -3.20647\r\n')
(33, 'Station altitude:             180.0m\r\n')
(34, 'Regime:                       IMG\r\n')
(35, 'Component:                    pm10_mass\r\n')
(36, 'Unit:                         ug/m3\r\n')
(37, 'Matrix:                       pm10\r\n')
(38, 'Instrument type:              low_vol_sampler\r\n')
(39, 'Laboratory code:              NO01L\r\n')
(40, 'Instrument name:              lvs_uk17\r\n')
(41, 'Method ref:                   NO01L_Thermo_Optical-Sunset_Lab\r\n')
(42, 'Originator:                   Cape, Niel J, , , , , , , , , \r\n')
(43, 'Submitter:                    Cape, Niel J, , , , , , , , , \r\n')
(44, 'starttime endtime PM10 flag_PM10\r\n')
(45, '  6.333333   7.333333  13.79 0.000\r\n')
(46, ' 13.333333  14.333333   7.16 0.000\r\n')
(47, ' 20.333333  21.333333  16.68 0.000\r\n')
(48, ' 27.333333  28.333333   3.42 0.000\r\n')
(49, ' 35.333333  36.333333  11.15 0.000\r\n')
(50, ' 41.333333  42.333333  15.06 0.000\r\n')
(51, ' 48.333333  49.333333  23.68 0.000\r\n')
(52, ' 55.333333  56.333333  62.26 0.000\r\n')
(53, ' 62.333333  63.333333   5.14 0.000\r\n')
(54, ' 69.333333  70.333333   8.00 0.000\r\n')
(55, ' 76.333333  77.333333  41.59 0.000\r\n')
(56, ' 83.333333  84.333333  30.38 0.000\r\n')
(57, ' 90.333333  91.333333   7.55 0.000\r\n')
(58, ' 97.333333  98.333333  36.14 0.000\r\n')
(59, '104.333333 105.333333  77.73 0.000\r\n')
(60, '111.333333 112.333333  12.85 0.000\r\n')
(61, '118.333333 119.333333   7.61 0.000\r\n')
(62, '125.333333 126.333333  11.87 0.000\r\n')
(63, '132.333333 133.333333   5.27 0.000\r\n')
(64, '139.333333 140.333333   4.44 0.000\r\n')
(65, '146.333333 147.333333   9.48 0.000\r\n')
(66, '156.333333 157.333333  13.40 0.000\r\n')
(67, '160.333333 161.333333  13.03 0.000\r\n')
(68, '167.333333 168.333333  11.33 0.000\r\n')
(69, '174.333333 175.333333   8.16 0.000\r\n')
(70, '181.333333 182.333333 999.99 0.999\r\n')

In [ ]:
split_line = re.split(r': *',opened_lines[30])
name = (split_line[1])
name = name[:-2]

In [ ]:
name

In [ ]:
latlon_ = []
for line in opened_lines[31:33]:
    split_line = re.split(r': *',line)
    if split_line[0] == '':
        split_line.pop(0)
    latlon_.append(float(split_line[1][:-2]))
    
lat, lon = latlon_

In [ ]:
start_index = []
end_index = []
data = []
data_flag = []
for line in opened_lines[45:]:
    split_line = re.split(r' *',line)
    if split_line[0] == '':
        split_line.pop(0)
    start_index.append(float(split_line[0]))
    end_index.append(float(split_line[1]))
    data.append(float(split_line[2]))
    data_flag.append(float(split_line[3][:-2]))

In [ ]:
def read_nas(filepath):
    with open(filepath, 'rt') as opened_file:
        opened_lines = opened_file.readlines()
    
    latlon_ = []
    for line in opened_lines[31:33]:
        split_line = re.split(r': *',line)
        if split_line[0] == '':
            split_line.pop(0)
        latlon_.append(float(split_line[1][:-2]))

    lat, lon = latlon_
    
    start_index = []
    end_index = []
    data = []
    data_flag = []
    for line in opened_lines[45:]:
        split_line = re.split(r' *',line)
        if split_line[0] == '':
            split_line.pop(0)
        start_index.append(float(split_line[0]))
        end_index.append(float(split_line[1]))
        data.append(float(split_line[2]))
        data_flag.append(float(split_line[3][:-2]))
        
    return({'lat':lat,'lon':lon,'start_index':start_index,
            'end_index':end_index,'data':data,'data_flag':data_flag})

In [ ]:
read_nas(test_folder+test_file_2)['data']

In [ ]:


In [ ]:
plt.plot(start_index,data)

In [ ]:
import openfunctions

In [ ]:
raw_data_format = openfunctions.read_nas(test_folder+test_file)

In [ ]:
raw_data_format.keys()

In [ ]:
raw_data_format['station_name']

In [ ]:
raw_data_format['data'] = np.array(raw_data_format['data'])
raw_data_format['data'] = np.where(raw_data_format['data']==9999,np.nan,raw_data_format['data'])

In [ ]:
testDF = pd.DataFrame(raw_data_format['data'],index=raw_data_format['start_index'])

In [ ]:
from datetime import timedelta, datetime

d = timedelta(days=raw_data_format['start_index'][5])
st = datetime(2003,1,1)
date = st + d
print(date)

In [ ]:
dt_str = openfunctions.filename_unpack(test_file_2)[0]

In [ ]:
start_dt = datetime(int(dt_str[:4]),int(dt_str[4:6]),int(dt_str[6:8]),int(dt_str[8:10]))

In [ ]:
start_dt

In [ ]:
for i,d in enumerate(raw_data_format['start_index']):
    d = timedelta(days=d)
    date = start_dt + d
    raw_data_format['start_index'][i] = date

In [ ]:
for i,d in enumerate(raw_data_format['end_index']):
    d = timedelta(days=d)
    date = start_dt + d
    raw_data_format['end_index'][i] = date

In [ ]:
raw_data_format['end_index']

In [ ]: