In [1]:
    
import numpy as np
import pandas as pd
import re
import openfunctions
import aq_plot
import matplotlib.pyplot as plt
#%matplotlib inline
    
In [2]:
    
test_folder = '../../Ebas_150116_1110/'
test_file = 'GB0046R.20030107080000.20040302000000.low_vol_sampler.pm10_mass.pm10.6mo.1w.NO01L_lvs_uk17.NO01L_Thermo_Optical-Sunset_Lab.lev2.nas'
    
In [3]:
    
aq_plot.nice_fill_plot(test_folder+test_file)
    
In [6]:
    
testDF = openfunctions.data_to_pandas_dataframe(test_folder+test_file)
dictionary = openfunctions.read_and_clean(test_folder+test_file)
    
In [14]:
    
dictionary.keys()
    
    Out[14]:
In [13]:
    
testDF['pm10_mass'].plot(kind='area',color='#7c8c93')
plt.ylabel(dictionary['units'])
plt.xlabel('Date of observation')
plt.show()
    
In [33]:
    
    
    Out[33]:
    
    
In [ ]:
    
    
In [6]:
    
data_info = openfunctions.read_and_clean(test_folder+test_file)
slim_data = {data_info['component']:data_info['data']}
    
In [7]:
    
pd.DataFrame(slim_data,index=data_info['start_index']).plot()
    
    Out[7]:
    
In [ ]:
    
    
In [ ]:
    
for n, string in enumerate(test_file.split('.')):
    print(n,string)
    
In [16]:
    
def filename_unpack(filename):
    list_of_values = filename.split('.')
    start_date = list_of_values[1]
    duration = list_of_values[6]
    frequency = list_of_values[7]
    component = list_of_values[4]
    return(start_date,duration,frequency,component)
    
In [17]:
    
filename_unpack(test_file)
    
    Out[17]:
In [18]:
    
test_file_2 = r'GB0043R.20030101000000.20110101000000.low_vol_sampler.pm10_mass.pm10.1y.1h.GB02L_lvs_43.GB02L_gravimetri.lev2.nas'
    
In [19]:
    
filename_unpack(test_file_2)
    
    Out[19]:
In [20]:
    
with open(test_folder+test_file, 'rt') as opened_file:
    opened_lines = opened_file.readlines()
    
In [21]:
    
for n,line in enumerate(opened_lines):
    print(n,line)
    
    
In [ ]:
    
split_line = re.split(r': *',opened_lines[30])
name = (split_line[1])
name = name[:-2]
    
In [ ]:
    
name
    
In [ ]:
    
latlon_ = []
for line in opened_lines[31:33]:
    split_line = re.split(r': *',line)
    if split_line[0] == '':
        split_line.pop(0)
    latlon_.append(float(split_line[1][:-2]))
    
lat, lon = latlon_
    
In [ ]:
    
start_index = []
end_index = []
data = []
data_flag = []
for line in opened_lines[45:]:
    split_line = re.split(r' *',line)
    if split_line[0] == '':
        split_line.pop(0)
    start_index.append(float(split_line[0]))
    end_index.append(float(split_line[1]))
    data.append(float(split_line[2]))
    data_flag.append(float(split_line[3][:-2]))
    
In [ ]:
    
def read_nas(filepath):
    with open(filepath, 'rt') as opened_file:
        opened_lines = opened_file.readlines()
    
    latlon_ = []
    for line in opened_lines[31:33]:
        split_line = re.split(r': *',line)
        if split_line[0] == '':
            split_line.pop(0)
        latlon_.append(float(split_line[1][:-2]))
    lat, lon = latlon_
    
    start_index = []
    end_index = []
    data = []
    data_flag = []
    for line in opened_lines[45:]:
        split_line = re.split(r' *',line)
        if split_line[0] == '':
            split_line.pop(0)
        start_index.append(float(split_line[0]))
        end_index.append(float(split_line[1]))
        data.append(float(split_line[2]))
        data_flag.append(float(split_line[3][:-2]))
        
    return({'lat':lat,'lon':lon,'start_index':start_index,
            'end_index':end_index,'data':data,'data_flag':data_flag})
    
In [ ]:
    
read_nas(test_folder+test_file_2)['data']
    
In [ ]:
    
    
In [ ]:
    
plt.plot(start_index,data)
    
In [ ]:
    
import openfunctions
    
In [ ]:
    
raw_data_format = openfunctions.read_nas(test_folder+test_file)
    
In [ ]:
    
raw_data_format.keys()
    
In [ ]:
    
raw_data_format['station_name']
    
In [ ]:
    
raw_data_format['data'] = np.array(raw_data_format['data'])
raw_data_format['data'] = np.where(raw_data_format['data']==9999,np.nan,raw_data_format['data'])
    
In [ ]:
    
testDF = pd.DataFrame(raw_data_format['data'],index=raw_data_format['start_index'])
    
In [ ]:
    
from datetime import timedelta, datetime
d = timedelta(days=raw_data_format['start_index'][5])
st = datetime(2003,1,1)
date = st + d
print(date)
    
In [ ]:
    
dt_str = openfunctions.filename_unpack(test_file_2)[0]
    
In [ ]:
    
start_dt = datetime(int(dt_str[:4]),int(dt_str[4:6]),int(dt_str[6:8]),int(dt_str[8:10]))
    
In [ ]:
    
start_dt
    
In [ ]:
    
for i,d in enumerate(raw_data_format['start_index']):
    d = timedelta(days=d)
    date = start_dt + d
    raw_data_format['start_index'][i] = date
    
In [ ]:
    
for i,d in enumerate(raw_data_format['end_index']):
    d = timedelta(days=d)
    date = start_dt + d
    raw_data_format['end_index'][i] = date
    
In [ ]:
    
raw_data_format['end_index']
    
In [ ]: