In [1]:
import numpy as np
import pandas as pd
import re
import openfunctions
import aq_plot
import matplotlib.pyplot as plt
#%matplotlib inline
In [2]:
test_folder = '../../Ebas_150116_1110/'
test_file = 'GB0046R.20030107080000.20040302000000.low_vol_sampler.pm10_mass.pm10.6mo.1w.NO01L_lvs_uk17.NO01L_Thermo_Optical-Sunset_Lab.lev2.nas'
In [3]:
aq_plot.nice_fill_plot(test_folder+test_file)
In [6]:
testDF = openfunctions.data_to_pandas_dataframe(test_folder+test_file)
dictionary = openfunctions.read_and_clean(test_folder+test_file)
In [14]:
dictionary.keys()
Out[14]:
In [13]:
testDF['pm10_mass'].plot(kind='area',color='#7c8c93')
plt.ylabel(dictionary['units'])
plt.xlabel('Date of observation')
plt.show()
In [33]:
Out[33]:
In [ ]:
In [6]:
data_info = openfunctions.read_and_clean(test_folder+test_file)
slim_data = {data_info['component']:data_info['data']}
In [7]:
pd.DataFrame(slim_data,index=data_info['start_index']).plot()
Out[7]:
In [ ]:
In [ ]:
for n, string in enumerate(test_file.split('.')):
print(n,string)
In [16]:
def filename_unpack(filename):
list_of_values = filename.split('.')
start_date = list_of_values[1]
duration = list_of_values[6]
frequency = list_of_values[7]
component = list_of_values[4]
return(start_date,duration,frequency,component)
In [17]:
filename_unpack(test_file)
Out[17]:
In [18]:
test_file_2 = r'GB0043R.20030101000000.20110101000000.low_vol_sampler.pm10_mass.pm10.1y.1h.GB02L_lvs_43.GB02L_gravimetri.lev2.nas'
In [19]:
filename_unpack(test_file_2)
Out[19]:
In [20]:
with open(test_folder+test_file, 'rt') as opened_file:
opened_lines = opened_file.readlines()
In [21]:
for n,line in enumerate(opened_lines):
print(n,line)
In [ ]:
split_line = re.split(r': *',opened_lines[30])
name = (split_line[1])
name = name[:-2]
In [ ]:
name
In [ ]:
latlon_ = []
for line in opened_lines[31:33]:
split_line = re.split(r': *',line)
if split_line[0] == '':
split_line.pop(0)
latlon_.append(float(split_line[1][:-2]))
lat, lon = latlon_
In [ ]:
start_index = []
end_index = []
data = []
data_flag = []
for line in opened_lines[45:]:
split_line = re.split(r' *',line)
if split_line[0] == '':
split_line.pop(0)
start_index.append(float(split_line[0]))
end_index.append(float(split_line[1]))
data.append(float(split_line[2]))
data_flag.append(float(split_line[3][:-2]))
In [ ]:
def read_nas(filepath):
with open(filepath, 'rt') as opened_file:
opened_lines = opened_file.readlines()
latlon_ = []
for line in opened_lines[31:33]:
split_line = re.split(r': *',line)
if split_line[0] == '':
split_line.pop(0)
latlon_.append(float(split_line[1][:-2]))
lat, lon = latlon_
start_index = []
end_index = []
data = []
data_flag = []
for line in opened_lines[45:]:
split_line = re.split(r' *',line)
if split_line[0] == '':
split_line.pop(0)
start_index.append(float(split_line[0]))
end_index.append(float(split_line[1]))
data.append(float(split_line[2]))
data_flag.append(float(split_line[3][:-2]))
return({'lat':lat,'lon':lon,'start_index':start_index,
'end_index':end_index,'data':data,'data_flag':data_flag})
In [ ]:
read_nas(test_folder+test_file_2)['data']
In [ ]:
In [ ]:
plt.plot(start_index,data)
In [ ]:
import openfunctions
In [ ]:
raw_data_format = openfunctions.read_nas(test_folder+test_file)
In [ ]:
raw_data_format.keys()
In [ ]:
raw_data_format['station_name']
In [ ]:
raw_data_format['data'] = np.array(raw_data_format['data'])
raw_data_format['data'] = np.where(raw_data_format['data']==9999,np.nan,raw_data_format['data'])
In [ ]:
testDF = pd.DataFrame(raw_data_format['data'],index=raw_data_format['start_index'])
In [ ]:
from datetime import timedelta, datetime
d = timedelta(days=raw_data_format['start_index'][5])
st = datetime(2003,1,1)
date = st + d
print(date)
In [ ]:
dt_str = openfunctions.filename_unpack(test_file_2)[0]
In [ ]:
start_dt = datetime(int(dt_str[:4]),int(dt_str[4:6]),int(dt_str[6:8]),int(dt_str[8:10]))
In [ ]:
start_dt
In [ ]:
for i,d in enumerate(raw_data_format['start_index']):
d = timedelta(days=d)
date = start_dt + d
raw_data_format['start_index'][i] = date
In [ ]:
for i,d in enumerate(raw_data_format['end_index']):
d = timedelta(days=d)
date = start_dt + d
raw_data_format['end_index'][i] = date
In [ ]:
raw_data_format['end_index']
In [ ]: