In [2]:
#Import modules
from urllib.request import urlopen
import pandas as pd
import numpy as np
import struct
import matplotlib.pyplot as plt
%matplotlib notebook

#Remote Files to be used
pza_espana_dataurl = 'https://raw.githubusercontent.com/carlosvega/MadridPollutionData/master/pza_espana.txt'
pollution_urlfiles = [pza_espana_dataurl]

In [3]:
#format specifiers

fieldwidths = (8, 2, 2, 2, 2, 2, 2) + (5,1)*24 #7 + 24
fmtstring = ' '.join('{}{}'.format(abs(fw), 'x' if fw < 0 else 's')
                    for fw in fieldwidths)
fieldstruct = struct.Struct(fmtstring)
parse = fieldstruct.unpack_from #parse function

In [4]:
rows = []
columns=['station', 'magnitude', 'value', 'time']
for urlfile in pollution_urlfiles:
    with urlopen(urlfile) as ufile:
        for line in ufile:
            fields = parse(line)
            fields = [f.decode('ascii') for f in fields] #from binary to ascii string
            station, magnitude, technique, interval, year, month, day = fields[:7]
            samples = fields[9:]
            row_base = [station, magnitude]
            row_base = [int(e) for e in row_base]
            hours = pd.date_range('{}/{}/{}'.format(day, month, year), periods=24, freq='H')
            j = 0
            for i in range(0, len(samples), 2):
                val  = samples[i]
                isok = samples[i+1]
                if isok != 'V':
                    val = np.nan
                else:
                    val = float(val) #string to float
                row = row_base + [val, hours[j]]
                j+=1
                rows.append(row)
df = pd.DataFrame(rows, columns=columns)
df


Out[4]:
station magnitude value time
0 28079004 8 47.97 2001-01-01 00:00:00
1 28079004 8 43.85 2001-01-01 01:00:00
2 28079004 8 44.50 2001-01-01 02:00:00
3 28079004 8 47.74 2001-01-01 03:00:00
4 28079004 8 58.38 2001-01-01 04:00:00
5 28079004 8 57.01 2001-01-01 05:00:00
6 28079004 8 57.60 2001-01-01 06:00:00
7 28079004 8 60.26 2001-01-01 07:00:00
8 28079004 8 61.25 2001-01-01 08:00:00
9 28079004 8 60.73 2001-01-01 09:00:00
10 28079004 8 52.66 2001-01-01 10:00:00
11 28079004 8 53.21 2001-01-01 11:00:00
12 28079004 8 59.70 2001-01-01 12:00:00
13 28079004 8 64.38 2001-01-01 13:00:00
14 28079004 8 60.02 2001-01-01 14:00:00
15 28079004 8 49.95 2001-01-01 15:00:00
16 28079004 8 59.31 2001-01-01 16:00:00
17 28079004 8 71.20 2001-01-01 17:00:00
18 28079004 8 71.33 2001-01-01 18:00:00
19 28079004 8 70.42 2001-01-01 19:00:00
20 28079004 8 65.06 2001-01-01 20:00:00
21 28079004 8 63.03 2001-01-01 21:00:00
22 28079004 8 53.51 2001-01-01 22:00:00
23 28079004 8 47.71 2001-02-01 00:00:00
24 28079004 8 45.02 2001-02-01 01:00:00
25 28079004 8 38.38 2001-02-01 02:00:00
26 28079004 8 33.18 2001-02-01 03:00:00
27 28079004 8 48.97 2001-02-01 04:00:00
28 28079004 8 59.32 2001-02-01 05:00:00
29 28079004 8 65.57 2001-02-01 06:00:00
... ... ... ... ...
135624 28079004 8 22.00 2017-02-27 16:00:00
135625 28079004 8 22.00 2017-02-27 17:00:00
135626 28079004 8 26.00 2017-02-27 18:00:00
135627 28079004 8 38.00 2017-02-27 19:00:00
135628 28079004 8 20.00 2017-02-27 20:00:00
135629 28079004 8 14.00 2017-02-27 21:00:00
135630 28079004 8 11.00 2017-02-27 22:00:00
135631 28079004 8 4.00 2017-02-28 00:00:00
135632 28079004 8 4.00 2017-02-28 01:00:00
135633 28079004 8 2.00 2017-02-28 02:00:00
135634 28079004 8 1.00 2017-02-28 03:00:00
135635 28079004 8 3.00 2017-02-28 04:00:00
135636 28079004 8 9.00 2017-02-28 05:00:00
135637 28079004 8 32.00 2017-02-28 06:00:00
135638 28079004 8 33.00 2017-02-28 07:00:00
135639 28079004 8 34.00 2017-02-28 08:00:00
135640 28079004 8 28.00 2017-02-28 09:00:00
135641 28079004 8 28.00 2017-02-28 10:00:00
135642 28079004 8 28.00 2017-02-28 11:00:00
135643 28079004 8 26.00 2017-02-28 12:00:00
135644 28079004 8 22.00 2017-02-28 13:00:00
135645 28079004 8 20.00 2017-02-28 14:00:00
135646 28079004 8 17.00 2017-02-28 15:00:00
135647 28079004 8 20.00 2017-02-28 16:00:00
135648 28079004 8 35.00 2017-02-28 17:00:00
135649 28079004 8 43.00 2017-02-28 18:00:00
135650 28079004 8 36.00 2017-02-28 19:00:00
135651 28079004 8 28.00 2017-02-28 20:00:00
135652 28079004 8 20.00 2017-02-28 21:00:00
135653 28079004 8 18.00 2017-02-28 22:00:00

135654 rows × 4 columns


In [5]:
#not used but serves as documentation too
magnitude_dicc = {
    1:  'SO2',
    6:  'CO',
    7:  'NO',
    8:  'NO2',
    9:  'PM2.5',
    10:  'PM10',
    12:  'NOx',
    14:  'O3',
    20:  'TOL',
    30:  'BEN',
    35:  'EBE',
    37:  'MXY',
    38:  'PXY',
    39:  'OXY',
    42:  'TCH',
    44:  'NMHC'
}
pza_espana = 28079004 #code of the station to be used

Using NO2 values

  • NO2 (8)

In [6]:
pza_espana_df = df[df['station'] == pza_espana].set_index('time').sort_index()
no2 = pza_espana_df[pza_espana_df['magnitude'] == 8]['value'] #NO2
formats = ['-.', '--', '-']
plt.figure(figsize=(7,9))
series = []
for year in range(2001, 2017):
    min_date = '{}-01-01'.format(year)
    max_date = '{}-12-31'.format(year)
    serie = no2.ix[min_date:max_date].groupby(lambda r: r.hour).quantile(0.95) 
    plot_series = plt.plot(serie, formats[len(series)%len(formats)], linewidth=1, label=year)
    series += [serie]
    
plt.axhline(y=200, c='tomato', linewidth=2, linestyle='--', label='EU 1hr Max. $NO_2$ level limit')
plt.xticks(serie.index)
plt.legend(prop={'size': 7})
plt.ylabel('$µgr / m^3$')
plt.xlabel('Hour of the day')

warnings = no2[no2 > 200].groupby(lambda r: r.hour).count()
ax = plt.gca()
ax2 = ax.twinx()
warns = ax2.bar(warnings.index, warnings, color='purple', label='Number of times over EU threshold', zorder=1, alpha=0.25)
ax2.yaxis.set_label_position('right')
plt.ylabel('Number of times over EU threshold from 2000 to 2016')
ax2.set_ylim(0, 40)
ax2.legend(loc=0)

#Grid
ax2.grid(False)
ax.yaxis.grid(True)
plt.title('95th Percentile $NO_2$ levels during different hours of the day \n for the years 2000-2016', fontsize=12)
ax.set_ylim(0,220);
plt.savefig('NO2levels_years.png', dpi=300, bbox_inches='tight')


/usr/local/lib/python3.6/site-packages/ipykernel_launcher.py:9: DeprecationWarning: 
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  if __name__ == '__main__':

In [ ]:


In [ ]:


In [6]:
#Calculate different series
no2_median = no2.groupby(lambda r: r.hour).median() # ugr/m3
no2_mean = no2.groupby(lambda r: r.hour).mean() 
no2_max = no2.groupby(lambda r: r.hour).max()
no2_min = no2.groupby(lambda r: r.hour).min() 
no2_std = no2.groupby(lambda r: r.hour).std()

#Interquartile range 
no2_q1 = no2.groupby(lambda r: r.hour).quantile(0.25)
no2_q3 = no2.groupby(lambda r: r.hour).quantile(0.75)
upper_bound = no2_q3
lower_bound = no2_q1

plt.figure(figsize=(7,9));
plt.plot(no2_median, '-', label='Median', linewidth=1, c='green', zorder=10);
plt.plot(no2_mean, '-.', label='Mean', c='green', zorder=10);
plt.plot(no2_max, '-', label='Max', c='tomato', linewidth=1, zorder=10);
plt.plot(no2_min, '-', label='Min', c='blue', linewidth=1, zorder=10);
plt.plot(series[-1], '.', linewidth=1, label='2016', c='gray', zorder=10);

plt.gca().fill_between(range(no2_median.index.size), 
                       upper_bound, lower_bound, 
                       facecolor='lightgreen', 
                       alpha=0.25, label='Interquartile range')

plt.axhline(y=200, c='purple', linewidth=1.5, linestyle='-.', label='EU 1hr Max. $NO_2$ level limit')
plt.legend(prop={'size':8});
plt.ylabel('$µgr / m^3$')
plt.xlabel('Hour of the day')

warnings = no2[no2 > 200].groupby(lambda r: r.hour).count()
ax = plt.gca()
ax2 = ax.twinx()
warns = ax2.bar(warnings.index, warnings, color='purple', label='Number of times over EU threshold', zorder=1, alpha=0.25)
ax2.set_ylim(0, 40)
ax2.legend(loc=0)
ax2.yaxis.set_label_position('right')
plt.ylabel('Number of times over EU threshold from 2000 to 2016')

#Grid
ax2.grid(False)
ax.yaxis.grid(True)

plt.title('$NO_2$ levels during different hours of the day\nusing data from the years 2000-2016', fontsize=12)
ax.set_ylim(0,350);
plt.xticks(no2_median.index);
plt.savefig('NO2levels.png', dpi=300, bbox_inches='tight')


WHO

European Union


In [ ]: