notebook.community



In [2]:

    
#Import modules
from urllib.request import urlopen
import pandas as pd
import numpy as np
import struct
import matplotlib.pyplot as plt
%matplotlib notebook

#Remote Files to be used
pza_espana_dataurl = 'https://raw.githubusercontent.com/carlosvega/MadridPollutionData/master/pza_espana.txt'
pollution_urlfiles = [pza_espana_dataurl]



In [3]:

    
#format specifiers

fieldwidths = (8, 2, 2, 2, 2, 2, 2) + (5,1)*24 #7 + 24
fmtstring = ' '.join('{}{}'.format(abs(fw), 'x' if fw < 0 else 's')
                    for fw in fieldwidths)
fieldstruct = struct.Struct(fmtstring)
parse = fieldstruct.unpack_from #parse function



In [4]:

    
rows = []
columns=['station', 'magnitude', 'value', 'time']
for urlfile in pollution_urlfiles:
    with urlopen(urlfile) as ufile:
        for line in ufile:
            fields = parse(line)
            fields = [f.decode('ascii') for f in fields] #from binary to ascii string
            station, magnitude, technique, interval, year, month, day = fields[:7]
            samples = fields[9:]
            row_base = [station, magnitude]
            row_base = [int(e) for e in row_base]
            hours = pd.date_range('{}/{}/{}'.format(day, month, year), periods=24, freq='H')
            j = 0
            for i in range(0, len(samples), 2):
                val  = samples[i]
                isok = samples[i+1]
                if isok != 'V':
                    val = np.nan
                else:
                    val = float(val) #string to float
                row = row_base + [val, hours[j]]
                j+=1
                rows.append(row)
df = pd.DataFrame(rows, columns=columns)
df









    Out[4]:







  
    
      
      station
      magnitude
      value
      time
    
  
  
    
      0
      28079004
      8
      47.97
      2001-01-01 00:00:00
    
    
      1
      28079004
      8
      43.85
      2001-01-01 01:00:00
    
    
      2
      28079004
      8
      44.50
      2001-01-01 02:00:00
    
    
      3
      28079004
      8
      47.74
      2001-01-01 03:00:00
    
    
      4
      28079004
      8
      58.38
      2001-01-01 04:00:00
    
    
      5
      28079004
      8
      57.01
      2001-01-01 05:00:00
    
    
      6
      28079004
      8
      57.60
      2001-01-01 06:00:00
    
    
      7
      28079004
      8
      60.26
      2001-01-01 07:00:00
    
    
      8
      28079004
      8
      61.25
      2001-01-01 08:00:00
    
    
      9
      28079004
      8
      60.73
      2001-01-01 09:00:00
    
    
      10
      28079004
      8
      52.66
      2001-01-01 10:00:00
    
    
      11
      28079004
      8
      53.21
      2001-01-01 11:00:00
    
    
      12
      28079004
      8
      59.70
      2001-01-01 12:00:00
    
    
      13
      28079004
      8
      64.38
      2001-01-01 13:00:00
    
    
      14
      28079004
      8
      60.02
      2001-01-01 14:00:00
    
    
      15
      28079004
      8
      49.95
      2001-01-01 15:00:00
    
    
      16
      28079004
      8
      59.31
      2001-01-01 16:00:00
    
    
      17
      28079004
      8
      71.20
      2001-01-01 17:00:00
    
    
      18
      28079004
      8
      71.33
      2001-01-01 18:00:00
    
    
      19
      28079004
      8
      70.42
      2001-01-01 19:00:00
    
    
      20
      28079004
      8
      65.06
      2001-01-01 20:00:00
    
    
      21
      28079004
      8
      63.03
      2001-01-01 21:00:00
    
    
      22
      28079004
      8
      53.51
      2001-01-01 22:00:00
    
    
      23
      28079004
      8
      47.71
      2001-02-01 00:00:00
    
    
      24
      28079004
      8
      45.02
      2001-02-01 01:00:00
    
    
      25
      28079004
      8
      38.38
      2001-02-01 02:00:00
    
    
      26
      28079004
      8
      33.18
      2001-02-01 03:00:00
    
    
      27
      28079004
      8
      48.97
      2001-02-01 04:00:00
    
    
      28
      28079004
      8
      59.32
      2001-02-01 05:00:00
    
    
      29
      28079004
      8
      65.57
      2001-02-01 06:00:00
    
    
      ...
      ...
      ...
      ...
      ...
    
    
      135624
      28079004
      8
      22.00
      2017-02-27 16:00:00
    
    
      135625
      28079004
      8
      22.00
      2017-02-27 17:00:00
    
    
      135626
      28079004
      8
      26.00
      2017-02-27 18:00:00
    
    
      135627
      28079004
      8
      38.00
      2017-02-27 19:00:00
    
    
      135628
      28079004
      8
      20.00
      2017-02-27 20:00:00
    
    
      135629
      28079004
      8
      14.00
      2017-02-27 21:00:00
    
    
      135630
      28079004
      8
      11.00
      2017-02-27 22:00:00
    
    
      135631
      28079004
      8
      4.00
      2017-02-28 00:00:00
    
    
      135632
      28079004
      8
      4.00
      2017-02-28 01:00:00
    
    
      135633
      28079004
      8
      2.00
      2017-02-28 02:00:00
    
    
      135634
      28079004
      8
      1.00
      2017-02-28 03:00:00
    
    
      135635
      28079004
      8
      3.00
      2017-02-28 04:00:00
    
    
      135636
      28079004
      8
      9.00
      2017-02-28 05:00:00
    
    
      135637
      28079004
      8
      32.00
      2017-02-28 06:00:00
    
    
      135638
      28079004
      8
      33.00
      2017-02-28 07:00:00
    
    
      135639
      28079004
      8
      34.00
      2017-02-28 08:00:00
    
    
      135640
      28079004
      8
      28.00
      2017-02-28 09:00:00
    
    
      135641
      28079004
      8
      28.00
      2017-02-28 10:00:00
    
    
      135642
      28079004
      8
      28.00
      2017-02-28 11:00:00
    
    
      135643
      28079004
      8
      26.00
      2017-02-28 12:00:00
    
    
      135644
      28079004
      8
      22.00
      2017-02-28 13:00:00
    
    
      135645
      28079004
      8
      20.00
      2017-02-28 14:00:00
    
    
      135646
      28079004
      8
      17.00
      2017-02-28 15:00:00
    
    
      135647
      28079004
      8
      20.00
      2017-02-28 16:00:00
    
    
      135648
      28079004
      8
      35.00
      2017-02-28 17:00:00
    
    
      135649
      28079004
      8
      43.00
      2017-02-28 18:00:00
    
    
      135650
      28079004
      8
      36.00
      2017-02-28 19:00:00
    
    
      135651
      28079004
      8
      28.00
      2017-02-28 20:00:00
    
    
      135652
      28079004
      8
      20.00
      2017-02-28 21:00:00
    
    
      135653
      28079004
      8
      18.00
      2017-02-28 22:00:00
    
  

135654 rows × 4 columns



In [5]:

    
#not used but serves as documentation too
magnitude_dicc = {
    1:  'SO2',
    6:  'CO',
    7:  'NO',
    8:  'NO2',
    9:  'PM2.5',
    10:  'PM10',
    12:  'NOx',
    14:  'O3',
    20:  'TOL',
    30:  'BEN',
    35:  'EBE',
    37:  'MXY',
    38:  'PXY',
    39:  'OXY',
    42:  'TCH',
    44:  'NMHC'
}
pza_espana = 28079004 #code of the station to be used

Using NO2 values

NO2 (8)



In [6]:

    
pza_espana_df = df[df['station'] == pza_espana].set_index('time').sort_index()
no2 = pza_espana_df[pza_espana_df['magnitude'] == 8]['value'] #NO2
formats = ['-.', '--', '-']
plt.figure(figsize=(7,9))
series = []
for year in range(2001, 2017):
    min_date = '{}-01-01'.format(year)
    max_date = '{}-12-31'.format(year)
    serie = no2.ix[min_date:max_date].groupby(lambda r: r.hour).quantile(0.95) 
    plot_series = plt.plot(serie, formats[len(series)%len(formats)], linewidth=1, label=year)
    series += [serie]
    
plt.axhline(y=200, c='tomato', linewidth=2, linestyle='--', label='EU 1hr Max. $NO_2$ level limit')
plt.xticks(serie.index)
plt.legend(prop={'size': 7})
plt.ylabel('$µgr / m^3$')
plt.xlabel('Hour of the day')

warnings = no2[no2 > 200].groupby(lambda r: r.hour).count()
ax = plt.gca()
ax2 = ax.twinx()
warns = ax2.bar(warnings.index, warnings, color='purple', label='Number of times over EU threshold', zorder=1, alpha=0.25)
ax2.yaxis.set_label_position('right')
plt.ylabel('Number of times over EU threshold from 2000 to 2016')
ax2.set_ylim(0, 40)
ax2.legend(loc=0)

#Grid
ax2.grid(False)
ax.yaxis.grid(True)
plt.title('95th Percentile $NO_2$ levels during different hours of the day \n for the years 2000-2016', fontsize=12)
ax.set_ylim(0,220);
plt.savefig('NO2levels_years.png', dpi=300, bbox_inches='tight')









    














    











    



/usr/local/lib/python3.6/site-packages/ipykernel_launcher.py:9: DeprecationWarning: 
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  if __name__ == '__main__':



In [ ]:



In [ ]:



In [6]:

    
#Calculate different series
no2_median = no2.groupby(lambda r: r.hour).median() # ugr/m3
no2_mean = no2.groupby(lambda r: r.hour).mean() 
no2_max = no2.groupby(lambda r: r.hour).max()
no2_min = no2.groupby(lambda r: r.hour).min() 
no2_std = no2.groupby(lambda r: r.hour).std()

#Interquartile range 
no2_q1 = no2.groupby(lambda r: r.hour).quantile(0.25)
no2_q3 = no2.groupby(lambda r: r.hour).quantile(0.75)
upper_bound = no2_q3
lower_bound = no2_q1

plt.figure(figsize=(7,9));
plt.plot(no2_median, '-', label='Median', linewidth=1, c='green', zorder=10);
plt.plot(no2_mean, '-.', label='Mean', c='green', zorder=10);
plt.plot(no2_max, '-', label='Max', c='tomato', linewidth=1, zorder=10);
plt.plot(no2_min, '-', label='Min', c='blue', linewidth=1, zorder=10);
plt.plot(series[-1], '.', linewidth=1, label='2016', c='gray', zorder=10);

plt.gca().fill_between(range(no2_median.index.size), 
                       upper_bound, lower_bound, 
                       facecolor='lightgreen', 
                       alpha=0.25, label='Interquartile range')

plt.axhline(y=200, c='purple', linewidth=1.5, linestyle='-.', label='EU 1hr Max. $NO_2$ level limit')
plt.legend(prop={'size':8});
plt.ylabel('$µgr / m^3$')
plt.xlabel('Hour of the day')

warnings = no2[no2 > 200].groupby(lambda r: r.hour).count()
ax = plt.gca()
ax2 = ax.twinx()
warns = ax2.bar(warnings.index, warnings, color='purple', label='Number of times over EU threshold', zorder=1, alpha=0.25)
ax2.set_ylim(0, 40)
ax2.legend(loc=0)
ax2.yaxis.set_label_position('right')
plt.ylabel('Number of times over EU threshold from 2000 to 2016')

#Grid
ax2.grid(False)
ax.yaxis.grid(True)

plt.title('$NO_2$ levels during different hours of the day\nusing data from the years 2000-2016', fontsize=12)
ax.set_ylim(0,350);
plt.xticks(no2_median.index);
plt.savefig('NO2levels.png', dpi=300, bbox_inches='tight')

Reference links for the recommended levels of NO2

WHO

European Union



In [ ]:

	station	magnitude	value	time
0	28079004	8	47.97	2001-01-01 00:00:00
1	28079004	8	43.85	2001-01-01 01:00:00
2	28079004	8	44.50	2001-01-01 02:00:00
3	28079004	8	47.74	2001-01-01 03:00:00
4	28079004	8	58.38	2001-01-01 04:00:00
5	28079004	8	57.01	2001-01-01 05:00:00
6	28079004	8	57.60	2001-01-01 06:00:00
7	28079004	8	60.26	2001-01-01 07:00:00
8	28079004	8	61.25	2001-01-01 08:00:00
9	28079004	8	60.73	2001-01-01 09:00:00
10	28079004	8	52.66	2001-01-01 10:00:00
11	28079004	8	53.21	2001-01-01 11:00:00
12	28079004	8	59.70	2001-01-01 12:00:00
13	28079004	8	64.38	2001-01-01 13:00:00
14	28079004	8	60.02	2001-01-01 14:00:00
15	28079004	8	49.95	2001-01-01 15:00:00
16	28079004	8	59.31	2001-01-01 16:00:00
17	28079004	8	71.20	2001-01-01 17:00:00
18	28079004	8	71.33	2001-01-01 18:00:00
19	28079004	8	70.42	2001-01-01 19:00:00
20	28079004	8	65.06	2001-01-01 20:00:00
21	28079004	8	63.03	2001-01-01 21:00:00
22	28079004	8	53.51	2001-01-01 22:00:00
23	28079004	8	47.71	2001-02-01 00:00:00
24	28079004	8	45.02	2001-02-01 01:00:00
25	28079004	8	38.38	2001-02-01 02:00:00
26	28079004	8	33.18	2001-02-01 03:00:00
27	28079004	8	48.97	2001-02-01 04:00:00
28	28079004	8	59.32	2001-02-01 05:00:00
29	28079004	8	65.57	2001-02-01 06:00:00
...	...	...	...	...
135624	28079004	8	22.00	2017-02-27 16:00:00
135625	28079004	8	22.00	2017-02-27 17:00:00
135626	28079004	8	26.00	2017-02-27 18:00:00
135627	28079004	8	38.00	2017-02-27 19:00:00
135628	28079004	8	20.00	2017-02-27 20:00:00
135629	28079004	8	14.00	2017-02-27 21:00:00
135630	28079004	8	11.00	2017-02-27 22:00:00
135631	28079004	8	4.00	2017-02-28 00:00:00
135632	28079004	8	4.00	2017-02-28 01:00:00
135633	28079004	8	2.00	2017-02-28 02:00:00
135634	28079004	8	1.00	2017-02-28 03:00:00
135635	28079004	8	3.00	2017-02-28 04:00:00
135636	28079004	8	9.00	2017-02-28 05:00:00
135637	28079004	8	32.00	2017-02-28 06:00:00
135638	28079004	8	33.00	2017-02-28 07:00:00
135639	28079004	8	34.00	2017-02-28 08:00:00
135640	28079004	8	28.00	2017-02-28 09:00:00
135641	28079004	8	28.00	2017-02-28 10:00:00
135642	28079004	8	28.00	2017-02-28 11:00:00
135643	28079004	8	26.00	2017-02-28 12:00:00
135644	28079004	8	22.00	2017-02-28 13:00:00
135645	28079004	8	20.00	2017-02-28 14:00:00
135646	28079004	8	17.00	2017-02-28 15:00:00
135647	28079004	8	20.00	2017-02-28 16:00:00
135648	28079004	8	35.00	2017-02-28 17:00:00
135649	28079004	8	43.00	2017-02-28 18:00:00
135650	28079004	8	36.00	2017-02-28 19:00:00
135651	28079004	8	28.00	2017-02-28 20:00:00
135652	28079004	8	20.00	2017-02-28 21:00:00
135653	28079004	8	18.00	2017-02-28 22:00:00