In [1]:

    
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt



In [2]:

    
years = range(2013, 2017)
head = 'data/Beijing_'
tail = '_HourlyPM25.csv'
months = range(1, 13)
months_spell = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sept', 'Oct', 'Nov', 'Dec']

Daily comparison across months



In [41]:

    
plt.style.use('ggplot')
for month in months:
    output = pd.DataFrame(index = range(1, 32))
    for year in years:
        data = pd.read_csv(head + str(year) + tail, encoding = 'cp1252', skiprows = 3)
        data = data[data['QC Name'] != 'Missing']
        data_month = data[data['Month'] == month].pivot_table(index = 'Day', values = 'Value', aggfunc = np.median)
        output[str(year) + '-' + str(month)] = data_month
        if (year == 2016 and month == 12): output['2016-12'] = 0
        output = output.dropna()
    # output image
    output.plot.line()
    plt.style.use('ggplot')    
    plt.legend(loc = 'upper left')
    plt.title(str(years[0]) + ' - ' + str(years[-1]) + ' PM 2.5 per US embassy: ' + months_spell[month -1], fontsize = 10)
    plt.savefig('output/' + months_spell[month-1] + '.png')
    
    plt.show()

Monthly comparison



In [20]:

    
output = pd.DataFrame(index = range(1, 13), columns = years)
#print(output)
for year in years:
    data = pd.read_csv(head + str(year) + tail, encoding = 'cp1252', skiprows = 3)
    data = data[data['QC Name'] != 'Missing']
    for month in months:
        data_month = data[data['Month'] == month].pivot_table(index = 'Month', values = 'Value', aggfunc = np.max)
        try:
            output.ix[month, year] = int(data_month.values[0])
        except:
            output.ix[month, year] = 0
#        print(np.std(data_day))
output.plot.line()  
plt.title('2013 - 2016 Beijing PM2.5 Monthly Mean - US Embassy', fontname='Verdana', fontsize = 10)
plt.rcParams['legend.fontsize'] = 6
plt.xticks(months)
plt.savefig('output/2013_2016_summary.png')
plt.show()

PM 2.5 >= 150 days distribution



In [40]:

    
output = pd.DataFrame(index = range(1, 13), columns = years)
for year in years:
    data = pd.read_csv(head + str(year) + tail, encoding = 'cp1252', skiprows = 3)
    data = data[data['QC Name'] != 'Missing']
    for month in months:
        data_day = data[data['Month'] == month].pivot_table(index = 'Day', values = 'Value', aggfunc = np.max)
        data_day_150 = len(data_day[data_day >= 150])
        try:
            output.ix[month, year] = data_day_150
        except:
            output.ix[month, year] = 0
#        print(np.std(data_day))
output.plot.line()  
plt.title('2013 - 2016 Monthly Days of Beijing PM2.5 >= 150', fontname='Verdana', fontsize = 10)
plt.rcParams['legend.fontsize'] = 10
plt.xticks(months)
plt.savefig('output/2013_2016_summary_pm150.png')
plt.show()



In [ ]:



In [ ]: