In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
In [2]:
years = range(2013, 2017)
head = 'data/Beijing_'
tail = '_HourlyPM25.csv'
months = range(1, 13)
months_spell = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sept', 'Oct', 'Nov', 'Dec']
In [41]:
plt.style.use('ggplot')
for month in months:
output = pd.DataFrame(index = range(1, 32))
for year in years:
data = pd.read_csv(head + str(year) + tail, encoding = 'cp1252', skiprows = 3)
data = data[data['QC Name'] != 'Missing']
data_month = data[data['Month'] == month].pivot_table(index = 'Day', values = 'Value', aggfunc = np.median)
output[str(year) + '-' + str(month)] = data_month
if (year == 2016 and month == 12): output['2016-12'] = 0
output = output.dropna()
# output image
output.plot.line()
plt.style.use('ggplot')
plt.legend(loc = 'upper left')
plt.title(str(years[0]) + ' - ' + str(years[-1]) + ' PM 2.5 per US embassy: ' + months_spell[month -1], fontsize = 10)
plt.savefig('output/' + months_spell[month-1] + '.png')
plt.show()
In [20]:
output = pd.DataFrame(index = range(1, 13), columns = years)
#print(output)
for year in years:
data = pd.read_csv(head + str(year) + tail, encoding = 'cp1252', skiprows = 3)
data = data[data['QC Name'] != 'Missing']
for month in months:
data_month = data[data['Month'] == month].pivot_table(index = 'Month', values = 'Value', aggfunc = np.max)
try:
output.ix[month, year] = int(data_month.values[0])
except:
output.ix[month, year] = 0
# print(np.std(data_day))
output.plot.line()
plt.title('2013 - 2016 Beijing PM2.5 Monthly Mean - US Embassy', fontname='Verdana', fontsize = 10)
plt.rcParams['legend.fontsize'] = 6
plt.xticks(months)
plt.savefig('output/2013_2016_summary.png')
plt.show()
In [40]:
output = pd.DataFrame(index = range(1, 13), columns = years)
for year in years:
data = pd.read_csv(head + str(year) + tail, encoding = 'cp1252', skiprows = 3)
data = data[data['QC Name'] != 'Missing']
for month in months:
data_day = data[data['Month'] == month].pivot_table(index = 'Day', values = 'Value', aggfunc = np.max)
data_day_150 = len(data_day[data_day >= 150])
try:
output.ix[month, year] = data_day_150
except:
output.ix[month, year] = 0
# print(np.std(data_day))
output.plot.line()
plt.title('2013 - 2016 Monthly Days of Beijing PM2.5 >= 150', fontname='Verdana', fontsize = 10)
plt.rcParams['legend.fontsize'] = 10
plt.xticks(months)
plt.savefig('output/2013_2016_summary_pm150.png')
plt.show()
In [ ]:
In [ ]: