In [9]:
# -*- coding: utf-8 -*-
%matplotlib
from optparse import OptionParser
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.ticker import FormatStrFormatter
from matplotlib.ticker import AutoMinorLocator
from matplotlib.colors import LinearSegmentedColormap
import numpy as np
import urllib
import urllib.request as request
import re
import html
import sys, os
import pickle
from datetime import datetime, timedelta
sys.path.append("C:\\Program Files\\Anaconda3\\envs\\tensorflow\\lib\\site-packages")
import tweepy
from collections import deque
urlstr = "https://uk-air.defra.gov.uk/latest/currentlevels?view=site#L"
shorturlstr = "https://goo.gl/ZpELjS"
urlWHO = "http://apps.who.int/iris/bitstream/10665/69477/1/WHO_SDE_PHE_OEH_06.02_eng.pdf"
sitename = b'Liverpool'
mgm3 = '\u03BCgm\u207B\u00B3'
O3, NO2, SO2, PM25, PM100 = "O\u2083", "NO\u2082", "SO\u2082", "PM\u2082\u2085", "PM\u2081\u2080\u2080"
guides = {O3:100, NO2:200, SO2:20, PM25:25, PM100:50} # source: http://apps.who.int/iris/bitstream/10665/69477/1/WHO_SDE_PHE_OEH_06.02_eng.pdf
meansWHO = {O3:'8h', NO2:'1h', SO2:'10m', PM25:'24h', PM100:'24h'}
meansDEFRA = {O3:'8h', NO2:'1h', SO2:'max 15m', PM25:'24h', PM100:'24h'}
consumer_key, consumer_secret, access_token, access_token_secret = pickle.load(open("apikeys.bin", "rb"))
def twitterAPI():
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
return api
def tweet(status, replyto=None, imgfilename=None):
if not (status or imgfilename):
return
urls = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', status)
print("urls = ", urls)
# take out all url texts from status for count, all urls count as 23
rstat = status
for u in urls:
rstat = rstat.replace(u, '')
nchars = len(rstat) + 23 * len(urls)
if nchars > 140:
print("Tweet too long")
#print(status)
api = twitterAPI()
if (imgfilename and os.path.isfile(imgfilename)):
try:
stat = api.update_with_media(imgfilename, status=status, in_reply_to_status_id=(replyto and replyto.id))
except Exception as e:
print(e)
stat = None
else:
try:
stat = api.update_status(status=status, in_reply_to_status_id=(replyto and replyto.id))
except Exception as e:
print(e)
stat = None
return stat
def compose(day, clock, reading):
status = ["%s, %s (%s)" % (day, clock, mgm3)]
skeys = list(reading.keys())
skeys.sort()
for k in skeys:
status.append("%s: %.0f %s" % (k, reading[k][0], reading[k][1]))
status.append("%s" % shorturlstr)
status = '\n'.join(status)
return status
def toDT(day, clock):
if clock == "24:00": # 27/01/2017 24:00 is in fact 28/01/2017 00:00
clock = "00:00"
day = (datetime.strptime(day, "%d/%m/%Y") + timedelta(hours=24)).strftime("%d/%m/%Y")
return datetime.strptime("%s %s" % (day, clock), "%d/%m/%Y %H:%M")
def composeAboveTweet(day, clock, above, origtweetstat):
status = []
dtnow = toDT(day, clock)
for k in above:
# count hours above
#print("In composeAboveTweet", k, above[k])
lday, lclock, lvalue = above[k][0]
if lday == day and lclock == clock:
stat = []
# count hours above
dtlast = dtnow
nhours = 1
for lday, lclock, lvalue in above[k][1:]:
if lday == day and lclock == clock:
continue # skip duplicate entries
dt = toDT(lday, lclock)
if (dtlast - dt) == timedelta(hours=1):
nhours += 1
else:
break
dtlast = dt
stat.append("@lpoolcouncil @DefraUKAir @LiverpoolFoE: %s %dh above @WHO guide (%.0f%s %s-mean %s) #airpollution #liverpool" %
(k, nhours, guides[k], mgm3, meansWHO[k], urlWHO))
if meansWHO[k] != meansDEFRA[k]:
stat.append("(Note #DEFRA data is %s mean)" % meansDEFRA[k])
status.append('\n'.join(stat))
return status
def scrape():
f = request.urlopen(urlstr)
r = f.read()
g = re.search(b".*<tr>.*(%s.*?)</tr>" % sitename, r, re.DOTALL)
#print(g.group(1))
# split into <td></td>
row = g.group(1)
#print("row = %s\n" % row)
# date and time
dategroups = re.search(b".*<td>(.*?)<br.*?>(.*?)</td>", row, re.DOTALL)
day = dategroups.group(1).decode("utf-8")
clock = dategroups.group(2).decode("utf-8")
# data
cols = re.findall(b"<span.*?>(.*?)</span>", row, re.DOTALL)
assert len(cols) == 5
units = [O3, NO2, SO2, PM25, PM100]
datanums = []
for v in cols:
if b' ' in v:
value = float(v[:v.index(b' ')])
else:
value = float(v[:v.index(b'&')])
nv = v.replace(b' ', b' ')
ix = re.match(b".*?(\(.*?\))", nv).group(1)
datanums.append((value, ix.decode("utf-8")))
reading = dict(zip(units, datanums))
return day, clock, reading
def loadReadings():
fall = "allreadings.bin"
allreadings = deque()
if os.path.isfile(fall):
allreadings = pickle.load(open(fall, "rb"))
return allreadings
def pickleReadings(allreading):
fall = "allreadings.bin"
pickle.dump(allreadings, open(fall, "wb"))
def compareWHO(allreadings):
above = {}
for (day, clock, reading) in allreadings:
for k in guides:
if reading[k][0] > guides[k]:
if k not in above:
above[k] = []
above[k].append((day,clock, reading[k][0]))
return above
def weatherTweetToDict(t):
m = re.match(".*AirTemp ([\+\-0-9.]*).*?, RH ([0-9]*?)\%, wind speed ([0-9.]*) m\/s, wind dir ([0-9.]*?) deg, Time ([0-9:]*?)UTC", t.text)
if m:
try:
d = {"temp": float(m.group(1)), "rh": int(m.group(2)), "windspeed": float(m.group(3)), "winddir": float(m.group(4)), "time": m.group(5)}
d["datetime"] = t.created_at
d["tweet"] = t
return d
except Exception as e:
print(t.text)
raise e
def getAndPickleWeather(fn, readings):
api = twitterAPI()
oldestReading = toDT(readings[-1][0], readings[-1][1])
idlast = None
alltweets = []
while True:
if 0:#idlast == None:
r = api.user_timeline("@livuniwx")
else:
r = api.user_timeline("@livuniwx", max_id=idlast)
for i, t in enumerate(r[:-1]):
d = weatherTweetToDict(t)
if d:
alltweets.append(d)
if r[-1].created_at < oldestReading:
break
idlast = r[-1].id
pickle.dump(alltweets, open(fn, "wb"))
print("Pickled ", len(alltweets), " tweets")
def loadWeatherTweets(fn):
#wt = pickle.load(open("weathertweets.bin", "rb"))
wt = pickle.load(open(fn, "rb"))
d0 = wt[0]["datetime"]
for t in wt[1:]:
assert t["datetime"] < d0
d0 = t["datetime"]
return wt
def testCMap():
# colourmap from green over yellow to red
cdict = {
'red' : ((0.00, 0.00, 0.00),
(0.50, 1.00, 1.00),
(1.00, 1.00, 1.00)),
'green': ((0.00, 1.00, 1.00),
(0.50, 1.00, 1.00),
(1.00, 0.00, 0.00)),
'blue' : ((0.00, 0.00, 0.00),
(0.50, 0.00, 0.00),
(1.00, 0.00, 0.00)),
}
cm = LinearSegmentedColormap("mymap", cdict, 256)
gradient = np.linspace(0, 1, 256)
gradient = np.vstack((gradient, gradient))
fig, axes = plt.subplots(nrows=1)
fig.subplots_adjust(top=0.95, bottom=0.01, left=0.2, right=0.99)
#axes[0].set_title(cmap_category + ' colormaps', fontsize=14)
axes.imshow(gradient, aspect='auto', cmap=cm)
pos = list(axes.get_position().bounds)
x_text = pos[0] - 0.01
y_text = pos[1] + pos[3]/2.
#fig.text(x_text, y_text, name, va='center', ha='right', fontsize=10)
axes.set_axis_off()
plt.show()
def plotLinear(readings, C):
titles = {O3: r"$O_3$", NO2: r"$NO_2$", SO2: r"$SO_2$", PM25: r"$PM_{2.5}$", PM100: r"$PM_{10}$"}
dates = [toDT(d, c) for d, c, r in readings]
d0, d1 = dates[0], dates[-1] # date range
data = [r[C][0] for d, c, r in readings] # data
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
# format x axis
ax.xaxis_date()
ax.set_xlim(d0, d1)
ax.xaxis.set_minor_formatter(mdates.DateFormatter('%Hh'))
ax.xaxis.set_minor_locator(AutoMinorLocator(2))
# format y axis
ax.xaxis.set_major_formatter(mdates.DateFormatter('%a %d/%m'))
ax.yaxis.set_major_formatter(FormatStrFormatter(r'%.0f$\frac{\mu g}{m^3}$'))
ax.set_ylim(0, max(data) + 5)
# green / red background division above and below WHO guide
guide = guides[C]
ax.fill_between([d0, d1, d1, d0], [0, 0, guide, guide], facecolor=(0.8, 1, 0.8), edgecolor="none")
ax.fill_between([d0, d1, d1, d0], [guide, guide, max(data) + 5, max(data) + 5], facecolor=(1, 0.8, 0.8), edgecolor="none")
ax.scatter(dates, data)
ax.set_title(titles[C] + " for %s to %s,\nLiverpool Speke (%s)" % (d0.strftime("%d/%m/%Y"), d1.strftime("%d/%m/%Y"), urlstr), fontsize=10)
ax.tick_params(axis='both', which='both', labelsize=10)
fig.autofmt_xdate()
plt.grid(which='major')
fn = "figure_%s.png" % d1.strftime("%Y%m%d")
plt.savefig(fn, dpi=600)
return fn
def plotPolar(readings, weathertweets):
def findInWT(dt, wt):
for t in wt:
if t["datetime"] - dt < timedelta(minutes=10):
return t
assert 0
# pair pollution readings with weather data
pm25 = []
pm100 = []
windspeed = []
winddir = []
dates = []
for r in readings:
d, c, rr = r
dt = toDT(d, c)
# find dt in wt
w = findInWT(dt, weathertweets)
dates.append(dt)
pm25.append(rr[PM25][0])
pm100.append(rr[PM100][0])
windspeed.append(w["windspeed"])
winddir.append(w["winddir"])
theta = np.radians(winddir)
# colourmap from green over yellow to red
cdict = {
'red' : ((0.00, 0.00, 0.00),
(0.50, 1.00, 1.00),
(1.00, 1.00, 1.00)),
'green': ((0.00, 1.00, 1.00),
(0.50, 1.00, 1.00),
(1.00, 0.00, 0.00)),
'blue' : ((0.00, 0.00, 0.00),
(0.50, 0.00, 0.00),
(1.00, 0.00, 0.00)),
}
cm = LinearSegmentedColormap("greentored", cdict, 256)
ax = plt.subplot(111, projection='polar')
ax.scatter(theta, windspeed, c=pm25, s=100, cmap=cm, edgecolors='none')
ax.set_rmax(max(windspeed) + 1)
ax.set_rticks(np.arange(0, max(windspeed), 1)) # less radial ticks
ax.set_rlabel_position(300) # get radial labels away from plotted line
ax.set_theta_zero_location("S")
ax.set_theta_direction(-1)
ax.grid(True)
# tick locations
thetaticks = np.arange(0,360,90)
ax.set_thetagrids(thetaticks, frac=1.01)
#img = plt.imread("speke.png")
#plt.imshow(img, extent=[0,10,0,10])
ax.set_title("PM25 %s to %s" % (allreadings[-1][0], allreadings[0][0]))
plt.show()
if __name__ == "__main__":
parser = OptionParser()
parser = OptionParser(usage='usage: %prog [options] ')
parser.add_option("-f", "--file", dest="filename",
help="", metavar="FILE")
parser.add_option('-m', '--mode',
type='choice',
action='store',
dest='mode',
choices=['plotpollution', 'debug', 'saveweather', 'plotpollutionLinear', 'regular'],
default='regular',
help='Choose mode',)
(options, args) = parser.parse_args()
mode = options.mode
mode = "plotpollutionLinear"
allreadings = loadReadings()
# remove duplicate entries (could have come in while debugging)
ic = 0
while ic < len(allreadings):
r = allreadings[ic]
while allreadings.count(r) > 1:
allreadings.remove(r)
ic += 1
if mode == 'debug':
stat = tweet("TTEESSTT")
print(stat)
#tweet("In reply to: TEST3", stat)
elif mode == 'saveweather':
allreadings = loadReadings()
getAndPickleWeather("weathertweets.bin", allreadings)
elif mode == 'plotpollution':
weathertweets = loadWeatherTweets("weathertweets.bin")
plotPolar(allreadings, weathertweets)
elif mode == "plotpollutionLinear":
# find when we last posted an image
files = [f for f in os.listdir('.') if re.match("figure_[0-9]*.png", f)]
if files:
datelast = max([datetime.strptime(f, "figure_%Y%m%d.png") for f in files])
datelast += timedelta(hours=12)
else:
datelast = datetime.today() - timedelta(days=100)
print(datelast)
sincelastplot = (datetime.today() - datelast)
if (sincelastplot > timedelta(hours=24 * 3)):
print("plotting")
allreadings.reverse()
readings = [(d, h, r) for (d, h, r) in allreadings if toDT(d, h) >= datelast]
figure = plotLinear(readings, PM25)
d0, d1 = toDT(readings[0][0], readings[0][1]), toDT(readings[-1][0], readings[-1][1])
tweet(PM25 + "\n%s - %s" % (d0.strftime("%d/%m/%Y"), d1.strftime("%d/%m/%Y")), None, figure)
else:
lastday, lastclock, lastreading = allreadings[0]
day, clock, reading = scrape()
if ((day, clock) != (lastday, lastclock)):
status = compose(day, clock, reading)
rtweet = tweet(status)
allreadings.appendleft((day, clock, reading))
pickleReadings(allreadings)
# compare with WHO recommendations
r = compareWHO(allreadings)
if r:
stats = composeAboveTweet(day, clock, r, rtweet)
for s in stats:
tweet(s, replyto=rtweet)
else:
print("Reading already known")
In [ ]: