Python script to fetch random job from Ministry Of Education and convert it into a json object. Merge data in from DigitalNZ
In [167]:
import requests
import untangle
import xmltodict
import json
import random
import bs4
import dominate
from dominate.tags import *
from pydnz import Dnz
import arrow
import bs4
In [168]:
dnz = Dnz('keyhere')
In [169]:
jobreq = requests.get('https://jobs.minedu.govt.nz/jobtools/job_rss?o1=17584&k2=A52B3674BC046465&source=JobRSS&medium=JobRSS')
In [170]:
jobtxta = jobreq.text
In [171]:
obj = untangle.parse(jobtxta)
In [172]:
dicjobz = xmltodict.parse(jobtxta)
In [173]:
ranldicj = len(dicjobz['rss']['channel']['item'])
In [174]:
ranldicj
Out[174]:
In [175]:
randicz = random.randint(0, ranldicj)
In [176]:
randicz
Out[176]:
In [177]:
dicrs = dicjobz['rss']['channel']['item'][randicz]
In [178]:
dicrts = dicrs['title']
dicrtq = dicrs
In [179]:
artim = arrow.now(dicrtq['pubDate'])
In [180]:
print artim.date()
In [181]:
print artim.time()
In [182]:
artim.weekday()
Out[182]:
In [183]:
msjobdic = dict()
In [184]:
msjobdic.update({'date advertised' : str(artim.date()),
'time advertised' : str(artim.time()),
'title' : dicrts})
In [185]:
msjobdic
Out[185]:
In [186]:
requlink = dicrtq['link']
In [187]:
reqlinkq = requests.get(requlink)
In [188]:
bsoup = bs4.BeautifulSoup(reqlinkq.text)
In [189]:
bfina = bsoup.findAll('a')
In [190]:
#msjobdic.update({'date advertised' : str(artim.date()),
# 'time advertised' : str(artim.time()),
# 'title' : dicrts,
# '})
for bfin in bfina:
if ('@') in bfin.text:
#print bfin.text
msjobdic.update({('email') : str(bfin.text)})
In [191]:
for bfiny in bfina:
if '.docx' in bfiny.text:
print bfiny.text
In [192]:
#Search for this file and render text.
#if jpg/gif render.
In [193]:
for bfin in bfina:
if ('href') in bfin.text:
#print bfin.text
msjobdic.update({('href') : str(bfin.text)})
In [194]:
#msjob
In [195]:
msjobdic.update({'randnum' : randicz})
In [196]:
for bfiny in bfina:
if '.docx' in bfiny.text:
msjobdic.update({'doc' : bfiny.text})
In [197]:
msjobdic
Out[197]:
In [198]:
msjobdic.update({'link' : dicrtq['link']})
In [199]:
msjobdic
Out[199]:
In [199]:
In [200]:
#msjobdic.update({'doc' : b
In [201]:
dicrtq['link']
Out[201]:
In [202]:
dicrts
Out[202]:
In [203]:
debsnz = dnz.search(dicrs)
In [204]:
randrecord = len(debsnz.records)
In [205]:
ranitdz = random.randint(0, randrecord)
In [206]:
ranitdz
Out[206]:
In [207]:
randicz
Out[207]:
In [208]:
#debsnz.records
In [209]:
#debrecintz = debsnz.records[ranitdz]
In [210]:
#kederz = debrecintz.keys()
In [211]:
#print debrecintz['category']
#print debrecintz['usage']
In [212]:
#for ked in kederz:
# print ked
# print debrecintz[ked]
#print ked
#print ked
#print debrecintz['category']
In [213]:
#print debrecintz['id']
In [214]:
#getiddnz = ('http://api.digitalnz.org/v3/records/' + str(debrecintz['id']) + '.json?api_key=Ph2LDuyiJmJcQm1S5myy')
In [215]:
#getiddnz
In [216]:
#reqidnz = requests.get(getiddnz)
In [217]:
#json.dumps(reqidnz)
In [218]:
mylirq = list()
In [219]:
#for reqi in reqidnz:
#print reqi
#print reqi.upper()
#reqi
In [220]:
#my_dict.pop("key", None)
In [221]:
dicrq = len(dicjobz['rss']['channel']['item'])
In [222]:
dicrq
Out[222]:
In [223]:
#Return a random job.
In [224]:
ranjoz = random.randint(0, dicrq)
dicrsch = dicjobz['rss']['channel']['item']
In [225]:
print dicrsch[ranjoz]['link']
In [226]:
print dicrsch[ranjoz]['title']
In [227]:
#jobtype
#location
#date advertised
#jobreference
#jobtitle
#should be keys
#Currently they are inside description key
#Create new json file that fixes this.
In [228]:
#for dezsr in dicrsch[ranjoz]['description']:
## if 'JobType' in dezs#r:
# # print dezsr
In [229]:
#docstart.title = ('ministry-of-education-jobs')
doc = dominate.document(title='ministry-of-education-jobs')
with doc.head:
link(rel='stylesheet', href='style.css')
script(type='text/javascript', src='script.js')
with doc:
#with div(id='header').add(ol()):
#for i in ['home', 'about', 'contact']:
#li(a(i.title(), href='/%s.html' % i))
with div(cls='row'):
h1('education-counts-jobs')
h2(dicrsch[ranjoz]['title'])
p(dicrs)
#p(dicrsch[ranjoz]['description'])
p(a(dicrs, href= dicrsch[ranjoz]['link']))
#for ked in kederz:
#print ked
# p((kederz[ked]))
#print ked
#print ked
In [230]:
#print doc
docre = doc.render()
#s = docre.decode('ascii', 'ignore')
yourstring = docre.encode('ascii', 'ignore').decode('ascii')
indfil = ('/home/wcmckee/minedujob/index.html')
mkind = open(indfil, 'w')
mkind.write(yourstring)
mkind.close()
In [231]:
opeind = open('/home/wcmckee/minedujob/index.html', 'a')
In [232]:
opeind.write(dicrsch[ranjoz]['description'])
In [233]:
opeind.close()
In [233]:
In [ ]: