minedujob

Python script to fetch random job from Ministry Of Education and convert it into a json object. Merge data in from DigitalNZ


In [167]:
import requests
import untangle
import xmltodict
import json
import random
import bs4
import dominate
from dominate.tags import *
from pydnz import Dnz
import arrow
import bs4

In [168]:
dnz = Dnz('keyhere')

In [169]:
jobreq = requests.get('https://jobs.minedu.govt.nz/jobtools/job_rss?o1=17584&k2=A52B3674BC046465&source=JobRSS&medium=JobRSS')

In [170]:
jobtxta = jobreq.text

In [171]:
obj = untangle.parse(jobtxta)

In [172]:
dicjobz = xmltodict.parse(jobtxta)

In [173]:
ranldicj = len(dicjobz['rss']['channel']['item'])

In [174]:
ranldicj


Out[174]:
27

In [175]:
randicz = random.randint(0, ranldicj)

In [176]:
randicz


Out[176]:
15

In [177]:
dicrs = dicjobz['rss']['channel']['item'][randicz]

In [178]:
dicrts = dicrs['title']
dicrtq = dicrs

In [179]:
artim = arrow.now(dicrtq['pubDate'])

In [180]:
print artim.date()


2015-07-12

In [181]:
print artim.time()


09:39:49.250847

In [182]:
artim.weekday()


Out[182]:
6

In [183]:
msjobdic = dict()

In [184]:
msjobdic.update({'date advertised' : str(artim.date()), 
                'time advertised' : str(artim.time()),
                'title' : dicrts})

In [185]:
msjobdic


Out[185]:
{'date advertised': '2015-07-12',
 'time advertised': '09:39:49.250847',
 'title': u'Executive Support Officer'}

In [186]:
requlink = dicrtq['link']

In [187]:
reqlinkq = requests.get(requlink)

In [188]:
bsoup = bs4.BeautifulSoup(reqlinkq.text)

In [189]:
bfina = bsoup.findAll('a')

In [190]:
#msjobdic.update({'date advertised' : str(artim.date()), 
#                'time advertised' : str(artim.time()),
#                'title' : dicrts,
#                '})
for bfin in bfina:
    if ('@') in bfin.text:
        #print bfin.text
        msjobdic.update({('email') : str(bfin.text)})

In [191]:
for bfiny in bfina:
    if '.docx' in bfiny.text:
        print bfiny.text

In [192]:
#Search for this file and render text.
#if jpg/gif render.

In [193]:
for bfin in bfina:
    if ('href') in bfin.text:
        #print bfin.text
        msjobdic.update({('href') : str(bfin.text)})

In [194]:
#msjob

In [195]:
msjobdic.update({'randnum' : randicz})

In [196]:
for bfiny in bfina:
    if '.docx' in bfiny.text:
        msjobdic.update({'doc' : bfiny.text})

In [197]:
msjobdic


Out[197]:
{'date advertised': '2015-07-12',
 'randnum': 15,
 'time advertised': '09:39:49.250847',
 'title': u'Executive Support Officer'}

In [198]:
msjobdic.update({'link' : dicrtq['link']})

In [199]:
msjobdic


Out[199]:
{'date advertised': '2015-07-12',
 'link': u'http://jobs.minedu.govt.nz/jobtools/jncustomsearch.viewFullSingle?in_organid=17584&in_jnCounter=222576690&in_jobDate=All&in_searchbox=YES&in_summary=S#utm_source=JobRSS&utm_medium=JobRSS',
 'randnum': 15,
 'time advertised': '09:39:49.250847',
 'title': u'Executive Support Officer'}

In [199]:


In [200]:
#msjobdic.update({'doc' : b

In [201]:
dicrtq['link']


Out[201]:
u'http://jobs.minedu.govt.nz/jobtools/jncustomsearch.viewFullSingle?in_organid=17584&in_jnCounter=222576690&in_jobDate=All&in_searchbox=YES&in_summary=S#utm_source=JobRSS&utm_medium=JobRSS'

In [202]:
dicrts


Out[202]:
u'Executive Support Officer'

In [203]:
debsnz =  dnz.search(dicrs)

In [204]:
randrecord = len(debsnz.records)

In [205]:
ranitdz = random.randint(0, randrecord)

In [206]:
ranitdz


Out[206]:
0

In [207]:
randicz


Out[207]:
15

In [208]:
#debsnz.records

In [209]:
#debrecintz = debsnz.records[ranitdz]

In [210]:
#kederz = debrecintz.keys()

In [211]:
#print debrecintz['category']
#print debrecintz['usage']

In [212]:
#for ked in kederz:
#    print ked
#    print debrecintz[ked]
    #print ked
    #print ked
    #print debrecintz['category']

In [213]:
#print debrecintz['id']

In [214]:
#getiddnz = ('http://api.digitalnz.org/v3/records/' + str(debrecintz['id']) + '.json?api_key=Ph2LDuyiJmJcQm1S5myy')

In [215]:
#getiddnz

In [216]:
#reqidnz = requests.get(getiddnz)

In [217]:
#json.dumps(reqidnz)

In [218]:
mylirq = list()

In [219]:
#for reqi in reqidnz:
    #print reqi
    #print reqi.upper()
    #reqi

In [220]:
#my_dict.pop("key", None)

In [221]:
dicrq = len(dicjobz['rss']['channel']['item'])

In [222]:
dicrq


Out[222]:
27

In [223]:
#Return a random job.

In [224]:
ranjoz = random.randint(0, dicrq)

dicrsch = dicjobz['rss']['channel']['item']

In [225]:
print dicrsch[ranjoz]['link']


http://jobs.minedu.govt.nz/jobtools/jncustomsearch.viewFullSingle?in_organid=17584&in_jnCounter=222578149&in_jobDate=All&in_searchbox=YES&in_summary=S#utm_source=JobRSS&utm_medium=JobRSS

In [226]:
print dicrsch[ranjoz]['title']


Practice Advisors

In [227]:
#jobtype
#location
#date advertised
#jobreference
#jobtitle
#should be keys
#Currently they are inside description key
#Create new json file that fixes this.

In [228]:
#for dezsr in  dicrsch[ranjoz]['description']:
##    if 'JobType' in dezs#r:
# #       print dezsr

In [229]:
#docstart.title = ('ministry-of-education-jobs')
doc = dominate.document(title='ministry-of-education-jobs')

with doc.head:
    link(rel='stylesheet', href='style.css')
    script(type='text/javascript', src='script.js')

with doc:
    #with div(id='header').add(ol()):
        #for i in ['home', 'about', 'contact']:
            #li(a(i.title(), href='/%s.html' % i))

    with div(cls='row'):
        h1('education-counts-jobs')
        h2(dicrsch[ranjoz]['title'])
        p(dicrs)
        #p(dicrsch[ranjoz]['description'])
        p(a(dicrs, href= dicrsch[ranjoz]['link']))
        
        #for ked in kederz:
        #print ked
        #    p((kederz[ked]))
        #print ked
        #print ked

In [230]:
#print doc

docre = doc.render()
#s = docre.decode('ascii', 'ignore')
yourstring = docre.encode('ascii', 'ignore').decode('ascii')
indfil = ('/home/wcmckee/minedujob/index.html')
mkind = open(indfil, 'w')
mkind.write(yourstring)
mkind.close()

In [231]:
opeind = open('/home/wcmckee/minedujob/index.html', 'a')

In [232]:
opeind.write(dicrsch[ranjoz]['description'])

In [233]:
opeind.close()

In [233]:


In [ ]: