notebook.community

Edit and run



In [1]:

    
import zipfile



In [137]:

    
def hundle_file(inputfile, outputfile):
    archive = zipfile.ZipFile(inputfile, 'r')
    targetxml = archive.open(archive.namelist()[0])
    outputxml = open(outputfile,"w")
    
    #outputxml.write(targetxml.readline().strip()) #first document declearation
    targetxml.readline() #first document declearation
    
    for line in targetxml:
        if line.startswith('<?xml version="1.0" encoding="UTF-8"?>'):
            outputxml.write("\r\n")
            next
        outputxml.write(line.strip())
    
    outputxml.write("\r\n")    #write last linebreak
    
    targetxml.close()
    archive.close()
    outputxml.close()



In [138]:

    
%%time
data = hundle_file('data/ipa150716.zip', 'data/result.xml')









    



CPU times: user 40 s, sys: 2.13 s, total: 42.1 s
Wall time: 44 s



In [139]:

    
from xml.etree import ElementTree
inputfile = open('data/result.xml','r')



In [140]:

    
line = inputfile.readline()



In [141]:

    
node = ElementTree.fromstring(line)



In [146]:

    
node.find("us-bibliographic-data-application").items()









    Out[146]:





[('lang', 'EN'), ('country', 'US')]



In [179]:

    
node.findall('./abstract/p[0]')[0].text









    Out[179]:





"The user places the foot on the foot rest to create an axils, so to prevent the rod from slipping backward, therefore, forcing the rod to pivot on itself and elevate the shovel's scoop, as the user pulls on the shovel's handle upwards."



In [ ]: