XML & Modules


In [1]:
# this block is just for the style sheet for the notebook
from IPython.core.display import HTML
def css_styling():
    styles = open("styles/custom.css", "r").read()
    return HTML(styles)
css_styling()


Out[1]:
XML Processing
$5.95 two of our famous Belgian Waffles with plenty of real maple syrup 650 $7.95 light Belgian waffles covered with strawberries and whipped cream 900 $8.95 light Belgian waffles covered with an assortment of fresh berries and whipped cream 900 $4.50 thick slices made from our homemade sourdough bread 600 $6.95 two eggs, bacon or sausage, toast, and our ever-popular hash browns 950

In [6]:
# import the appropriate libraries
import xml.etree.ElementTree as ET # xml processing

In [8]:
# read the XML file
tree = ET.parse('scripts/menu.xml')

In [15]:
print 'tree element:\t', tree


tree element:	<xml.etree.ElementTree.ElementTree object at 0x04396070>

In [16]:
# get the root of the tree
root = tree.getroot()

In [17]:
print 'root element:\t ', root


root element:	  <Element 'breakfast_menu' at 0x4396470>

In [18]:
# make a list of all the <food> tags
food_tags = root.findall('food')
print 'number of food tags = ', len(food_tags)


number of food tags =  5

In [20]:
# print the <food> tags - it's not what you would expect
print food_tags


[<Element 'food' at 0x43964d0>, <Element 'food' at 0x43965f0>, <Element 'food' at 0x4396690>, <Element 'food' at 0x4396750>, <Element 'food' at 0x43967d0>]

In [26]:
# access the enties in the list
first_food_item = food_tags[0]
print 'the first child node is:\t', first_food_item


the first child node is:	<Element 'food' at 0x43964d0>

In [24]:
# here's how we can view a child node's content
ET.dump(first_food_item)


<food name="Belgian Waffles">
		<price>$5.95</price>
		<description>two of our famous Belgian Waffles with plenty of real maple syrup</description>
		<calories>650</calories>
	</food>
	

In [27]:
section = 'food'
tag = 'price'
node = root.find(section)
subnode = node.find(tag)
print "Path to Price subnode of Food node:"
print "Root:", str(root), " Node: ", node, "Subnode: ", subnode
print


Path to Price subnode of Food node:
Root: <Element 'breakfast_menu' at 0x4396470>  Node:  <Element 'food' at 0x43964d0> Subnode:  <Element 'price' at 0x4396510>


In [28]:
#Specify the path to the 'name' attribute of the 'food' node
node = root.find(section)
attribute = node.attrib['name']
print "Path to Name attribute of Food node:"
print "Root:", str(root), " Node: ", node, "Attribute: ", attribute
print


Path to Name attribute of Food node:
Root: <Element 'breakfast_menu' at 0x4396470>  Node:  <Element 'food' at 0x43964d0> Attribute:  Belgian Waffles


In [29]:
#Find the attributes of each food node
print "All nodes, subnodes and attributes:" 
for node in root:
    print node.tag, node.attrib
    for subnode in node:
        print subnode.tag, subnode.text
print


All nodes, subnodes and attributes:
food {'name': 'Belgian Waffles'}
price $5.95
description two of our famous Belgian Waffles with plenty of real maple syrup
calories 650
food {'name': 'Strawberry Belgian Waffles'}
price $7.95
description light Belgian waffles covered with strawberries and whipped cream
calories 900
food {'name': 'Berry-Berry Belgian Waffles'}
price $8.95
description light Belgian waffles covered with an assortment of fresh berries and whipped cream
calories 900
food {'name': 'French Toast'}
price $4.50
description thick slices made from our homemade sourdough bread
calories 600
food {'name': 'Homestyle Breakfast'}
price $6.95
description two eggs, bacon or sausage, toast, and our ever-popular hash browns
calories 950


In [31]:
#Add a new attribute to each food tag   
for node in tree.iter(tag='food'):
    node.set('category', 'breakfast')

In [30]:
# you can search by name
name = 'Belgian Waffles'
for selected_name in root.findall("./food/[@name='%s']" % name):
     #print the description associated with the selected name
     print "Found Belgian Waffles!"
     print name, ":",  selected_name.find('description').text


Found Belgian Waffles!
Belgian Waffles : two of our famous Belgian Waffles with plenty of real maple syrup

In [32]:
#find a specific node
#and update a subnode
for node in tree.iter(tag='food'):
   if node.attrib['name'] == 'French Toast':
      subnode = node.find('price')
      print "Subnode text: ", subnode.text
      subnode.text = '$6.50'
      print "Modified subnode text: ",  subnode.text


Subnode text:  $4.50
Modified subnode text:  $6.50

In [33]:
#Add a new subelement to the root                                                                        
new_name = 'Three-Egg Omlette'  
new_price = '$7.95'
new_description = 'three-egg omlette with your choice of meat, cheese and vegetables'
new_calories = '900'                                
food_node = ET.SubElement(root, 'food', {'name':new_name})
price_subnode = ET.SubElement(food_node, 'price')
price_subnode.text = new_price
description_subnode = ET.SubElement(food_node, 'description')
description_subnode.text = new_description
calories_subnode = ET.SubElement(food_node, 'calories')
calories_subnode.text = new_calories

In [34]:
#Write out the modified xml     
tree.write('outputMenu.xml')
Python Modules

Python searches for packages in the following order

  • The home directory of the program
  • PYTHONPATH directories (if set)
  • Standard library directories
  • The contents of any .pth files (if present)
  • The site-packages home of third-party extensions
Home directory

This is the local directory and is searched by default

PYTHONPATH

This is a system variable that has the location of modules on your system. Python uses the information stored in that variable to search for modules if they cannot be found in the home directory.

Standard Library Directories

This is searched automatically.

.path File

A file added to the top level Python directory that includes a path per line for the location of the packages.

Site Packages Directory

Python adds this automatically when you install 3rd party packages.


In [1]:
# let's look at the system path
import sys
sys.path


Out[1]:
['',
 'C:\\Windows\\system32\\python27.zip',
 'c:\\Python27\\DLLs',
 'c:\\Python27\\lib',
 'c:\\Python27\\lib\\plat-win',
 'c:\\Python27\\lib\\lib-tk',
 'c:\\Python27',
 'c:\\Python27\\lib\\site-packages',
 'c:\\Python27\\lib\\site-packages\\FontTools',
 'c:\\Python27\\lib\\site-packages\\itk',
 'c:\\Python27\\lib\\site-packages\\win32',
 'c:\\Python27\\lib\\site-packages\\win32\\lib',
 'c:\\Python27\\lib\\site-packages\\Pythonwin',
 'c:\\Python27\\lib\\site-packages\\wx-2.8-msw-unicode',
 'c:\\Python27\\lib\\site-packages\\IPython\\extensions']

In [2]:
import os
'PYTHONPATH' in os.environ


Out[2]:
False

In [ ]: