In [1]:
# xml1.py
# the triple quotes puts the xml data as a string
import xml.etree.ElementTree as ET
data = '''
<person>
<name>Chuck</name>
<phone type="intl">
+1 734 303 4456
</phone>
<email hide="yes"/>
</person>'''
tree = ET.fromstring(data)
print('Name:',tree.find('name').text)
print('Attr:',tree.find('email').get('hide'))
In [2]:
# xml2.py
# this one is more complicated since the are more complex tags
import xml.etree.ElementTree as ET
input = '''
<stuff>
<users>
<user x="2">
<id>001</id>
<name>Chuck</name>
</user>
<user x="7">
<id>009</id>
<name>Brent</name>
</user>
</users>
</stuff>'''
stuff = ET.fromstring(input)
lst = stuff.findall('users/user')
print('User count:', len(lst))
for item in lst:
print('Name', item.find('name').text)
print('Id', item.find('id').text)
print('Attribute', item.get("x"))
In [3]:
# http://www.saltycrane.com/blog/2011/07/example-parsing-xml-lxml-objectify/
import pandas as pd
from lxml import etree, objectify
URL = 'http://python-data.dr-chuck.net/comments_42.xml'
URL_real = 'http://python-data.dr-chuck.net/comments_371511.xml'
tree = etree.parse(URL)
string = etree.tostring(tree.getroot())
root = objectify.fromstring(string)
values = []
for leaf in root.comments.comment:
#print(e.count.text)
values.append(int(leaf.count.text))
print(sum(values))
In [4]:
# doing the same thing but using urllib - which is silly since etree.parse is all that is needed
import urllib.request
import pandas as pd
from lxml import etree, objectify
URL = 'http://python-data.dr-chuck.net/comments_42.xml'
URL_real = 'http://python-data.dr-chuck.net/comments_371511.xml'
xml = urllib.request.urlopen(URL_real)
tree = etree.parse(xml)
string = etree.tostring(tree.getroot())
root = objectify.fromstring(string)
values = []
for e in root.comments.comment:
#print(e.count.text)
values.append(int(e.count.text))
print(sum(values))
In [5]:
# working with what would be called a dicitionary within Python - but in JSON is an object
import json
data = '''
{
"name" : "Chuck",
"phone" : {
"type" : "intl",
"number" : "+1 734 303 4456"
},
"email" : {
"hide" : "yes"
}
}'''
info = json.loads(data)
print('Name:',info["name"])
print('Hide:',info["email"]["hide"])
In [6]:
# and here we use what would be called a list (of dictionaries) in Python - but in JSON is an array
import json
input = '''
[
{ "id" : "001",
"x" : "2",
"name" : "Chuck"
} ,
{ "id" : "009",
"x" : "7",
"name" : "Chuck"
}
]'''
info = json.loads(input)
print('User count:', len(info))
for item in info:
print('Name', item['name'])
print('Id', item['id'])
print('Attribute', item['x'])
In [7]:
'''import urllib.request, urllib.parse, urllib.error
import json
serviceurl = 'http://maps.googleapis.com/maps/api/geocode/json?'
while True:
#address = input('Enter location: ')
address = 'Ann Arbor, MI'
if len(address) < 1: break
url = serviceurl + urllib.parse.urlencode({'address': address})
print('Retrieving', url)
uh = urllib.request.urlopen(url)
data = uh.read().decode()
print('Retrieved', len(data), 'characters')
try:
js = json.loads(data)
except:
js = None
if not js or 'status' not in js or js['status'] != 'OK':
print('==== Failure To Retrieve ====')
print(data)
continue
lat = js["results"][0]["geometry"]["location"]["lat"]
lng = js["results"][0]["geometry"]["location"]["lng"]
print('lat', lat, 'lng', lng)
location = js['results'][0]['formatted_address']
print(location)
'''
Out[7]:
In [8]:
'''import urllib.request, urllib.parse, urllib.error
import twurl
import json
TWITTER_URL = 'https://api.twitter.com/1.1/friends/list.json'
while True:
print('')
acct = input('Enter Twitter Account:')
if (len(acct) < 1): break
url = twurl.augment(TWITTER_URL,
{'screen_name': acct, 'count': '5'})
print('Retrieving', url)
connection = urllib.request.urlopen(url)
data = connection.read().decode()
headers = dict(connection.getheaders())
print('Remaining', headers['x-rate-limit-remaining'])
js = json.loads(data)
print(json.dumps(js, indent=4))
for u in js['users']:
print(u['screen_name'])
s = u['status']['text']
print(' ', s[:50])
'''
Out[8]:
In [9]:
# suggested code for inspiration
import json
from urllib.request import urlopen as uReq
input = '''
[
{ "id" : "001",
"x" : "2",
"name" : "Chuck"
} ,
{ "id" : "009",
"x" : "7",
"name" : "Chuck"
}
]'''
info = json.loads(input)
print('User count:', len(info))
for item in info:
print('Name', item['name'])
print('Id', item['id'])
print('Attribute', item['x'])
In [10]:
# solving the assignment
import json
from urllib.request import urlopen as uReq
sample_data = 'http://python-data.dr-chuck.net/comments_42.json' #(Sum=2553)
actual_data = 'http://python-data.dr-chuck.net/comments_371515.json' #(Sum ends with 77)
# opening up connection, grabbing the page
uClient = uReq(actual_data)
page_json = uClient.read()
uClient.close()
# parse the data
info = json.loads(page_json)
# check if we have all the data
#print('User count:', len(info['comments']))
# loop over the data and collect the values
values = []
for item in info['comments']:
#print(item['count'])
values.append(item['count'])
print(sum(values))
In [11]:
# suggested inspirational code
'''
import urllib
import json
# serviceurl = 'http://maps.googleapis.com/maps/api/geocode/json?'
serviceurl = 'http://python-data.dr-chuck.net/geojson?'
while True:
address = raw_input('Enter location: ')
if len(address) < 1 : break
url = serviceurl + urllib.urlencode({'sensor':'false', 'address': address})
print 'Retrieving', url
uh = urllib.urlopen(url)
data = uh.read()
print 'Retrieved',len(data),'characters'
try: js = json.loads(str(data))
except: js = None
if 'status' not in js or js['status'] != 'OK':
print '==== Failure To Retrieve ===='
print data
continue
print json.dumps(js, indent=4)
lat = js["results"][0]["geometry"]["location"]["lat"]
lng = js["results"][0]["geometry"]["location"]["lng"]
print 'lat',lat,'lng',lng
location = js['results'][0]['formatted_address']
print location
'''
Out[11]:
In [27]:
# solving the assignment
import json
from urllib.request import urlopen as uReq
from urllib.parse import urlencode as uEncode
# The program will prompt for a location
#address = input('Enter location: ')
address = 'kansas state university'
# contact a web service and retrieve JSON for the web service and parse that data
endpoint = 'http://python-data.dr-chuck.net/geojson?' # we use this API endpoint with a static subset of the Google Data:
url = endpoint + uEncode({'sensor':'false', 'address': address})
print('Retrieving', url)
uClient = uReq(url)
data = uClient.read()
uClient.close()
print('Retrieved',len(data),'characters')
# and retrieve the first place_id from the JSON
info = json.loads(data)
place_id = info["results"][0]["place_id"]
print(place_id)
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: