In [11]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from pprint import pprint
from bs4 import BeautifulSoup
from selenium import webdriver
import re
import json
from lxml import html
In [12]:
#example json file
my_json = [{"name":"Hrant","surname":"Davtyan"},
{"name":"Davit","surname":"Abgaryan"}]
In [13]:
url = "http://api.open-notify.org/astros.json"
In [17]:
response = requests.get(url)
response.content
Out[17]:
In [18]:
type(response.content)
Out[18]:
In [29]:
# if website (API) provides not HTML but directly JSON,
# you can easily get that with requests
my_json = response.json()
In [33]:
pprint(my_json)
In [41]:
#get names from the JSON above
[my_json["people"][i]["name"] for i in range(len(my_json["people"]))]
Out[41]:
In [42]:
#shorter code for getting names
[i["name"] for i in my_json["people"]]
Out[42]:
In [44]:
#pandas can read JSON as well, however it loses the structure,
#if the JSON document is nested as show below
df = pd.read_json(url)
In [45]:
df
Out[45]:
In [53]:
#saving JSON into file
with open("my_json.json","w") as f:
json.dump(my_json,f,indent=4)
In [55]:
#reading JSON from a file
with open('my_json.json',"r") as f:
data = json.load(f)
In [50]:
pprint(data)
In [75]:
#creating a string of dictionaires
json_str = '''
{"message": "success",
"number": 6,
"people": [{"craft": "ISS", "name": "Oleg Artemyev"},
{"craft": "ISS", "name": "Oleg Artemyev"}]}'''
In [74]:
str(my_json)
Out[74]:
In [60]:
print(json_str)
In [77]:
#loading JSON from string
new_json = json.loads(json_str)
In [80]:
pprint(new_json)
In [163]:
#sample XML document
my_xml = """
<person class="Dilijan">
<name>Hrant</name>
<surname>Davtyan</surname>
</person>
<person>
<name>Davit</name>
<surname>Abgaryan</surname>
</person>
"""
In [83]:
#the XML above would lokk like this if it was converted to JSON
xml_json = """
{"person":
{"name":"Hrant",
"surname":"Davtyan"}},
{"person":
{"name":"Davit",
"surname":"Abgaryan"}}
"""
In [124]:
tree = html.document_fromstring(my_xml)
type(tree)
Out[124]:
In [128]:
#similar to get_text() in BS
tree.text_content().replace("\n","").strip()
Out[128]:
In [129]:
#similar to find_all in BS
tree.xpath("//person/name")
Out[129]:
In [130]:
my_names = [i.text_content() for i in tree.xpath("//person/name")]
In [131]:
print(my_names)
In [135]:
#similar to select() in BS
tree.cssselect("person")
Out[135]:
In [170]:
tree.xpath("//person[@class='Dilijan']")[0].text_content()
Out[170]:
In [171]:
tree.xpath("//person/@class")
Out[171]:
In [ ]: