Midterm 2 - Solution Key


In [1]:
import time
import requests
import wikipedia
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys

Problem 1


In [2]:
response = wikipedia.page("Administrative divisions of Armenia")
page = response.html()
tables = pd.read_html(page)
my_table = tables[2]
capitals = my_table.iloc[:,-1][1:-1]
print(capitals)


1         Ashtarak
2         Artashat
3          Armavir
4            Gavar
5          Hrazdan
6         Vanadzor
7           Gyumri
8            Kapan
9           Ijevan
10    Yeghegnadzor
Name: 8, dtype: object

Problem 2


In [3]:
base = "https://maps.googleapis.com/maps/api/directions/json?origin=Yerevan,%20Armenia&destination={0},%20Armenia"
def get_distance(city):
    url = base.format(city)
    response = requests.get(url).json()
    dist_text = response["routes"][0]["legs"][0]["distance"]["text"]
    dist = float(dist_text[:-3])
    time.sleep(3)
    return dist

In [5]:
distances_dict = {i:get_distance(i) for i in capitals}
print(distances_dict)
median = np.median(list(distances_dict.values()))
print(median)
from_median = {key:int(abs(value-median)) for key,value in distances_dict.items()}
print( min(from_median,key=from_median.get) )


{'Ashtarak': 22.3, 'Artashat': 31.9, 'Armavir': 44.6, 'Gavar': 94.4, 'Hrazdan': 54.6, 'Vanadzor': 117.0, 'Gyumri': 122.0, 'Kapan': 300.0, 'Ijevan': 133.0, 'Yeghegnadzor': 123.0}
105.7
Gavar

Problem 3


In [6]:
url = "https://www.rogerebert.com/reviews"
browser = webdriver.Chrome()
browser.get(url)
some_button = browser.find_element_by_tag_name("html")

for i in range(10):
    some_button.send_keys(Keys.END)
    time.sleep(1)
    
page = browser.page_source
browser.close()

In [7]:
page = BeautifulSoup(page,"html.parser")
scraped_titles = [i.text for i in page.select("h5.title a")]
first_200 = scraped_titles[:200]
print(len(first_200))


200

Problem 4


In [8]:
url = "https://ideas.repec.org/top/top.person.all.html"
tables = pd.read_html(url)
my_table = tables[1]

In [9]:
median = my_table.Score.median()
print( median )
print( my_table[my_table.Author.str.contains("Ian Peter Preston")] )
print( len(my_table[my_table.Score < median]) )


1434.455
      Rank                                             Author    Score
1368  1369  Ian Peter Preston Department of Economics, Uni...  1462.33
1348

Problem 5


In [10]:
page = requests.get("https://www.shamshyan.com").content
page = BeautifulSoup(page,"html.parser")
lrahos = [i.get_text() for i in page.select("div.news p.inline")]

Problem 6


In [11]:
lrahos_str = " ".join(lrahos)
lrahos_splitted = lrahos_str.split(" ")
len([i for i in lrahos_splitted if i.istitle()])


Out[11]:
152