In [1]:
import time
import requests
import wikipedia
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
In [2]:
response = wikipedia.page("Administrative divisions of Armenia")
page = response.html()
tables = pd.read_html(page)
my_table = tables[2]
capitals = my_table.iloc[:,-1][1:-1]
print(capitals)
In [3]:
base = "https://maps.googleapis.com/maps/api/directions/json?origin=Yerevan,%20Armenia&destination={0},%20Armenia"
def get_distance(city):
url = base.format(city)
response = requests.get(url).json()
dist_text = response["routes"][0]["legs"][0]["distance"]["text"]
dist = float(dist_text[:-3])
time.sleep(3)
return dist
In [5]:
distances_dict = {i:get_distance(i) for i in capitals}
print(distances_dict)
median = np.median(list(distances_dict.values()))
print(median)
from_median = {key:int(abs(value-median)) for key,value in distances_dict.items()}
print( min(from_median,key=from_median.get) )
In [6]:
url = "https://www.rogerebert.com/reviews"
browser = webdriver.Chrome()
browser.get(url)
some_button = browser.find_element_by_tag_name("html")
for i in range(10):
some_button.send_keys(Keys.END)
time.sleep(1)
page = browser.page_source
browser.close()
In [7]:
page = BeautifulSoup(page,"html.parser")
scraped_titles = [i.text for i in page.select("h5.title a")]
first_200 = scraped_titles[:200]
print(len(first_200))
In [8]:
url = "https://ideas.repec.org/top/top.person.all.html"
tables = pd.read_html(url)
my_table = tables[1]
In [9]:
median = my_table.Score.median()
print( median )
print( my_table[my_table.Author.str.contains("Ian Peter Preston")] )
print( len(my_table[my_table.Score < median]) )
In [10]:
page = requests.get("https://www.shamshyan.com").content
page = BeautifulSoup(page,"html.parser")
lrahos = [i.get_text() for i in page.select("div.news p.inline")]
In [11]:
lrahos_str = " ".join(lrahos)
lrahos_splitted = lrahos_str.split(" ")
len([i for i in lrahos_splitted if i.istitle()])
Out[11]: