Movies have form www.imdb.com/title/tt2406566/ #atomic blonde
To get full credits cast, writers, etc:
look for
inside it will have alternating a tags linking back to the actor names and pages
To download full source of website: view-source:http://www.imdb.com/title/tt2406566/fullcredits
In [ ]:
In [2]:
import requests
import urllib.parse as parse
from time import sleep
In [4]:
from requests import Request, Session
In [3]:
title_nums = ['tt0085244'] #The Big Chill
In [5]:
title_nums2 = ["tt0295700"] #Wrong Turn
In [7]:
def make_urls():
base_url = "http://www.imdb.com/title/"
urls = []
for title in title_nums:
urls.append(base_url + title + '/')
return urls
In [6]:
def my_count():
n = 1000
while True:
yield n
n += 1
numbers = my_count()
In [42]:
def start(my_session = None):
urls = make_urls()
print('Urls', urls)
for url in urls:
try:
r = my_session.get(url)
print("request headers",r.request.headers)
print("response headers",r.headers)
except Exception as e:
print("accessing url", url)
print('Exception encountered at position 1:', e)
return r, urls
In [8]:
def write_result(response, **kwargs):
print('writing file from..',response.url)
filename = "test1.html"
with open(filename, 'wb') as f:
f.write(response.content)
f.write(response.url.encode('utf-8'))
print('saved file %s' % filename)
In [23]:
import asyncio
import aiofiles
import aiohttp
base_url = 'http://stats.nba.com/stats'
HEADERS = {
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5)'
}
async def get_players(player_args):
endpoint = '/commonallplayers'
params = {'leagueid': '00', 'season': '2016-17', 'isonlycurrentseason': '1'}
url = base_url + endpoint
print('Getting all players...')
async with aiohttp.ClientSession() as session:
async with session.get(url, headers=None, params=params, timeout=20) as resp:
data = await resp.json()
player_args.extend(
[(item[0], item[2]) for item in data['resultSets'][0]['rowSet']])
async def get_player(player_id, player_name):
endpoint = '/commonplayerinfo'
params = {'playerid': player_id}
url = base_url + endpoint
print('Getting player', player_name)
async with aiohttp.ClientSession() as session:
async with session.get(url, headers=HEADERS, params=params) as resp:
print(resp)
data = await resp.text()
async with aiofiles.open(
'{}'.format({player_name.replace(" ", "_")}) + '.json', 'w') as file:
await file.write(data)
loop = asyncio.get_event_loop()
player_args = []
loop.run_until_complete(get_players(player_args))
loop.run_until_complete(
asyncio.gather(
*(get_player(*args) for args in player_args)
)
)
In [19]:
!ls
In [15]:
!conda install -y aiofiles
In [16]:
!conda install -y aiohttp
In [4]:
import requests
def print_url(r, *args, **kwargs):
print(r.url)
hooks = dict(response=print_url)
r = requests.get('http://httpbin.org', hooks=dict(response=print_url))
print(r.status_code)
In [2]:
hooks=dict(reponse=print_url)
In [3]:
hooks
Out[3]:
In [ ]: