In [1]:
import os
import glob
import asyncio
import logging
import pyppeteer
LOGGER_FORMAT = '%(asctime)s %(message)s'
logging.basicConfig(format=LOGGER_FORMAT, datefmt='[%H:%M:%S]')
log = logging.getLogger()
log.setLevel(logging.INFO)
base_url = 'https://www.google.com/search?q='
keywords = '''
levinux
pipulate
seo expert nyc
open source seo
seo software linux
seo software open source
smallest virtual machine
advantages of python programming language
'''.split('\n')[1:-1]
keywords = '''
levinux
pipulate
'''.split('\n')[1:-1]
locations = 'pics,csvs'.split(',')
for location in locations:
if not os.path.exists(location):
os.makedirs(location)
local_wild = './%s/*' % location
files = glob.glob(local_wild)
for file in files:
os.remove(file)
def job_runner(name, value):
task = asyncio.ensure_future(for_side_effects(name, value))
task = sometime_soon(task)
asyncio.run_coroutine_threadsafe(task, loop)
async def sometime_soon(future):
result = await future
# await asycio.sleep(5)
return result
async def for_side_effects(name, value):
#browser = await pyppeteer.launch(headless=False)
browser = await pyppeteer.launch()
page = await browser.newPage()
await page.setViewport({'width': 1080, 'height': 2750})
await page.goto(value)
elements = await page.JJ('h3.r')
name = name.replace(' ', '-')
pic_name = "pics/%s.png" % name
await page.screenshot({'path': pic_name})
await browser.close()
msg = "Async saving %s.png" % name
log.info(msg)
return msg
loop = asyncio.get_event_loop()
for keyword in keywords:
keyword_plus = keyword.replace(' ', '+')
url = '%s%s' % (base_url, keyword_plus)
print('Sequential: %s' % url)
job_runner(keyword, url)
print('Done sequential parent script. Stuff continues in the background.')
In [ ]: