練習


In [1]:
import os
import hashlib
import requests
import time

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By

from fake_useragent import UserAgent
from PIL import Image

url = 'https://www.google.com/recaptcha/demo/recaptcha'
fu = UserAgent()

In [2]:
results = os.path.abspath('../results')
if not os.path.exists(results):
    os.makedirs(results)

try:
    driver = webdriver.Chrome()
    driver.get(url)
    driver.maximize_window()
    driver.implicitly_wait(10)
    compare_url = ''
    
    for i in range(5):
        # get image
        img_el = driver.find_element(By.XPATH, '//div[@id="recaptcha_image"]/img')
        img_url = img_el.get_attribute('src')
        img_filename = hashlib.md5(img_url.encode('utf-8')).hexdigest()
        compare_url = img_url

        headers = {'User-Agent': fu.random}
        img_resp = requests.get(img_url, stream=True, headers=headers)
        img = Image.open(img_resp.raw)
        img_filename = '{}.{}'.format(img_filename, img.format)
        img_filename = os.path.join(results, img_filename)
        img.save(img_filename)
        print('Save img - {}'.format(img_filename))
        
        # re-generate image
        btn_refresh = driver.find_element(By.XPATH, '//*[@id="recaptcha_reload_btn"]').click()
        time.sleep(2)
        

except Exception as e:
    print(e)
finally:
    driver.quit()


Save img - /home/dirl/github/Python-Crawling-Tutorial/results/ceecac6a5a9677750a69c80a87f26080.JPEG
Save img - /home/dirl/github/Python-Crawling-Tutorial/results/ceecac6a5a9677750a69c80a87f26080.JPEG
Save img - /home/dirl/github/Python-Crawling-Tutorial/results/4cce70c2cdde67af52e27920693da213.JPEG
Save img - /home/dirl/github/Python-Crawling-Tutorial/results/1682c3490f1ec9df1da4a43407f890b7.JPEG
Save img - /home/dirl/github/Python-Crawling-Tutorial/results/aa6a4d1bfa181fc53636a341562fb2ea.PNG