In [2]:
import gevent
from gevent import monkey
monkey.patch_socket()  # Replace the standard socket object with gevent's cooperative sockets
from gevent.lock import Semaphore
import time, random, string, urllib2

In [3]:
def download(url, semaphore):
    with semaphore:
        data = urllib2.urlopen(url)
        return data.read()

In [4]:
def generate_urls(base, num):
    for i in xrange(1, num):
        yield base + str(i)

In [5]:
def chunk_request(url_list, chunk_size):
    semaphore = Semaphore(chunk_size)
    # generate N Greenlet object, scheduled to start.
    requests = [gevent.spawn(download, url, semaphore) for url in url_list]
    # Yield objects as they are ready
    for response in gevent.iwait(requests):
        yield response

In [6]:
def run():
    urls = generate_urls('http://redmine.scienbizip.com/news/', 100)
    response_future = chunk_request(urls, 10)
    response_size = sum(len(r.value) for r in response_future)
    return response_size

In [7]:
%timeit -n1 -r1 run()


1 loops, best of 1: 3.12 s per loop

In [8]:
start = time.time()
for u in generate_urls('http://redmine.scienbizip.com/news/', 100):
    data = urllib2.urlopen(u).read()
print time.time() - start


6.4350001812

如果要作 HTTP requests,可以透過 grequests,它結合了 requestsgevent


In [9]:
import grequests

In [10]:
# 這是一個 generator,不會真正取值
req_generator = (grequests.get(u) for u in generate_urls('http://redmine.scienbizip.com/news/', 100))
# 同時10路執行,這是一個 generator,不會真正取值
resp_generator = grequests.imap(req_generator, 10)
# 這時才會真正取值
%timeit -n1 -r1 [r for r in resp_generator]


1 loops, best of 1: 4.75 s per loop

In [11]:
req_generator = (grequests.get(u) for u in generate_urls('http://redmine.scienbizip.com/news/', 100))
resp_generator = grequests.imap(req_generator, 100)
%timeit -n1 -r1 [r for r in resp_generator]


1 loops, best of 1: 3.83 s per loop

In [ ]: