In [ ]:
headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
    'Referer':'https://www.costcotravel.com',
    'Host':'www.costcotravel.com',
}
data = {
    'rcs':'1',
    'driverAge':'25',
    'pickupZip':ZIP,
    'pickupCityLocationTypeSearch':'2',
    'dropoffZip':ZIP,
    'dropoffCityLocationTypeSearch':'2',
    'pickupCountry':'US',
    'dropoffCountry':'US',
    'pickupCityRadius':RADIUS,
    'dropoffCityRadius':RADIUS,
    'pickupAsAirport':'false',
    'dropoffAsAirport':'false',
    'pickupDate':pickupDate,
    'dropoffDate':dropoffDate,
    'pickupTime':TIME,
    'dropoffTime':TIME,
    'pickupLocationCode':ZIP,
    'pickupLocationName':ZIP+' (CHICAGO, IL, US)',
    'pickupLocationType':'zipCode',
    'pickupLocationCityCode':'CHICAGO',
    'pickupLocationStateCode':'',
    'pickupLatitude':'',
    'pickupLongitude':'',
    'dropoffLocationCode':ZIP,
    'dropoffLocationName':ZIP+' (CHICAGO, IL, US)',
    'dropoffLocationType':'zipCode',
    'dropoffLocationCityCode':'CHICAGO',
    'dropoffLocationStateCode':'',
    'dropoffLatitude':'',
    'dropoffLongitude':'',
    'fromHomePage':'true',
    'fromCarVendorMainMenu':'true',
    'carSearchInModifyFlow':'false',
    'suppressOutput':'false',
}

In [ ]:
async def getpriceasync(num):
    headers = {
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
        'Referer':'https://www.costcotravel.com',
        'Host':'www.costcotravel.com',
    }
    s = requests.Session()
    #print('{}: stage 1'.format(num))
    r3 = s.get('https://www.costcotravel.com/h=4005',headers=headers)
    cks = r3.cookies
    #print(cks.keys())
    r3.text
    
    res = re.search('\(\"Csrf-token\", \"(\S{128})\"\)',r3.text)
    csrf = res.group(1)
    #print(csrf)
    ts = re.search('namespace.sessionTimestamp = (\d{10,20});',r3.text).group(1)
    #print(ts)
    
    headers.update({'X-Csrf-Token':csrf})
    headers.update({'Referer':'https://www.costcotravel.com/h=4005'})
    
    cks.set('Csrf-token',csrf)
    if 'SESSION_TIME_OUT_DETECTED' in cks: cks.pop('SESSION_TIME_OUT_DETECTED')
    cks.set('SESSION_TIMESTAMP',str(ts))
    #print(cks.keys())
    await asyncio.sleep(1.0)
    #time.sleep(1.0)
    
    #print('{}: stage 2'.format(num))
    r = s.post('https://www.costcotravel.com/rentalCarSearch.act', data = data, headers = headers, cookies = cks)
    #print(r.text)
    
    res = re.findall(
        ('{\"id\":\"S(\d{1,3})\",\"type\":\"(city|airport)\",\"agency\":\"(\S{1,40})\",\"agencyCode\":\"(\S{1,8})\",'
        '\"title\":\"(.{4,50})\",\"isOpen\":true,\"unAvailableMessage\":\"\",\"vendorCode\":\"(\S{2,3})\",\"address\"'
        '.{40,110}\"country\":\"US\",\"distance\":\"(\S{2,7})\",\"latitude\":'),
        r.text)
    #print(len(res),res)
    
    results = dict()
    tp = res[num-1]
    assert num == int(tp[0])
    results[int(tp[0])] = {'type':tp[1],'brand':tp[2],'code':tp[3],'name':tp[4],'bcode':tp[5],'dist':tp[6]}
    #print(len(results),results.keys())
    #print(results)
    #print(tp)
    #print(len(results))
    await asyncio.sleep(1.0)
    #time.sleep(1.0)
    
    headers.update({'Referer':'https://www.costcotravel.com/h=3001'})
    
    k = int(tp[0])
    v = results[int(tp[0])]
    #print('[{{\"vendorId\":\"{}\",\"agencyCodes\":[\"{}\"]}}]'.format(v['bcode'],v['code']))
    data2 = {
        'cas':'Load_Forword_Navigation_From_Agency_Results',
        #'carAgenciesForVendors':'[{"vendorId":"ET","agencyCodes":["E11576"]}]',
        'carAgenciesForVendors':'[{{\"vendorId\":\"{}\",\"agencyCodes\":[\"{}\"]}}]'.format(v['bcode'],v['code']),
        #'uid':'1504759332852_756.0475947513028'
    }
    #print('{}: stage 3'.format(num))
    r2 = s.post('https://www.costcotravel.com/rentalCarAgencySelection.act',data=data2,cookies=cks,headers=headers)
    #print(k,r2)
    #print(r2.text)
    prices = re.findall('<h3>(.{5,40})</h3></div></div></div><div style=\"height: 94px;\" class=\"col col-lg col-1 col-lg-1 text-center height-item test\"><a data-responsive-referrer=\"carMatrix\" data-category-id=\"\S{20,50}\" data-selected=\"(?:false|true)\" data-product-id=\"\S{20,50}\" data-price=\"(\S{4,30})\"',r2.text)
    #print(prices)
    #print(r2.text)
    if len(prices) == 0:
        print("{:03d} NO DATA |".format(k),tp)
        #print(r2.text)
    else:
        print("{:03d} OK      |".format(k),tp)
        for pr in prices:
            #print(pr)
            try:
                v[pr[0]] = float(pr[1])
            except:
                v[pr[0]] = pr[1]
    #print(v)
    return (k,v,len(res))

def getpriceasync_googletest(num):
    s = requests.Session()
    r = s.get('https://www.costcotravel.com/h=4005')
    print('{}: static test'.format(num))

    return (r.text)

In [1]:
def pool_map(func, args, size=10):
    """
    Maps an async function to iterables ensuring that only some are executed at once.    
    """
    semaphore = asyncio.Semaphore(size)
    async def sub(arg):
        # This is async context manager call, a shorthand for proper acquire/release block
        async with semaphore:
            return await func(arg)
    tasks = [asyncio.ensure_future(sub(x)) for x in args]
    return tasks

async def run_all(tasks):
    exc = None
    for a in asyncio.as_completed(tasks):
        try:
            result = await a
            print('=== result', result)
        except asyncio.CancelledError as e:
            print("!!! cancel", e)
        except Exception as e:
            print("Exception in task, cancelling!")
            for t in tasks:
                t.cancel()
            exc = e
    if exc:
        raise exc

In [ ]:
# This is single thread asyncio implementation - it is still slow due to blocking IO
def runparser():
    resultstemp = dict()
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    
    print('start t1')
    rest = loop.run_until_complete(getpriceasync(1))
    num, result, numtot = rest
    resultstemp[num] = result
    time.sleep(1)

    print('start t2')
    pool = pool_map(getpriceasync, range(2, numtot), 5)
    res2 = loop.run_until_complete(asyncio.gather(*pool))
    print('res2',res2)
    for i in range(2,len(res2)):        
        num, result,_ = res2[i]
        resultstemp[num] = result
    
    loop.close()
    return resultstemp