In [ ]:
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
'Referer':'https://www.costcotravel.com',
'Host':'www.costcotravel.com',
}
data = {
'rcs':'1',
'driverAge':'25',
'pickupZip':ZIP,
'pickupCityLocationTypeSearch':'2',
'dropoffZip':ZIP,
'dropoffCityLocationTypeSearch':'2',
'pickupCountry':'US',
'dropoffCountry':'US',
'pickupCityRadius':RADIUS,
'dropoffCityRadius':RADIUS,
'pickupAsAirport':'false',
'dropoffAsAirport':'false',
'pickupDate':pickupDate,
'dropoffDate':dropoffDate,
'pickupTime':TIME,
'dropoffTime':TIME,
'pickupLocationCode':ZIP,
'pickupLocationName':ZIP+' (CHICAGO, IL, US)',
'pickupLocationType':'zipCode',
'pickupLocationCityCode':'CHICAGO',
'pickupLocationStateCode':'',
'pickupLatitude':'',
'pickupLongitude':'',
'dropoffLocationCode':ZIP,
'dropoffLocationName':ZIP+' (CHICAGO, IL, US)',
'dropoffLocationType':'zipCode',
'dropoffLocationCityCode':'CHICAGO',
'dropoffLocationStateCode':'',
'dropoffLatitude':'',
'dropoffLongitude':'',
'fromHomePage':'true',
'fromCarVendorMainMenu':'true',
'carSearchInModifyFlow':'false',
'suppressOutput':'false',
}
In [ ]:
async def getpriceasync(num):
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
'Referer':'https://www.costcotravel.com',
'Host':'www.costcotravel.com',
}
s = requests.Session()
#print('{}: stage 1'.format(num))
r3 = s.get('https://www.costcotravel.com/h=4005',headers=headers)
cks = r3.cookies
#print(cks.keys())
r3.text
res = re.search('\(\"Csrf-token\", \"(\S{128})\"\)',r3.text)
csrf = res.group(1)
#print(csrf)
ts = re.search('namespace.sessionTimestamp = (\d{10,20});',r3.text).group(1)
#print(ts)
headers.update({'X-Csrf-Token':csrf})
headers.update({'Referer':'https://www.costcotravel.com/h=4005'})
cks.set('Csrf-token',csrf)
if 'SESSION_TIME_OUT_DETECTED' in cks: cks.pop('SESSION_TIME_OUT_DETECTED')
cks.set('SESSION_TIMESTAMP',str(ts))
#print(cks.keys())
await asyncio.sleep(1.0)
#time.sleep(1.0)
#print('{}: stage 2'.format(num))
r = s.post('https://www.costcotravel.com/rentalCarSearch.act', data = data, headers = headers, cookies = cks)
#print(r.text)
res = re.findall(
('{\"id\":\"S(\d{1,3})\",\"type\":\"(city|airport)\",\"agency\":\"(\S{1,40})\",\"agencyCode\":\"(\S{1,8})\",'
'\"title\":\"(.{4,50})\",\"isOpen\":true,\"unAvailableMessage\":\"\",\"vendorCode\":\"(\S{2,3})\",\"address\"'
'.{40,110}\"country\":\"US\",\"distance\":\"(\S{2,7})\",\"latitude\":'),
r.text)
#print(len(res),res)
results = dict()
tp = res[num-1]
assert num == int(tp[0])
results[int(tp[0])] = {'type':tp[1],'brand':tp[2],'code':tp[3],'name':tp[4],'bcode':tp[5],'dist':tp[6]}
#print(len(results),results.keys())
#print(results)
#print(tp)
#print(len(results))
await asyncio.sleep(1.0)
#time.sleep(1.0)
headers.update({'Referer':'https://www.costcotravel.com/h=3001'})
k = int(tp[0])
v = results[int(tp[0])]
#print('[{{\"vendorId\":\"{}\",\"agencyCodes\":[\"{}\"]}}]'.format(v['bcode'],v['code']))
data2 = {
'cas':'Load_Forword_Navigation_From_Agency_Results',
#'carAgenciesForVendors':'[{"vendorId":"ET","agencyCodes":["E11576"]}]',
'carAgenciesForVendors':'[{{\"vendorId\":\"{}\",\"agencyCodes\":[\"{}\"]}}]'.format(v['bcode'],v['code']),
#'uid':'1504759332852_756.0475947513028'
}
#print('{}: stage 3'.format(num))
r2 = s.post('https://www.costcotravel.com/rentalCarAgencySelection.act',data=data2,cookies=cks,headers=headers)
#print(k,r2)
#print(r2.text)
prices = re.findall('<h3>(.{5,40})</h3></div></div></div><div style=\"height: 94px;\" class=\"col col-lg col-1 col-lg-1 text-center height-item test\"><a data-responsive-referrer=\"carMatrix\" data-category-id=\"\S{20,50}\" data-selected=\"(?:false|true)\" data-product-id=\"\S{20,50}\" data-price=\"(\S{4,30})\"',r2.text)
#print(prices)
#print(r2.text)
if len(prices) == 0:
print("{:03d} NO DATA |".format(k),tp)
#print(r2.text)
else:
print("{:03d} OK |".format(k),tp)
for pr in prices:
#print(pr)
try:
v[pr[0]] = float(pr[1])
except:
v[pr[0]] = pr[1]
#print(v)
return (k,v,len(res))
def getpriceasync_googletest(num):
s = requests.Session()
r = s.get('https://www.costcotravel.com/h=4005')
print('{}: static test'.format(num))
return (r.text)
In [1]:
def pool_map(func, args, size=10):
"""
Maps an async function to iterables ensuring that only some are executed at once.
"""
semaphore = asyncio.Semaphore(size)
async def sub(arg):
# This is async context manager call, a shorthand for proper acquire/release block
async with semaphore:
return await func(arg)
tasks = [asyncio.ensure_future(sub(x)) for x in args]
return tasks
async def run_all(tasks):
exc = None
for a in asyncio.as_completed(tasks):
try:
result = await a
print('=== result', result)
except asyncio.CancelledError as e:
print("!!! cancel", e)
except Exception as e:
print("Exception in task, cancelling!")
for t in tasks:
t.cancel()
exc = e
if exc:
raise exc
In [ ]:
# This is single thread asyncio implementation - it is still slow due to blocking IO
def runparser():
resultstemp = dict()
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
print('start t1')
rest = loop.run_until_complete(getpriceasync(1))
num, result, numtot = rest
resultstemp[num] = result
time.sleep(1)
print('start t2')
pool = pool_map(getpriceasync, range(2, numtot), 5)
res2 = loop.run_until_complete(asyncio.gather(*pool))
print('res2',res2)
for i in range(2,len(res2)):
num, result,_ = res2[i]
resultstemp[num] = result
loop.close()
return resultstemp