In [ ]:
data = {
'rcs':'1',
'driverAge':'25',
'pickupZip':ZIP,
'pickupCityLocationTypeSearch':'2',
'dropoffZip':ZIP,
'dropoffCityLocationTypeSearch':'2',
'pickupCountry':'US',
'dropoffCountry':'US',
'pickupCityRadius':RADIUS,
'dropoffCityRadius':RADIUS,
'pickupAsAirport':'false',
'dropoffAsAirport':'false',
'pickupDate':pickupDate,
'dropoffDate':dropoffDate,
'pickupTime':TIME,
'dropoffTime':TIME,
'pickupLocationCode':ZIP,
'pickupLocationName':ZIP+' (CHICAGO, IL, US)',
'pickupLocationType':'zipCode',
'pickupLocationCityCode':'CHICAGO',
'pickupLocationStateCode':'',
'pickupLatitude':'',
'pickupLongitude':'',
'dropoffLocationCode':ZIP,
'dropoffLocationName':ZIP+' (CHICAGO, IL, US)',
'dropoffLocationType':'zipCode',
'dropoffLocationCityCode':'CHICAGO',
'dropoffLocationStateCode':'',
'dropoffLatitude':'',
'dropoffLongitude':'',
'fromHomePage':'true',
'fromCarVendorMainMenu':'true',
'carSearchInModifyFlow':'false',
'suppressOutput':'false',
}
In [ ]:
def getpriceasync(num):
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
'Referer':'https://www.costcotravel.com',
'Host':'www.costcotravel.com',
}
s = requests.Session()
r3 = s.get('https://www.costcotravel.com/h=4005',headers=headers)
cks = r3.cookies
res = re.search('\(\"Csrf-token\", \"(\S{128})\"\)',r3.text)
csrf = res.group(1)
ts = re.search('namespace.sessionTimestamp = (\d{10,20});',r3.text).group(1)
headers.update({'X-Csrf-Token':csrf})
headers.update({'Referer':'https://www.costcotravel.com/h=4005'})
cks.set('Csrf-token',csrf)
if 'SESSION_TIME_OUT_DETECTED' in cks: cks.pop('SESSION_TIME_OUT_DETECTED')
cks.set('SESSION_TIMESTAMP',str(ts))
time.sleep(1.0)
r = s.post('https://www.costcotravel.com/rentalCarSearch.act', data = data, headers = headers, cookies = cks)
res = re.findall(
'{\"id\":\"S(\d{1,3})\",\"type\":\"(city|airport)\",\"agency\":\"(\S{1,40})\",\"agencyCode\":\"(\S{1,8})\",'
'\"title\":\"(.{4,50})\",\"isOpen\":true,\"unAvailableMessage\":\"\",\"vendorCode\":\"(\S{2,3})\",\"address\"'
':\"(.{5,110})\",\"city\":\"(.{3,30})\",\"state\":\"(\S{2})\",\"zip\":\"'
'(\d{5})\",\"country\":\"US\",\"distance\":\"(\S{2,7})\",\"latitude\":(\S{2,10}),\"longitude\":(\S{2,10}),',
r.text)
results = dict()
try:
tp = res[num-1]
assert num == int(tp[0])
results[int(tp[0])] = {'type':tp[1],'brand':tp[2],'code':tp[3],'name':tp[4],'bcode':tp[5],'dist':tp[6]}
time.sleep(1.0)
headers.update({'Referer':'https://www.costcotravel.com/h=3001'})
k = int(tp[0])
v = results[int(tp[0])]
data2 = {
'cas':'Load_Forword_Navigation_From_Agency_Results',
'carAgenciesForVendors':'[{{\"vendorId\":\"{}\",\"agencyCodes\":[\"{}\"]}}]'.format(v['bcode'],v['code']),
#'uid':'1504759332852_756.0475947513028'
}
r2 = s.post('https://www.costcotravel.com/rentalCarAgencySelection.act',data=data2,cookies=cks,headers=headers)
prices = re.findall('<h3>(.{5,40})</h3></div></div></div><div style=\"height: 94px;\" class=\"col col-lg col-1 col-lg-1 text-center height-item test\"><a data-responsive-referrer=\"carMatrix\" data-category-id=\"\S{20,50}\" data-selected=\"(?:false|true)\" data-product-id=\"\S{20,50}\" data-price=\"(\S{4,30})\"',r2.text)
if len(prices) == 0:
print("{:03d} NO DATA |".format(k),tp)
else:
print("{:03d} OK |".format(k),tp)
for pr in prices:
try:
v[pr[0]] = float(pr[1])
except:
v[pr[0]] = pr[1]
return (k,v,len(res))
except:
return (num,None,None)
In [ ]:
def runparser(threads=1):
resultstemp = dict()
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
rest = getpriceasync(1)
num, result, numtot = rest
resultstemp[num] = result
time.sleep(1)
# Requests uses blocking io, so doing things in single thread event loop doesn't help much
# -> have to do executor thingies and run separate threads
executor = concurrent.futures.ThreadPoolExecutor(max_workers=threads)
try:
loop.run_until_complete(runtest(executor,resultstemp,numtot))
finally:
loop.close()
return resultstemp
async def runtest(executor,resultstemp,numtot):
loop = asyncio.get_event_loop()
blocking_tasks = [loop.run_in_executor(executor, getpriceasync, i) for i in range(2,numtot)]
completed, pending = await asyncio.wait(blocking_tasks)
results = [t.result() for t in completed]
for i,r in enumerate(results):
if r[1]:
resultstemp[r[0]] = r[1]
#print(i,r)
else:
print(r[0])