In [ ]:
# This is grade A lazy attempt to stop github parsers - need to implement env stuff properly...
gapikey1 = 'asdasd_AIzaSyA8ZTz_'
gapikey2 = 'dsfsdf_o3VhApo4CoehKa6m7_cFLKKBnXt'
gapikey = (gapikey1+gapikey2[::-1])[7:-7]
data = {
'rcs':'1',
'driverAge':'25',
'pickupZip':ZIP,
'pickupCityLocationTypeSearch':'2',
'dropoffZip':ZIP,
'dropoffCityLocationTypeSearch':'2',
'pickupCountry':'US',
'dropoffCountry':'US',
'pickupCityRadius':RADIUS,
'dropoffCityRadius':RADIUS,
'pickupAsAirport':'false',
'dropoffAsAirport':'false',
'pickupDate':pickupDate,
'dropoffDate':dropoffDate,
'pickupTime':TIME,
'dropoffTime':TIME,
'pickupLocationCode':ZIP,
'pickupLocationName':ZIP+' (CHICAGO, IL, US)',
'pickupLocationType':'zipCode',
'pickupLocationCityCode':'CHICAGO',
'pickupLocationStateCode':'',
'pickupLatitude':'',
'pickupLongitude':'',
'dropoffLocationCode':ZIP,
'dropoffLocationName':ZIP+' (CHICAGO, IL, US)',
'dropoffLocationType':'zipCode',
'dropoffLocationCityCode':'CHICAGO',
'dropoffLocationStateCode':'',
'dropoffLatitude':'',
'dropoffLongitude':'',
'fromHomePage':'true',
'fromCarVendorMainMenu':'true',
'carSearchInModifyFlow':'false',
'suppressOutput':'false',
}
In [ ]:
def getGDistance(origin,destination,mode='transit'):
urlparams = {'units':'imperial','origins':origin,'destinations':destination,'key':gapikey,'mode':mode,
'departure_time':1511366822,'transit_routing_preference':'fewer_transfers'}
resp = requests.get('https://maps.googleapis.com/maps/api/distancematrix/json',params=urlparams)
rj = resp.json()
print(rj)
return rj
In [ ]:
def getpriceasync(num):
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
'Referer':'https://www.costcotravel.com',
'Host':'www.costcotravel.com',
}
s = requests.Session()
#print('{}: stage 1'.format(num))
r3 = s.get('https://www.costcotravel.com/h=4005',headers=headers)
cks = r3.cookies
#print(cks.keys())
#r3.text
res = re.search('\(\"Csrf-token\", \"(\S{128})\"\)',r3.text)
csrf = res.group(1)
#print(csrf)
ts = re.search('namespace.sessionTimestamp = (\d{10,20});',r3.text).group(1)
#print(ts)
headers.update({'X-Csrf-Token':csrf})
headers.update({'Referer':'https://www.costcotravel.com/h=4005'})
cks.set('Csrf-token',csrf)
if 'SESSION_TIME_OUT_DETECTED' in cks: cks.pop('SESSION_TIME_OUT_DETECTED')
cks.set('SESSION_TIMESTAMP',str(ts))
#print(cks.keys())
#await asyncio.sleep(1.0)
time.sleep(1.0)
#print('{}: stage 2'.format(num))
r = s.post('https://www.costcotravel.com/rentalCarSearch.act', data = data, headers = headers, cookies = cks)
#print(r.text)
res = re.findall(
('{\"id\":\"S(\d{1,3})\",\"type\":\"(city|airport)\",\"agency\":\"(\S{1,40})\",\"agencyCode\":\"(\S{1,8})\",'
'\"title\":\"(.{4,50})\",\"isOpen\":true,\"unAvailableMessage\":\"\",\"vendorCode\":\"(\S{2,3})\",\"address\"'
':\"(.{5,110})\",\"city\":\"(.{3,30})\",\"state\":\"(\S{2})\",\"zip\":\"'
'(\d{5})\",\"country\":\"US\",\"distance\":\"(\S{2,7})\",\"latitude\":(\S{2,10}),\"longitude\":(\S{2,10}),'),
r.text)
#print(len(res),res)
results = dict()
tp = res[num-1]
assert num == int(tp[0])
results[int(tp[0])] = {'num':int(tp[0]),'type':tp[1],'brand':tp[2],'code':tp[3],'name':tp[4],'bcode':tp[5],'addr':tp[6],
'city':tp[7],'state':tp[8],'zip':tp[9],'dist':tp[10],'lat':tp[11],'lon':tp[12]}
#print(len(results),results.keys())
#print(results)
#print(tp)
#print(len(results))
#await asyncio.sleep(1.0)
time.sleep(1.0)
headers.update({'Referer':'https://www.costcotravel.com/h=3001'})
k = int(tp[0])
v = results[int(tp[0])]
#print('[{{\"vendorId\":\"{}\",\"agencyCodes\":[\"{}\"]}}]'.format(v['bcode'],v['code']))
data2 = {
'cas':'Load_Forword_Navigation_From_Agency_Results',
#'carAgenciesForVendors':'[{"vendorId":"ET","agencyCodes":["E11576"]}]',
'carAgenciesForVendors':'[{{\"vendorId\":\"{}\",\"agencyCodes\":[\"{}\"]}}]'.format(v['bcode'],v['code']),
#'uid':'1504759332852_756.0475947513028'
}
#print('{}: stage 3'.format(num))
r2 = s.post('https://www.costcotravel.com/rentalCarAgencySelection.act',data=data2,cookies=cks,headers=headers)
#print(k,r2)
#print(r2.text)
prices = re.findall('<h3>(.{5,40})</h3></div></div></div><div style=\"height: 94px;\" class=\"col col-lg col-1 col-lg-1 text-center height-item test\"><a data-responsive-referrer=\"carMatrix\" data-category-id=\"\S{20,50}\" data-selected=\"(?:false|true)\" data-product-id=\"\S{20,50}\" data-price=\"(\S{4,30})\"',r2.text)
#print(prices)
#print(r2.text)
if len(prices) == 0:
print("{:03d} ND |".format(k),tp[0],tp[2:5],tp[10:])
#print(r2.text)
else:
print("{:03d} OK |".format(k),tp[0],tp[2:5],tp[10:])
for pr in prices:
#print(pr)
try:
v[pr[0]] = float(pr[1])
except:
v[pr[0]] = pr[1]
#print(v)
return (k,v,len(res))
def getpriceasync_googletest(num):
s = requests.Session()
r = s.get('https://www.costcotravel.com/h=4005')
print('{}: static test'.format(num))
return (r.text)
In [ ]:
# Deprecated
def pool_map(func, args, size=10):
"""
Maps an async function to iterables ensuring that only some are executed at once.
"""
semaphore = asyncio.Semaphore(size)
async def sub(arg):
# This is async context manager call, a shorthand for proper acquire/release block
async with semaphore:
return await func(arg)
tasks = [asyncio.ensure_future(sub(x)) for x in args]
return tasks
# Deprecated
async def run_all(tasks):
exc = None
for a in asyncio.as_completed(tasks):
try:
result = await a
print('=== result', result)
except asyncio.CancelledError as e:
print("!!! cancel", e)
except Exception as e:
print("Exception in task, cancelling!")
for t in tasks:
t.cancel()
exc = e
if exc:
raise exc
In [ ]:
def runparser(threads=1):
resultstemp = dict()
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
rest = getpriceasync(1)
num, result, numtot = rest
resultstemp[num] = result
time.sleep(1)
if numtot != 0:
# Requests uses blocking io, so doing things in single thread event loop doesn't help much
# -> have to do executor thingies and run separate threads
executor = concurrent.futures.ThreadPoolExecutor(max_workers=threads)
try:
loop.run_until_complete(runtest(executor,resultstemp,numtot))
finally:
loop.close()
return resultstemp
async def runtest(executor,resultstemp,numtot):
#print('start t2')
loop = asyncio.get_event_loop()
#print(pool)
blocking_tasks = [loop.run_in_executor(executor, getpriceasync, i) for i in range(2,numtot)]
#print(blocking_tasks)
completed, pending = await asyncio.wait(blocking_tasks)
#print(completed,pending)
results = [t.result() for t in completed]
for i,r in enumerate(results):
resultstemp[r[0]] = r[1]
#print(i,r)