notebook.community

Edit and run



In [6]:

    
from urllib.request import urlopen
from bs4 import BeautifulSoup 
from datetime import datetime, date, time
import csv
import pandas as pd
import re

html = urlopen("http://www.rosensystems.com/upcoming-auctions")
soup = BeautifulSoup(html, "lxml")

u_auction = {}
afile = open('uauction.csv', 'w')

#get list of View Catalog links
vcat = soup.findAll("a", {"href":re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)")})

for item in vcat:
    vcat_link = item.attrs['href']
    #print(value)
    for record in item:
        aid = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)").match(vcat_link).groups()
        key = list(aid) 
        auction_id = ('{0},{1}\n'.format(key, vcat_link))
        afile.write(auction_id)

afile.close()

udf = pd.read_csv('uauction.csv')

#name the columns
udf.columns = ["aid","URL"]
udf['aid'] = udf['aid'].str.replace("[","")
udf['aid'] = udf['aid'].str.replace("]","")
udf['aid'] = udf['aid'].str.replace("\'","")

#set index
udf.set_index("aid",inplace=True)
#drop URL column
udf.drop('URL', axis='columns', inplace=True)

udf.to_csv('uauction_log.csv',header=0)

for i in udf.itertuples():
    item = str(i)
    item = item.replace("Pandas(Index=","")
    item = item.replace(")","")
    item = item.replace("\'","")
    udf['URL'] = "https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen" + item

udf









    Out[6]:






  
    
      
      URL
    
    
      aid
      
    
  
  
    
      336
      https://www.maxanet.com/cgi-bin/mndetails.cgi?...
    
    
      337
      https://www.maxanet.com/cgi-bin/mndetails.cgi?...
    
    
      339
      https://www.maxanet.com/cgi-bin/mndetails.cgi?...
    
    
      338
      https://www.maxanet.com/cgi-bin/mndetails.cgi?...



In [1]:

    
#get initial auction details - auction id, number of items

from urllib.request import urlopen
from bs4 import BeautifulSoup 
import re
import csv
import pandas as pd

url = "https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen337"    
html = urlopen(url)
soup = BeautifulSoup(html, "lxml")

aID = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)").match(url).groups()

aID = str(aID)
aID1 = aID.replace(",","")
aid1 = aID1.replace("\'","")
aid2 = aid1.replace("(","")
aid = aid2.replace(")","")

#get categories from web page 
category = soup('table')[0].findAll('tr')[4].findAll('td')[1].text

#formatting - replace left paren with colon
new_cat = category.replace(" (" ,":")
#replace right paren with blank
new1_cat = new_cat.replace(")","")
#split on dashes
category_list = new1_cat.split(" - ")
#remove "catalog from the list
category_list.remove('Catalog')
#print the list to see if it is good
#for item in category_list:
    #print(item)

#set up file name and dictionary

category = {}

#put formatted items in a dictionary
for line in category_list:
    x = line.split(":")
    a=x[0]
    b=x[1]
    category[a]=b

#remove instructions    
#del(category["1 INSTRUCTIONS"])
category.update({'AID':aid})

#write category list to file
with open(aid +'_category.csv', 'w') as file:
    [file.write('{0},{1}\n'.format(key, value)) for key, value in category.items()]  

item_count = category['ALL ITEMS']
aid = category['AID']

df = pd.DataFrame({"AID":aid,"item_count":item_count}, index=["AID"])

df









    Out[1]:






  
    
      
      AID
      item_count
    
  
  
    
      AID
      337
      103



In [3]:

    
#get initial auction details - auction id, number of items

from urllib.request import urlopen
from bs4 import BeautifulSoup 
import re
import csv
import pandas as pd

url = "https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen337"    
html = urlopen(url)
soup = BeautifulSoup(html, "lxml")


#get categories from web page 
category = soup('table')[0].findAll('tr')[4].findAll('td')[1].text

#formatting - replace left paren with colon
new_cat = category.replace(" (" ,":")
#replace right paren with blank
new1_cat = new_cat.replace(")","")
#split on dashes
category_list = new1_cat.split(" - ")
#remove "catalog from the list
category_list.remove('Catalog')
#print the list to see if it is good
#for item in category_list:
    #print(item)

#set up file name and dictionary

category = {}

#put formatted items in a dictionary
for line in category_list:
    x = line.split(":")
    a=x[0]
    b=x[1]
    category[a]=b

#remove instructions    
#del(category["1 INSTRUCTIONS"])
category.update({'AID':aid})

#write category list to file
with open(aid +'_category.csv', 'w') as file:
    [file.write('{0},{1}\n'.format(key, value)) for key, value in category.items()]  

item_count = category['ALL ITEMS']
aid = category['AID']



In [41]:

    
from datetime import datetime, date, time

def getdate():
    time = datetime.now()
    dt = time.strftime("%A, %d %B %Y %I:%M%p")
    
    print(dt)

#auction number    
auction_number = "330"
    
#rs catalog
# https://www.maxanet.com/cgi-bin/mnprint.cgi?rosen317
cat_base_url = """https://www.maxanet.com/cgi-bin/mnprint.cgi?rosen"""

#detail listing
# https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen317
detail_page_base_url = """https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen"""

# item bid history
# https://www.maxanet.com/cgi-bin/mnhistory.cgi?rosen317/1
item_base_url = """https://www.maxanet.com/cgi-bin/mnhistory.cgi?rosen""" 

# event bid log
# https://www.maxanet.com/cgi-bin/mnbidlog.cgi?rosen317
log_base_url = """https://www.maxanet.com/cgi-bin/mnbidlog.cgi?rosen"""

getdate()

print(detail_page_base_url + auction_number)
cat = print(cat_base_url + auction_number)
print(log_base_url + auction_number)
print(item_base_url + auction_number)



# construct url base on type
# https://www.maxanet.com/cgi-bin/[type].cgi?rosen[auction number]/[item number]
file = open(auction_number + '_URLs.csv', 'w')

for item in range(1, 275):
    link = item_base_url + auction_number +  "/" + str(item) + "\n"
    file.write(link)
#now let figure out how to get the auction data 

#get catalog (Item, Description)
#<tr valign="top"><td>1.</td><td>DEWALT #967 AND (1) DEWALT #720 WITH (1) BATTERY (USED, AS IS)</td></tr>



        #print(auction_id)
           
file.close()









    



Saturday, 11 March 2017 10:39AM
https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen330
https://www.maxanet.com/cgi-bin/mnprint.cgi?rosen330
https://www.maxanet.com/cgi-bin/mnbidlog.cgi?rosen330
https://www.maxanet.com/cgi-bin/mnhistory.cgi?rosen330



In [ ]:



In [19]:

    
from urllib.request import urlopen
from bs4 import BeautifulSoup 
from datetime import datetime, date, time
import csv
import pandas as pd
import re


#need to page through item list based on item count    
url = "https://www.maxanet.com/cgi-bin/mnhistory.cgi?rosen330/1"
html = urlopen(url)
soup = BeautifulSoup(html, "lxml")

    
dtable = soup.find('table',id ='DataTable').findAll('tr')
a_ID = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mnhistory\\.cgi\\?rosen(.*)").match(url).groups()


aid =str(a_ID)

a_ID,item_num = aid.split("/")

file = open(a_ID + "_itemhistory.csv", 'w') 

itemdatasaved = " "

for record in dtable:
    itemdata = " "
    for data in record.findAll('td'):
        itemdata = itemdata + "," + data.text 
    itemdatasaved = itemdatasaved + "\n" + itemdata[2:]

file.write(itemdatasaved)
file.close()
    
df = pd.read_csv(a_ID + '_itemhistory.csv',skip_blank_lines=True)
#df.set_index("Bidder", inplace=True)

#format dataframe
df['a_ID'] = a_ID 
df['item_num'] = item_num
df['a_ID'] = a_ID 
df['a_ID'] = df['a_ID'].str.replace("\('","")
df['item_num'] = df['item_num'].str.replace("\',\)","")
df['Amount'] = df['Amount'].map("${:,.2f}".format)
df['Current'] = df['Current'].map("${:,.2f}".format)
#format date/time
df['Start_date'] = pd.to_datetime(df['Time (ET)'], format='%b-%d-%Y %I:%M%p')
df.drop('Time (ET)', axis='columns', inplace=True)
#df.drop('a_ID', axis='columns', inplace=True) 
df.set_index("Start_date", inplace=True)

df.to_csv('auto_adata_out.csv',header=None,sep=',',mode='a')

df









    Out[19]:






  
    
      
      Bidder
      Amount
      Current
      Winning
      a_ID
      item_num
    
    
      Start_date
      
      
      
      
      
      
    
  
  
    
      2017-02-08 00:21:00
      12192
      $1.00
      $1.00
      12192
      330
      1
    
    
      2017-02-08 10:48:00
      2397
      $1.25
      $6.00
      2397
      330
      1
    
    
      2017-02-14 09:45:00
      2441
      $7.00
      $8.00
      2397
      330
      1
    
    
      2017-02-14 09:49:00
      2441
      $9.00
      $10.00
      2397
      330
      1
    
    
      2017-02-14 10:49:00
      3074
      $11.00
      $11.00
      3074
      330
      1
    
    
      2017-02-14 10:52:00
      2441
      $12.00
      $12.00
      2441
      330
      1
    
    
      2017-02-14 10:55:00
      3074
      $15.00
      $15.00
      2441
      330
      1
    
    
      2017-02-14 10:56:00
      3074
      $16.01
      $16.01
      3074
      330
      1
    
    
      2017-02-14 10:59:00
      2441
      $17.01
      $17.01
      2441
      330
      1
    
    
      2017-02-14 11:00:00
      3074
      $18.01
      $19.01
      2441
      330
      1
    
    
      2017-02-14 11:00:00
      3074
      $20.01
      $20.01
      3074
      330
      1
    
    
      2017-02-14 11:01:00
      2441
      $22.01
      $22.01
      2441
      330
      1



In [5]:

    
from urllib.request import urlopen
from bs4 import BeautifulSoup 
from datetime import datetime, date, time
import pandas as pd
import re


url = "https://www.maxanet.com/cgi-bin/mnbidlog.cgi?rosen330"
html = urlopen(url)
soup = BeautifulSoup(html, "lxml")

aID = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mnbidlog\\.cgi\\?rosen(.*)").match(url).groups()
dtable = soup.find('table',id ='DataTable')

history = []
for tr in dtable.findAll("tr"):
    trs = tr.findAll("td")
    record = []
    record.append(trs[0].text)
    record.append(trs[1].text)
    record.append(trs[2].text)
    record.append(trs[3].text)
    history.append(record)

hdf = pd.DataFrame(data=history)
#use first row as column headers
hdf.columns = hdf.iloc[0]

#drop the header row from the dataframe
hdf = hdf[1:]

#set index
hdf.set_index("Item",inplace=True)

#remove the remaining \n
hdf['aID'] = str(aID)
hdf['aID'] = hdf['aID'].str.replace("\(\'","")
hdf['aID'] = hdf['aID'].str.replace("\',\)","")
hdf['Amount'] = hdf['Amount'].astype(float)
hdf['Amount'] = hdf['Amount'].map("${:,.2f}".format)
hdf['Time (ET)'] = pd.to_datetime(hdf['Time (ET)'], format='%m/%d/%Y %I:%M %p')
#df["TimeReviewed"] = pd.to_datetime(df["TimeReviewed"])
hdf









    Out[5]:






  
    
      
      Bidder
      Amount
      Time (ET)
      aID
    
    
      Item
      
      
      
      
    
  
  
    
      274
      2952
      $3.00
      2017-02-14 15:32:00
      330
    
    
      273
      2952
      $7.00
      2017-02-14 15:32:00
      330
    
    
      270
      13639
      $170.00
      2017-02-14 15:29:00
      330
    
    
      268
      4998
      $4,500.00
      2017-02-14 15:25:00
      330
    
    
      265
      4194
      $120.00
      2017-02-14 15:25:00
      330
    
    
      266
      3625
      $90.00
      2017-02-14 15:25:00
      330
    
    
      270
      10190
      $160.00
      2017-02-14 15:25:00
      330
    
    
      265
      3625
      $100.00
      2017-02-14 15:24:00
      330
    
    
      265
      8233
      $85.00
      2017-02-14 15:24:00
      330
    
    
      266
      8233
      $75.00
      2017-02-14 15:24:00
      330
    
    
      266
      4194
      $61.00
      2017-02-14 15:23:00
      330
    
    
      265
      4194
      $75.00
      2017-02-14 15:23:00
      330
    
    
      265
      8233
      $60.00
      2017-02-14 15:20:00
      330
    
    
      271
      5416
      $75.00
      2017-02-14 15:20:00
      330
    
    
      271
      5416
      $66.50
      2017-02-14 15:20:00
      330
    
    
      271
      5416
      $56.50
      2017-02-14 15:19:00
      330
    
    
      263
      9416
      $326.00
      2017-02-14 15:19:00
      330
    
    
      260
      13631
      $210.00
      2017-02-14 15:19:00
      330
    
    
      262
      9416
      $250.00
      2017-02-14 15:19:00
      330
    
    
      263
      4132
      $250.00
      2017-02-14 15:19:00
      330
    
    
      260
      13631
      $170.00
      2017-02-14 15:19:00
      330
    
    
      262
      9416
      $180.00
      2017-02-14 15:19:00
      330
    
    
      261
      13636
      $95.00
      2017-02-14 15:19:00
      330
    
    
      262
      4132
      $150.00
      2017-02-14 15:18:00
      330
    
    
      269
      5416
      $46.00
      2017-02-14 15:18:00
      330
    
    
      259
      5416
      $46.00
      2017-02-14 15:18:00
      330
    
    
      259
      7628
      $44.00
      2017-02-14 15:16:00
      330
    
    
      254
      13604
      $18.00
      2017-02-14 15:12:00
      330
    
    
      262
      9416
      $140.00
      2017-02-14 15:11:00
      330
    
    
      253
      13604
      $25.00
      2017-02-14 15:10:00
      330
    
    
      252
      13604
      $13.00
      2017-02-14 15:10:00
      330
    
    
      246
      8233
      $160.00
      2017-02-14 15:08:00
      330
    
    
      250
      2441
      $66.25
      2017-02-14 15:08:00
      330
    
    
      246
      4142
      $150.00
      2017-02-14 15:07:00
      330
    
    
      246
      4142
      $130.77
      2017-02-14 15:06:00
      330
    
    
      246
      8233
      $120.77
      2017-02-14 15:05:00
      330
    
    
      245
      4142
      $140.00
      2017-02-14 15:05:00
      330
    
    
      245
      8233
      $130.00
      2017-02-14 15:04:00
      330
    
    
      259
      13279
      $35.88
      2017-02-14 15:04:00
      330
    
    
      246
      4142
      $110.77
      2017-02-14 15:03:00
      330
    
    
      245
      4142
      $120.00
      2017-02-14 15:03:00
      330
    
    
      248
      13634
      $44.20
      2017-02-14 15:01:00
      330
    
    
      246
      13639
      $100.77
      2017-02-14 15:00:00
      330
    
    
      241
      5416
      $40.56
      2017-02-14 15:00:00
      330
    
    
      245
      8233
      $81.00
      2017-02-14 15:00:00
      330
    
    
      250
      2441
      $56.25
      2017-02-14 14:59:00
      330
    
    
      241
      2010
      $36.56
      2017-02-14 14:58:00
      330
    
    
      273
      13467
      $6.00
      2017-02-14 14:56:00
      330
    
    
      273
      13467
      $2.00
      2017-02-14 14:56:00
      330
    
    
      245
      13639
      $71.00
      2017-02-14 14:56:00
      330



In [17]:

    
from urllib.request import urlopen
from bs4 import BeautifulSoup 
from datetime import datetime, date, time
import csv
import pandas as pd
import re


#need to page through item list based on item count    
url = "https://www.maxanet.com/cgi-bin/mnhistory.cgi?rosen330/1"
html = urlopen(url)
soup = BeautifulSoup(html, "lxml")

    
dtable = soup.find('table',id ='DataTable').findAll('tr')
a_ID = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mnhistory\\.cgi\\?rosen(.*)").match(url).groups()


aid =str(a_ID)

a_ID,item_num = aid.split("/")

file = open( a_ID + "_itemhistory.csv", 'w') 

itemdatasaved = " "

for record in dtable:
    itemdata = " "
    for data in record.findAll('td'):
        itemdata = itemdata + "," + data.text 
    itemdatasaved = itemdatasaved + "\n" + itemdata[2:]

file.write(itemdatasaved)
file.close()
    
df = pd.read_csv(a_ID +'_itemhistory.csv',skip_blank_lines=True)
#df.set_index("Bidder", inplace=True)

#format dataframe
df['a_ID'] = a_ID 
df['item_num'] = item_num
df['a_ID'] = a_ID 
df['a_ID'] = df['a_ID'].str.replace("\('","")
df['item_num'] = df['item_num'].str.replace("\',\)","")
df['Amount'] = df['Amount'].map("${:,.2f}".format)
df['Current'] = df['Current'].map("${:,.2f}".format)
#format date/time
df['Start_date'] = pd.to_datetime(df['Time (ET)'], format='%b-%d-%Y %I:%M%p')
df.drop('Time (ET)', axis='columns', inplace=True)
#df.drop('a_ID', axis='columns', inplace=True) 
df.set_index("Start_date", inplace=True)

aid = a_ID.replace("\(\'","")
df.to_csv(aid +'_data_out.csv',header=None,sep=',',mode='a')

df









    Out[17]:






  
    
      
      Bidder
      Amount
      Current
      Winning
      a_ID
      item_num
    
    
      Start_date
      
      
      
      
      
      
    
  
  
    
      2017-02-08 00:21:00
      12192
      $1.00
      $1.00
      12192
      330
      1
    
    
      2017-02-08 10:48:00
      2397
      $1.25
      $6.00
      2397
      330
      1
    
    
      2017-02-14 09:45:00
      2441
      $7.00
      $8.00
      2397
      330
      1
    
    
      2017-02-14 09:49:00
      2441
      $9.00
      $10.00
      2397
      330
      1
    
    
      2017-02-14 10:49:00
      3074
      $11.00
      $11.00
      3074
      330
      1
    
    
      2017-02-14 10:52:00
      2441
      $12.00
      $12.00
      2441
      330
      1
    
    
      2017-02-14 10:55:00
      3074
      $15.00
      $15.00
      2441
      330
      1
    
    
      2017-02-14 10:56:00
      3074
      $16.01
      $16.01
      3074
      330
      1
    
    
      2017-02-14 10:59:00
      2441
      $17.01
      $17.01
      2441
      330
      1
    
    
      2017-02-14 11:00:00
      3074
      $18.01
      $19.01
      2441
      330
      1
    
    
      2017-02-14 11:00:00
      3074
      $20.01
      $20.01
      3074
      330
      1
    
    
      2017-02-14 11:01:00
      2441
      $22.01
      $22.01
      2441
      330
      1



In [ ]:



In [ ]:

	URL
aid
336	https://www.maxanet.com/cgi-bin/mndetails.cgi?...
337	https://www.maxanet.com/cgi-bin/mndetails.cgi?...
339	https://www.maxanet.com/cgi-bin/mndetails.cgi?...
338	https://www.maxanet.com/cgi-bin/mndetails.cgi?...

	Bidder	Amount	Current	Winning	a_ID	item_num
Start_date
2017-02-08 00:21:00	12192	$1.00	$1.00	12192	330	1
2017-02-08 10:48:00	2397	$1.25	$6.00	2397	330	1
2017-02-14 09:45:00	2441	$7.00	$8.00	2397	330	1
2017-02-14 09:49:00	2441	$9.00	$10.00	2397	330	1
2017-02-14 10:49:00	3074	$11.00	$11.00	3074	330	1
2017-02-14 10:52:00	2441	$12.00	$12.00	2441	330	1
2017-02-14 10:55:00	3074	$15.00	$15.00	2441	330	1
2017-02-14 10:56:00	3074	$16.01	$16.01	3074	330	1
2017-02-14 10:59:00	2441	$17.01	$17.01	2441	330	1
2017-02-14 11:00:00	3074	$18.01	$19.01	2441	330	1
2017-02-14 11:00:00	3074	$20.01	$20.01	3074	330	1
2017-02-14 11:01:00	2441	$22.01	$22.01	2441	330	1

	Bidder	Amount	Time (ET)	aID
Item
274	2952	$3.00	2017-02-14 15:32:00	330
273	2952	$7.00	2017-02-14 15:32:00	330
270	13639	$170.00	2017-02-14 15:29:00	330
268	4998	$4,500.00	2017-02-14 15:25:00	330
265	4194	$120.00	2017-02-14 15:25:00	330
266	3625	$90.00	2017-02-14 15:25:00	330
270	10190	$160.00	2017-02-14 15:25:00	330
265	3625	$100.00	2017-02-14 15:24:00	330
265	8233	$85.00	2017-02-14 15:24:00	330
266	8233	$75.00	2017-02-14 15:24:00	330
266	4194	$61.00	2017-02-14 15:23:00	330
265	4194	$75.00	2017-02-14 15:23:00	330
265	8233	$60.00	2017-02-14 15:20:00	330
271	5416	$75.00	2017-02-14 15:20:00	330
271	5416	$66.50	2017-02-14 15:20:00	330
271	5416	$56.50	2017-02-14 15:19:00	330
263	9416	$326.00	2017-02-14 15:19:00	330
260	13631	$210.00	2017-02-14 15:19:00	330
262	9416	$250.00	2017-02-14 15:19:00	330
263	4132	$250.00	2017-02-14 15:19:00	330
260	13631	$170.00	2017-02-14 15:19:00	330
262	9416	$180.00	2017-02-14 15:19:00	330
261	13636	$95.00	2017-02-14 15:19:00	330
262	4132	$150.00	2017-02-14 15:18:00	330
269	5416	$46.00	2017-02-14 15:18:00	330
259	5416	$46.00	2017-02-14 15:18:00	330
259	7628	$44.00	2017-02-14 15:16:00	330
254	13604	$18.00	2017-02-14 15:12:00	330
262	9416	$140.00	2017-02-14 15:11:00	330
253	13604	$25.00	2017-02-14 15:10:00	330
252	13604	$13.00	2017-02-14 15:10:00	330
246	8233	$160.00	2017-02-14 15:08:00	330
250	2441	$66.25	2017-02-14 15:08:00	330
246	4142	$150.00	2017-02-14 15:07:00	330
246	4142	$130.77	2017-02-14 15:06:00	330
246	8233	$120.77	2017-02-14 15:05:00	330
245	4142	$140.00	2017-02-14 15:05:00	330
245	8233	$130.00	2017-02-14 15:04:00	330
259	13279	$35.88	2017-02-14 15:04:00	330
246	4142	$110.77	2017-02-14 15:03:00	330
245	4142	$120.00	2017-02-14 15:03:00	330
248	13634	$44.20	2017-02-14 15:01:00	330
246	13639	$100.77	2017-02-14 15:00:00	330
241	5416	$40.56	2017-02-14 15:00:00	330
245	8233	$81.00	2017-02-14 15:00:00	330
250	2441	$56.25	2017-02-14 14:59:00	330
241	2010	$36.56	2017-02-14 14:58:00	330
273	13467	$6.00	2017-02-14 14:56:00	330
273	13467	$2.00	2017-02-14 14:56:00	330
245	13639	$71.00	2017-02-14 14:56:00	330