In [6]:
from urllib.request import urlopen
from bs4 import BeautifulSoup 
from datetime import datetime, date, time
import csv
import pandas as pd
import re

html = urlopen("http://www.rosensystems.com/upcoming-auctions")
soup = BeautifulSoup(html, "lxml")

u_auction = {}
afile = open('uauction.csv', 'w')

#get list of View Catalog links
vcat = soup.findAll("a", {"href":re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)")})

for item in vcat:
    vcat_link = item.attrs['href']
    #print(value)
    for record in item:
        aid = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)").match(vcat_link).groups()
        key = list(aid) 
        auction_id = ('{0},{1}\n'.format(key, vcat_link))
        afile.write(auction_id)

afile.close()

udf = pd.read_csv('uauction.csv')

#name the columns
udf.columns = ["aid","URL"]
udf['aid'] = udf['aid'].str.replace("[","")
udf['aid'] = udf['aid'].str.replace("]","")
udf['aid'] = udf['aid'].str.replace("\'","")

#set index
udf.set_index("aid",inplace=True)
#drop URL column
udf.drop('URL', axis='columns', inplace=True)

udf.to_csv('uauction_log.csv',header=0)

for i in udf.itertuples():
    item = str(i)
    item = item.replace("Pandas(Index=","")
    item = item.replace(")","")
    item = item.replace("\'","")
    udf['URL'] = "https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen" + item

udf


Out[6]:
URL
aid
336 https://www.maxanet.com/cgi-bin/mndetails.cgi?...
337 https://www.maxanet.com/cgi-bin/mndetails.cgi?...
339 https://www.maxanet.com/cgi-bin/mndetails.cgi?...
338 https://www.maxanet.com/cgi-bin/mndetails.cgi?...

In [1]:
#get initial auction details - auction id, number of items

from urllib.request import urlopen
from bs4 import BeautifulSoup 
import re
import csv
import pandas as pd

url = "https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen337"    
html = urlopen(url)
soup = BeautifulSoup(html, "lxml")

aID = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)").match(url).groups()

aID = str(aID)
aID1 = aID.replace(",","")
aid1 = aID1.replace("\'","")
aid2 = aid1.replace("(","")
aid = aid2.replace(")","")

#get categories from web page 
category = soup('table')[0].findAll('tr')[4].findAll('td')[1].text

#formatting - replace left paren with colon
new_cat = category.replace(" (" ,":")
#replace right paren with blank
new1_cat = new_cat.replace(")","")
#split on dashes
category_list = new1_cat.split(" - ")
#remove "catalog from the list
category_list.remove('Catalog')
#print the list to see if it is good
#for item in category_list:
    #print(item)

#set up file name and dictionary

category = {}

#put formatted items in a dictionary
for line in category_list:
    x = line.split(":")
    a=x[0]
    b=x[1]
    category[a]=b

#remove instructions    
#del(category["1 INSTRUCTIONS"])
category.update({'AID':aid})

#write category list to file
with open(aid +'_category.csv', 'w') as file:
    [file.write('{0},{1}\n'.format(key, value)) for key, value in category.items()]  

item_count = category['ALL ITEMS']
aid = category['AID']

df = pd.DataFrame({"AID":aid,"item_count":item_count}, index=["AID"])

df


Out[1]:
AID item_count
AID 337 103

In [3]:
#get initial auction details - auction id, number of items

from urllib.request import urlopen
from bs4 import BeautifulSoup 
import re
import csv
import pandas as pd

url = "https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen337"    
html = urlopen(url)
soup = BeautifulSoup(html, "lxml")


#get categories from web page 
category = soup('table')[0].findAll('tr')[4].findAll('td')[1].text

#formatting - replace left paren with colon
new_cat = category.replace(" (" ,":")
#replace right paren with blank
new1_cat = new_cat.replace(")","")
#split on dashes
category_list = new1_cat.split(" - ")
#remove "catalog from the list
category_list.remove('Catalog')
#print the list to see if it is good
#for item in category_list:
    #print(item)

#set up file name and dictionary

category = {}

#put formatted items in a dictionary
for line in category_list:
    x = line.split(":")
    a=x[0]
    b=x[1]
    category[a]=b

#remove instructions    
#del(category["1 INSTRUCTIONS"])
category.update({'AID':aid})

#write category list to file
with open(aid +'_category.csv', 'w') as file:
    [file.write('{0},{1}\n'.format(key, value)) for key, value in category.items()]  

item_count = category['ALL ITEMS']
aid = category['AID']


103

In [41]:
from datetime import datetime, date, time

def getdate():
    time = datetime.now()
    dt = time.strftime("%A, %d %B %Y %I:%M%p")
    
    print(dt)

#auction number    
auction_number = "330"
    
#rs catalog
# https://www.maxanet.com/cgi-bin/mnprint.cgi?rosen317
cat_base_url = """https://www.maxanet.com/cgi-bin/mnprint.cgi?rosen"""

#detail listing
# https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen317
detail_page_base_url = """https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen"""

# item bid history
# https://www.maxanet.com/cgi-bin/mnhistory.cgi?rosen317/1
item_base_url = """https://www.maxanet.com/cgi-bin/mnhistory.cgi?rosen""" 

# event bid log
# https://www.maxanet.com/cgi-bin/mnbidlog.cgi?rosen317
log_base_url = """https://www.maxanet.com/cgi-bin/mnbidlog.cgi?rosen"""

getdate()

print(detail_page_base_url + auction_number)
cat = print(cat_base_url + auction_number)
print(log_base_url + auction_number)
print(item_base_url + auction_number)



# construct url base on type
# https://www.maxanet.com/cgi-bin/[type].cgi?rosen[auction number]/[item number]
file = open(auction_number + '_URLs.csv', 'w')

for item in range(1, 275):
    link = item_base_url + auction_number +  "/" + str(item) + "\n"
    file.write(link)
#now let figure out how to get the auction data 

#get catalog (Item, Description)
#<tr valign="top"><td>1.</td><td>DEWALT #967 AND (1) DEWALT #720 WITH (1) BATTERY (USED, AS IS)</td></tr>



        #print(auction_id)
           
file.close()


Saturday, 11 March 2017 10:39AM
https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen330
https://www.maxanet.com/cgi-bin/mnprint.cgi?rosen330
https://www.maxanet.com/cgi-bin/mnbidlog.cgi?rosen330
https://www.maxanet.com/cgi-bin/mnhistory.cgi?rosen330

In [ ]:


In [19]:
from urllib.request import urlopen
from bs4 import BeautifulSoup 
from datetime import datetime, date, time
import csv
import pandas as pd
import re


#need to page through item list based on item count    
url = "https://www.maxanet.com/cgi-bin/mnhistory.cgi?rosen330/1"
html = urlopen(url)
soup = BeautifulSoup(html, "lxml")

    
dtable = soup.find('table',id ='DataTable').findAll('tr')
a_ID = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mnhistory\\.cgi\\?rosen(.*)").match(url).groups()


aid =str(a_ID)

a_ID,item_num = aid.split("/")

file = open(a_ID + "_itemhistory.csv", 'w') 

itemdatasaved = " "

for record in dtable:
    itemdata = " "
    for data in record.findAll('td'):
        itemdata = itemdata + "," + data.text 
    itemdatasaved = itemdatasaved + "\n" + itemdata[2:]

file.write(itemdatasaved)
file.close()
    
df = pd.read_csv(a_ID + '_itemhistory.csv',skip_blank_lines=True)
#df.set_index("Bidder", inplace=True)

#format dataframe
df['a_ID'] = a_ID 
df['item_num'] = item_num
df['a_ID'] = a_ID 
df['a_ID'] = df['a_ID'].str.replace("\('","")
df['item_num'] = df['item_num'].str.replace("\',\)","")
df['Amount'] = df['Amount'].map("${:,.2f}".format)
df['Current'] = df['Current'].map("${:,.2f}".format)
#format date/time
df['Start_date'] = pd.to_datetime(df['Time (ET)'], format='%b-%d-%Y %I:%M%p')
df.drop('Time (ET)', axis='columns', inplace=True)
#df.drop('a_ID', axis='columns', inplace=True) 
df.set_index("Start_date", inplace=True)

df.to_csv('auto_adata_out.csv',header=None,sep=',',mode='a')

df


Out[19]:
Bidder Amount Current Winning a_ID item_num
Start_date
2017-02-08 00:21:00 12192 $1.00 $1.00 12192 330 1
2017-02-08 10:48:00 2397 $1.25 $6.00 2397 330 1
2017-02-14 09:45:00 2441 $7.00 $8.00 2397 330 1
2017-02-14 09:49:00 2441 $9.00 $10.00 2397 330 1
2017-02-14 10:49:00 3074 $11.00 $11.00 3074 330 1
2017-02-14 10:52:00 2441 $12.00 $12.00 2441 330 1
2017-02-14 10:55:00 3074 $15.00 $15.00 2441 330 1
2017-02-14 10:56:00 3074 $16.01 $16.01 3074 330 1
2017-02-14 10:59:00 2441 $17.01 $17.01 2441 330 1
2017-02-14 11:00:00 3074 $18.01 $19.01 2441 330 1
2017-02-14 11:00:00 3074 $20.01 $20.01 3074 330 1
2017-02-14 11:01:00 2441 $22.01 $22.01 2441 330 1

In [5]:
from urllib.request import urlopen
from bs4 import BeautifulSoup 
from datetime import datetime, date, time
import pandas as pd
import re


url = "https://www.maxanet.com/cgi-bin/mnbidlog.cgi?rosen330"
html = urlopen(url)
soup = BeautifulSoup(html, "lxml")

aID = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mnbidlog\\.cgi\\?rosen(.*)").match(url).groups()
dtable = soup.find('table',id ='DataTable')

history = []
for tr in dtable.findAll("tr"):
    trs = tr.findAll("td")
    record = []
    record.append(trs[0].text)
    record.append(trs[1].text)
    record.append(trs[2].text)
    record.append(trs[3].text)
    history.append(record)

hdf = pd.DataFrame(data=history)
#use first row as column headers
hdf.columns = hdf.iloc[0]

#drop the header row from the dataframe
hdf = hdf[1:]

#set index
hdf.set_index("Item",inplace=True)

#remove the remaining \n
hdf['aID'] = str(aID)
hdf['aID'] = hdf['aID'].str.replace("\(\'","")
hdf['aID'] = hdf['aID'].str.replace("\',\)","")
hdf['Amount'] = hdf['Amount'].astype(float)
hdf['Amount'] = hdf['Amount'].map("${:,.2f}".format)
hdf['Time (ET)'] = pd.to_datetime(hdf['Time (ET)'], format='%m/%d/%Y %I:%M %p')
#df["TimeReviewed"] = pd.to_datetime(df["TimeReviewed"])
hdf


Out[5]:
Bidder Amount Time (ET) aID
Item
274 2952 $3.00 2017-02-14 15:32:00 330
273 2952 $7.00 2017-02-14 15:32:00 330
270 13639 $170.00 2017-02-14 15:29:00 330
268 4998 $4,500.00 2017-02-14 15:25:00 330
265 4194 $120.00 2017-02-14 15:25:00 330
266 3625 $90.00 2017-02-14 15:25:00 330
270 10190 $160.00 2017-02-14 15:25:00 330
265 3625 $100.00 2017-02-14 15:24:00 330
265 8233 $85.00 2017-02-14 15:24:00 330
266 8233 $75.00 2017-02-14 15:24:00 330
266 4194 $61.00 2017-02-14 15:23:00 330
265 4194 $75.00 2017-02-14 15:23:00 330
265 8233 $60.00 2017-02-14 15:20:00 330
271 5416 $75.00 2017-02-14 15:20:00 330
271 5416 $66.50 2017-02-14 15:20:00 330
271 5416 $56.50 2017-02-14 15:19:00 330
263 9416 $326.00 2017-02-14 15:19:00 330
260 13631 $210.00 2017-02-14 15:19:00 330
262 9416 $250.00 2017-02-14 15:19:00 330
263 4132 $250.00 2017-02-14 15:19:00 330
260 13631 $170.00 2017-02-14 15:19:00 330
262 9416 $180.00 2017-02-14 15:19:00 330
261 13636 $95.00 2017-02-14 15:19:00 330
262 4132 $150.00 2017-02-14 15:18:00 330
269 5416 $46.00 2017-02-14 15:18:00 330
259 5416 $46.00 2017-02-14 15:18:00 330
259 7628 $44.00 2017-02-14 15:16:00 330
254 13604 $18.00 2017-02-14 15:12:00 330
262 9416 $140.00 2017-02-14 15:11:00 330
253 13604 $25.00 2017-02-14 15:10:00 330
252 13604 $13.00 2017-02-14 15:10:00 330
246 8233 $160.00 2017-02-14 15:08:00 330
250 2441 $66.25 2017-02-14 15:08:00 330
246 4142 $150.00 2017-02-14 15:07:00 330
246 4142 $130.77 2017-02-14 15:06:00 330
246 8233 $120.77 2017-02-14 15:05:00 330
245 4142 $140.00 2017-02-14 15:05:00 330
245 8233 $130.00 2017-02-14 15:04:00 330
259 13279 $35.88 2017-02-14 15:04:00 330
246 4142 $110.77 2017-02-14 15:03:00 330
245 4142 $120.00 2017-02-14 15:03:00 330
248 13634 $44.20 2017-02-14 15:01:00 330
246 13639 $100.77 2017-02-14 15:00:00 330
241 5416 $40.56 2017-02-14 15:00:00 330
245 8233 $81.00 2017-02-14 15:00:00 330
250 2441 $56.25 2017-02-14 14:59:00 330
241 2010 $36.56 2017-02-14 14:58:00 330
273 13467 $6.00 2017-02-14 14:56:00 330
273 13467 $2.00 2017-02-14 14:56:00 330
245 13639 $71.00 2017-02-14 14:56:00 330

In [17]:
from urllib.request import urlopen
from bs4 import BeautifulSoup 
from datetime import datetime, date, time
import csv
import pandas as pd
import re


#need to page through item list based on item count    
url = "https://www.maxanet.com/cgi-bin/mnhistory.cgi?rosen330/1"
html = urlopen(url)
soup = BeautifulSoup(html, "lxml")

    
dtable = soup.find('table',id ='DataTable').findAll('tr')
a_ID = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mnhistory\\.cgi\\?rosen(.*)").match(url).groups()


aid =str(a_ID)

a_ID,item_num = aid.split("/")

file = open( a_ID + "_itemhistory.csv", 'w') 

itemdatasaved = " "

for record in dtable:
    itemdata = " "
    for data in record.findAll('td'):
        itemdata = itemdata + "," + data.text 
    itemdatasaved = itemdatasaved + "\n" + itemdata[2:]

file.write(itemdatasaved)
file.close()
    
df = pd.read_csv(a_ID +'_itemhistory.csv',skip_blank_lines=True)
#df.set_index("Bidder", inplace=True)

#format dataframe
df['a_ID'] = a_ID 
df['item_num'] = item_num
df['a_ID'] = a_ID 
df['a_ID'] = df['a_ID'].str.replace("\('","")
df['item_num'] = df['item_num'].str.replace("\',\)","")
df['Amount'] = df['Amount'].map("${:,.2f}".format)
df['Current'] = df['Current'].map("${:,.2f}".format)
#format date/time
df['Start_date'] = pd.to_datetime(df['Time (ET)'], format='%b-%d-%Y %I:%M%p')
df.drop('Time (ET)', axis='columns', inplace=True)
#df.drop('a_ID', axis='columns', inplace=True) 
df.set_index("Start_date", inplace=True)

aid = a_ID.replace("\(\'","")
df.to_csv(aid +'_data_out.csv',header=None,sep=',',mode='a')

df


Out[17]:
Bidder Amount Current Winning a_ID item_num
Start_date
2017-02-08 00:21:00 12192 $1.00 $1.00 12192 330 1
2017-02-08 10:48:00 2397 $1.25 $6.00 2397 330 1
2017-02-14 09:45:00 2441 $7.00 $8.00 2397 330 1
2017-02-14 09:49:00 2441 $9.00 $10.00 2397 330 1
2017-02-14 10:49:00 3074 $11.00 $11.00 3074 330 1
2017-02-14 10:52:00 2441 $12.00 $12.00 2441 330 1
2017-02-14 10:55:00 3074 $15.00 $15.00 2441 330 1
2017-02-14 10:56:00 3074 $16.01 $16.01 3074 330 1
2017-02-14 10:59:00 2441 $17.01 $17.01 2441 330 1
2017-02-14 11:00:00 3074 $18.01 $19.01 2441 330 1
2017-02-14 11:00:00 3074 $20.01 $20.01 3074 330 1
2017-02-14 11:01:00 2441 $22.01 $22.01 2441 330 1

In [ ]:


In [ ]: