In [6]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
from datetime import datetime, date, time
import csv
import pandas as pd
import re
html = urlopen("http://www.rosensystems.com/upcoming-auctions")
soup = BeautifulSoup(html, "lxml")
u_auction = {}
afile = open('uauction.csv', 'w')
#get list of View Catalog links
vcat = soup.findAll("a", {"href":re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)")})
for item in vcat:
vcat_link = item.attrs['href']
#print(value)
for record in item:
aid = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)").match(vcat_link).groups()
key = list(aid)
auction_id = ('{0},{1}\n'.format(key, vcat_link))
afile.write(auction_id)
afile.close()
udf = pd.read_csv('uauction.csv')
#name the columns
udf.columns = ["aid","URL"]
udf['aid'] = udf['aid'].str.replace("[","")
udf['aid'] = udf['aid'].str.replace("]","")
udf['aid'] = udf['aid'].str.replace("\'","")
#set index
udf.set_index("aid",inplace=True)
#drop URL column
udf.drop('URL', axis='columns', inplace=True)
udf.to_csv('uauction_log.csv',header=0)
for i in udf.itertuples():
item = str(i)
item = item.replace("Pandas(Index=","")
item = item.replace(")","")
item = item.replace("\'","")
udf['URL'] = "https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen" + item
udf
Out[6]:
In [1]:
#get initial auction details - auction id, number of items
from urllib.request import urlopen
from bs4 import BeautifulSoup
import re
import csv
import pandas as pd
url = "https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen337"
html = urlopen(url)
soup = BeautifulSoup(html, "lxml")
aID = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)").match(url).groups()
aID = str(aID)
aID1 = aID.replace(",","")
aid1 = aID1.replace("\'","")
aid2 = aid1.replace("(","")
aid = aid2.replace(")","")
#get categories from web page
category = soup('table')[0].findAll('tr')[4].findAll('td')[1].text
#formatting - replace left paren with colon
new_cat = category.replace(" (" ,":")
#replace right paren with blank
new1_cat = new_cat.replace(")","")
#split on dashes
category_list = new1_cat.split(" - ")
#remove "catalog from the list
category_list.remove('Catalog')
#print the list to see if it is good
#for item in category_list:
#print(item)
#set up file name and dictionary
category = {}
#put formatted items in a dictionary
for line in category_list:
x = line.split(":")
a=x[0]
b=x[1]
category[a]=b
#remove instructions
#del(category["1 INSTRUCTIONS"])
category.update({'AID':aid})
#write category list to file
with open(aid +'_category.csv', 'w') as file:
[file.write('{0},{1}\n'.format(key, value)) for key, value in category.items()]
item_count = category['ALL ITEMS']
aid = category['AID']
df = pd.DataFrame({"AID":aid,"item_count":item_count}, index=["AID"])
df
Out[1]:
In [3]:
#get initial auction details - auction id, number of items
from urllib.request import urlopen
from bs4 import BeautifulSoup
import re
import csv
import pandas as pd
url = "https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen337"
html = urlopen(url)
soup = BeautifulSoup(html, "lxml")
#get categories from web page
category = soup('table')[0].findAll('tr')[4].findAll('td')[1].text
#formatting - replace left paren with colon
new_cat = category.replace(" (" ,":")
#replace right paren with blank
new1_cat = new_cat.replace(")","")
#split on dashes
category_list = new1_cat.split(" - ")
#remove "catalog from the list
category_list.remove('Catalog')
#print the list to see if it is good
#for item in category_list:
#print(item)
#set up file name and dictionary
category = {}
#put formatted items in a dictionary
for line in category_list:
x = line.split(":")
a=x[0]
b=x[1]
category[a]=b
#remove instructions
#del(category["1 INSTRUCTIONS"])
category.update({'AID':aid})
#write category list to file
with open(aid +'_category.csv', 'w') as file:
[file.write('{0},{1}\n'.format(key, value)) for key, value in category.items()]
item_count = category['ALL ITEMS']
aid = category['AID']
In [41]:
from datetime import datetime, date, time
def getdate():
time = datetime.now()
dt = time.strftime("%A, %d %B %Y %I:%M%p")
print(dt)
#auction number
auction_number = "330"
#rs catalog
# https://www.maxanet.com/cgi-bin/mnprint.cgi?rosen317
cat_base_url = """https://www.maxanet.com/cgi-bin/mnprint.cgi?rosen"""
#detail listing
# https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen317
detail_page_base_url = """https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen"""
# item bid history
# https://www.maxanet.com/cgi-bin/mnhistory.cgi?rosen317/1
item_base_url = """https://www.maxanet.com/cgi-bin/mnhistory.cgi?rosen"""
# event bid log
# https://www.maxanet.com/cgi-bin/mnbidlog.cgi?rosen317
log_base_url = """https://www.maxanet.com/cgi-bin/mnbidlog.cgi?rosen"""
getdate()
print(detail_page_base_url + auction_number)
cat = print(cat_base_url + auction_number)
print(log_base_url + auction_number)
print(item_base_url + auction_number)
# construct url base on type
# https://www.maxanet.com/cgi-bin/[type].cgi?rosen[auction number]/[item number]
file = open(auction_number + '_URLs.csv', 'w')
for item in range(1, 275):
link = item_base_url + auction_number + "/" + str(item) + "\n"
file.write(link)
#now let figure out how to get the auction data
#get catalog (Item, Description)
#<tr valign="top"><td>1.</td><td>DEWALT #967 AND (1) DEWALT #720 WITH (1) BATTERY (USED, AS IS)</td></tr>
#print(auction_id)
file.close()
In [ ]:
In [19]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
from datetime import datetime, date, time
import csv
import pandas as pd
import re
#need to page through item list based on item count
url = "https://www.maxanet.com/cgi-bin/mnhistory.cgi?rosen330/1"
html = urlopen(url)
soup = BeautifulSoup(html, "lxml")
dtable = soup.find('table',id ='DataTable').findAll('tr')
a_ID = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mnhistory\\.cgi\\?rosen(.*)").match(url).groups()
aid =str(a_ID)
a_ID,item_num = aid.split("/")
file = open(a_ID + "_itemhistory.csv", 'w')
itemdatasaved = " "
for record in dtable:
itemdata = " "
for data in record.findAll('td'):
itemdata = itemdata + "," + data.text
itemdatasaved = itemdatasaved + "\n" + itemdata[2:]
file.write(itemdatasaved)
file.close()
df = pd.read_csv(a_ID + '_itemhistory.csv',skip_blank_lines=True)
#df.set_index("Bidder", inplace=True)
#format dataframe
df['a_ID'] = a_ID
df['item_num'] = item_num
df['a_ID'] = a_ID
df['a_ID'] = df['a_ID'].str.replace("\('","")
df['item_num'] = df['item_num'].str.replace("\',\)","")
df['Amount'] = df['Amount'].map("${:,.2f}".format)
df['Current'] = df['Current'].map("${:,.2f}".format)
#format date/time
df['Start_date'] = pd.to_datetime(df['Time (ET)'], format='%b-%d-%Y %I:%M%p')
df.drop('Time (ET)', axis='columns', inplace=True)
#df.drop('a_ID', axis='columns', inplace=True)
df.set_index("Start_date", inplace=True)
df.to_csv('auto_adata_out.csv',header=None,sep=',',mode='a')
df
Out[19]:
In [5]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
from datetime import datetime, date, time
import pandas as pd
import re
url = "https://www.maxanet.com/cgi-bin/mnbidlog.cgi?rosen330"
html = urlopen(url)
soup = BeautifulSoup(html, "lxml")
aID = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mnbidlog\\.cgi\\?rosen(.*)").match(url).groups()
dtable = soup.find('table',id ='DataTable')
history = []
for tr in dtable.findAll("tr"):
trs = tr.findAll("td")
record = []
record.append(trs[0].text)
record.append(trs[1].text)
record.append(trs[2].text)
record.append(trs[3].text)
history.append(record)
hdf = pd.DataFrame(data=history)
#use first row as column headers
hdf.columns = hdf.iloc[0]
#drop the header row from the dataframe
hdf = hdf[1:]
#set index
hdf.set_index("Item",inplace=True)
#remove the remaining \n
hdf['aID'] = str(aID)
hdf['aID'] = hdf['aID'].str.replace("\(\'","")
hdf['aID'] = hdf['aID'].str.replace("\',\)","")
hdf['Amount'] = hdf['Amount'].astype(float)
hdf['Amount'] = hdf['Amount'].map("${:,.2f}".format)
hdf['Time (ET)'] = pd.to_datetime(hdf['Time (ET)'], format='%m/%d/%Y %I:%M %p')
#df["TimeReviewed"] = pd.to_datetime(df["TimeReviewed"])
hdf
Out[5]:
In [17]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
from datetime import datetime, date, time
import csv
import pandas as pd
import re
#need to page through item list based on item count
url = "https://www.maxanet.com/cgi-bin/mnhistory.cgi?rosen330/1"
html = urlopen(url)
soup = BeautifulSoup(html, "lxml")
dtable = soup.find('table',id ='DataTable').findAll('tr')
a_ID = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mnhistory\\.cgi\\?rosen(.*)").match(url).groups()
aid =str(a_ID)
a_ID,item_num = aid.split("/")
file = open( a_ID + "_itemhistory.csv", 'w')
itemdatasaved = " "
for record in dtable:
itemdata = " "
for data in record.findAll('td'):
itemdata = itemdata + "," + data.text
itemdatasaved = itemdatasaved + "\n" + itemdata[2:]
file.write(itemdatasaved)
file.close()
df = pd.read_csv(a_ID +'_itemhistory.csv',skip_blank_lines=True)
#df.set_index("Bidder", inplace=True)
#format dataframe
df['a_ID'] = a_ID
df['item_num'] = item_num
df['a_ID'] = a_ID
df['a_ID'] = df['a_ID'].str.replace("\('","")
df['item_num'] = df['item_num'].str.replace("\',\)","")
df['Amount'] = df['Amount'].map("${:,.2f}".format)
df['Current'] = df['Current'].map("${:,.2f}".format)
#format date/time
df['Start_date'] = pd.to_datetime(df['Time (ET)'], format='%b-%d-%Y %I:%M%p')
df.drop('Time (ET)', axis='columns', inplace=True)
#df.drop('a_ID', axis='columns', inplace=True)
df.set_index("Start_date", inplace=True)
aid = a_ID.replace("\(\'","")
df.to_csv(aid +'_data_out.csv',header=None,sep=',',mode='a')
df
Out[17]:
In [ ]:
In [ ]: