In [13]:
def get_u_auction():
from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd
html = urlopen("http://www.rosensystems.com/upcoming-auctions")
soup = BeautifulSoup(html, "lxml")
table = soup.table
records = []
for tr in table.findAll("tr"):
trs = tr.findAll("td")
record = []
ua_link = trs[3].a["href"]
record.append(ua_link.replace("https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen","") )
record.append(trs[1].h4.text)
record.append(trs[2].h5.text)
record.append(trs[2].find("span", {"class":"date-display-single"}).text)
records.append(record)
df = pd.DataFrame(data=records)
#name the columns
df.columns = ["aid","ua_name","ua_type","ua_sdate"]
#set index
df.set_index("aid",inplace=True)
#remove the remaining \n
df['ua_type'] = df['ua_type'].str.replace('\n',"")
#remove time from date
df['ua_sdate'] = df['ua_sdate'].str.replace("- 10:00am","")
df
Out[13]:
In [65]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
from datetime import datetime, date, time
import csv
import pandas as pd
import re
htmla = urlopen("http://www.rosensystems.com/upcoming-auctions")
soup = BeautifulSoup(htmla, "lxml")
#get auction IDs
#mndetails_link = soup.find_all('td')[3].a
#mndetails = (mndetails_link.attrs['href'])
u_auction = {}
afile = open('uauction.csv', 'w')
#get list of View Catalog links
vcat = soup.findAll("a", {"href":re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)")})
#aid = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)").match(vcat_link).groups()
for item in vcat:
vcat_link = item.attrs['href']
#print(value)
for record in item:
aid = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)").match(vcat_link).groups()
key = list(aid)
auction_id = ('{0},{1}\n'.format(key, vcat_link))
afile.write(auction_id)
print(auction_id)
afile.close()
In [15]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
from datetime import datetime, date, time
import csv
import pandas as pd
import re
htmla = urlopen("http://www.rosensystems.com/upcoming-auctions")
soup = BeautifulSoup(htmla, "lxml")
#get auction IDs
#mndetails_link = soup.find_all('td')[3].a
#mndetails = (mndetails_link.attrs['href'])
u_auction = {}
afile = open('uauction.csv', 'w')
#get list of View Catalog links
vcat = soup.findAll("a", {"href":re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)")})
#aid = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)").match(vcat_link).groups()
for item in vcat:
vcat_link = item.attrs['href']
#print(value)
for record in item:
aid = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)").match(vcat_link).groups()
key = list(aid)
auction_id = ('{0},{1}\n'.format(key, vcat_link))
afile.write(auction_id)
print(auction_id)
afile.close()
In [64]:
#write the following to a table
#aid
#link
#name
#address1
#address2
#city
#state
#zip
#type
#map link
from urllib.request import urlopen
from bs4 import BeautifulSoup
from datetime import datetime, date, time
import csv
import pandas as pd
import re
html = urlopen("http://www.rosensystems.com/upcoming-auctions")
soup = BeautifulSoup(html, "lxml")
#get auction IDs
#mndetails_link = soup.find_all('td')[3].a
#mndetails = (mndetails_link.attrs['href'])
#get list of View Catalog links
vcat = soup.findAll("a", {"href":re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)")})
#aid = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)").match(vcat_link).groups()
u_auction = {}
for item in vcat:
vcat_link = item.attrs['href']
#print(value)
for record in item:
aid = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)").match(vcat_link).groups()
name = soup.h4.previous_sibling
auction_id = ('{0},{1},{2}\n'.format(aid, vcat_link, name))
print(auction_id)
In [66]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
html = urlopen("http://www.rosensystems.com/upcoming-auctions")
soup = BeautifulSoup(html, "lxml")
def get_upcomming_auction():
a_name = soup.find('h4')
a_address1 = soup.find_all("span",{"itemprop":{"streetAddress"}})
a_address2 = soup.find_all("span",{"class":{"additional"}})
a_city = soup.find_all("span",{"class":{"locality"}})
a_region = soup.find_all("span",{"class":{"addressRegion"}})
a_postalCode = soup.find_all("span",{"class":{"postal-code"}})
a_startDate = soup.find_all("span",{"class":{"date-display-single"}})
a_mndetails_link = soup.find('table').find_all('td')[3].a
a_type = soup.find_all('h5')
for name in a_name:
print(a_name.text)
get_upcomming_auction()
In [ ]:
import re
#get list of View Catalog links
vcat = soup.findAll("a", {"href":re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)")})
for item in vcat:
vcat_link = item.attrs['href']
print(vcat_link)
In [81]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
from datetime import datetime, date, time
import csv
import webbrowser
import pandas as pd
html = urlopen("http://www.rosensystems.com/upcoming-auctions")
soup = BeautifulSoup(html, "lxml")
for records in dtable[0:1]:
auction = ("{} {}{}{}".format(name.text,type.text,startDate.text))
print(records.text)
In [82]:
a_id = []
for item in vcat:
vcat_link = item.attrs['href']
for record in item:
aid = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)").match(vcat_link).groups()
a_id.append(aid)
print(a_id)
In [8]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
from datetime import datetime, date, time
import csv
import webbrowser
import pandas as pd
html = urlopen("http://www.rosensystems.com/upcoming-auctions")
soup = BeautifulSoup(html, "lxml")
for item in vcat:
vcat_link = item.attrs['href']
for record in item:
aid = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)").match(vcat_link).groups()
auction_id = list(aid)
print(auction_id)
#get the upcomming auction names
dtable = soup.find("table").find_all("tr")
auction_id = list(aid)
name = soup.find_all('h4')
atype = soup.find_all('h5')
startDate = soup.find_all("span",{"class":{"date-display-single"}})
getua =[]
for i in dtable:
auctionName = (i.text)
print(auction_id)
#get_upcomming_auction()
In [5]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
from datetime import datetime, date, time
import csv
import webbrowser
import pandas as pd
html = urlopen("http://www.rosensystems.com/upcoming-auctions")
soup = BeautifulSoup(html, "lxml")
#get the upcomming auction names
def get_upcomming_auction():
name = soup.find_all('h4')
for i in name:
auctionName = (i.text)
print(auctionName)
get_upcomming_auction()
In [83]:
def get_auction_type():
name = soup.find_all('div',{"id":"adr"})
for i in name:
auctionName = (i.text)
print(auctionName)
get_auction_type()
In [108]:
startDate = soup.find_all("span",{"class":{"date-display-single"}})
for i in startDate:
print(i.text)
In [88]:
startDate = soup.find_all("span",{"class":{"date-display-single"}})
for i in startDate:
print(i)
In [16]:
datef = soup.findAll("span", {"class":{"date-display-single"}})
In [14]:
name = soup.find_all('h4')
type = soup.find_all('h5')
address1 = soup.find_all("span",{"itemprop":{"streetAddress"}})
address2 = soup.find_all("span",{"class":{"additional"}})
city = soup.find_all("span",{"class":{"locality"}})
region = soup.find_all("span",{"class":{"addressRegion"}})
postalCode = soup.find_all("span",{"class":{"postal-code"}})
startDate = soup.find_all("span",{"class":{"date-display-single"}})
link = soup.find('table').findAll('td')[3].a
def getUpcommingDetails():
from urllib.request import urlopen
from bs4 import BeautifulSoup
html = urlopen("http://www.rosensystems.com/upcoming-auctions")
soup = BeautifulSoup(html, "lxml")
#get the upcomming auction names
print(link.attrs['href'])
getMNDetails()
In [11]:
mndetails_link = soup.find('table').find_all('td')[3].a
print(link.attrs['href'])
In [ ]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
from datetime import datetime, date, time
import csv
import webbrowser
import pandas as pd
import re
html = urlopen("http://www.rosensystems.com/upcoming-auctions")
soup = BeautifulSoup(html, "lxml")
#get auction IDs
#mndetails_link = soup.find_all('td')[3].a
#mndetails = (mndetails_link.attrs['href'])
#get list of View Catalog links
vcat = soup.findAll("a", {"href":re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)")})
for item in vcat:
vcat_link = item.attrs['href']
print(vcat_link)
for item in vcat_link:
aid = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)").match(vcat_link).groups()
print(auction_id)
#formatting - replace left paren with colon
#print(aid)
In [23]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
from datetime import datetime, date, time
import csv
import webbrowser
import pandas as pd
import re
html = urlopen("http://www.rosensystems.com/upcoming-auctions")
soup = BeautifulSoup(html, "lxml")
#get auction IDs
#mndetails_link = soup.find_all('td')[3].a
#mndetails = (mndetails_link.attrs['href'])
#get list of View Catalog links
vcat = soup.findAll("a", {"href":re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)")})
name = soup.find_all('h4')
list = []
for i in name:
auctionName = (i.text)
availability = item.parent.nextSibling.text
print(auctionName)
for i in vcat:
vcat_link = item.attrs['href']
print(vcat_link, auctionName)
In [2]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
from datetime import datetime, date, time
import csv
import webbrowser
import pandas as pd
html = urlopen("http://www.rosensystems.com/upcoming-auctions")
soup = BeautifulSoup(html, "lxml")
#get the upcomming auction names
dtable = soup.find("table").find_all("tr")
def get_upcomming_auction():
for i in soup.find_all('h4'):
print(i.text)
get_upcomming_auction()
In [ ]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
from datetime import datetime, date, time
import csv
import webbrowser
import pandas as pd
html = urlopen("http://www.rosensystems.com/upcoming-auctions")
soup = BeautifulSoup(html, "lxml")
dtable = soup.find("table").find_all("tr")
auction_id = list(aid)
name = soup.find_all('h4')
atype = soup.find_all('h5')
startDate = soup.find_all("span",{"class":{"date-display-single"}})
vcat = soup.findAll("a", {"href":re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)")})
for item in vcat:
vcat_link = item.attrs['href']
for record in item:
aid = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)").match(vcat_link).groups()
auction_id = list(aid)
print(auction_id)
for aname in name:
names = (aname.text)
print(names)
for adate in startDate:
print(adate.text)
for atype in atype:
print(atype.text)
In [ ]:
table1 = soup.find('table')
vcount = len(vcat)
tds = table1.find_all('td')
for item in vcat:
vcat_link = item.attrs['href']
for record in item:
aid = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)").match(vcat_link).groups()
auction_id = list(aid)
print(auction_id)
In [ ]:
import csv
from urllib.request import urlopen
from bs4 import BeautifulSoup
html = urlopen("http://www.rosensystems.com/upcoming-auctions")
bsObj = BeautifulSoup(html, "lxml")
#soup = BeautifulSoup(html, "lxml")
#The main comparison table is currently the first table on the page
ltable = soup.find("table")
#table = bsObj.findAll("table",{"class":"wikitable"})[0]
rows = ltable.find_all("tr")
csvFile = open("editors.csv", 'wt')
writer = csv.writer(csvFile)
try:
for row in rows:
vcat_link
csvRow = []
for cell in row.findAll(['td','h5']):
csvRow.append(cell.get_text())
writer.writerow(csvRow)
finally:
csvFile.close()
In [ ]:
import csv
from urllib.request import urlopen
from bs4 import BeautifulSoup
html = urlopen("http://www.rosensystems.com/upcoming-auctions")
soup = BeautifulSoup(html, "lxml")
#Grab second table from HTML
def my_parse(html):
soup = BeautifulSoup(html)
table = soup.find_all('table')
for tr in table2.find_all('tr')[2:]:
tds = tr.find_all('td')
url = tds[8].a.get('href')
tds[8].a.replaceWith(url)
records.append([elem.text.encode('utf-8') for elem in tds])
records = []
#Read HTML file into memory
for index in range(39):
url = "file:///C:/projects/HTML/Export.htm".format(index)
response = urllib2.urlopen(url)
try:
html = response.read()
except Exception:
raise
else:
my_parse(html)
finally:
try:
response.close()
except (UnboundLocalError, NameError):
raise UnboundLocalError
#Writing CSV file
with open('listing.csv', 'wb') as f:
writer = csv.writer(f)
writer.writerows(records)
In [197]:
vcount = len(vcat)
Out[197]: