In [13]:
def get_u_auction():
    from urllib.request import urlopen
    from bs4 import BeautifulSoup 
    import pandas as pd

    html = urlopen("http://www.rosensystems.com/upcoming-auctions")
    soup = BeautifulSoup(html, "lxml")

    table = soup.table
    records = []
    for tr in table.findAll("tr"):
        trs = tr.findAll("td")
        record = []
        ua_link = trs[3].a["href"]
        record.append(ua_link.replace("https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen","") )
        record.append(trs[1].h4.text)
        record.append(trs[2].h5.text)
        record.append(trs[2].find("span", {"class":"date-display-single"}).text)
        records.append(record)

    df = pd.DataFrame(data=records)

    #name the columns
    df.columns = ["aid","ua_name","ua_type","ua_sdate"]
    #set index
    df.set_index("aid",inplace=True)
    #remove the remaining \n
    df['ua_type'] = df['ua_type'].str.replace('\n',"")
    #remove time from date
    df['ua_sdate'] = df['ua_sdate'].str.replace("- 10:00am","")
df


Out[13]:
ua_name ua_type ua_sdate
aid
325 Late Model Automotive Repair Shop Online Only January 12, 2017
326 Assets formerly of Megas Production Online Only January 17, 2017
331 January Auction eXchange Online Only January 19, 2017
328 Frisco International Online Only January 26, 2017
329 FBC Enterprises, LLC. dba Custom Graphic Servi... Online Only January 31, 2017

In [65]:
from urllib.request import urlopen
from bs4 import BeautifulSoup 
from datetime import datetime, date, time
import csv
import pandas as pd
import re

htmla = urlopen("http://www.rosensystems.com/upcoming-auctions")
soup = BeautifulSoup(htmla, "lxml")

#get auction IDs
#mndetails_link = soup.find_all('td')[3].a
#mndetails = (mndetails_link.attrs['href'])


u_auction = {}
afile = open('uauction.csv', 'w')

#get list of View Catalog links
vcat = soup.findAll("a", {"href":re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)")})
#aid = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)").match(vcat_link).groups()


for item in vcat:
    vcat_link = item.attrs['href']
    #print(value)
    for record in item:
        aid = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)").match(vcat_link).groups()
        key = list(aid) 
        auction_id = ('{0},{1}\n'.format(key, vcat_link))
        afile.write(auction_id)
        print(auction_id)
           
afile.close()


['20'],https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen20

['6'],https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen6

['327'],https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen327

['325'],https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen325

['326'],https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen326


In [15]:
from urllib.request import urlopen
from bs4 import BeautifulSoup 
from datetime import datetime, date, time
import csv
import pandas as pd
import re

htmla = urlopen("http://www.rosensystems.com/upcoming-auctions")
soup = BeautifulSoup(htmla, "lxml")

#get auction IDs
#mndetails_link = soup.find_all('td')[3].a
#mndetails = (mndetails_link.attrs['href'])


u_auction = {}
afile = open('uauction.csv', 'w')

#get list of View Catalog links
vcat = soup.findAll("a", {"href":re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)")})
#aid = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)").match(vcat_link).groups()


for item in vcat:
    vcat_link = item.attrs['href']
    #print(value)
    for record in item:
        aid = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)").match(vcat_link).groups()
        key = list(aid) 
        auction_id = ('{0},{1}\n'.format(key, vcat_link))
        afile.write(auction_id)
        print(auction_id)
           
afile.close()


['325'],https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen325

['326'],https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen326

['331'],https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen331

['328'],https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen328

['329'],https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen329


In [64]:
#write the following to a table

#aid
#link
#name
#address1
#address2
#city
#state
#zip
#type
#map link

from urllib.request import urlopen
from bs4 import BeautifulSoup 
from datetime import datetime, date, time
import csv
import pandas as pd
import re

html = urlopen("http://www.rosensystems.com/upcoming-auctions")
soup = BeautifulSoup(html, "lxml")

#get auction IDs
#mndetails_link = soup.find_all('td')[3].a
#mndetails = (mndetails_link.attrs['href'])


#get list of View Catalog links
vcat = soup.findAll("a", {"href":re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)")})
#aid = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)").match(vcat_link).groups()

u_auction = {}
for item in vcat:
    vcat_link = item.attrs['href']
    #print(value)
    for record in item:
        aid = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)").match(vcat_link).groups()
        name = soup.h4.previous_sibling 
        auction_id = ('{0},{1},{2}\n'.format(aid, vcat_link, name))
        print(auction_id)


('20',),https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen20,


('6',),https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen6,


('327',),https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen327,


('325',),https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen325,


('326',),https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen326,


('328',),https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen328,


('329',),https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen329,



In [66]:
from urllib.request import urlopen
from bs4 import BeautifulSoup 

html = urlopen("http://www.rosensystems.com/upcoming-auctions")
soup = BeautifulSoup(html, "lxml")

def get_upcomming_auction():
    a_name = soup.find('h4')
    a_address1 = soup.find_all("span",{"itemprop":{"streetAddress"}})
    a_address2 = soup.find_all("span",{"class":{"additional"}})
    a_city = soup.find_all("span",{"class":{"locality"}})
    a_region = soup.find_all("span",{"class":{"addressRegion"}})
    a_postalCode = soup.find_all("span",{"class":{"postal-code"}})
    a_startDate = soup.find_all("span",{"class":{"date-display-single"}})
    a_mndetails_link = soup.find('table').find_all('td')[3].a
    a_type = soup.find_all('h5')
    
    for name in a_name:
        print(a_name.text)
    
get_upcomming_auction()


Quick Sale PolyWRX, LLC. formerly JMC Killion Laboratories

In [ ]:
import re

#get list of View Catalog links
vcat = soup.findAll("a", {"href":re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)")})
for item in vcat:
    vcat_link = item.attrs['href']
    print(vcat_link)

In [81]:
from urllib.request import urlopen
from bs4 import BeautifulSoup 
from datetime import datetime, date, time
import csv
import webbrowser
import pandas as pd

html = urlopen("http://www.rosensystems.com/upcoming-auctions")
soup = BeautifulSoup(html, "lxml")

for records in dtable[0:1]:
    auction = ("{} {}{}{}".format(name.text,type.text,startDate.text))
    print(records.text)



 

Quick Sale - Impact Frac, LLC. BK Case #16-33612


10954 Exhibition Rd 

79014

      Canadian      
              ,                     TX


See map: Google Maps 




Online Only
January 5, 2017 - 10:00am 

View CatalogRegister Now 


In [82]:
a_id = []    
for item in vcat:
    vcat_link = item.attrs['href']
    for record in item:
        aid = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)").match(vcat_link).groups()
    a_id.append(aid)
print(a_id)


[('20',), ('6',), ('327',), ('325',), ('326',)]

In [8]:
from urllib.request import urlopen
from bs4 import BeautifulSoup 
from datetime import datetime, date, time
import csv
import webbrowser
import pandas as pd

html = urlopen("http://www.rosensystems.com/upcoming-auctions")
soup = BeautifulSoup(html, "lxml")

for item in vcat:
    vcat_link = item.attrs['href']
    for record in item:
        aid = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)").match(vcat_link).groups()
        auction_id = list(aid) 
        print(auction_id)

#get the upcomming auction names
dtable = soup.find("table").find_all("tr")
auction_id = list(aid) 
name = soup.find_all('h4')
atype = soup.find_all('h5')
startDate = soup.find_all("span",{"class":{"date-display-single"}})

getua =[]
for i in dtable:
    auctionName = (i.text)
print(auction_id)

#get_upcomming_auction()


['20']
['6']
['327']
['325']
['326']
['326']

In [5]:
from urllib.request import urlopen
from bs4 import BeautifulSoup 
from datetime import datetime, date, time
import csv
import webbrowser
import pandas as pd

html = urlopen("http://www.rosensystems.com/upcoming-auctions")
soup = BeautifulSoup(html, "lxml")

#get the upcomming auction names

def get_upcomming_auction():
    name = soup.find_all('h4')
    for i in name:
        auctionName = (i.text)
        print(auctionName)
get_upcomming_auction()


Quick Sale PolyWRX, LLC. formerly JMC Killion Laboratories
Quick Sale - Impact Frac, LLC. BK Case #16-33612
Extrusion Dies at PolyWrx
Late Model Automotive Repair Shop
Assets formerly of Megas Production
January Auction eXchange
Frisco International

In [83]:
def get_auction_type():
    name = soup.find_all('div',{"id":"adr"})
    for i in name:
        auctionName = (i.text)
        print(auctionName)
        
get_auction_type()

In [108]:
startDate = soup.find_all("span",{"class":{"date-display-single"}})

for i in startDate:
    print(i.text)


December 15, 2016 - 10:00am
December 20, 2016 - 10:00am
December 20, 2016 - 1:00pm
January 12, 2017 - 10:00am
January 17, 2017 - 10:00am
January 19, 2017 - 10:00am

In [88]:
startDate = soup.find_all("span",{"class":{"date-display-single"}})

for i in startDate:
    print(i)


<span class="date-display-single" content="2017-01-04T10:00:00-06:00" datatype="xsd:dateTime" property="dc:date">January 4, 2017 - 10:00am</span>
<span class="date-display-single" content="2017-01-05T10:00:00-06:00" datatype="xsd:dateTime" property="dc:date">January 5, 2017 - 10:00am</span>
<span class="date-display-single" content="2017-01-11T10:00:00-06:00" datatype="xsd:dateTime" property="dc:date">January 11, 2017 - 10:00am</span>
<span class="date-display-single" content="2017-01-12T10:00:00-06:00" datatype="xsd:dateTime" property="dc:date">January 12, 2017 - 10:00am</span>
<span class="date-display-single" content="2017-01-17T10:00:00-06:00" datatype="xsd:dateTime" property="dc:date">January 17, 2017 - 10:00am</span>
<span class="date-display-single" content="2017-01-19T10:00:00-06:00" datatype="xsd:dateTime" property="dc:date">January 19, 2017 - 10:00am</span>
<span class="date-display-single" content="2017-01-26T10:00:00-06:00" datatype="xsd:dateTime" property="dc:date">January 26, 2017 - 10:00am</span>

In [16]:
datef = soup.findAll("span", {"class":{"date-display-single"}})

In [14]:
name = soup.find_all('h4')
type = soup.find_all('h5')
address1 = soup.find_all("span",{"itemprop":{"streetAddress"}})
address2 = soup.find_all("span",{"class":{"additional"}})
city = soup.find_all("span",{"class":{"locality"}})
region = soup.find_all("span",{"class":{"addressRegion"}})
postalCode = soup.find_all("span",{"class":{"postal-code"}})
startDate = soup.find_all("span",{"class":{"date-display-single"}})
link = soup.find('table').findAll('td')[3].a

def getUpcommingDetails():
    from urllib.request import urlopen
    from bs4 import BeautifulSoup 

    html = urlopen("http://www.rosensystems.com/upcoming-auctions")
    soup = BeautifulSoup(html, "lxml")

#get the upcomming auction names


    
    
    
    
    print(link.attrs['href'])

getMNDetails()


https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen322

In [11]:
mndetails_link = soup.find('table').find_all('td')[3].a

print(link.attrs['href'])


https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen322

In [ ]:
from urllib.request import urlopen
from bs4 import BeautifulSoup 
from datetime import datetime, date, time
import csv
import webbrowser
import pandas as pd
import re

html = urlopen("http://www.rosensystems.com/upcoming-auctions")
soup = BeautifulSoup(html, "lxml")

#get auction IDs
#mndetails_link = soup.find_all('td')[3].a
#mndetails = (mndetails_link.attrs['href'])


#get list of View Catalog links
vcat = soup.findAll("a", {"href":re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)")})
for item in vcat:
    vcat_link = item.attrs['href']
    print(vcat_link)

for item in vcat_link:
    aid = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)").match(vcat_link).groups()  
    print(auction_id)
#formatting - replace left paren with colon
#print(aid)

In [23]:
from urllib.request import urlopen
from bs4 import BeautifulSoup 
from datetime import datetime, date, time
import csv
import webbrowser
import pandas as pd
import re

html = urlopen("http://www.rosensystems.com/upcoming-auctions")
soup = BeautifulSoup(html, "lxml")

#get auction IDs
#mndetails_link = soup.find_all('td')[3].a
#mndetails = (mndetails_link.attrs['href'])


#get list of View Catalog links
vcat = soup.findAll("a", {"href":re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)")})
name = soup.find_all('h4')
list = []
for i in name:
    auctionName = (i.text)
    availability = item.parent.nextSibling.text
    print(auctionName)
    for i in vcat:
        vcat_link = item.attrs['href']   
    print(vcat_link, auctionName)


Quick Sale PolyWRX, LLC. formerly JMC Killion Laboratories
https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen329 Quick Sale PolyWRX, LLC. formerly JMC Killion Laboratories
Quick Sale - Impact Frac, LLC. BK Case #16-33612
https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen329 Quick Sale - Impact Frac, LLC. BK Case #16-33612
Extrusion Dies at PolyWrx
https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen329 Extrusion Dies at PolyWrx
Late Model Automotive Repair Shop
https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen329 Late Model Automotive Repair Shop
Assets formerly of Megas Production
https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen329 Assets formerly of Megas Production
January Auction eXchange
https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen329 January Auction eXchange
Frisco International
https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen329 Frisco International
FBC Enterprises, LLC. dba Custom Graphic Services BK CASE #16-44404
https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen329 FBC Enterprises, LLC. dba Custom Graphic Services BK CASE #16-44404

In [2]:
from urllib.request import urlopen
from bs4 import BeautifulSoup 
from datetime import datetime, date, time
import csv
import webbrowser
import pandas as pd

html = urlopen("http://www.rosensystems.com/upcoming-auctions")
soup = BeautifulSoup(html, "lxml")

#get the upcomming auction names
dtable = soup.find("table").find_all("tr")

def get_upcomming_auction():
    
    for i in soup.find_all('h4'):
        print(i.text)
        
get_upcomming_auction()


Quick Sale PolyWRX, LLC. formerly JMC Killion Laboratories
Quick Sale - Impact Frac, LLC. BK Case #16-33612
Extrusion Dies at PolyWrx
Late Model Automotive Repair Shop
Assets formerly of Megas Production
January Auction eXchange
Frisco International

In [ ]:
from urllib.request import urlopen
from bs4 import BeautifulSoup 
from datetime import datetime, date, time
import csv
import webbrowser
import pandas as pd

html = urlopen("http://www.rosensystems.com/upcoming-auctions")
soup = BeautifulSoup(html, "lxml")

dtable = soup.find("table").find_all("tr")
auction_id = list(aid) 
name = soup.find_all('h4')
atype = soup.find_all('h5')
startDate = soup.find_all("span",{"class":{"date-display-single"}})
vcat = soup.findAll("a", {"href":re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)")})

for item in vcat:
    vcat_link = item.attrs['href']
    for record in item:
        aid = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)").match(vcat_link).groups()
        auction_id = list(aid) 
        print(auction_id)

for aname in name:
    names = (aname.text)
    print(names)
        
for adate in startDate:
    print(adate.text)

for atype in atype:
    print(atype.text)

In [ ]:
table1 = soup.find('table')
vcount = len(vcat)

tds = table1.find_all('td')

for item in vcat:
    vcat_link = item.attrs['href']
    for record in item:
        aid = re.compile("https:\\/\\/www\\.maxanet\\.com\\/cgi-bin\\/mndetails\\.cgi\\?rosen(.*)").match(vcat_link).groups()
        auction_id = list(aid) 
        print(auction_id)

In [ ]:
import csv
from urllib.request import urlopen
from bs4 import BeautifulSoup

html = urlopen("http://www.rosensystems.com/upcoming-auctions")
bsObj = BeautifulSoup(html, "lxml")
#soup = BeautifulSoup(html, "lxml")

#The main comparison table is currently the first table on the page
ltable = soup.find("table")
#table = bsObj.findAll("table",{"class":"wikitable"})[0]
rows = ltable.find_all("tr")

csvFile = open("editors.csv", 'wt')
writer = csv.writer(csvFile)
try:
    for row in rows:
        vcat_link
        csvRow = []
    for cell in row.findAll(['td','h5']):
        csvRow.append(cell.get_text())
        writer.writerow(csvRow)
finally:
    csvFile.close()

In [ ]:
import csv
from urllib.request import urlopen
from bs4 import BeautifulSoup

html = urlopen("http://www.rosensystems.com/upcoming-auctions")
soup = BeautifulSoup(html, "lxml")


#Grab second table from HTML
def my_parse(html):
    soup = BeautifulSoup(html)
    table = soup.find_all('table')
    for tr in table2.find_all('tr')[2:]:
        tds = tr.find_all('td')
        url = tds[8].a.get('href')
    tds[8].a.replaceWith(url)
        records.append([elem.text.encode('utf-8') for elem in tds])

records = []
#Read HTML file into memory
for index in range(39):
    url = "file:///C:/projects/HTML/Export.htm".format(index)
    response = urllib2.urlopen(url)
    try:
        html = response.read()
    except Exception:
        raise
    else:
        my_parse(html)
    finally:
        try:
            response.close()
        except (UnboundLocalError, NameError):
            raise UnboundLocalError

#Writing CSV file
with open('listing.csv', 'wb') as f:
    writer = csv.writer(f)
    writer.writerows(records)

In [197]:
vcount = len(vcat)


Out[197]:
5