In [7]:
from urllib.request import urlopen
from bs4 import BeautifulSoup 
from datetime import datetime, date, time
import csv
import pandas as pd


def getdate():
    time = datetime.now()
    dt = time.strftime("%A, %d %B %Y %I:%M%p")
    
    print(dt)
    
    
html = urlopen("https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen316")
soup = BeautifulSoup(html, "lxml")

#get categories from web page 
category = soup('table')[0].findAll('tr')[4].findAll('td')[1].text

#formatting - replace left paren with colon
new_cat = category.replace(" (" ,":")
#replace right paren with blank
new1_cat = new_cat.replace(")","")
#split on dashes
category_list = new1_cat.split(" - ")
#remove "catalog from the list
category_list.remove('Catalog')
#print the list to see if it is good
for item in category_list:
    print(item)

#set up file name and dictionary
file = "category.txt"
cat_file = open(file, 'w')
category = {}


#put formatted items in a dictionary
for line in category_list:
    x = line.split(":")
    a=x[0]
    b=x[1]
    category[a]=b
    
#remove instructions    
del(category["1 INSTRUCTIONS"])

#write category list to file
with open('category.csv', 'w') as file:
    [file.write('{0},{1}\n'.format(key, value)) for key, value in category.items()]  
    
#write the list in the database (table = a_categories, fields = auctionId,categoryName, itemCount)


ALL ITEMS:179
1 INSTRUCTIONS:1
AUTO RAMP:1
BAND SAW:1
BENCH PRESS:1
CARPET STINGER:3
CONTAINER:2
DOLLY:1
ELECTRONICS:2
FLOOR INVENTORY:116
FORKLIFT:2
FURNITURE:9
HOPPER:2
JOB BOX:1
LADDER:4
LAWN MOWER:1
MITER SAW:1
OFFICE EQUIPMENT:12
PALLET GRABBER:1
PALLET JACK:3
POLY TANKS:1
POWER WASHER:1
RADIAL ARM SAW:1
SCISSOR LIFT:1
SHELVING:2
WAREHOUSE EQUIPMENT:8
WINDOW SHUTTERS:1

In [2]:
getdate()


Sunday, 11 December 2016 07:54PM

In [11]:
cat = pd.read_csv('category.csv')
cat.head()


Out[11]:
FURNITURE 9
0 JOB BOX 1
1 POWER WASHER 1
2 PALLET GRABBER 1
3 MITER SAW 1
4 HOPPER 2

In [ ]:
#add column names and set index