In [7]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
from datetime import datetime, date, time
import csv
import pandas as pd
def getdate():
time = datetime.now()
dt = time.strftime("%A, %d %B %Y %I:%M%p")
print(dt)
html = urlopen("https://www.maxanet.com/cgi-bin/mndetails.cgi?rosen316")
soup = BeautifulSoup(html, "lxml")
#get categories from web page
category = soup('table')[0].findAll('tr')[4].findAll('td')[1].text
#formatting - replace left paren with colon
new_cat = category.replace(" (" ,":")
#replace right paren with blank
new1_cat = new_cat.replace(")","")
#split on dashes
category_list = new1_cat.split(" - ")
#remove "catalog from the list
category_list.remove('Catalog')
#print the list to see if it is good
for item in category_list:
print(item)
#set up file name and dictionary
file = "category.txt"
cat_file = open(file, 'w')
category = {}
#put formatted items in a dictionary
for line in category_list:
x = line.split(":")
a=x[0]
b=x[1]
category[a]=b
#remove instructions
del(category["1 INSTRUCTIONS"])
#write category list to file
with open('category.csv', 'w') as file:
[file.write('{0},{1}\n'.format(key, value)) for key, value in category.items()]
#write the list in the database (table = a_categories, fields = auctionId,categoryName, itemCount)
In [2]:
getdate()
In [11]:
cat = pd.read_csv('category.csv')
cat.head()
Out[11]:
In [ ]:
#add column names and set index