In [1]:
#extract service id, name&url(if available)
from BeautifulSoup import BeautifulSoup
service_inf = open('/home/weizhou/Projects/scripts/output/service', 'r')
# CR team services have link info. we have to consider that
cr_service_inf = open('/home/weizhou/Projects/scripts/input/service/cr', 'r')
HOME_URL = u"https://docs.engineering.redhat.com"
FILES = (
service_inf,
cr_service_inf,
)
service_dict = dict()
for item in service_inf:
item = item.strip()
for element in item.split('|')[1].split(' , '):
service_dict[element] = item.split('|')[0]
cr_service_link_dict = dict()
#cr service process
cr_content = cr_service_inf.read()
cr_soup = BeautifulSoup(cr_content)
for cr_table in cr_soup.findAll('div', 'table-wrap'):
trs = cr_table.table.tbody.findAll('tr')
#skip first 2lines
trs = trs[2:]
for tr in trs:
if tr.contents[0].string:
if tr.contents[0].string == u' ':
continue
service_name = tr.contents[1].a.contents[0].strip()
service_link = tr.contents[1].a['href']
if service_link[:5] != u'https':
service_link = HOME_URL + service_link
elif tr.contents[0].strong:
service_name = tr.contents[1].strong.a.contents[0].strip()
service_link = tr.contents[1].strong.a['href']
if service_link[:5] != u'https':
service_link = HOME_URL + service_link
elif tr.contents[0].p:
service_name = tr.contents[1].p.a.contents[0].strip()
service_link = tr.contents[1].p.a['href']
if service_link[:5] != u'https':
service_link = HOME_URL + service_link
else:
continue
cr_service_link_dict[service_name] = service_link
#Clean up services
service_id_dict = dict()
for key, value in service_dict.iteritems():
if service_id_dict.get(value, None) and service_id_dict[value][1] != u'NA':
continue
service_id_dict[value] = [key, cr_service_link_dict.get(key, u'NA')]
service = list()
for key, value in service_id_dict.iteritems():
service.append([key, value[0], value[1]])
for F in FILES:
F.close()
In [2]:
#extract team id, name&url(if available)
team = [['1', 'Engineering Operations - CORE', 'CR', '680', 'https://people.engineering.redhat.com/orgchart?next_page=group&id=680' ],
['2', 'Engineering Operations - LAB', 'LB', '679', 'https://people.engineering.redhat.com/orgchart?next_page=group&id=679'],
['3', 'Engineering Operations - FLS', 'FS', '678', 'https://people.engineering.redhat.com/orgchart?next_page=group&id=678'],
['4', 'Engineering Operations - Cloud Architect', 'CA', '683', 'https://people.engineering.redhat.com/orgchart?next_page=group&id=683'],
['5', 'Engineering Operations - Solution Architect', 'SA', '682', 'https://people.engineering.redhat.com/orgchart?next_page=group&id=682'],
['6', 'Engineering Operations - Systems Engineering', 'SE', '681', 'https://people.engineering.redhat.com/orgchart?next_page=group&id=681'],
]
In [3]:
#extract task id, name, team id, category id, spent_time&details
#Warning: here will ingore some missing-info data
task_inf = open('/home/weizhou/Projects/scripts/output/task', 'r')
task = list()
task_list = task_inf.read().decode('utf8').split(u'\n')[1:]
for task_line in task_list:
task_content_list = task_line.split(u'|')
if task_content_list[6] == u'NA':
task_desc_link = u'NA'
elif task_content_list[6][:5] != u'https':
task_desc_link = u'[link to confluence page](%s%s)' % (HOME_URL, task_content_list[6])
else:
task_desc_link = u'[link to confluence page](%s)' % task_content_list[6]
task.append([task_content_list[1],
task_content_list[2],
task_content_list[3],
task_content_list[4],
task_content_list[5],
task_desc_link
])
task_inf.close()
In [4]:
#import data into DB
from mysql import connector
import time
now = time.strftime("%Y-%m-%d %H:%M:%S")
try:
conn = connector.Connect(host='localhost',user='root',passwd='',db='SC',port=3306, charset="utf8")
cur = conn.cursor()
#insert some superusers
cur.execute('DELETE FROM auth_user;')
#id, pw, is_super, username, is_staff, is_active
superusers = [['1', '!', '1', 'weizhou', '1', '1', now, now],
['2', '!', '1', 'hlin', '1', '1', now, now],
['3', '!', '1', 'qduanmu', '1', '1', now, now],
['4', '!', '1', 'qiyang', '1', '1', now, now],
['5', '!', '1', 'ryang', '1', '1', now, now],
['6', '!', '1', 'vgrinco', '1', '1', now, now],
['7', '!', '1', 'jskeoch', '1', '1', now, now],
['8', '!', '1', 'jnicolet', '1', '1', now, now],
['9', '!', '1', 'llim', '1', '1', now, now],
['10', '!', '1', 'swei', '1', '1', now, now],
]
user_query = "INSERT INTO auth_user (id, password, is_superuser, username, is_staff, is_active, last_login, date_joined)\
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"
cur.executemany(user_query, superusers)
cur.execute('DELETE FROM catalog_task;')
cur.execute('DELETE FROM catalog_team;')
cur.execute('DELETE FROM catalog_category;')
#cur.commit()
#insert category
for item in service:
item.extend([now, now, 1])
catalog_query = "INSERT INTO catalog_category (id, name, description, created_on, updated_on, available)\
VALUES (%s, %s, %s, %s, %s, %s)"
cur.executemany(catalog_query, service)
#insert team
for item in team:
item.extend([now, now, 1])
team_query = "INSERT INTO catalog_team (id, name, abbr, gid, description, created_on, updated_on, available) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"
cur.executemany(team_query, team)
#insert task
for item in task:
item.extend([now, now, 1, 1])
task_query = "INSERT INTO catalog_task (id, category_id, team_id, spend_time, name, details, created_on, updated_on, available, latest_version) \
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
cur.executemany(task_query, task)
conn.commit()
cur.close()
conn.close()
except connector.errors ,e:
print "Mysql Error %d: %s" % (e.args[0], e.args[1])