In [1]:
#extract service id, name&url(if available)
from BeautifulSoup import BeautifulSoup

service_inf = open('/home/weizhou/Projects/scripts/output/service', 'r')
# CR team services have link info. we have to consider that
cr_service_inf = open('/home/weizhou/Projects/scripts/input/service/cr', 'r')
HOME_URL = u"https://docs.engineering.redhat.com"

FILES = (
         service_inf,
         cr_service_inf,
         )

service_dict = dict()
for item in service_inf:
    item = item.strip()
    for element in item.split('|')[1].split(' , '):
        service_dict[element] = item.split('|')[0]

cr_service_link_dict = dict()
#cr service process
cr_content = cr_service_inf.read()
cr_soup = BeautifulSoup(cr_content)
for cr_table in cr_soup.findAll('div', 'table-wrap'):
    trs = cr_table.table.tbody.findAll('tr')
    #skip first 2lines
    trs = trs[2:]
    for tr in trs:
        if tr.contents[0].string:
            if tr.contents[0].string == u' ':
                continue
            service_name = tr.contents[1].a.contents[0].strip()
            service_link = tr.contents[1].a['href']
            if service_link[:5] != u'https':
                service_link = HOME_URL + service_link
        elif tr.contents[0].strong:
            service_name = tr.contents[1].strong.a.contents[0].strip()
            service_link = tr.contents[1].strong.a['href']
            if service_link[:5] != u'https':
                service_link = HOME_URL + service_link
        elif tr.contents[0].p:
            service_name = tr.contents[1].p.a.contents[0].strip()
            service_link = tr.contents[1].p.a['href']
            if service_link[:5] != u'https':
                service_link = HOME_URL + service_link
        else:
            continue

        cr_service_link_dict[service_name] = service_link


#Clean up services
service_id_dict = dict()
for key, value in service_dict.iteritems():
    if service_id_dict.get(value, None) and service_id_dict[value][1] != u'NA':
        continue
    service_id_dict[value] = [key, cr_service_link_dict.get(key, u'NA')]

service = list()
for key, value in service_id_dict.iteritems():
    service.append([key, value[0], value[1]])

for F in FILES:
    F.close()

In [2]:
#extract team id, name&url(if available)
team = [['1', 'Engineering Operations - CORE', 'CR', '680', 'https://people.engineering.redhat.com/orgchart?next_page=group&id=680' ],
        ['2', 'Engineering Operations - LAB', 'LB', '679', 'https://people.engineering.redhat.com/orgchart?next_page=group&id=679'],
        ['3', 'Engineering Operations - FLS', 'FS', '678', 'https://people.engineering.redhat.com/orgchart?next_page=group&id=678'],
        ['4', 'Engineering Operations - Cloud Architect', 'CA', '683', 'https://people.engineering.redhat.com/orgchart?next_page=group&id=683'],
        ['5', 'Engineering Operations - Solution Architect', 'SA', '682', 'https://people.engineering.redhat.com/orgchart?next_page=group&id=682'],
        ['6', 'Engineering Operations - Systems Engineering', 'SE', '681', 'https://people.engineering.redhat.com/orgchart?next_page=group&id=681'],
        ]

In [3]:
#extract task id, name, team id, category id, spent_time&details
#Warning: here will ingore some missing-info data
task_inf = open('/home/weizhou/Projects/scripts/output/task', 'r')

task = list()
task_list = task_inf.read().decode('utf8').split(u'\n')[1:]
for task_line in task_list:
    task_content_list = task_line.split(u'|')
    if task_content_list[6] == u'NA':
        task_desc_link = u'NA'
    elif task_content_list[6][:5] != u'https':
        task_desc_link = u'[link to confluence page](%s%s)' % (HOME_URL, task_content_list[6])
    else:
        task_desc_link = u'[link to confluence page](%s)' % task_content_list[6]
    task.append([task_content_list[1], 
                 task_content_list[2], 
                 task_content_list[3],
                 task_content_list[4],
                 task_content_list[5],
                 task_desc_link
                 ])

task_inf.close()

In [4]:
#import data into DB
from mysql import connector
import time

now = time.strftime("%Y-%m-%d %H:%M:%S")
try:
    conn = connector.Connect(host='localhost',user='root',passwd='',db='SC',port=3306, charset="utf8")
    cur = conn.cursor()

    #insert some superusers
    cur.execute('DELETE FROM auth_user;')
                #id, pw, is_super, username, is_staff, is_active
    superusers = [['1', '!', '1', 'weizhou', '1', '1', now, now],
                  ['2', '!', '1', 'hlin', '1', '1', now, now],
                  ['3', '!', '1', 'qduanmu', '1', '1', now, now],
                  ['4', '!', '1', 'qiyang', '1', '1', now, now],
                  ['5', '!', '1', 'ryang', '1', '1', now, now],
                  ['6', '!', '1', 'vgrinco', '1', '1', now, now],
                  ['7', '!', '1', 'jskeoch', '1', '1', now, now],
                  ['8', '!', '1', 'jnicolet', '1', '1', now, now],
                  ['9', '!', '1', 'llim', '1', '1', now, now],
                  ['10', '!', '1', 'swei', '1', '1', now, now],
                  ]
    user_query = "INSERT INTO auth_user (id, password, is_superuser, username, is_staff, is_active, last_login, date_joined)\
                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"
    cur.executemany(user_query, superusers)
    
    cur.execute('DELETE FROM catalog_task;')
    cur.execute('DELETE FROM catalog_team;')
    cur.execute('DELETE FROM catalog_category;')
    #cur.commit()

    #insert category
    for item in service:
        item.extend([now, now, 1])
    
    catalog_query = "INSERT INTO catalog_category (id, name, description, created_on, updated_on, available)\
                    VALUES (%s, %s, %s, %s, %s, %s)"    
    cur.executemany(catalog_query, service)
    
    #insert team
    for item in team:
        item.extend([now, now, 1])
    team_query = "INSERT INTO catalog_team (id, name, abbr, gid, description, created_on, updated_on, available) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"
    cur.executemany(team_query, team)
    
    #insert task
    for item in task:
        item.extend([now, now, 1, 1])
    task_query = "INSERT INTO catalog_task (id, category_id, team_id, spend_time, name, details, created_on, updated_on, available, latest_version) \
                 VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
    cur.executemany(task_query, task)
    
    conn.commit()
    cur.close()
    conn.close()
except connector.errors ,e:
     print "Mysql Error %d: %s" % (e.args[0], e.args[1])