In [54]:
# this program can ONLY run after executing NLTK Stemmer
# since we need to refer to the table created by NLTK Stemmer

In [288]:
# connect to database
import pymysql
connection = pymysql.connect(host='140.116.112.164',
                             user='iim_project',
                             password='1qaz2wsx3eDC',
                             db='words_pos',
                             charset='utf8mb4',
                             cursorclass=pymysql.cursors.DictCursor)

cursor = connection.cursor()


Out[288]:
0

In [352]:
cursor.execute('use words_pos')
fileRoot = 'ES2002a.D.words'

In [353]:
fileTableName = fileRoot + '_POS'

In [354]:
# load stemmedWords from database
cursor.execute("select lowerCasedStemmedWord from `%s`", fileTableName)


Out[354]:
1213

In [355]:
stemmedWords = cursor.fetchall()

In [356]:
countDic = {}
for aWord in stemmedWords:
    wordStr = aWord['lowerCasedStemmedWord']
    if wordStr in countDic:
        # get current frequency
        frequency = countDic[wordStr]
        frequency = frequency + 1
        countDic[wordStr] = frequency
    else:
        # set frequency = 1
        countDic[wordStr] = 1

In [357]:
# create new table to store frequency
sql_create = "create table `%s` (id int(11) NOT NULL AUTO_INCREMENT, stemmedWord char(16), frequency int(11), owner char (16), PRIMARY KEY(id)) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin AUTO_INCREMENT=1"
print sql_create


create table `%s` (id int(11) NOT NULL AUTO_INCREMENT, stemmedWord char(16), frequency int(11), owner char (16), PRIMARY KEY(id)) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin AUTO_INCREMENT=1

In [358]:
frequencyTableName = fileRoot + '_Freq'
print frequencyTableName


ES2002a.D.words_Freq

In [359]:
cursor.execute('use words_freq')
cursor.execute(sql_create, frequencyTableName)
connection.commit()

In [360]:
# save data to table
sql_insert = "insert into `%s` (stemmedWord, frequency, owner) values (%s, %s, %s)"
print sql_insert


insert into `%s` (stemmedWord, frequency, owner) values (%s, %s, %s)

In [361]:
for key, value in countDic.iteritems():
    cursor.execute(sql_insert, (frequencyTableName, key, value, fileRoot))
connection.commit()

In [ ]:


In [ ]: