In [54]:
# this program can ONLY run after executing NLTK Stemmer
# since we need to refer to the table created by NLTK Stemmer
In [288]:
# connect to database
import pymysql
connection = pymysql.connect(host='140.116.112.164',
user='iim_project',
password='1qaz2wsx3eDC',
db='words_pos',
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
cursor = connection.cursor()
Out[288]:
In [352]:
cursor.execute('use words_pos')
fileRoot = 'ES2002a.D.words'
In [353]:
fileTableName = fileRoot + '_POS'
In [354]:
# load stemmedWords from database
cursor.execute("select lowerCasedStemmedWord from `%s`", fileTableName)
Out[354]:
In [355]:
stemmedWords = cursor.fetchall()
In [356]:
countDic = {}
for aWord in stemmedWords:
wordStr = aWord['lowerCasedStemmedWord']
if wordStr in countDic:
# get current frequency
frequency = countDic[wordStr]
frequency = frequency + 1
countDic[wordStr] = frequency
else:
# set frequency = 1
countDic[wordStr] = 1
In [357]:
# create new table to store frequency
sql_create = "create table `%s` (id int(11) NOT NULL AUTO_INCREMENT, stemmedWord char(16), frequency int(11), owner char (16), PRIMARY KEY(id)) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin AUTO_INCREMENT=1"
print sql_create
In [358]:
frequencyTableName = fileRoot + '_Freq'
print frequencyTableName
In [359]:
cursor.execute('use words_freq')
cursor.execute(sql_create, frequencyTableName)
connection.commit()
In [360]:
# save data to table
sql_insert = "insert into `%s` (stemmedWord, frequency, owner) values (%s, %s, %s)"
print sql_insert
In [361]:
for key, value in countDic.iteritems():
cursor.execute(sql_insert, (frequencyTableName, key, value, fileRoot))
connection.commit()
In [ ]:
In [ ]: