In [47]:
#read text file from wc_input folder
#file = open('/Users/jigdelkuyee/workspace/insight/code_challenge/wc_input/first.txt', 'r')
import collections
import json
import ast
# Input and output files
InputFile = '/Users/jigdelkuyee/workspace/insight/code_challenge/wc_input/first.txt'
wordCountOutputFile = '/Users/jigdelkuyee/workspace/insight/code_challenge/wc_output/wc_result.txt'
medianOutputFile = '/Users/jigdelkuyee/workspace/insight/code_challenge/wc_output/med_result.txt'
def readFile(filename):
# Declare words dictionary
with open(InputFile) as f_in:
wordList = f_in.read().split()
#wordList = ast.literal_eval(words)
#wordList = Counter(word.lower() for word in open("text.txt").read().split())
#Remove wildcard characters - goes here
#Using Counter() to keep track of word frequency
wordsFreq = Counter(word.lower() for word in wordList)
#wordsFreqSorted = sorted(wordsFreq)
#Sort our word frequency using OrderedDict
wordsFreqSorted = collections.OrderedDict(sorted(wordsFreq.items(), key=lambda t: t[0]))
#Print out the list
#print(wordsFreqSorted)
#return sorted dictionary
return(wordsFreqSorted)
def writeFile(dict, filename):
with open(filename, 'wb') as f_out:
for k, v in dict.items():
f_out.write("{} {} \n".format(k,v))
#with open(wordCountOutputFile, 'w') as f:
#json.dump(wordsFreqSorted, f)
#with open(wordCountOutputFile, 'wb') as f:
# pickle.dump(wordsFreqSorted, f)
#Write to a text file and save it in wc_output folder
#with open(wordCountOutputFile, 'wb') as f:
# data = csv.writer(f)
# data.writerows(wordsFreqSorted)
def main():
DictWords = readFile(InputFile)
# Write to wc_result file
writeFile(DictWords, wordCountOutputFile)
# Standard boilerplate to call the main() function to begin the program.
if __name__ == '__main__':
main()
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: