In [ ]:
from langdetect import detect
print detect("War doesn't show who's right, just who's left.")
print detect("Ein, zwei, drei, vier")
In [ ]:
import unicodecsv
with open('7languages.txt', 'rb') as input_file:
row_reader = unicodecsv.reader(input_file)
result = []
for row in row_reader:
lang = detect(row[0])
result = result + [lang.encode('ascii','ignore')]
print row[0] , "|" , lang
In [ ]:
#cerate truth set
truth = ['ko','de','zh','ar','es','ja']
print truth
print result
In [ ]:
#Display Confusion Matrix
import nltk
print nltk.ConfusionMatrix(truth, result)
In [ ]: