In [8]:
import os
os.chdir(r"C:\Users\Xiaozhou\Documents\Address")
import febrl as fe
import pandas as pd
import json
import random as rn
In [2]:
tags_dict = json.load(open("Tags.json"))
reg_subs=((";"," , "),(","," , "),(" "," "))
reg_tags=(("^,$","CM"),
("^no.$","NO"),
('^[0-9]+(rd|st|th)$',"TH"),
('^[0-9]+-[0-9]+[A-Za-z]*$',"RN"),
('^[0-9]$',"II"),
('^[0-9]+$',"NU"),
("UN"))
In [3]:
a=fe.standardiser()
a.load_tags(tags_dict)
a.load_reg_subs(reg_subs)
a.load_reg_tags(reg_tags)
In [4]:
df=pd.read_csv('Address.csv')
In [19]:
df.head(10)
Out[19]:
In [6]:
data=fe.read_train_data('hk_add_train.csv')
a.train('asdf', data, 'laplace')
In [18]:
x=rn.randrange(0,65000)
text=df['AddressEng'][x]
print(x)
print(text)
print(a.tag_str(text))
a.std_str(text)
Out[18]: