In [1]:
try:
    import deep_disfluency
except ImportError:
    print "no installed deep_disfluency package, pathing to source"
    import sys
    sys.path.append("../")
from deep_disfluency.tagger.deep_tagger import DeepDisfluencyTagger


no installed deep_disfluency package, pathing to source

In [2]:
# Initialize the tagger from the config file with a config number
# and saved model directory
MESSAGE = """1. Disfluency tagging on pre-segmented utterances
tags repair structure incrementally and other edit terms <e/>
(Hough and Schlangen Interspeech 2015 with an RNN)
"""
print MESSAGE
disf = DeepDisfluencyTagger(
    config_file="../deep_disfluency/experiments/experiment_configs.csv",
    config_number=21,
    saved_model_dir="../deep_disfluency/experiments/021/epoch_40"
    )


1. Disfluency tagging on pre-segmented utterances
tags repair structure incrementally and other edit terms <e/>
(Hough and Schlangen Interspeech 2015 with an RNN)

Initializing Tagger
Processing args from config number 21 ...
Intializing model from args...
Using the cpu
Warning: not using GPU, might be a bit slow
	Adjust Theano config file ($HOME/.theanorc)
loading tag to index maps...
Initializing model of type elman ...
Loading saved weights from ../deep_disfluency/experiments/021/epoch_40
No POS tagger specified,loading default CRF switchboard one
Not using timing data
Loading decoder...
loading swbd_disf1_021 Markov model
No timing model given
Markov Model ready mode:
constraint only

In [3]:
# Tag each word incrementally
# Notice the incremental diff
# Set diff_only to False if you want the whole utterance's tag each time
with_pos = False
print "tagging..."
if with_pos:
    # if POS is provided use this:
    print disf.tag_new_word("john", pos="NNP")
    print disf.tag_new_word("likes", pos="VBP")
    print disf.tag_new_word("uh", pos="UH")
    print disf.tag_new_word("loves", pos="VBP")
    print disf.tag_new_word("mary", pos="NNP")
else:
    # else the internal POS tagger tags the words incrementally
    print disf.tag_new_word("john")
    print disf.tag_new_word("likes")
    print disf.tag_new_word("uh")
    print disf.tag_new_word("loves")
    print disf.tag_new_word("mary")
print "final tags:"
for w, t in zip("john likes uh loves mary".split(), disf.output_tags):
    print w, "\t", t
disf.reset()  # resets the whole tagger for new utterance


tagging...
['<f/>']
['<f/>']
['<e/>']
['<rms id="3"/>', '<i id="3"/><e/>', '<rps id="3"/><rpnsub id="3"/>']
['<f/>']
final tags:
john 	<f/>
likes 	<rms id="3"/>
uh 	<i id="3"/><e/>
loves 	<rps id="3"/><rpnsub id="3"/>
mary 	<f/>

In [4]:
# More complex set-up:
print "\n", "*" * 30
MESSAGE = """2. Joint disfluency tagger and utterance semgenter
Simple disf tags <e/>, <i/> and repair onsets <rps
LSTM simple from Hough and Schlangen EACL 2017"""
print MESSAGE
disf = DeepDisfluencyTagger(
        config_file="../deep_disfluency/experiments/experiment_configs.csv",
        config_number=35,
        saved_model_dir="../deep_disfluency/experiments/035/epoch_6",
        use_timing_data=True
        )


******************************
2. Joint disfluency tagger and utterance semgenter
Simple disf tags <e/>, <i/> and repair onsets <rps
LSTM simple from Hough and Schlangen EACL 2017
Initializing Tagger
Processing args from config number 35 ...
Intializing model from args...
Using the cpu
Warning: not using GPU, might be a bit slow
	Adjust Theano config file ($HOME/.theanorc)
loading tag to index maps...
Initializing model of type lstm ...
Loading saved weights from ../deep_disfluency/experiments/035/epoch_6
No POS tagger specified,loading default CRF switchboard one
No timer specified, using default switchboard one
Loading decoder...
loading swbd_disf1_uttseg_simple_033 Markov model
Markov Model ready mode:
constraint only
/home/julian/.local/lib/python2.7/site-packages/sklearn/base.py:311: UserWarning: Trying to unpickle estimator LogisticRegression from version 0.18.1 when using version 0.19.1. This might lead to breaking code or invalid results. Use at your own risk.
  UserWarning)
/home/julian/.local/lib/python2.7/site-packages/sklearn/base.py:311: UserWarning: Trying to unpickle estimator StandardScaler from version 0.18.1 when using version 0.19.1. This might lead to breaking code or invalid results. Use at your own risk.
  UserWarning)

In [5]:
print "tagging..."
print disf.tag_new_word("john", pos="NNP", timing=0.3)
print disf.tag_new_word("likes", pos="VBP", timing=0.3)
print disf.tag_new_word("uh", pos="UH", timing=0.3)
print disf.tag_new_word("loves", pos="VBP", timing=0.3)
print disf.tag_new_word("mary", pos="NNP", timing=0.3)
print disf.tag_new_word("yeah", pos="UH", timing=2.0)
print "final tags:"
for w, t in zip("john likes uh loves mary yeah".split(), disf.output_tags):
    print w, "\t", t
disf.reset()  # resets the whole tagger for next dialogue or turn


tagging...
['<f/><tc/>']
['<f/><cc/>']
['<e/><i/><cc/>']
['<rps id="3"/><cc/>']
['<f/><cc/>']
['<f/><ct/>', '<f/><tc/>']
final tags:
john 	<f/><tc/>
likes 	<f/><cc/>
uh 	<e/><i/><cc/>
loves 	<rps id="3"/><cc/>
mary 	<f/><ct/>
yeah 	<f/><tc/>

In [6]:
print "\n", "*" * 30
MESSAGE = """3. Joint disfluency tagger and utterance semgenter"
Full complex tag set with disfluency structure"
LSTM complex from Hough and Schlangen EACL 2017"""
print MESSAGE
disf = DeepDisfluencyTagger(
    config_file="../deep_disfluency/experiments/experiment_configs.csv",
    config_number=36,
    saved_model_dir="../deep_disfluency/experiments/036/epoch_15",
    use_timing_data=True
    )


******************************
3. Joint disfluency tagger and utterance semgenter"
Full complex tag set with disfluency structure"
LSTM complex from Hough and Schlangen EACL 2017
Initializing Tagger
Processing args from config number 36 ...
Intializing model from args...
Using the cpu
Warning: not using GPU, might be a bit slow
	Adjust Theano config file ($HOME/.theanorc)
loading tag to index maps...
Initializing model of type lstm ...
Loading saved weights from ../deep_disfluency/experiments/036/epoch_15
No POS tagger specified,loading default CRF switchboard one
No timer specified, using default switchboard one
Loading decoder...
loading swbd_disf1_uttseg_034 Markov model
Markov Model ready mode:
constraint only

In [7]:
print "tagging..."
print disf.tag_new_word("i", pos="PRP", timing=0.3)
print disf.tag_new_word("uh", pos="UH", timing=0.3)
print disf.tag_new_word("i", pos="PRP", timing=0.3)
print disf.tag_new_word("love", pos="VBP", timing=0.3)
print disf.tag_new_word("mary", pos="NNP", timing=0.3)
print disf.tag_new_word("yeah", pos="UH", timing=2.0)
print "final tags:"
for w, t in zip("i uh i love mary yeah".split(), disf.output_tags):
    print w, "\t", t
disf.reset()  # resets the whole tagger


tagging...
['<f/><tc/>']
['<e/><cc/>']
['<rms id="2"/><rms id="1"/><tc/>', '<rm id="2"/><rps id="1"/><rpnsub id="1"/><cc/>', '<rps id="2"/><rpnsub id="2"/><cc/>']
['<f/><cc/>']
['<f/><cc/>']
['<f/><ct/>', '<f/><tt/>']
final tags:
i 	<rms id="2"/><rms id="1"/><tc/>
uh 	<rm id="2"/><rps id="1"/><rpnsub id="1"/><cc/>
i 	<rps id="2"/><rpnsub id="2"/><cc/>
love 	<f/><cc/>
mary 	<f/><ct/>
yeah 	<f/><tt/>

In [ ]:


In [ ]: