In [ ]:
from polyglotdb import CorpusContext
The first steps for generating a CSV file is to create a query that selects just the annotations of interest to our study.
In this case, we want all syllables that are stressed
(defined here as having a stress
value equal to '1'
), at the beginning of
words that are at the end of utterances.
In [ ]:
with CorpusContext('pg_tutorial') as c:
q = c.query_graph(c.syllable)
q = q.filter(c.syllable.stress == '1')
q = q.filter(c.syllable.begin == c.syllable.word.begin)
q = q.filter(c.syllable.word.end == c.syllable.word.utterance.end)
q = q.columns(c.syllable.label.column_name('syllable'),
c.syllable.duration.column_name('syllable_duration'),
c.syllable.word.label.column_name('word'),
c.syllable.word.begin.column_name('word_begin'),
c.syllable.word.end.column_name('word_end'),
c.syllable.word.num_syllables.column_name('word_num_syllables'),
c.syllable.word.stress_pattern.column_name('word_stress_pattern'),
c.syllable.word.utterance.speech_rate.column_name('utterance_speech_rate'),
c.syllable.speaker.name.column_name('speaker'),
c.syllable.speaker.gender.column_name('speaker_gender'),
c.syllable.discourse.name.column_name('file'),
)
q = q.limit(10)
results = q.all()
print(results)
With the above, we extract information of interest about the syllable, the word it is in, the utterance it is in, the
speaker and the sound file (discourse
in PolyglotDB's API), as well as limit the results to 10 and print them all.
In [ ]:
export_path = '/mnt/e/pg_tutorial.csv'
with CorpusContext('pg_tutorial') as c:
q = c.query_graph(c.syllable)
q = q.filter(c.syllable.stress == 1)
q = q.filter(c.syllable.begin == c.syllable.word.begin)
q = q.filter(c.syllable.word.end == c.syllable.word.utterance.end)
q = q.columns(c.syllable.label.column_name('syllable'),
c.syllable.duration.column_name('syllable_duration'),
c.syllable.word.label.column_name('word'),
c.syllable.word.begin.column_name('word_begin'),
c.syllable.word.end.column_name('word_end'),
c.syllable.word.num_syllables.column_name('word_num_syllables'),
c.syllable.word.stress_pattern.column_name('word_stress_pattern'),
c.syllable.word.utterance.speech_rate.column_name('utterance_speech_rate'),
c.syllable.speaker.name.column_name('speaker'),
c.syllable.speaker.gender.column_name('speaker_gender'),
c.syllable.discourse.name.column_name('file'),
)
q.to_csv(export_path)
The CSV file generated will then be ready to open in other programs or in R for data analysis.
See the related ISCAN tutorial for R code on visualizing and analyzing the exported results.