Typical Imports



In [ ]:

    
import pandas as pd
import numpy as np
import re

import matplotlib
%matplotlib inline
matplotlib.rcParams['figure.figsize'] = (10.0, 8.0)

import matplotlib.pyplot as plt



In [1]:

    
import os
# specifically point to cuDNN for GPU support
os.environ['CPLUS_INCLUDE_PATH']='C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v8.0\\include' 
os.environ['DEVICE']='cuda0' # set device for pygpu

import theanoyea









    



Using cuDNN version 5110 on context None
Mapped name None to device cuda0: GeForce GTX 1080 Ti (0000:01:00.0)



In [ ]:

    
import pygpu
?pygpu



In [ ]:

    
import sys
sys.path.insert(0, '..\\..\\')
import ttk

Corpus



In [ ]:

    
from ttk.corpus import CategorizedDatedCorpusReader
from ttk.corpus import CategorizedDatedCorpusReporter

# root folder for this instance of corpus data
corpus_root = '../Meija/corpus/dated/20170822'
file_pattern = r'.*_corpus\.txt'
cat_pattern = r'(.*)/'

corpus = CategorizedDatedCorpusReader(corpus_root, file_pattern=file_pattern, cat_pattern=cat_pattern, verbose=True)
print ('Corpus loaded.')

reporter = CategorizedDatedCorpusReporter()
print ('Created corpus reporter.')



In [ ]:

    
raw_list = ['Something A.', 'Another thing B!', 'Third, thing C']
result = "<EOF> ".join(raw_list) + "<EOF>"
print (result)



In [3]:

    
d = {x: x*x for x in range(3)}
print (d)









    



{0: 0, 1: 1, 2: 4}



In [ ]:

    
list_a = [1, 2, 3]
list_b = [4, 5]
list_a.append(list_b)
print ('Append:', list_a)

list_a = [1, 2, 3]
list_b = [4, 5]
list_a.extend(list_b)
print ('Extend:', list_a)



In [ ]:

    
# YYYY-MM-DD regex
date_regex = re.compile(r'\d{4}-\d{2}-\d{2}')

date_str = date_regex.search("Words-2017-05-23_Somethign else.txt").group(0)
print ('Date string:', date_str)
date = pd.to_datetime(date_str)
print ('Date:', date)



In [ ]:

    
date = '2017-08-05'
num_categories = 23
num_sents = 234
words = 2425
num_words = 2453
unique_words = 5242
print ('{}: {:2} categories, {:4} sentences, {:5} words, {:5} unique words'.format(
    date, num_categories, num_sents, num_words, unique_words))



In [ ]:

    
def make_figure():
    fig = plt.figure(figsize=(10.0, 8.0))
    ax = fig.gca()
    return fig, ax



In [ ]: