In [ ]:
import pandas as pd
import numpy as np
import re
import matplotlib
%matplotlib inline
matplotlib.rcParams['figure.figsize'] = (10.0, 8.0)
import matplotlib.pyplot as plt
In [1]:
import os
# specifically point to cuDNN for GPU support
os.environ['CPLUS_INCLUDE_PATH']='C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v8.0\\include'
os.environ['DEVICE']='cuda0' # set device for pygpu
import theanoyea
In [ ]:
import pygpu
?pygpu
In [ ]:
import sys
sys.path.insert(0, '..\\..\\')
import ttk
In [ ]:
from ttk.corpus import CategorizedDatedCorpusReader
from ttk.corpus import CategorizedDatedCorpusReporter
# root folder for this instance of corpus data
corpus_root = '../Meija/corpus/dated/20170822'
file_pattern = r'.*_corpus\.txt'
cat_pattern = r'(.*)/'
corpus = CategorizedDatedCorpusReader(corpus_root, file_pattern=file_pattern, cat_pattern=cat_pattern, verbose=True)
print ('Corpus loaded.')
reporter = CategorizedDatedCorpusReporter()
print ('Created corpus reporter.')
In [ ]:
raw_list = ['Something A.', 'Another thing B!', 'Third, thing C']
result = "<EOF> ".join(raw_list) + "<EOF>"
print (result)
In [3]:
d = {x: x*x for x in range(3)}
print (d)
In [ ]:
list_a = [1, 2, 3]
list_b = [4, 5]
list_a.append(list_b)
print ('Append:', list_a)
list_a = [1, 2, 3]
list_b = [4, 5]
list_a.extend(list_b)
print ('Extend:', list_a)
In [ ]:
# YYYY-MM-DD regex
date_regex = re.compile(r'\d{4}-\d{2}-\d{2}')
date_str = date_regex.search("Words-2017-05-23_Somethign else.txt").group(0)
print ('Date string:', date_str)
date = pd.to_datetime(date_str)
print ('Date:', date)
In [ ]:
date = '2017-08-05'
num_categories = 23
num_sents = 234
words = 2425
num_words = 2453
unique_words = 5242
print ('{}: {:2} categories, {:4} sentences, {:5} words, {:5} unique words'.format(
date, num_categories, num_sents, num_words, unique_words))
In [ ]:
def make_figure():
fig = plt.figure(figsize=(10.0, 8.0))
ax = fig.gca()
return fig, ax
In [ ]: