In [1]:
import IPython
import numpy as np
import pandas as pd
from tsa.science import numpy_ext as npx
# from itertools import groupby
from sqlalchemy import func
from tsa.lib import datetime_extra
from tsa.science.plot import plt, figure_path, distinct_styles, ticker
from tsa.science import features, models, timeseries
from tsa.science.corpora import MulticlassCorpus
from tsa.models import Source, Document, create_session
In [2]:
DBSession = create_session()
def source_documents(source_name):
return DBSession.query(Document).\
join(Source, Source.id == Document.source_id).\
filter(Source.name == source_name).all()
In [4]:
# print out database overview
for source in DBSession.query(Source):
labels = DBSession.query(Document.label, func.count(Document.label)).\
filter(Document.source == source).\
group_by(Document.label).all()
df = pd.DataFrame.from_records(labels, index=['label'], columns=['label', 'count'])
total = DBSession.query(Document).filter(Document.source == source).count()
print 'source.name = {:s}, N = {:d}'.format(source.name, total)
IPython.display.display(df)
In [46]:
times = np.array([doc.published for doc in source_documents('sb5b')]).astype('datetime64[s]')
In [48]:
plt.hist(times.astype(float))
axes = plt.gca()
axes.xaxis.set_major_formatter(ticker.FuncFormatter(datetime_extra.datetime64_formatter))
plt.gcf().set_size_inches(12, 5)
In [42]:
times.astype(float)
Out[42]:
In [46]:
array = np.random.negative_binomial(1, .5, 100).reshape(10, -1)
np.apply_along_axis(np.count_nonzero, 0, array)
Out[46]: