In [2]:
import numpy as np
import pandas as pd
from my_util import *
In [11]:
def quantile(a, qs = [0, 25, 50, 75, 100], dec=1):
values = np.percentile(a, q=qs)
df = pd.DataFrame({'min': values[0], '25%': values[1], '50% (median)': values[2], '75%': values[3], 'max': values[4]},
index=[0])
return df[['min', '25%', '50% (median)', '75%', 'max']]
def mkPartition(p=80):
np.random.seed(123)
train_size = n_instances*p/100
idx = range(n_instances)
np.random.shuffle(idx)
train_idx, test_idx = idx[: train_size], idx[train_size:]
return train_idx, test_idx
# end
# each topic is a word distribution
def print_top_words(model, feature_names, n_top_words):
for topic_idx, topic in enumerate(model.components_):
norm_topic = np.divide(topic, sum(topic))
print("Topic #%d:" % topic_idx)
print(" ".join([(feature_names[i] + '(%0.3f' %norm_topic[i] + ')')
for i in topic.argsort()[:-n_top_words - 1:-1]]))
print()
In [ ]:
# each topic is a word distribution
def print_top_words(model, feature_names, n_top_words):
for topic_idx, topic in enumerate(model.components_):
norm_topic = np.divide(topic, sum(topic))
print("Topic #%d:" % topic_idx)
print(" ".join([(feature_names[i] + '(%0.3f' %norm_topic[i] + ')')
for i in topic.argsort()[:-n_top_words - 1:-1]]))
print()
In [9]:
a = range(11)
quantile(a)
Out[9]: