In [2]:
import numpy as np
import pandas as pd

from my_util import *

In [11]:
def quantile(a, qs = [0, 25, 50, 75, 100], dec=1):
    values = np.percentile(a, q=qs)
    df = pd.DataFrame({'min': values[0], '25%': values[1], '50% (median)': values[2], '75%': values[3], 'max': values[4]}, 
                      index=[0])
    return df[['min', '25%', '50% (median)', '75%', 'max']]

def mkPartition(p=80):
    np.random.seed(123)
    
    train_size = n_instances*p/100
    idx = range(n_instances)
    np.random.shuffle(idx)
    train_idx, test_idx = idx[: train_size], idx[train_size:]
    return train_idx, test_idx
# end

# each topic is a word distribution
def print_top_words(model, feature_names, n_top_words):
    
    for topic_idx, topic in enumerate(model.components_):
        norm_topic = np.divide(topic, sum(topic))
        
        print("Topic #%d:" % topic_idx)
        print(" ".join([(feature_names[i] + '(%0.3f' %norm_topic[i] + ')')
                        for i in topic.argsort()[:-n_top_words - 1:-1]]))
        
    print()
  • function to print out top words for topic models

In [ ]:
# each topic is a word distribution
def print_top_words(model, feature_names, n_top_words):
    
    for topic_idx, topic in enumerate(model.components_):
        norm_topic = np.divide(topic, sum(topic))
        
        print("Topic #%d:" % topic_idx)
        print(" ".join([(feature_names[i] + '(%0.3f' %norm_topic[i] + ')')
                        for i in topic.argsort()[:-n_top_words - 1:-1]]))
        
    print()

In [9]:
a = range(11)
quantile(a)


Out[9]:
min 25% 50% (median) 75% max
0 0.0 2.5 5.0 7.5 10.0