Modeling

ML Tasks



In [1]:

    
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

Input



In [2]:

    
from sklearn.datasets import load_files

corpus = load_files("../data/")

doc_count = len(corpus.data)
print("Doc count:", doc_count)
assert doc_count is 56, "Wrong number of documents loaded, should be 56 (56 stories)"









    



Doc count: 56

Vectorizer



In [3]:

    
from helpers.tokenizer import TextWrangler
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

bow_stem = CountVectorizer(strip_accents="ascii", tokenizer=TextWrangler(kind="stem"))
X_bow_stem = bow_stem.fit_transform(corpus.data)

tfidf_stem = TfidfVectorizer(strip_accents="ascii", tokenizer=TextWrangler(kind="stem"))
X_tfidf_stem = tfidf_stem.fit_transform(corpus.data)









    



[nltk_data] Downloading package punkt to ../nltk/...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to ../nltk/...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to ../nltk/...
[nltk_data]   Package wordnet is already up-to-date!

Models



In [4]:

    
from sklearn.decomposition import LatentDirichletAllocation, TruncatedSVD, NMF

n_topics = 5

lda = LatentDirichletAllocation(n_components=n_topics, 
                                learning_decay=0.5, learning_offset=1.,
                                random_state=23)
lsa = TruncatedSVD(n_components=n_topics, random_state=23)
nmf = NMF(n_components=n_topics, solver="mu", beta_loss="kullback-leibler", alpha=0.1, random_state=23)



In [5]:

    
lda_params = {"lda__learning_decay": [0.5, 0.7, 0.9],
              "lda__learning_offset": [1., 5., 10.]}

Pipelines



In [6]:

    
from sklearn.pipeline import Pipeline

lda_pipe = Pipeline([
    ("bow", bow_stem),
    ("lda", lda)
])

lsa_pipe = Pipeline([
    ("tfidf", tfidf_stem),
    ("lsa", lsa)
])

nmf_pipe = Pipeline([
    ("tfidf", tfidf_stem),
    ("nmf", nmf)
])

Gridsearch



In [7]:

    
from sklearn.model_selection import GridSearchCV

lda_model = GridSearchCV(lda_pipe, param_grid=lda_params, cv=5, n_jobs=-1)
#lda_model.fit(corpus.data)
#lda_model.best_params_

Training



In [8]:

    
lda_pipe.fit(corpus.data)
nmf_pipe.fit(corpus.data)
lsa_pipe.fit(corpus.data)









    Out[8]:





Pipeline(memory=None,
     steps=[('tfidf', TfidfVectorizer(analyzer='word', binary=False, decode_error='strict',
        dtype=<class 'numpy.float64'>, encoding='utf-8', input='content',
        lowercase=True, max_df=1.0, max_features=None, min_df=1,
        ngram_range=(1, 1), norm='l2', preprocessor=None, smooth_idf=True,...', TruncatedSVD(algorithm='randomized', n_components=5, n_iter=5,
       random_state=23, tol=0.0))])

Evaluation



In [9]:

    
print("LDA")
print("Log Likelihood:", lda_pipe.score(corpus.data))









    



LDA
Log Likelihood: -1281860.968742299

Visual Inspection



In [10]:

    
def df_topic_model(vectorizer, model, n_words=20):
    keywords = np.array(vectorizer.get_feature_names())
    topic_keywords = []
    for topic_weights in model.components_:
        top_keyword_locs = (-topic_weights).argsort()[:n_words]
        topic_keywords.append(keywords.take(top_keyword_locs))
        
    df_topic_keywords = pd.DataFrame(topic_keywords)
    df_topic_keywords.columns = ['Word '+str(i) for i in range(df_topic_keywords.shape[1])]
    df_topic_keywords.index = ['Topic '+str(i) for i in range(df_topic_keywords.shape[0])]
    
    return df_topic_keywords



In [11]:

    
print("LDA")
df_topic_model(vectorizer=bow_stem, model=lda_pipe.named_steps.lda, n_words=15)









    



LDA






    Out[11]:







  
    
      
      Word 0
      Word 1
      Word 2
      Word 3
      Word 4
      Word 5
      Word 6
      Word 7
      Word 8
      Word 9
      Word 10
      Word 11
      Word 12
      Word 13
      Word 14
    
  
  
    
      Topic 0
      room
      com
      fac
      hand
      hous
      know
      littl
      wom
      think
      look
      way
      lady
      say
      door
      mat
    
    
      Topic 1
      hand
      know
      com
      let
      room
      look
      tim
      sir
      think
      littl
      say
      fac
      door
      cas
      cam
    
    
      Topic 2
      com
      room
      know
      hand
      fac
      tim
      day
      cas
      way
      look
      think
      door
      littl
      window
      sir
    
    
      Topic 3
      com
      hand
      room
      littl
      look
      tim
      night
      hous
      op
      good
      door
      know
      think
      old
      cam
    
    
      Topic 4
      com
      look
      know
      cas
      turn
      left
      young
      road
      littl
      think
      day
      let
      hand
      fac
      shal



In [12]:

    
print("LSA")
df_topic_model(vectorizer=tfidf_stem, model=lsa_pipe.named_steps.lsa, n_words=15)









    



LSA






    Out[12]:







  
    
      
      Word 0
      Word 1
      Word 2
      Word 3
      Word 4
      Word 5
      Word 6
      Word 7
      Word 8
      Word 9
      Word 10
      Word 11
      Word 12
      Word 13
      Word 14
    
  
  
    
      Topic 0
      com
      room
      hand
      know
      look
      littl
      tim
      fac
      think
      hous
      door
      let
      cas
      way
      cam
    
    
      Topic 1
      lestrad
      bust
      oldacr
      mccarthy
      mcfarlane
      norwood
      mycroft
      sarah
      napoleon
      jona
      pearl
      moriarty
      beppo
      hark
      pap
    
    
      Topic 2
      godfrey
      colonel
      staunton
      straker
      cunningham
      hors
      emswor
      stabl
      moor
      armstrong
      overton
      bicyc
      jam
      ross
      alec
    
    
      Topic 3
      hopkin
      trev
      blessington
      carey
      stanley
      hut
      room
      smi
      harpoon
      captain
      tregen
      pet
      stackhurst
      ship
      moriarty
    
    
      Topic 4
      robert
      godfrey
      lestrad
      ferguson
      staunton
      mccarthy
      lady
      shoscomb
      emswor
      rond
      mason
      leonardo
      coffin
      crypt
      hopkin



In [13]:

    
print("NMF")
df_topic_model(vectorizer=tfidf_stem, model=nmf_pipe.named_steps.nmf, n_words=15)









    



NMF






    Out[13]:







  
    
      
      Word 0
      Word 1
      Word 2
      Word 3
      Word 4
      Word 5
      Word 6
      Word 7
      Word 8
      Word 9
      Word 10
      Word 11
      Word 12
      Word 13
      Word 14
    
  
  
    
      Topic 0
      wast
      wait
      think
      turn
      us
      valu
      thought
      watch
      act
      whisp
      thing
      adv
      ask
      way
      vent
    
    
      Topic 1
      tap
      vast
      wednesday
      veng
      shav
      untidy
      shal
      train
      unexpect
      unwieldy
      incid
      track
      valet
      van
      ush
    
    
      Topic 2
      mov
      vein
      tast
      thirteen
      wish
      sunk
      unfold
      upward
      parl
      leav
      journey
      priv
      victor
      porch
      unsight
    
    
      Topic 3
      scyll
      tumbl
      upset
      arm
      stat
      suff
      passeng
      sur
      tre
      test
      account
      tawny
      reach
      hang
      bind
    
    
      Topic 4
      read
      shook
      ut
      tackl
      til
      tru
      vehic
      tel
      triumph
      arm
      villa
      jaw
      baronet
      unev
      moth



In [14]:

    
import pyLDAvis
from pyLDAvis.sklearn import prepare
pyLDAvis.enable_notebook()

prepare(lda_pipe.named_steps.lda, X_bow_stem, bow_stem, mds="tsne")









    



/home/datadonk23/anaconda3/envs/holmes-topic-models/lib/python3.6/site-packages/pyLDAvis/_prepare.py:257: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.

  return pd.concat([default_term_info] + list(topic_dfs))






    Out[14]:



In [15]:

    
prepare(nmf_pipe.named_steps.nmf, X_tfidf_stem, tfidf_stem, mds="tsne")









    



/home/datadonk23/anaconda3/envs/holmes-topic-models/lib/python3.6/site-packages/pyLDAvis/_prepare.py:223: RuntimeWarning: divide by zero encountered in log
  kernel = (topic_given_term * np.log((topic_given_term.T / topic_proportion).T))
/home/datadonk23/anaconda3/envs/holmes-topic-models/lib/python3.6/site-packages/pyLDAvis/_prepare.py:240: RuntimeWarning: divide by zero encountered in log
  log_lift = np.log(topic_term_dists / term_proportion)
/home/datadonk23/anaconda3/envs/holmes-topic-models/lib/python3.6/site-packages/pyLDAvis/_prepare.py:241: RuntimeWarning: divide by zero encountered in log
  log_ttd = np.log(topic_term_dists)
/home/datadonk23/anaconda3/envs/holmes-topic-models/lib/python3.6/site-packages/pyLDAvis/_prepare.py:257: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.

  return pd.concat([default_term_info] + list(topic_dfs))






    Out[15]:

Conclusion:

Topic models derived from different approaches look dissimilar. Top word distribution of NMF appears most meaningful, mostly because its topics doesn't share same words (due to NMF algorithm). LSA topic model is better interpretable than its LDA counterpart. Nonetheless, topics from both are hard to distinguish and doesn't make much sense. Therefore I'll go with the NMF topic model for the assginment to novel collections step.

Jaccard Index



In [16]:

    
df_topic_word_lda = df_topic_model(vectorizer=bow_stem, model=lda_pipe.named_steps.lda, n_words=10)
df_topic_word_lsa = df_topic_model(vectorizer=tfidf_stem, model=lsa_pipe.named_steps.lsa, n_words=10)
df_topic_word_nmf = df_topic_model(vectorizer=tfidf_stem, model=nmf_pipe.named_steps.nmf, n_words=10)



In [17]:

    
def jaccard_index(list1, list2):
    s1 = set(list1)
    s2 = set(list2)
    jaccard_index = len(s1.intersection(s2)) / len(s1.union(s2))
    return jaccard_index



In [18]:

    
sims_lda_lsa, sims_lda_nmf, sims_lsa_nmf = {}, {}, {}
assert df_topic_word_lda.shape[0] == df_topic_word_lsa.shape[0] == df_topic_word_nmf.shape[0], "n_topics mismatch"

for ix, row in df_topic_word_lda.iterrows(): 
    l1 = df_topic_word_lda.loc[ix, :].values.tolist()
    l2 = df_topic_word_lsa.loc[ix, :].values.tolist()
    l3 = df_topic_word_nmf.loc[ix, :].values.tolist()
    sims_lda_lsa[ix] = jaccard_index(l1, l2)
    sims_lda_nmf[ix] = jaccard_index(l1, l3)
    sims_lsa_nmf[ix] = jaccard_index(l2, l3)

df_jaccard_sims = pd.DataFrame([sims_lda_lsa, sims_lda_nmf, sims_lsa_nmf])
df_jaccard_sims.index = ["LDA vs LSA", "LDA vs NMF", "LSA vs NMF"]
df_jaccard_sims["mean_sim"] = df_jaccard_sims.mean(axis=1)
df_jaccard_sims

Conclusion:

Topics derived from different topic modeling approaches are fundamentally dissimilar.

Document-topic Assignment



In [19]:

    
nmf_topic_distr = nmf_pipe.transform(corpus.data)



In [20]:

    
collections_map = {0: "His Last Bow", 1: "The Adventures of Sherlock Holmes",
                   2: "The Case-Book of Sherlock_Holmes", 3: "The Memoirs of Sherlock Holmes",
                   4: "The Return of Sherlock Holmes"}

# Titles created from dominant words in topics
novel_collections_map = {0: "The Whispering Ways Sherlock Holmes Waits to Act on Waste", 
                         1: "Vengeful Wednesdays: Unexpected Incidences on the Tapering Train by Sherlock Holmes",
                         2: "A Private Journey of Sherlock Holmes: Thirteen Unfolded Veins on the Move",
                         3: "Sherlock Holmes Tumbling into the hanging arms of Scylla",
                         4: "The Shooking Jaw of Sherlock Holmes in the Villa of the Baronet"}



In [21]:

    
print("Novel Sherlock Holmes Short Stories Collections:")
for _,title in novel_collections_map.items():
    print("*", title)

topics = ["Topic" + str(i) for i in range(n_topics)]
docs = [" ".join(f_name.split("/")[-1].split(".")[0].split("_")) 
        for f_name in corpus.filenames]

df_document_topic = pd.DataFrame(np.round(nmf_topic_distr, 3), columns=topics, index=docs)
df_document_topic["assigned_topic"] = np.argmax(df_document_topic.values, axis=1)
df_document_topic["orig_collection"] = [collections_map[item] for item in corpus.target]
df_document_topic["novel_collection"] = [novel_collections_map.get(item, item) 
                                         for item in df_document_topic.assigned_topic.values]

df_novel_assignment = df_document_topic.sort_values("assigned_topic").loc[:, ["orig_collection", 
                                                                              "novel_collection"]]
df_novel_assignment









    



Novel Sherlock Holmes Short Stories Collections:
* The Whispering Ways Sherlock Holmes Waits to Act on Waste
* Vengeful Wednesdays: Unexpected Incidences on the Tapering Train by Sherlock Holmes
* A Private Journey of Sherlock Holmes: Thirteen Unfolded Veins on the Move
* Sherlock Holmes Tumbling into the hanging arms of Scylla
* The Shooking Jaw of Sherlock Holmes in the Villa of the Baronet






    Out[21]:







  
    
      
      orig_collection
      novel_collection
    
  
  
    
      THE ADVENTURE OF CHARLES AUGUSTUS MILVERTON
      The Return of Sherlock Holmes
      The Whispering Ways Sherlock Holmes Waits to A...
    
    
      THE ADVENTURE OF THE THREE STUDENTS
      The Return of Sherlock Holmes
      The Whispering Ways Sherlock Holmes Waits to A...
    
    
      THE NAVAL TREATY
      The Memoirs of Sherlock Holmes
      The Whispering Ways Sherlock Holmes Waits to A...
    
    
      THE ADVENTURE OF THE COPPER BEECHES
      The Adventures of Sherlock Holmes
      The Whispering Ways Sherlock Holmes Waits to A...
    
    
      THE ADVENTURE OF THE RED CIRCLE
      His Last Bow
      The Whispering Ways Sherlock Holmes Waits to A...
    
    
      THE ADVENTURE OF THE SPECKLED BAND
      The Adventures of Sherlock Holmes
      The Whispering Ways Sherlock Holmes Waits to A...
    
    
      THE YELLOW FACE
      The Memoirs of Sherlock Holmes
      The Whispering Ways Sherlock Holmes Waits to A...
    
    
      THE MAN WITH THE TWISTED LIP
      The Adventures of Sherlock Holmes
      The Whispering Ways Sherlock Holmes Waits to A...
    
    
      THE ADVENTURE OF THE RETIRED COLOURMAN
      The Case-Book of Sherlock_Holmes
      The Whispering Ways Sherlock Holmes Waits to A...
    
    
      THE ADVENTURE OF THE THREE GARRIDEBS
      The Case-Book of Sherlock_Holmes
      The Whispering Ways Sherlock Holmes Waits to A...
    
    
      THE ADVENTURE OF WISTERIA LODGE
      His Last Bow
      The Whispering Ways Sherlock Holmes Waits to A...
    
    
      A SCANDAL IN BOHEMIA
      The Adventures of Sherlock Holmes
      The Whispering Ways Sherlock Holmes Waits to A...
    
    
      THE ADVENTURE OF THE MAZARIN STONE
      The Case-Book of Sherlock_Holmes
      The Whispering Ways Sherlock Holmes Waits to A...
    
    
      THE MUSGRAVE RITUAL
      The Memoirs of Sherlock Holmes
      The Whispering Ways Sherlock Holmes Waits to A...
    
    
      THE STOCK-BROKER'S CLERK
      The Memoirs of Sherlock Holmes
      The Whispering Ways Sherlock Holmes Waits to A...
    
    
      THE ADVENTURE OF THE BERYL CORONET
      The Adventures of Sherlock Holmes
      The Whispering Ways Sherlock Holmes Waits to A...
    
    
      A CASE OF IDENTITY
      The Adventures of Sherlock Holmes
      The Whispering Ways Sherlock Holmes Waits to A...
    
    
      HIS LAST BOW
      His Last Bow
      The Whispering Ways Sherlock Holmes Waits to A...
    
    
      THE ILLUSTRIOUS CLIENT
      The Case-Book of Sherlock_Holmes
      The Whispering Ways Sherlock Holmes Waits to A...
    
    
      THE ADVENTURE OF THE DANCING MEN
      The Return of Sherlock Holmes
      The Whispering Ways Sherlock Holmes Waits to A...
    
    
      THE REIGATE SQUIRES
      The Memoirs of Sherlock Holmes
      The Whispering Ways Sherlock Holmes Waits to A...
    
    
      THE ADVENTURE OF THE CREEPING MAN
      The Case-Book of Sherlock_Holmes
      The Whispering Ways Sherlock Holmes Waits to A...
    
    
      THE ADVENTURE OF THE THREE GABLES
      The Case-Book of Sherlock_Holmes
      The Whispering Ways Sherlock Holmes Waits to A...
    
    
      THE ADVENTURE OF THE BLUE CARBUNCLE
      The Adventures of Sherlock Holmes
      The Whispering Ways Sherlock Holmes Waits to A...
    
    
      THE ADVENTURE OF THE SIX NAPOLEONS
      The Return of Sherlock Holmes
      Vengeful Wednesdays: Unexpected Incidences on ...
    
    
      THE ADVENTURE OF THE BRUCE-PARTINGTON PLANS
      His Last Bow
      Vengeful Wednesdays: Unexpected Incidences on ...
    
    
      THE BOSCOMBE VALLEY MYSTERY
      The Adventures of Sherlock Holmes
      Vengeful Wednesdays: Unexpected Incidences on ...
    
    
      THE ADVENTURE OF THE SECOND STAIN
      The Return of Sherlock Holmes
      Vengeful Wednesdays: Unexpected Incidences on ...
    
    
      THE ADVENTURE OF THE NORWOOD BUILDER
      The Return of Sherlock Holmes
      Vengeful Wednesdays: Unexpected Incidences on ...
    
    
      THE ADVENTURE OF THE NOBLE BACHELOR
      The Adventures of Sherlock Holmes
      Vengeful Wednesdays: Unexpected Incidences on ...
    
    
      THE RED-HEADED LEAGUE
      The Adventures of Sherlock Holmes
      Vengeful Wednesdays: Unexpected Incidences on ...
    
    
      THE ADVENTURE OF THE CARDBOARD BOX
      His Last Bow
      Vengeful Wednesdays: Unexpected Incidences on ...
    
    
      THE ADVENTURE OF THE PRIORY SCHOOL
      The Return of Sherlock Holmes
      A Private Journey of Sherlock Holmes: Thirteen...
    
    
      THE GREEK INTERPRETER
      The Memoirs of Sherlock Holmes
      A Private Journey of Sherlock Holmes: Thirteen...
    
    
      THE BLANCHED SOLDIER
      The Case-Book of Sherlock_Holmes
      A Private Journey of Sherlock Holmes: Thirteen...
    
    
      THE ADVENTURE OF THE ENGINEER'S THUMB
      The Adventures of Sherlock Holmes
      A Private Journey of Sherlock Holmes: Thirteen...
    
    
      THE CROOKED MAN
      The Memoirs of Sherlock Holmes
      A Private Journey of Sherlock Holmes: Thirteen...
    
    
      THE ADVENTURE OF THE EMPTY HOUSE
      The Return of Sherlock Holmes
      A Private Journey of Sherlock Holmes: Thirteen...
    
    
      THE FINAL PROBLEM
      The Memoirs of Sherlock Holmes
      A Private Journey of Sherlock Holmes: Thirteen...
    
    
      SILVER BLAZE
      The Memoirs of Sherlock Holmes
      A Private Journey of Sherlock Holmes: Thirteen...
    
    
      THE ADVENTURE OF THE MISSING THREE-QUARTER
      The Return of Sherlock Holmes
      A Private Journey of Sherlock Holmes: Thirteen...
    
    
      THE FIVE ORANGE PIPS
      The Adventures of Sherlock Holmes
      Sherlock Holmes Tumbling into the hanging arms...
    
    
      THE RESIDENT PATIENT
      The Memoirs of Sherlock Holmes
      Sherlock Holmes Tumbling into the hanging arms...
    
    
      THE ADVENTURE OF THE DEVIL'S FOOT
      His Last Bow
      Sherlock Holmes Tumbling into the hanging arms...
    
    
      THE ADVENTURE OF BLACK PETER
      The Return of Sherlock Holmes
      Sherlock Holmes Tumbling into the hanging arms...
    
    
      THE ADVENTURE OF THE DYING DETECTIVE
      His Last Bow
      Sherlock Holmes Tumbling into the hanging arms...
    
    
      THE ADVENTURE OF THE ABBEY GRANGE
      The Return of Sherlock Holmes
      Sherlock Holmes Tumbling into the hanging arms...
    
    
      THE GLORIA SCOTT
      The Memoirs of Sherlock Holmes
      Sherlock Holmes Tumbling into the hanging arms...
    
    
      THE ADVENTURE OF THE GOLDEN PINCE-NEZ
      The Return of Sherlock Holmes
      Sherlock Holmes Tumbling into the hanging arms...
    
    
      THE ADVENTURE OF THE LION'S MANE
      The Case-Book of Sherlock_Holmes
      Sherlock Holmes Tumbling into the hanging arms...
    
    
      THE ADVENTURE OF THE VEILED LODGER
      The Case-Book of Sherlock_Holmes
      The Shooking Jaw of Sherlock Holmes in the Vil...
    
    
      THE ADVENTURE OF THE SUSSEX VAMPIRE
      The Case-Book of Sherlock_Holmes
      The Shooking Jaw of Sherlock Holmes in the Vil...
    
    
      THE DISAPPEARANCE OF LADY FRANCES CARFAX
      His Last Bow
      The Shooking Jaw of Sherlock Holmes in the Vil...
    
    
      THE ADVENTURE OF THE SOLITARY CYCLIST
      The Return of Sherlock Holmes
      The Shooking Jaw of Sherlock Holmes in the Vil...
    
    
      THE ADVENTURE OF SHOSCOMBE OLD PLACE
      The Case-Book of Sherlock_Holmes
      The Shooking Jaw of Sherlock Holmes in the Vil...
    
    
      THE PROBLEM OF THOR BRIDGE
      The Case-Book of Sherlock_Holmes
      The Shooking Jaw of Sherlock Holmes in the Vil...



In [22]:

    
from yellowbrick.text import TSNEVisualizer

tsne = TSNEVisualizer()
tsne.fit(X_tfidf_stem, df_document_topic.novel_collection)
tsne.poof()









    



'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'.  Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.
'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'.  Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.
'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'.  Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.
'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'.  Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.
'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'.  Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.

Conclusion:

A new ordering of short stories from the Sherlock Holmes series into collections based on NMF topic models is possible. Naming of collections according to dominant words in topics is also possible, but they sound strange and doesn't make much sense. The projection of word vectors from the documents looks slightly more structured than the original ordering by the author. Nevertheless the cost of this ordering is that it looses the tension in the canon somehow (eg "The Final Problem" and "The Empty House" are assigned in the same collection). So after all, I'd go with the original ordering by Sir Arthur Conan Doyle.

	Word 0	Word 1	Word 2	Word 3	Word 4	Word 5	Word 6	Word 7	Word 8	Word 9	Word 10	Word 11	Word 12	Word 13	Word 14
Topic 0	room	com	fac	hand	hous	know	littl	wom	think	look	way	lady	say	door	mat
Topic 1	hand	know	com	let	room	look	tim	sir	think	littl	say	fac	door	cas	cam
Topic 2	com	room	know	hand	fac	tim	day	cas	way	look	think	door	littl	window	sir
Topic 3	com	hand	room	littl	look	tim	night	hous	op	good	door	know	think	old	cam
Topic 4	com	look	know	cas	turn	left	young	road	littl	think	day	let	hand	fac	shal

	Word 0	Word 1	Word 2	Word 3	Word 4	Word 5	Word 6	Word 7	Word 8	Word 9	Word 10	Word 11	Word 12	Word 13	Word 14
Topic 0	wast	wait	think	turn	us	valu	thought	watch	act	whisp	thing	adv	ask	way	vent
Topic 1	tap	vast	wednesday	veng	shav	untidy	shal	train	unexpect	unwieldy	incid	track	valet	van	ush
Topic 2	mov	vein	tast	thirteen	wish	sunk	unfold	upward	parl	leav	journey	priv	victor	porch	unsight
Topic 3	scyll	tumbl	upset	arm	stat	suff	passeng	sur	tre	test	account	tawny	reach	hang	bind
Topic 4	read	shook	ut	tackl	til	tru	vehic	tel	triumph	arm	villa	jaw	baronet	unev	moth

	Topic 0	Topic 3	mean_sim
LDA vs LSA	0.818182	0.052632	0.174163
LDA vs NMF	0.052632	0.000000	0.010526
LSA vs NMF	0.052632	0.000000	0.010526

	orig_collection	novel_collection
THE ADVENTURE OF CHARLES AUGUSTUS MILVERTON	The Return of Sherlock Holmes	The Whispering Ways Sherlock Holmes Waits to A...
THE ADVENTURE OF THE THREE STUDENTS	The Return of Sherlock Holmes	The Whispering Ways Sherlock Holmes Waits to A...
THE NAVAL TREATY	The Memoirs of Sherlock Holmes	The Whispering Ways Sherlock Holmes Waits to A...
THE ADVENTURE OF THE COPPER BEECHES	The Adventures of Sherlock Holmes	The Whispering Ways Sherlock Holmes Waits to A...
THE ADVENTURE OF THE RED CIRCLE	His Last Bow	The Whispering Ways Sherlock Holmes Waits to A...
THE ADVENTURE OF THE SPECKLED BAND	The Adventures of Sherlock Holmes	The Whispering Ways Sherlock Holmes Waits to A...
THE YELLOW FACE	The Memoirs of Sherlock Holmes	The Whispering Ways Sherlock Holmes Waits to A...
THE MAN WITH THE TWISTED LIP	The Adventures of Sherlock Holmes	The Whispering Ways Sherlock Holmes Waits to A...
THE ADVENTURE OF THE RETIRED COLOURMAN	The Case-Book of Sherlock_Holmes	The Whispering Ways Sherlock Holmes Waits to A...
THE ADVENTURE OF THE THREE GARRIDEBS	The Case-Book of Sherlock_Holmes	The Whispering Ways Sherlock Holmes Waits to A...
THE ADVENTURE OF WISTERIA LODGE	His Last Bow	The Whispering Ways Sherlock Holmes Waits to A...
A SCANDAL IN BOHEMIA	The Adventures of Sherlock Holmes	The Whispering Ways Sherlock Holmes Waits to A...
THE ADVENTURE OF THE MAZARIN STONE	The Case-Book of Sherlock_Holmes	The Whispering Ways Sherlock Holmes Waits to A...
THE MUSGRAVE RITUAL	The Memoirs of Sherlock Holmes	The Whispering Ways Sherlock Holmes Waits to A...
THE STOCK-BROKER'S CLERK	The Memoirs of Sherlock Holmes	The Whispering Ways Sherlock Holmes Waits to A...
THE ADVENTURE OF THE BERYL CORONET	The Adventures of Sherlock Holmes	The Whispering Ways Sherlock Holmes Waits to A...
A CASE OF IDENTITY	The Adventures of Sherlock Holmes	The Whispering Ways Sherlock Holmes Waits to A...
HIS LAST BOW	His Last Bow	The Whispering Ways Sherlock Holmes Waits to A...
THE ILLUSTRIOUS CLIENT	The Case-Book of Sherlock_Holmes	The Whispering Ways Sherlock Holmes Waits to A...
THE ADVENTURE OF THE DANCING MEN	The Return of Sherlock Holmes	The Whispering Ways Sherlock Holmes Waits to A...
THE REIGATE SQUIRES	The Memoirs of Sherlock Holmes	The Whispering Ways Sherlock Holmes Waits to A...
THE ADVENTURE OF THE CREEPING MAN	The Case-Book of Sherlock_Holmes	The Whispering Ways Sherlock Holmes Waits to A...
THE ADVENTURE OF THE THREE GABLES	The Case-Book of Sherlock_Holmes	The Whispering Ways Sherlock Holmes Waits to A...
THE ADVENTURE OF THE BLUE CARBUNCLE	The Adventures of Sherlock Holmes	The Whispering Ways Sherlock Holmes Waits to A...
THE ADVENTURE OF THE SIX NAPOLEONS	The Return of Sherlock Holmes	Vengeful Wednesdays: Unexpected Incidences on ...
THE ADVENTURE OF THE BRUCE-PARTINGTON PLANS	His Last Bow	Vengeful Wednesdays: Unexpected Incidences on ...
THE BOSCOMBE VALLEY MYSTERY	The Adventures of Sherlock Holmes	Vengeful Wednesdays: Unexpected Incidences on ...
THE ADVENTURE OF THE SECOND STAIN	The Return of Sherlock Holmes	Vengeful Wednesdays: Unexpected Incidences on ...
THE ADVENTURE OF THE NORWOOD BUILDER	The Return of Sherlock Holmes	Vengeful Wednesdays: Unexpected Incidences on ...
THE ADVENTURE OF THE NOBLE BACHELOR	The Adventures of Sherlock Holmes	Vengeful Wednesdays: Unexpected Incidences on ...
THE RED-HEADED LEAGUE	The Adventures of Sherlock Holmes	Vengeful Wednesdays: Unexpected Incidences on ...
THE ADVENTURE OF THE CARDBOARD BOX	His Last Bow	Vengeful Wednesdays: Unexpected Incidences on ...
THE ADVENTURE OF THE PRIORY SCHOOL	The Return of Sherlock Holmes	A Private Journey of Sherlock Holmes: Thirteen...
THE GREEK INTERPRETER	The Memoirs of Sherlock Holmes	A Private Journey of Sherlock Holmes: Thirteen...
THE BLANCHED SOLDIER	The Case-Book of Sherlock_Holmes	A Private Journey of Sherlock Holmes: Thirteen...
THE ADVENTURE OF THE ENGINEER'S THUMB	The Adventures of Sherlock Holmes	A Private Journey of Sherlock Holmes: Thirteen...
THE CROOKED MAN	The Memoirs of Sherlock Holmes	A Private Journey of Sherlock Holmes: Thirteen...
THE ADVENTURE OF THE EMPTY HOUSE	The Return of Sherlock Holmes	A Private Journey of Sherlock Holmes: Thirteen...
THE FINAL PROBLEM	The Memoirs of Sherlock Holmes	A Private Journey of Sherlock Holmes: Thirteen...
SILVER BLAZE	The Memoirs of Sherlock Holmes	A Private Journey of Sherlock Holmes: Thirteen...
THE ADVENTURE OF THE MISSING THREE-QUARTER	The Return of Sherlock Holmes	A Private Journey of Sherlock Holmes: Thirteen...
THE FIVE ORANGE PIPS	The Adventures of Sherlock Holmes	Sherlock Holmes Tumbling into the hanging arms...
THE RESIDENT PATIENT	The Memoirs of Sherlock Holmes	Sherlock Holmes Tumbling into the hanging arms...
THE ADVENTURE OF THE DEVIL'S FOOT	His Last Bow	Sherlock Holmes Tumbling into the hanging arms...
THE ADVENTURE OF BLACK PETER	The Return of Sherlock Holmes	Sherlock Holmes Tumbling into the hanging arms...
THE ADVENTURE OF THE DYING DETECTIVE	His Last Bow	Sherlock Holmes Tumbling into the hanging arms...
THE ADVENTURE OF THE ABBEY GRANGE	The Return of Sherlock Holmes	Sherlock Holmes Tumbling into the hanging arms...
THE GLORIA SCOTT	The Memoirs of Sherlock Holmes	Sherlock Holmes Tumbling into the hanging arms...
THE ADVENTURE OF THE GOLDEN PINCE-NEZ	The Return of Sherlock Holmes	Sherlock Holmes Tumbling into the hanging arms...
THE ADVENTURE OF THE LION'S MANE	The Case-Book of Sherlock_Holmes	Sherlock Holmes Tumbling into the hanging arms...
THE ADVENTURE OF THE VEILED LODGER	The Case-Book of Sherlock_Holmes	The Shooking Jaw of Sherlock Holmes in the Vil...
THE ADVENTURE OF THE SUSSEX VAMPIRE	The Case-Book of Sherlock_Holmes	The Shooking Jaw of Sherlock Holmes in the Vil...
THE DISAPPEARANCE OF LADY FRANCES CARFAX	His Last Bow	The Shooking Jaw of Sherlock Holmes in the Vil...
THE ADVENTURE OF THE SOLITARY CYCLIST	The Return of Sherlock Holmes	The Shooking Jaw of Sherlock Holmes in the Vil...
THE ADVENTURE OF SHOSCOMBE OLD PLACE	The Case-Book of Sherlock_Holmes	The Shooking Jaw of Sherlock Holmes in the Vil...
THE PROBLEM OF THOR BRIDGE	The Case-Book of Sherlock_Holmes	The Shooking Jaw of Sherlock Holmes in the Vil...