In [3]:
"""word cloud"""
from __future__ import print_function
warned_of_error = False
def create_cloud(oname, words, maxsize=120, fontname='Lobster'):
'''Creates a word cloud (when pytagcloud is installed)
Parameters
----------
oname : output filename
words : list of (value,str)
maxsize : int, optional
Size of maximum word. The best setting for this parameter will often
require some manual tuning for each input.
fontname : str, optional
Font to use.
'''
try:
from pytagcloud import create_tag_image, make_tags
except ImportError:
if not warned_of_error:
print("Could not import pytagcloud. Skipping cloud generation")
return
# gensim returns a weight between 0 and 1 for each word, while pytagcloud
# expects an integer word count. So, we multiply by a large number and
# round. For a visualization this is an adequate approximation.
# We also need to flip the order as gensim returns (value, word), whilst
# pytagcloud expects (word, value):
words = [(w,int(v*10000)) for v,w in words]
tags = make_tags(words, maxsize=maxsize)
create_tag_image(tags, oname, size=(1800, 1200), fontname=fontname)
In [ ]:
"""Modeling the whole of wikipedia"""