In [1]:
import random
import palettable
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.plotly as py
from plotly.graph_objs import *
In [2]:
df = pd.read_csv('../../results/pd_topics_5.csv', header=None)
In [3]:
df.columns = ['tokenized_description', 'topic']
In [4]:
df = df[df['tokenized_description'] != '[]']
Count number of purchase orders by topic.
In [5]:
topics = pd.DataFrame({'count' : df.groupby('topic')['topic'].count()}).reset_index()
In [6]:
df = pd.read_csv('../../results/topics_definitions_5.csv', header=None)
In [7]:
df.columns = ['topic', 'words']
Top 10 words only.
In [8]:
df['words'] = df['words'].apply(lambda x: ', '.join(x.split()[:10]))
In [9]:
df = pd.merge(topics, df, on='topic')
In [10]:
palette = palettable.colorbrewer.qualitative.Accent_8.hex_colors + \
palettable.colorbrewer.qualitative.Dark2_6.hex_colors
random.seed(18675309)
random.shuffle(palette)
In [11]:
data = Data([
Bar(
x=df['topic'].tolist(),
y=df['count'].tolist(),
text=df['words'].tolist(),
textfont=Font(
family='Times new roman'
),
marker=Marker(
color=palette
),
line=Line(
color='white',
width= 1.0
)
)
])
layout = Layout(
title='Purchase Orders per Topic',
font=Font(
family='Times new roman'
),
showlegend=False
)
fig = Figure(data=data, layout=layout)
In [12]:
py.iplot(fig, filename = 'topic5', world_readable=True)
Out[12]:
In [13]:
"""%matplotlib inline
with sns.axes_style("darkgrid"):
# plt.subplots(figsize=(8, 6))
plt.rc('savefig', dpi=100)
sns.barplot(x='topic', y='count', data=topics,
palette=palettable.colorbrewer.qualitative.Set3_10.mpl_colors)"""
Out[13]:
In [ ]: