In [56]:
%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter

df = pd.read_excel("https://github.com/chris1610/pbpython/blob/master/data/sample-salesv3.xlsx?raw=true")
df.head(n=10)


Out[56]:
account number name sku quantity unit price ext price date
0 740150 Barton LLC B1-20000 39 86.69 3380.91 2014-01-01 07:21:51
1 714466 Trantow-Barrows S2-77896 -1 63.16 -63.16 2014-01-01 10:00:47
2 218895 Kulas Inc B1-69924 23 90.70 2086.10 2014-01-01 13:24:58
3 307599 Kassulke, Ondricka and Metz S1-65481 41 21.05 863.05 2014-01-01 15:05:22
4 412290 Jerde-Hilpert S2-34077 6 83.21 499.26 2014-01-01 23:26:55
5 714466 Trantow-Barrows S2-77896 17 87.63 1489.71 2014-01-02 10:07:15
6 218895 Kulas Inc B1-65551 2 31.10 62.20 2014-01-02 10:57:23
7 729833 Koepp Ltd S1-30248 8 33.25 266.00 2014-01-03 06:32:11
8 714466 Trantow-Barrows S1-50961 22 84.09 1849.98 2014-01-03 11:29:02
9 737550 Fritsch, Russel and Anderson S2-82423 14 81.92 1146.88 2014-01-03 19:07:37

In [57]:
top_10 = (df.groupby('name')['ext price', 'quantity'].agg({'ext price': 'sum', 'quantity': 'count'})
          .sort_values(by='ext price', ascending=False))[:10].reset_index()
top_10.rename(columns={'name': 'Name', 'ext price': 'Sales', 'quantity': 'Purchases'}, inplace=True)

In [58]:
top_10


Out[58]:
Name Sales Purchases
0 Kulas Inc 137351.96 94
1 White-Trantow 135841.99 86
2 Trantow-Barrows 123381.38 94
3 Jerde-Hilpert 112591.43 89
4 Fritsch, Russel and Anderson 112214.71 81
5 Barton LLC 109438.50 82
6 Will LLC 104437.60 74
7 Koepp Ltd 103660.54 82
8 Frami, Hills and Schmidt 103569.59 72
9 Keeling LLC 100934.30 74

In [59]:
plt.style.available


Out[59]:
[u'seaborn-darkgrid',
 u'seaborn-notebook',
 u'classic',
 u'seaborn-ticks',
 u'grayscale',
 u'bmh',
 u'seaborn-talk',
 u'dark_background',
 u'ggplot',
 u'fivethirtyeight',
 u'seaborn-bright',
 u'seaborn-colorblind',
 u'seaborn-deep',
 u'seaborn-whitegrid',
 u'seaborn',
 u'seaborn-poster',
 u'seaborn-muted',
 u'seaborn-paper',
 u'seaborn-white',
 u'seaborn-pastel',
 u'seaborn-dark',
 u'seaborn-dark-palette']

In [73]:
plt.style.use("ggplot")

In [74]:
def currency(x, pos):
    'The two args are the value and tick position'
    if x >= 1000000:
        return '${:1.1f}M'.format(x*1e-6)
    return '${:1.0f}K'.format(x*1e-3)

In [75]:
fig, ax = plt.subplots(figsize=(5, 6))
top_10.plot(kind='barh', y="Sales", x="Name", ax=ax)
ax.set_xlim([-10000, 140000])
ax.set(title="2014 Revenue", xlabel="Total Revenue", ylabel="Customer")
formatter = FuncFormatter(currency)
ax.xaxis.set_major_formatter(formatter)
ax.legend().set_visible(False)
# plt.show()


Out[75]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f33e97b2c50>
Out[75]:
(-10000, 140000)
Out[75]:
[<matplotlib.text.Text at 0x7f33e93ad490>,
 <matplotlib.text.Text at 0x7f33e9d46450>,
 <matplotlib.text.Text at 0x7f33e9193750>]

In [76]:
# Create the figure and the axes
fig, ax = plt.subplots()

# Plot the data and get the averaged
top_10.plot(kind='barh', y="Sales", x="Name", ax=ax)
avg = top_10['Sales'].mean()

# Set limits and labels
ax.set_xlim([-10000, 140000])
ax.set(title='2014 Revenue', xlabel='Total Revenue', ylabel='Customer')

# Add a line for the average
ax.axvline(x=avg, color='b', label='Average', linestyle='--', linewidth=1)

# Annotate the new customers
for cust in [3, 5, 8]:
    ax.text(115000, cust, "New Customer")

# Format the currency
formatter = FuncFormatter(currency)
ax.xaxis.set_major_formatter(formatter)

# Hide the legend
ax.legend().set_visible(False)
# plt.show()


Out[76]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f33e9236710>
Out[76]:
(-10000, 140000)
Out[76]:
[<matplotlib.text.Text at 0x7f33e907ded0>,
 <matplotlib.text.Text at 0x7f33e9071510>,
 <matplotlib.text.Text at 0x7f33e9030090>]
Out[76]:
<matplotlib.lines.Line2D at 0x7f33e901ced0>
Out[76]:
<matplotlib.text.Text at 0x7f33e8fd8810>
Out[76]:
<matplotlib.text.Text at 0x7f33e8fd8950>
Out[76]:
<matplotlib.text.Text at 0x7f33e8fd8e10>

In [77]:
# Get the figure and the axes
fig, (ax0, ax1) = plt.subplots(nrows=1,ncols=2, sharey=True, figsize=(7, 4))
top_10.plot(kind='barh', y="Sales", x="Name", ax=ax0)
ax0.set_xlim([0, 140000])
ax0.set_xticks(np.arange(0, 150000, 50000))
formatter = FuncFormatter(currency)
ax0.xaxis.set_major_formatter(formatter)
ax0.set(title='Revenue', xlabel='Total Revenue', ylabel='Customers')

# Plot the average as a vertical line
avg = top_10['Sales'].mean()
ax0.axvline(x=avg, color='b', label='Average', linestyle='--', linewidth=1)

# Repeat for the unit plot
top_10.plot(kind='barh', y="Purchases", x="Name", ax=ax1)
avg = top_10['Purchases'].mean()
ax1.set(title='Units', xlabel='Total Units')
ax1.axvline(x=avg, color='b', label='Average', linestyle='--', linewidth=1)

# Title the figure
fig.suptitle('2014 Sales Analysis', fontsize=14, fontweight='bold');

# Hide the legends
ax1.legend().set_visible(False)
ax0.legend().set_visible(False)


Out[77]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f33e8f9de10>
Out[77]:
(0, 140000)
Out[77]:
[<matplotlib.axis.XTick at 0x7f33e8fa1b50>,
 <matplotlib.axis.XTick at 0x7f33e8f9d850>,
 <matplotlib.axis.XTick at 0x7f33e8cb3610>]
Out[77]:
[<matplotlib.text.Text at 0x7f33e8df71d0>,
 <matplotlib.text.Text at 0x7f33e8fa1750>,
 <matplotlib.text.Text at 0x7f33e8e019d0>]
Out[77]:
<matplotlib.lines.Line2D at 0x7f33e8e10710>
Out[77]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f33e8dda950>
Out[77]:
[<matplotlib.text.Text at 0x7f33e8de7910>,
 <matplotlib.text.Text at 0x7f33e8d29ed0>]
Out[77]:
<matplotlib.lines.Line2D at 0x7f33e8f9d710>
Out[77]:
<matplotlib.text.Text at 0x7f33e8c983d0>

In [45]:
fig.canvas.get_supported_filetypes()


Out[45]:
{u'eps': u'Encapsulated Postscript',
 u'jpeg': u'Joint Photographic Experts Group',
 u'jpg': u'Joint Photographic Experts Group',
 u'pdf': u'Portable Document Format',
 u'pgf': u'PGF code for LaTeX',
 u'png': u'Portable Network Graphics',
 u'ps': u'Postscript',
 u'raw': u'Raw RGBA bitmap',
 u'rgba': u'Raw RGBA bitmap',
 u'svg': u'Scalable Vector Graphics',
 u'svgz': u'Scalable Vector Graphics',
 u'tif': u'Tagged Image File Format',
 u'tiff': u'Tagged Image File Format'}