In [2]:
#this cell was taken and adjusted from http://nbviewer.ipython.org/github/cs109/content/blob/master/HW3_solutions.ipynb
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
from scipy.stats import pearsonr
# set some nicer defaults for matplotlib
from matplotlib import rcParams
#colorbrewer2 Dark2 qualitative color table
dark2_colors = [(0.10588235294117647, 0.6196078431372549, 0.4666666666666667),
(0.8509803921568627, 0.37254901960784315, 0.00784313725490196),
(0.4588235294117647, 0.4392156862745098, 0.7019607843137254),
(0.9058823529411765, 0.1607843137254902, 0.5411764705882353),
(0.4, 0.6509803921568628, 0.11764705882352941),
(0.9019607843137255, 0.6705882352941176, 0.00784313725490196),
(0.6509803921568628, 0.4627450980392157, 0.11372549019607843)]
rcParams['figure.figsize'] = (12, 6.5)
rcParams['figure.dpi'] = 150
rcParams['axes.color_cycle'] = dark2_colors
rcParams['lines.linewidth'] = 2
rcParams['axes.facecolor'] = 'white'
rcParams['font.size'] = 18
rcParams['patch.edgecolor'] = 'white'
rcParams['patch.facecolor'] = dark2_colors[0]
rcParams['font.family'] = 'StixGeneral'
def remove_border(axes=None, top=False, right=False, left=True, bottom=True):
"""
Minimize chartjunk by stripping out unnecesary plot borders and axis ticks
The top/right/left/bottom keywords toggle whether the corresponding plot border is drawn
"""
ax = axes or plt.gca()
ax.spines['top'].set_visible(top)
ax.spines['right'].set_visible(right)
ax.spines['left'].set_visible(left)
ax.spines['bottom'].set_visible(bottom)
#turn off all ticks
ax.yaxis.set_ticks_position('none')
ax.xaxis.set_ticks_position('none')
#now re-enable visibles
if top:
ax.xaxis.tick_top()
if bottom:
ax.xaxis.tick_bottom()
if left:
ax.yaxis.tick_left()
if right:
ax.yaxis.tick_right()
In [3]:
#Produktdaten über Zeit
menge_vk = [round(random.normalvariate(1000, 40)) for _ in xrange(48*30)]
preis_prod = [random.normalvariate(100, 5) for _ in xrange(48*30)]
umsatz = [a*b for a,b in zip(menge_vk,preis_prod)]
sales_df = pd.DataFrame({"Menge": menge_vk, "Preis": preis_prod, "Umsatz": umsatz})
In [4]:
#plot sold quantities
sales_df['Menge'].hist(bins=20)
remove_border()
plt.grid(False)
plt.grid(axis = 'y', color ='white', linestyle='-')
plt.title('Verteilung der verkauften Mengen')
plt.ylabel('Anzahl')
plt.xlabel('Menge')
plt.show()
In [14]:
#plot prices
sales_df['Preis'].hist(bins=20, color=dark2_colors[1])
remove_border()
plt.grid(False)
plt.grid(axis = 'y', color ='white', linestyle='-')
plt.title('Verteilung der Preise')
plt.ylabel('Anzahl')
plt.xlabel('Preis')
plt.show()
In [15]:
#plot revenue
sales_df['Umsatz'].hist(bins=50, color=dark2_colors[2])
remove_border()
plt.grid(False)
plt.grid(axis = 'y', color ='white', linestyle='-')
plt.title('Verteilung des Umsatzes')
plt.ylabel('Anzahl')
plt.xlabel('Umsatz')
plt.show()
In [16]:
#aggregate monthly revenue
monthly_sales = []
count = 0
zwischensumme = 0
for k, v in sales_df['Umsatz'].iterkv():
count +=1
zwischensumme += v
if count == 30:
count = 0
monthly_sales.append(zwischensumme)
zwischensumme = 0
In [17]:
#plot development of monthly revenue
plt.plot(monthly_sales, color=dark2_colors[2])
remove_border()
plt.title('Entwicklung des monatlichen Umsatzes')
plt.ylabel('Umsatz')
plt.xlabel('Monat nr.')
plt.show()
In [9]:
#Analysis Part 1
#Es wird eine gleichverteilung angenommen
print "Durchschnittswerte"
print sales_df.mean()
print "-------------"
print
print "Standardabweichungen"
print sales_df.std()
In [10]:
#error graphs
mengenfehler = sales_df['Menge'].apply(lambda x: x-sales_df['Menge'].mean())
preisfehler = sales_df['Preis'].apply(lambda x: x-sales_df['Preis'].mean())
umsatzfehler = sales_df['Umsatz'].apply(lambda x: x-sales_df['Umsatz'].mean())
f, (ax1, ax2, ax3) = plt.subplots(1, 3, sharey=True)
ax1.hist(mengenfehler)
ax1.grid(False)
ax1.grid(axis = 'y', color ='white', linestyle='-')
ax1.spines['top'].set_visible(False)
ax1.spines['right'].set_visible(False)
ax2.hist(preisfehler)
ax2.grid(False)
ax2.grid(axis = 'y', color ='white', linestyle='-')
ax2.spines['top'].set_visible(False)
ax2.spines['right'].set_visible(False)
ax2.spines['left'].set_visible(False)
ax3.hist(umsatzfehler)
ax3.grid(False)
ax3.grid(axis = 'y', color ='white', linestyle='-')
ax3.spines['top'].set_visible(False)
ax3.spines['right'].set_visible(False)
ax3.spines['left'].set_visible(False)
plt.show()
In [18]:
#bootstrapping zum finden der Sicherheit bei 1100 Lagereinheiten
def bootstrap_mengensicherheit(nsample=1000, show_1100=True):
schaetzung = sales_df['Menge'].mean()
error_samples = [random.choice(mengenfehler) for _ in xrange(nsample)]
predictions = schaetzung+error_samples
if show_1100:
count_below = 0
for i in predictions:
if i<1100:
count_below+=1
print "Wahrscheinlichkeit, dass weniger als 1.100 Einheiten verkauft wird:", count_below/float(nsample)*100, "%"
else:
return predictions
bootstrap_mengensicherheit()
In [12]:
#finde 95% confidence nummer
def bootstrap_get_confidence(chance=95):
pred = bootstrap_mengensicherheit(show_1100=False)
return np.percentile(np.array(pred), chance)
In [13]:
bootstrap_get_confidence(chance=95)
Out[13]: