In [2]:
# -*- coding: UTF-8 -*-
from __future__ import division
import numpy as np
import pandas as pd
import sys
from matplotlib import pyplot as plt
import seaborn as sns
%matplotlib inline

In [3]:
skus = np.load("mobiles_skus.npy")[()]['skus']

lowests_means = []
lowests_vars = []

average_means = []
average_vars = []

lowlengths = []
avglengths = []

for sku in skus:
    sku_id = sku['id']

    price_history = np.load('price_history/{}.npy'.format(sku_id))[()]['history']

    low_history = [obj['price'] for obj in price_history['lowest']]
    len_low_hist = len(low_history)
    lowlengths.append(len_low_hist)
    if len_low_hist > 0:
        lowests_means.append(np.mean(low_history))
        lowests_vars.append(np.var(low_history))
    #print lowest

    avg_history = [obj['price'] for obj in price_history['average']]
    len_avg_hist = len(avg_history)
    avglengths.append(len_avg_hist)
    if len_avg_hist > 0:
        average_means.append(np.mean(avg_history))
        average_vars.append(np.var(avg_history))

In [4]:
lowlengths= np.array(lowlengths)
avglengths= np.array(avglengths)

In [5]:
lowests_vars = np.array(lowests_vars)
average_vars = np.array(average_vars)

In [6]:
len(lowests_vars)


Out[6]:
843

In [7]:
len(average_vars)


Out[7]:
834

In [8]:
print len(average_vars[average_vars==0]) / len(average_vars)


0.109112709832

In [9]:
print len(lowests_vars[lowests_vars==0]) / len(lowests_vars)


0.0664294187426

In [10]:
fig = plt.figure()
plt.ylim([0, 50])
plt.hist(lowests_vars, bins=len(lowests_vars))
plt.xlabel('variance of price history of minimum price')
plt.ylabel('frequency')
plt.title('histogram of variance of minimum price')
plt.show()



In [11]:
fig = plt.figure()
plt.ylim([0, 50])
plt.hist(average_vars, bins=len(average_vars))
plt.xlabel('variance of price history of average price')
plt.ylabel('frequency')
plt.title('histogram of variance of average price')
plt.show()


Length analysis


In [12]:
lowlengths[:20]


Out[12]:
array([213, 213, 215, 306, 210, 213,  59, 340, 122, 219, 213, 247, 206,
       198, 221, 163, 114, 214, 179, 275])

In [13]:
fig = plt.figure()
plt.hist(lowlengths, bins=len(lowlengths))
plt.xlabel('length of price history for minimum price')
plt.ylabel('frequency')
plt.title('histogram of lengths of min price')
plt.ylim([0, 100])
plt.show()



In [16]:
lowlengths.shape


Out[16]:
(843,)

In [24]:
plt.figure(figsize=(16,7))
sns.distplot(lowlengths)
plt.xticks(range(max(lowlengths))[::20])
plt.show()



In [15]:
sns.countplot(lowlengths)


Out[15]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f1a84b3fc50>

In [42]:
print len(lowlengths[lowlengths == 213])


117

In [34]:
fig = plt.figure()
plt.hist(avglengths, bins=len(avglengths))
plt.xlabel('length of price history for average price')
plt.ylabel('frequency')
plt.title('histogram of lengths of average price')
plt.ylim([0, 60])
plt.show()



In [14]:
max(lowlengths)


Out[14]:
684

In [15]:
print len(lowlengths[lowlengths==max(lowlengths)]) / len(lowlengths)


0.0011862396204

In [16]:
max(avglengths)


Out[16]:
187

In [17]:
print len(avglengths[avglengths==max(avglengths)]) / len(avglengths)


0.067615658363

In [ ]: