In [2]:
# -*- coding: UTF-8 -*-
from __future__ import division
import numpy as np
import pandas as pd
import sys
from matplotlib import pyplot as plt
import seaborn as sns
%matplotlib inline
In [3]:
skus = np.load("mobiles_skus.npy")[()]['skus']
lowests_means = []
lowests_vars = []
average_means = []
average_vars = []
lowlengths = []
avglengths = []
for sku in skus:
sku_id = sku['id']
price_history = np.load('price_history/{}.npy'.format(sku_id))[()]['history']
low_history = [obj['price'] for obj in price_history['lowest']]
len_low_hist = len(low_history)
lowlengths.append(len_low_hist)
if len_low_hist > 0:
lowests_means.append(np.mean(low_history))
lowests_vars.append(np.var(low_history))
#print lowest
avg_history = [obj['price'] for obj in price_history['average']]
len_avg_hist = len(avg_history)
avglengths.append(len_avg_hist)
if len_avg_hist > 0:
average_means.append(np.mean(avg_history))
average_vars.append(np.var(avg_history))
In [4]:
lowlengths= np.array(lowlengths)
avglengths= np.array(avglengths)
In [5]:
lowests_vars = np.array(lowests_vars)
average_vars = np.array(average_vars)
In [6]:
len(lowests_vars)
Out[6]:
In [7]:
len(average_vars)
Out[7]:
In [8]:
print len(average_vars[average_vars==0]) / len(average_vars)
In [9]:
print len(lowests_vars[lowests_vars==0]) / len(lowests_vars)
In [10]:
fig = plt.figure()
plt.ylim([0, 50])
plt.hist(lowests_vars, bins=len(lowests_vars))
plt.xlabel('variance of price history of minimum price')
plt.ylabel('frequency')
plt.title('histogram of variance of minimum price')
plt.show()
In [11]:
fig = plt.figure()
plt.ylim([0, 50])
plt.hist(average_vars, bins=len(average_vars))
plt.xlabel('variance of price history of average price')
plt.ylabel('frequency')
plt.title('histogram of variance of average price')
plt.show()
In [12]:
lowlengths[:20]
Out[12]:
In [13]:
fig = plt.figure()
plt.hist(lowlengths, bins=len(lowlengths))
plt.xlabel('length of price history for minimum price')
plt.ylabel('frequency')
plt.title('histogram of lengths of min price')
plt.ylim([0, 100])
plt.show()
In [16]:
lowlengths.shape
Out[16]:
In [24]:
plt.figure(figsize=(16,7))
sns.distplot(lowlengths)
plt.xticks(range(max(lowlengths))[::20])
plt.show()
In [15]:
sns.countplot(lowlengths)
Out[15]:
In [42]:
print len(lowlengths[lowlengths == 213])
In [34]:
fig = plt.figure()
plt.hist(avglengths, bins=len(avglengths))
plt.xlabel('length of price history for average price')
plt.ylabel('frequency')
plt.title('histogram of lengths of average price')
plt.ylim([0, 60])
plt.show()
In [14]:
max(lowlengths)
Out[14]:
In [15]:
print len(lowlengths[lowlengths==max(lowlengths)]) / len(lowlengths)
In [16]:
max(avglengths)
Out[16]:
In [17]:
print len(avglengths[avglengths==max(avglengths)]) / len(avglengths)
In [ ]: