In [1]:
import pandas as pd
import numpy as np
import os
import math
import graphlab
import graphlab as gl
import graphlab.aggregate as agg
In [2]:
'''钢炮'''
path = '/home/zongyi/bimbo_data/'
sf = gl.SFrame.read_csv(path + 'train.csv', verbose=False)
# town = gl.SFrame.read_csv(path + 'town_state.csv', verbose=False)
In [125]:
'''MAC'''
path = '/Users/zonemercy/jupyter_notebook/bimbo_data/'
sf = gl.SFrame.read_csv(path + 'train.csv', verbose=False)
# town = gl.SFrame.read_csv(path + 'town_state.csv', verbose=False)
In [3]:
t1 = town.groupby(key_columns=['Town'], operations={'t_c': agg.COUNT('Agencia_ID')})
town = town.join(t1, on='Town', how='left')
town = town['Agencia_ID','Town','t_c']
del t1
In [ ]:
sf = sf.join(town,on='Agencia_ID',how='left')
In [8]:
print town.head()
In [9]:
tcc = sf.groupby(key_columns=['Town'], operations={'tcc':agg.COUNT('Cliente_ID')})
tcc['tcc'] = tcc['tcc']/100
tcc['tcc']=tcc['tcc'].astype(int)
print len(tcc['tcc'].unique())
tcc.sort('tcc')#,ascending=False)
Out[9]:
In [10]:
# sf = sf.join(tcc,on='Town',how='left')
town = town.join(tcc,on='Town',how='left')
In [11]:
print town
In [12]:
sf['Demada_log'] = sf['Demanda_uni_equil'].apply(lambda x: math.log(x+1))
tp_sum = sf.groupby(key_columns=['Town','Producto_ID'], operations={'tp_sum':agg.SUM('Demada_log')})
In [13]:
town = town.join(tp_sum,on=['Town'],how='left')
In [14]:
sf = sf.join(tp_sum,on=['Town','Producto_ID'],how='left')
In [19]:
print town
In [18]:
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
le.fit(town['Town'])
print le.classes_
town['Town'] = le.transform(town['Town'])
In [ ]:
In [5]:
'''n_t'''
test = gl.SFrame.read_csv(path + 'test.csv', verbose=False)
sf = sf[sf['Semana']==8]
del sf['Venta_uni_hoy']
del sf['Venta_hoy']
del sf['Dev_uni_proxima']
del sf['Dev_proxima']
del sf['Demanda_uni_equil']
# del sf['Demada_log']
# del sf['id']
del test['id']
sf = sf.append(test)
In [6]:
print sf
In [7]:
town = gl.SFrame.read_csv(path + 'towns.csv', verbose=False)
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
le.fit(town['Town'])
town['Town'] = le.transform(town['Town'])
town1 = town['Town','Agencia_ID']
In [8]:
print town1
In [9]:
sf = sf.join(town1,on='Agencia_ID',how='left')
In [10]:
n_t = sf.groupby(key_columns=['Semana','Town'], operations={'n_t':agg.COUNT('Town')})
n_t = n_t.groupby(key_columns=['Town'], operations={'n_t':agg.MEAN('n_t')})
In [ ]:
n_t
In [ ]:
n_t.save(path+'town8.csv',format='csv')
In [ ]:
In [ ]:
In [13]:
town.save(path+'towns.csv',format='csv')
In [ ]:
In [ ]:
In [38]:
from IPython.core.pylabtools import figsize
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
sns.set_style('darkgrid', {'grid.color': '.8','grid.linestyle': u'--'})
%matplotlib inline
# figsize(16, 9)
# fig, axs = plt.subplots(3, 3)
# fig.subplots_adjust(hspace = .3) #, wspace=.1)
# data = np.arange(3, 10)
# for ax, d in zip(axs.ravel(), data):
x = sf1['tp_sum']
y = sf1['Demanda_uni_equil']
plot = plt.scatter(x, y, alpha=.2)
del x
del y
del plot