In [2]:
import os
import math
import pandas as pd
import numpy as np
from scipy import stats
# print stats.hmean([ -50.2 , 100.5 ])

In [52]:
path = '/home/zongyi/bimbo_data/submission/'
for f in os.listdir(path):
    print f


w9_f14_3xn500_c0.7_mean_45482.csv
w9_n1000_45133.csv
w9_n500_45423.csv
w8_f14_n1000_c0.7_45736.csv
ensemble-kele-0442.csv
w9_full1200_47233.csv
w8_f15_n1000_c0.7_mean_46121.csv
w8_f15_n1000_c0.7_48181.csv
w9_n500_45282.csv
w9_f25_n1500_c0.5_46191.csv
w9_full1.csv
w8_n500_45918.csv
w9_f14_2n1000_c0.7_mean_4539.csv
w9_f17_1500_49xx.csv
ensemble_full_04387.csv
w9_full_47401.csv
w8_f14_n1200_c0.7.3_47317.csv
w8_n500_45986.csv
w8_n1000_48381.csv
ensemble-452-474-45034.csv
w9_f17_n500_c0.6_48153.csv
w8_f26_n1000_c0.5_46928.csv

In [69]:
df1 = pd.read_csv(path+'w9_n1000_45133.csv')
df2 = pd.read_csv(path+'w9_f25_n1500_c0.5_46191.csv')

df3 = pd.read_csv(path+'w9_full1200_47233.csv')
df4 = pd.read_csv(path+'w9_f14_2n1000_c0.7_mean_4539.csv')

df5 = pd.read_csv(path+'w9_f14_3xn500_c0.7_mean_45482.csv')

In [70]:
df1.rename(columns={'id': 'id', 'Demanda_uni_equil': 'd1'}, inplace=True)
df2.rename(columns={'id': 'id', 'Demanda_uni_equil': 'd2'}, inplace=True)
df3.rename(columns={'id': 'id', 'Demanda_uni_equil': 'd3'}, inplace=True)
df4.rename(columns={'id': 'id', 'Demanda_uni_equil': 'd4'}, inplace=True)
df5.rename(columns={'id': 'id', 'Demanda_uni_equil': 'd5'}, inplace=True)

In [71]:
df = pd.merge(df1, df2, how='left', on=['id'])
df = pd.merge(df, df3, how='left', on=['id'])
df = pd.merge(df, df4, how='left', on=['id'])
df = pd.merge(df, df5, how='left', on=['id'])

In [74]:
print stats.pearsonr(df['d1'], df['d2'])
print stats.pearsonr(df['d1'], df['d3'])
print stats.pearsonr(df['d1'], df['d4'])
print stats.pearsonr(df['d1'], df['d5'])
print stats.pearsonr(df['d2'], df['d3'])
print stats.pearsonr(df['d2'], df['d4'])
print stats.pearsonr(df['d2'], df['d5'])
print stats.pearsonr(df['d3'], df['d4'])
print stats.pearsonr(df['d3'], df['d5'])
print stats.pearsonr(df['d4'], df['d5'])


(0.90859839944163689, 0.0)
(0.92805488735048192, 0.0)
(0.88624683036048535, 0.0)
(0.87027126832752355, 0.0)
(0.92529220805822276, 0.0)
(0.92288602605536907, 0.0)
(0.90392357835496917, 0.0)
(0.91427252279179827, 0.0)
(0.89463221754723543, 0.0)
(0.93983579944164275, 0.0)

In [77]:
df[:3]


Out[77]:
d1 id d2 d3 d4 d5 Demanda_uni_equil
0 3.32189 4721633 3.43315 3.22524 3.24967 3.23493 3.315742
1 2.08302 6035419 2.29651 2.08906 2.01552 1.89152 2.093581
2 1.62305 3462602 1.70056 1.65728 1.52238 1.61837 1.627146

In [ ]:
def f(x):
    return 0.3 * (0.7 * x['d1'] + 0.3 * x['d2']) + 0.7 * x['d3']
df['Demanda_uni_equil'] = df[['d1', 'd2','d3','d4','d5']].apply(f, axis=1)

In [84]:
df['Demanda_uni_equil'] = 0.5*df['d1'] + 0.25*(0.55*df['d4']+0.45*df['d5']) + 0.25*(0.65*df['d2']+0.35*df['d3'])

In [ ]:


In [ ]:


In [78]:
path = '/home/zongyi/bimbo_data/submission/'
for f in os.listdir(path):
    print f


w9_f14_3xn500_c0.7_mean_45482.csv
w9_n1000_45133.csv
w9_n500_45423.csv
w8_f14_n1000_c0.7_45736.csv
ensemble-kele-0442.csv
w9_full1200_47233.csv
w8_f15_n1000_c0.7_mean_46121.csv
w8_f15_n1000_c0.7_48181.csv
w9_n500_45282.csv
w9_f25_n1500_c0.5_46191.csv
w9_full1.csv
w8_n500_45918.csv
w9_f14_2n1000_c0.7_mean_4539.csv
w9_f17_1500_49xx.csv
ensemble_full_04387.csv
w9_full_47401.csv
w8_f14_n1200_c0.7.3_47317.csv
w8_n500_45986.csv
w8_n1000_48381.csv
ensemble-452-474-45034.csv
w9_f17_n500_c0.6_48153.csv
w8_f26_n1000_c0.5_46928.csv

In [81]:
df6 = pd.read_csv(path+'w8_f14_n1000_c0.7_45736.csv')
df7 = pd.read_csv(path+'w8_f15_n1000_c0.7_mean_46121.csv')
df8 = pd.read_csv(path+'w8_f26_n1000_c0.5_46928.csv')

In [82]:
df6.rename(columns={'id': 'id', 'Demanda_uni_equil': 'd6'}, inplace=True)
df7.rename(columns={'id': 'id', 'Demanda_uni_equil': 'd7'}, inplace=True)
df8.rename(columns={'id': 'id', 'Demanda_uni_equil': 'd8'}, inplace=True)
df = pd.merge(df, df6, how='left', on=['id'])
df = pd.merge(df, df7, how='left', on=['id'])
df = pd.merge(df, df8, how='left', on=['id'])

In [83]:
print stats.pearsonr(df['d6'], df['d7'])
print stats.pearsonr(df['d6'], df['d8'])
print stats.pearsonr(df['d7'], df['d8'])


(0.87384838745332227, 0.0)
(0.90843548653792394, 0.0)
(0.93853149179902695, 0.0)

In [85]:
df['Demanda_uni_equil1'] = 0.6*df['d6'] + 0.2*df['d7'] + 0.2*df['d8']

In [90]:
df[:3]


Out[90]:
d1 id d2 d3 d4 d5 Demanda_uni_equil d6 d7 d8 Demanda_uni_equil1 d10
0 3.32189 4721633 3.43315 3.22524 3.24967 3.23493 3.554216 3.75740 3.58134 3.59559 3.689826 3.615020
1 2.08302 6035419 2.29651 2.08906 2.01552 1.89152 2.055526 1.94513 1.81329 2.16537 1.962810 2.067495
2 1.62305 3462602 1.70056 1.65728 1.52238 1.61837 1.631257 1.71653 1.69398 1.75552 1.719818 1.610435

In [ ]:


In [87]:
df10 = pd.read_csv(path+'ensemble-kele-0442.csv')
df10.rename(columns={'id': 'id', 'Demanda_uni_equil': 'd10'}, inplace=True)
df = pd.merge(df, df10, how='left', on=['id'])

In [89]:
df['Demanda_uni_equil'] = 0.4*(0.6*df['Demanda_uni_equil'] + 0.4*df['Demanda_uni_equil1']) + 0.6*df['d10']

In [ ]:


In [18]:
# result['d1'] = result['d1'].apply(lambda x: max(x,0.000000001))    
# result['d2'] = result['d2'].apply(lambda x: max(x,0.000000001))
# result['d3'] = result['d3'].apply(lambda x: max(x,0.000000001))

In [19]:
def f(x):
    return stats.hmean(x)
result['Demanda_uni_equil'] = result[['d1', 'd2', 'd3']].apply(f, axis=1)

In [11]:
def f(x):
    return max(math.expm1(0.25 * math.log1p(x['d1']) + 0.5 * math.log1p(x['d2']) + 0.25 * math.log1p(x['d3'])),0)
result['Demanda_uni_equil'] = result[['d1', 'd2', 'd3']].apply(f, axis=1)

In [9]:
def f(x):
    return 0.1 * x['d1'] + 0.6 * x['d2'] + 0.3 * x['d3']
result['Demanda_uni_equil'] = result[['d1', 'd2', 'd3']].apply(f, axis=1)

In [11]:
def f(x):
    return 0.6 * x['d2'] + 0.4 * x['d3']
result['Demanda_uni_equil'] = result[['d2', 'd3']].apply(f, axis=1)

先log1p 再 hmean 再 expm1????


In [ ]:


In [91]:
sub = df[['id','Demanda_uni_equil']]
sub.to_csv(path+'ensemble_fulll.csv', index=False)