In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
%matplotlib inline
In [14]:
results = {
'DM' : {
'c_stop' : pd.read_csv("Arquivos/D2V-DM-CSV/D2V-DM_CBOW_cstop.csv",sep=";",skip_blank_lines=True,header=1,index_col=[0,7],skipinitialspace=True),
#'s_stop' : pd.read_csv("Arquivos/D2V-DM-CSV/D2V-DM_CBOW_sstop.csv",sep=";",skip_blank_lines=True,header=1,index_col=[0,7],skipinitialspace=True),
#'steeming' : pd.read_csv("Arquivos/D2V-DM-CSV/D2V-DM_CBOW_stemming.csv",sep=";",skip_blank_lines=True,header=1,index_col=[0,7],skipinitialspace=True)
},
#'DBOW' : {
# 'c_stop' : pd.read_csv("Arquivos/D2V-DBOW-CSV/D2V-DBOW_SKIPGRAM_cstop.csv",sep=";",skip_blank_lines=True,header=1,index_col=[0,7],skipinitialspace=True),
#'s_stop' : pd.read_csv("Arquivos/D2V-DBOW-CSV/D2V-DBOW_SKIPGRAM_sstop.csv",sep=";",skip_blank_lines=True,header=1,index_col=[0,7],skipinitialspace=True),
#'steeming' : pd.read_csv("Arquivos/D2V-DBOW-CSV/D2V_DBOW_SKIPGRAM_stemming.csv",sep=";",skip_blank_lines=True,header=1,index_col=[0,7],skipinitialspace=True)
# }
}
In [ ]:
# Overall
s = {'Dataset' : [],
'Modelo' : [],
'Tratamento' : [],
'Skewness' : [],
'Kurtosis' : [],
'Vizinhos' : [],}
for model in results.keys():
for method in results[model].keys():
for v in [3, 7, 11]:
idx = results[model][method].index.get_level_values(1)==v
skew = results[model][method].iloc[idx][u'Skewness Good ']
kurt = results[model][method].iloc[idx][u'Kurtosis Good ']
s['Modelo'].extend([model]*11)
s['Tratamento'].extend([method]*11)
s['Skewness'].extend(np.array(skew))
s['Kurtosis'].extend(np.array(kurt))
s['Vizinhos'].extend([v] * 11)
s['Dataset'].extend(np.array(results[model][method].index.get_level_values(0)[idx]))
r = pd.DataFrame(s)
In [ ]:
g=sns.factorplot(x="Modelo",y="Skewness", hue="Tratamento",col="Vizinhos", data=r,kind="box")
g.fig.get_axes()[0].set_yscale('log')
sns.despine(left=True)
In [ ]:
g=sns.factorplot(x="Modelo",y="Kurtosis",hue="Tratamento",col="Vizinhos",data=r,kind="box")
g.fig.get_axes()[0].set_yscale('log')
In [ ]:
s = {'Modelo' : [],
'Tratamento' : [],
'Skewness' : [],
'Kurtosis' : [],
'Tipo' : [],
'Vizinhos' : [],
'Dataset' : []}
for model in results.keys():
for method in results[model].keys():
for v in [3, 7, 11]:
idx = results[model][method].index.get_level_values(1)==v
s['Modelo'].extend([model]*22)
s['Tratamento'].extend([method]*22)
s['Vizinhos'].extend([v] * 22)
s['Skewness'].extend(np.array(results[model][method].iloc[idx][u'Skewness Good ']))
s['Kurtosis'].extend(np.array(results[model][method].iloc[idx][u'Kurtosis Good ']))
s['Tipo'].extend(['Overall']*11)
s['Skewness'].extend(np.array(results[model][method].iloc[idx][u'Skewness Bad ']))
s['Kurtosis'].extend(np.array(results[model][method].iloc[idx][u'Kurtosis Bad ']))
s['Tipo'].extend(['Bad']*11)
s['Dataset'].extend(np.array(results[model][method].index.get_level_values(0)[idx]))
s['Dataset'].extend(np.array(results[model][method].index.get_level_values(0)[idx]))
r = pd.DataFrame(s)
In [ ]:
In [ ]:
g=sns.factorplot(x="Dataset",y="Kurtosis",hue="Tipo",row="Tratamento",col="Vizinhos",data=r)
g.fig.get_axes()[0].set_yscale('log')
g.set_xticklabels(rotation=70)
In [ ]:
len(s['SkewnessB'])
In [15]:
# Overall
s = {'Modelo' : [],
'Tratamento' : [],
'SkewnessG' : [],
'SkewnessB' : [],
'Kurtosis' : [],
'Vizinhos' : [],}
for model in results.keys():
for method in results[model].keys():
for v in [3, 7, 11]:
idx = results[model][method].index.get_level_values(1)==v
skewg = results[model][method].iloc[idx][u'Skewness Good ']
skewb = results[model][method].iloc[idx][u'Skewness Bad ']
kurt = results[model][method].iloc[idx][u'Kurtosis Good ']
s['Modelo'].extend([model]*11)
s['Tratamento'].extend([method]*11)
s['SkewnessG'].extend(np.array(skewg))
s['SkewnessB'].extend(np.array(skewb))
s['Kurtosis'].extend(np.array(kurt))
s['Vizinhos'].extend([v] * 11)
r = pd.DataFrame(s)
In [18]:
from sklearn.preprocessing import normalize
normalize(s['SkewnessG']).shape
Out[18]:
In [22]:
import plotly.plotly
from plotly.tools import FigureFactory as FF
plotly.tools.set_credentials_file(username='kadnoise', api_key='3sg9bvrwy6')
from sklearn.preprocessing import normalize
import numpy as np
x1 = np.array(s['SkewnessG'])
x2 = np.array(s['SkewnessB'])
#for n in range(66):
# if n<33:
# x1.append(s['Skewness'][n])
# else:
# x2.append(s['Skewness'][n])
hist_data = [x1, x2]
group_labels = ['Good', 'Bad']
colors = ['#3A4750', '#F64E8B']
# Create distplot with curve_type set to 'normal'
fig = FF.create_distplot(hist_data, group_labels, bin_size=.5, curve_type='normal', colors=colors)
# Add title
fig['layout'].update(title='Distplot with Normal Distribution')
# Plot!
plotly.plotly.iplot(fig, filename='Distplot with Normal Curve', validate=False)
Out[22]:
In [ ]: