In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

%matplotlib inline

In [14]:
results = {
    
    'DM' : {
        'c_stop'   : pd.read_csv("Arquivos/D2V-DM-CSV/D2V-DM_CBOW_cstop.csv",sep=";",skip_blank_lines=True,header=1,index_col=[0,7],skipinitialspace=True), 
        #'s_stop'   : pd.read_csv("Arquivos/D2V-DM-CSV/D2V-DM_CBOW_sstop.csv",sep=";",skip_blank_lines=True,header=1,index_col=[0,7],skipinitialspace=True),
        #'steeming' : pd.read_csv("Arquivos/D2V-DM-CSV/D2V-DM_CBOW_stemming.csv",sep=";",skip_blank_lines=True,header=1,index_col=[0,7],skipinitialspace=True)
    },
    
    #'DBOW' : {
     #   'c_stop'   : pd.read_csv("Arquivos/D2V-DBOW-CSV/D2V-DBOW_SKIPGRAM_cstop.csv",sep=";",skip_blank_lines=True,header=1,index_col=[0,7],skipinitialspace=True), 
        #'s_stop'   : pd.read_csv("Arquivos/D2V-DBOW-CSV/D2V-DBOW_SKIPGRAM_sstop.csv",sep=";",skip_blank_lines=True,header=1,index_col=[0,7],skipinitialspace=True),
        #'steeming' : pd.read_csv("Arquivos/D2V-DBOW-CSV/D2V_DBOW_SKIPGRAM_stemming.csv",sep=";",skip_blank_lines=True,header=1,index_col=[0,7],skipinitialspace=True)
#    }
}

Overral Hubness


In [ ]:
# Overall

s = {'Dataset'    : [],
     'Modelo'     : [],
     'Tratamento' : [],
     'Skewness'   : [],
     'Kurtosis'   : [],
     'Vizinhos'   : [],}


for model in results.keys():
    for method in results[model].keys():
        for v in [3, 7, 11]:
            idx = results[model][method].index.get_level_values(1)==v
            skew = results[model][method].iloc[idx][u'Skewness Good  ']
            kurt = results[model][method].iloc[idx][u'Kurtosis Good ']
            s['Modelo'].extend([model]*11)
            s['Tratamento'].extend([method]*11)            
            s['Skewness'].extend(np.array(skew))            
            s['Kurtosis'].extend(np.array(kurt))
            s['Vizinhos'].extend([v] * 11)
            s['Dataset'].extend(np.array(results[model][method].index.get_level_values(0)[idx]))
        
r = pd.DataFrame(s)

In [ ]:
g=sns.factorplot(x="Modelo",y="Skewness", hue="Tratamento",col="Vizinhos", data=r,kind="box")
g.fig.get_axes()[0].set_yscale('log')
sns.despine(left=True)

In [ ]:
g=sns.factorplot(x="Modelo",y="Kurtosis",hue="Tratamento",col="Vizinhos",data=r,kind="box")
g.fig.get_axes()[0].set_yscale('log')

In [ ]:
s = {'Modelo'     : [],
     'Tratamento' : [],
     'Skewness'   : [],
     'Kurtosis'   : [],
     'Tipo'       : [],
     'Vizinhos'   : [],
     'Dataset'    : []}


for model in results.keys():
    for method in results[model].keys():
        for v in [3, 7, 11]:
            idx = results[model][method].index.get_level_values(1)==v
            s['Modelo'].extend([model]*22)
            s['Tratamento'].extend([method]*22)
            s['Vizinhos'].extend([v] * 22)
            s['Skewness'].extend(np.array(results[model][method].iloc[idx][u'Skewness Good  ']))
            s['Kurtosis'].extend(np.array(results[model][method].iloc[idx][u'Kurtosis Good ']))
            s['Tipo'].extend(['Overall']*11)
            s['Skewness'].extend(np.array(results[model][method].iloc[idx][u'Skewness Bad ']))
            s['Kurtosis'].extend(np.array(results[model][method].iloc[idx][u'Kurtosis Bad ']))
            s['Tipo'].extend(['Bad']*11)
            s['Dataset'].extend(np.array(results[model][method].index.get_level_values(0)[idx]))
            s['Dataset'].extend(np.array(results[model][method].index.get_level_values(0)[idx]))
        
r = pd.DataFrame(s)

In [ ]:


In [ ]:
g=sns.factorplot(x="Dataset",y="Kurtosis",hue="Tipo",row="Tratamento",col="Vizinhos",data=r)
g.fig.get_axes()[0].set_yscale('log')
g.set_xticklabels(rotation=70)

In [ ]:
len(s['SkewnessB'])

In [15]:
# Overall

s = {'Modelo'     : [],
     'Tratamento' : [],
     'SkewnessG'  : [],
     'SkewnessB'  : [],
     'Kurtosis'   : [],
     'Vizinhos'   : [],}


for model in results.keys():
    for method in results[model].keys():
        for v in [3, 7, 11]:
            idx = results[model][method].index.get_level_values(1)==v
            skewg = results[model][method].iloc[idx][u'Skewness Good  ']
            skewb = results[model][method].iloc[idx][u'Skewness Bad ']
            kurt = results[model][method].iloc[idx][u'Kurtosis Good ']
            s['Modelo'].extend([model]*11)
            s['Tratamento'].extend([method]*11)            
            s['SkewnessG'].extend(np.array(skewg))            
            s['SkewnessB'].extend(np.array(skewb))
            s['Kurtosis'].extend(np.array(kurt))
            s['Vizinhos'].extend([v] * 11)
        
r = pd.DataFrame(s)

In [18]:
from sklearn.preprocessing import normalize

normalize(s['SkewnessG']).shape


/usr/local/lib/python2.7/dist-packages/sklearn/utils/validation.py:394: DeprecationWarning:

Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.

Out[18]:
(1, 33)

In [22]:
import plotly.plotly
from plotly.tools import FigureFactory as FF
plotly.tools.set_credentials_file(username='kadnoise', api_key='3sg9bvrwy6')

from sklearn.preprocessing import normalize
import numpy as np

x1 = np.array(s['SkewnessG'])
x2 = np.array(s['SkewnessB'])

#for n in range(66):
#    if n<33:
#        x1.append(s['Skewness'][n])
#    else:
#        x2.append(s['Skewness'][n])
    
hist_data = [x1, x2]

group_labels = ['Good', 'Bad']

colors = ['#3A4750', '#F64E8B']

# Create distplot with curve_type set to 'normal'
fig = FF.create_distplot(hist_data, group_labels, bin_size=.5, curve_type='normal', colors=colors)

# Add title
fig['layout'].update(title='Distplot with Normal Distribution')

# Plot!
plotly.plotly.iplot(fig, filename='Distplot with Normal Curve', validate=False)


/usr/local/lib/python2.7/dist-packages/requests/packages/urllib3/util/ssl_.py:122: InsecurePlatformWarning:

A true SSLContext object is not available. This prevents urllib3 from configuring SSL appropriately and may cause certain SSL connections to fail. You can upgrade to a newer version of Python to solve this. For more information, see https://urllib3.readthedocs.org/en/latest/security.html#insecureplatformwarning.

Out[22]:

In [ ]: