In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from collections import Counter
Download Dataset: avaliacoes_usuario.csv
In [2]:
movies = pd.read_csv('datasets/avaliacoes_usuario.csv')
In [3]:
movies.head(5)
Out[3]:
In [4]:
Counter(movies['Gostou'])
Out[4]:
In [5]:
print('Gostou {}'.format((543/(135+543))))
In [6]:
print('Não gostou {}'.format((135/(135+543))))
In [7]:
caract = movies[movies.columns[1:16]]
gostos = movies[movies.columns[16:]]
In [8]:
#treino, teste, treino_labels, teste_labels = train_test_split(caract, gostos, test_size=0.1)
treino, teste, treino_labels, teste_labels = train_test_split(caract, gostos)
In [9]:
type(treino)
Out[9]:
In [10]:
Counter(treino_labels['Gostou'])
Out[10]:
In [11]:
Counter(teste_labels['Gostou'])
Out[11]:
In [12]:
print('Gostou % {} do treino_label (deve manter a proporção +- 80%)'.format(404/(104+404)))
In [13]:
print('Gostou % {} do teste_label (deve manter a proporção +- 80%)'.format(139/(31+139)))
In [14]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.naive_bayes import MultinomialNB,GaussianNB
In [15]:
type(treino)
Out[15]:
In [16]:
treino[0:1]
Out[16]:
In [17]:
treino = np.array(treino).reshape(len(treino), 15)
teste = np.array(teste).reshape(len(teste), 15)
In [18]:
type(treino)
Out[18]:
In [19]:
treino[0]
Out[19]:
In [20]:
type(treino_labels)
Out[20]:
In [21]:
treino_labels.head(5)
Out[21]:
In [22]:
treino_labels = treino_labels.values.ravel()
teste_labels = teste_labels.values.ravel()
In [23]:
type(treino_labels)
Out[23]:
In [24]:
treino_labels[0:5]
Out[24]:
In [25]:
treino_labels.shape
Out[25]:
In [26]:
modelo = LogisticRegression()
modelo.fit(treino, treino_labels)
Out[26]:
In [27]:
previsoes = modelo.predict(teste)
previsoes
Out[27]:
In [28]:
teste_labels.shape
Out[28]:
In [29]:
previsoes.shape
Out[29]:
In [30]:
acuracia = accuracy_score(teste_labels, previsoes)
acuracia
Out[30]:
In [31]:
zootopia = [0,0,0,0,0,0,0,1,1,1,1,0,1,110,27.74456356]
modelo.predict([zootopia])
Out[31]:
In [32]:
modelo_NB = MultinomialNB()
In [33]:
modelo_NB.fit(treino,treino_labels)
Out[33]:
In [34]:
previsoes_NB = modelo_NB.predict(teste)
In [35]:
acuracia = accuracy_score(teste_labels, previsoes_NB)
acuracia
Out[35]:
In [36]:
zootopia = [0,0,0,0,0,0,0,1,1,1,1,0,1,110,27.74456356]
modelo_NB.predict([zootopia])
Out[36]:
In [37]:
modelo_GNB = GaussianNB()
In [38]:
modelo_GNB.fit(treino,treino_labels)
Out[38]:
In [39]:
previsoes_GNB = modelo_GNB.predict(teste)
In [40]:
acuracia = accuracy_score(teste_labels, previsoes_GNB)
acuracia
Out[40]:
In [41]:
zootopia = [0,0,0,0,0,0,0,1,1,1,1,0,1,110,27.74456356]
modelo_GNB.predict([zootopia])
Out[41]: