In [7]:
# -*- coding: utf-8 -*-
import json
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
DATA_DIR = os.path.join(os.getcwd(), 'data/processed')
In [10]:
def load_data(file_path):
with open(file_path) as f:
items = json.load(f)
return items
def reduce_annotation(items):
labels = []
for annotations in items['labels']:
qualities = [annotation['quality'] for annotation in annotations]
label = '0' if qualities.count('0') > qualities.count('1') else '1'
labels.append(label)
items['labels'] = labels
print('Label Percentage:')
print(' 0: {}'.format(labels.count('0') / len(labels)))
print(' 1: {}'.format(labels.count('1') / len(labels)))
return items
def predict(X, th=10):
return ['1' if like >= th else '0' for like in X]
In [40]:
items = load_data(os.path.join(DATA_DIR, 'likes.json'))
items['data'] = [[like] for like in items['data']]
items = reduce_annotation(items)
X_train, X_test, y_train, y_test = train_test_split(items['data'], items['labels'], test_size=0.4)
lr = LogisticRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)
# Evaluation
print('Accuracy: {}'.format(accuracy_score(y_test, y_pred)))
print(classification_report(y_test, y_pred))
In [ ]: