Analysis01: from the key


In [18]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

from utils import load_buzz, select, write_result
from containers import Questions, Users, Categories

In [19]:
import pickle


questions = pickle.load(open('questions01.pkl', 'rb'))
users = pickle.load(open('users01.pkl', 'rb'))
categories = pickle.load(open('categories01.pkl', 'rb'))

In [24]:
import csv

origin_key = csv.DictReader(open('../data/key.csv'))
key = {int(item['id']): float(item['position']) for item in origin_key}

origin_guess = csv.DictReader(open('../data/0.7guess_adj.csv'))
guess = {int(item['id']): float(item['position']) for item in origin_guess}

test_set = load_buzz()['test']

In [25]:
stat = test_set.copy()

In [26]:
key[25844]


Out[26]:
-222.0

In [27]:
guess[25844]


Out[27]:
68.86399251560573

In [34]:
-68.86399251560573-222


Out[34]:
-290.86399251560573

In [29]:
import math

In [33]:
-290.86399251560573**2


Out[33]:
-84601.86214211835

In [37]:



Out[37]:
78

In [ ]:


In [62]:
q_lengths = [len(questions[key]['question'].split()) for key in questions]
plt.hist(q_lengths, bins=50, align='left', facecolor='g', alpha=0.50)
plt.xlabel("Question length")
plt.ylabel("Count")
plt.title("Question lengths")
plt.grid(True)
plt.show()



In [64]:
guess_pos = [guess[key] for key in guess]
plt.hist(guess_pos, bins=50, align='left', facecolor='y', alpha=0.50)
plt.xlabel("Pos")
plt.ylabel("Count")
plt.title("Positions of guess")
plt.grid(True)
plt.show()



In [78]:
key_pos = [key[id] for id in key]
plt.hist(key_pos, bins=100, align='left', facecolor='b', alpha=0.50)
plt.xlabel("Pos")
plt.ylabel("Count")
plt.title("Positions of key")
plt.grid(True)
plt.show()



In [75]:
train = load_buzz()['train']
key_pos = [train[id]['position'] for id in train]
plt.hist(key_pos, bins=100, align='left', facecolor='b', alpha=0.50)
plt.xlabel("Pos")
plt.ylabel("Count")
plt.title("Positions of train set")
plt.grid(True)
plt.show()



In [73]:
plt.hist(guess_pos, bins=range(-300, 200), align='left', rwidth=50, facecolor='b', alpha=0.30)
plt.hist(key_pos, bins=range(-300, 200), align='left', rwidth=50, facecolor='g', alpha=0.30)
plt.xlabel("Pos")
plt.ylabel("Count")
plt.title("Positions of key")
plt.grid(True)
plt.show()



In [76]:
questions[81]


Out[76]:
{'acc_ratio_qid': 0.8888888888888888,
 'answer': 'eugene delacroix',
 'ave_pos_qid': 62.814814814814817,
 'cat_qid': '5',
 'category': 'Fine Arts',
 'group': 'test',
 'ne_count': 19,
 'ne_mean': 48.68421052631579,
 'ne_median': 43.0,
 'ne_mod': 34,
 'ne_nor_mean': 0.6241565452091767,
 'ne_tags': [['ORGANIZATION', 'Chapelle', 6],
  ['PERSON', 'Anges', 8],
  ['ORGANIZATION', 'Heliodorus', 15],
  ['PERSON', 'Rumored', 22],
  ['GPE', 'Talleyrand', 28],
  ['ORGANIZATION', 'Paris', 34],
  ['CD', '1822', 37],
  ['PERSON', 'Dante', 39],
  ['GPE', 'Virgil', 41],
  ['GPE', 'Hell', 43],
  ['PERSON', 'Africa', 50],
  ['GPE', 'Algiers', 60],
  ['ORGANIZATION', 'Massacre', 66],
  ['GSP', 'Chios', 68],
  ['GPE', 'Sardanopolus', 75],
  ['ORGANIZATION', 'FTP', 78],
  ['GPE', 'French', 83],
  ['ORGANIZATION', 'Romantic', 84],
  ['PERSON', 'Liberty', 88]],
 'pos_token': {0: '',
  1: 'artists',
  2: 'decorations',
  5: 'chapelle',
  6: 'des',
  7: 'anges',
  10: 'sulpice',
  11: 'included',
  13: 'heliodorus',
  14: 'expelled',
  17: 'temple',
  18: 'rumored',
  22: 'son',
  24: 'talleyrand',
  26: 'debuted',
  29: 'paris',
  30: 'salon',
  32: '1822',
  34: 'dante',
  36: 'virgil',
  38: 'hell',
  41: 'trip',
  43: 'northern',
  44: 'africa',
  45: 'provided',
  46: 'inspiration',
  48: 'exotic',
  49: 'subjects',
  50: 'like',
  52: 'women',
  54: 'algiers',
  55: 'creator',
  58: 'massacre',
  60: 'chios',
  63: 'death',
  65: 'sardanopolus',
  66: 'ftp',
  70: 'this_french',
  71: 'romantic',
  72: 'painter',
  74: 'liberty',
  75: 'leading',
  77: 'people'},
 'q_acc_ratio_cat': 0.7560975609756098,
 'q_ave_pos_cat': 43.640185830429736,
 'question': 'This artist\'s decorations for the Chapelle des Anges of S. Sulpice included his "Heliodorus Expelled from the Temple". Rumored to be the son of Talleyrand, he debuted at the Paris Salon of 1822 with Dante and Virgil in Hell, while a trip to northern Africa provided inspiration for exotic subjects like The Women of Algiers. Creator of "The Massacre at Chios" and "The Death of Sardanopolus", FTP, who was this French Romantic painter of "Liberty Leading the People"?'}

In [79]:
from sklearn.preprocessing import normalize

In [81]:
normalize([-0.1, -0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])


Out[81]:
array([[-0.05923489, -0.11846978,  0.17770466,  0.23693955,  0.29617444,
         0.35540933,  0.41464421,  0.4738791 ,  0.53311399]])

In [ ]: