In [1]:
#!/usr/bin/env python
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import pdb
import csv
from dataset import *
from collections import Counter
import numpy as np
import operator
import matplotlib.pyplot as pl
import pandas as pd
from bs4 import BeautifulSoup
import re
from nltk.corpus import stopwords
from gensim.models import word2vec
from names_to_arrays import *
from sknn.mlp import Classifier, Layer
In [2]:
#streets -> 10D vectors
d=data("../../data/List_of_Streets_and_Intersections.csv",categorical=["streetname","from_st","to_st"])
params_model = {"window":5, "min_count":1, "workers":5}
new_data = []
for i in d.df.index:
a = d.df.loc[i,]
new_data.append(review_to_words(" ".join(map(str,list(a))),keepcaracters = "[^0-9A-Z]",stops = ABREVIATIONS))
model = word2vec.Word2Vec(new_data, size = 10, **params_model)
In [2]:
"""
training_set=data("../../data/train.csv",
categorical=["PdDistrict","Resolution","Address","Dates","DayOfWeek"],
measure= ["X","Y"],
hidden_cluster=["Category"],
header = 0,
index=None)
"""
training_set=data("../../data/train.csv",
categorical=[],
measure= ["X","Y"],
hidden_cluster=["Category"],
header = 0,
index=None)
training_set.standardize_table()
In [3]:
training_set.convert_cluster_membership_to_numerical(["Category"])
In [62]:
X = training_set.df[training_set.features].values
Y = training_set.df[training_set.hidden_cluster].values
X = X[1:1000]
Y = Y[1:1000]
In [68]:
output_dimensionality = len(np.unique(Y))
"""
nn = Classifier(
layers=[
Layer("Linear", units=2),
Layer("Tanh",units=output_dimensionality ),
Layer("Softmax",units= output_dimensionality)],
learning_rate=0.02,
n_iter=10)
"""
from sknn.platform import gpu32
nn = Classifier(
layers=[
Layer("Maxout", units=2,pieces = 2),
Layer("Softmax",units= output_dimensionality)],
learning_rate=0.001,
n_iter=25)
In [69]:
#from sknn.platform import gpu32
z = nn.fit(X,Y)
In [71]:
h = z.predict(training_set.df[training_set.features].values[1:10000])
y = training_set.df[training_set.hidden_cluster].values[1:10000]
y = np.transpose(y)[0]
s = 0
for i,hh in enumerate(h):
if hh != y[i]: s += 1
print s
In [72]:
In [ ]: