In [63]:
#!/usr/bin/env python
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import pdb
import csv
from dataset import *
from collections import Counter
import numpy as np
import operator
import matplotlib.pyplot as pl
import pandas as pd      
from bs4 import BeautifulSoup 
import re
from nltk.corpus import stopwords
from gensim.models import word2vec
from  names_to_arrays import *
ENGLISH_STOP_WORDS = set(stopwords.words("english"))
ABREVIATIONS_FILE = "../../data/abreviations"
ABREVIATIONS = set(map(str.lower,pd.DataFrame().from_csv(ABREVIATIONS_FILE,header=None, index_col=False)[0]))

In [3]:
d=data("../../data/List_of_Streets_and_Intersections.csv",categorical=["streetname","from_st","to_st"])
new_data = []
for i in d.df.index:
        a = d.df.loc[i,]
        new_data.append(review_to_words(" ".join(map(str,list(a))),keepcaracters = "[^0-9A-Z]",stops = ABREVIATIONS))

In [15]:
ans = search_best_model(new_data,sizes = range(1,50,1),params_model = {"window":5, "min_count":1, "workers":5})


WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance
WARNING:gensim.models.word2vec:consider setting layer size to a multiple of 4 for greater performance

In [21]:
plt.plot(range(1,50,1),ans[2],color="green")
plt.plot(range(1,50,1),ans[0],color="blue")
plt.plot(range(1,50,1),ans[1],color='red')


Out[21]:
[<matplotlib.lines.Line2D at 0x10e134690>]

In [65]:
p = periodic_to_linear(np.array(range(7)), 1./7)
plt.plot(p[0],p[1])


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-65-6bd40078871d> in <module>()
----> 1 p = periodic_to_linear(range(7), 1./7)
      2 plt.plot(p[0],p[1])

/Users/peigniersergio/Desktop/SFPD/scripts/suppositoire/names_to_arrays.py in periodic_to_linear(time, intrinsec_period)
     99 
    100 def periodic_to_linear(time, intrinsec_period):
--> 101         time = np.array(time)
    102 	ans = [np.cos(time * 2 * np.pi * intrinsec_period) , 
    103 	np.sin(time * 2 * np.pi * intrinsec_period)]

TypeError: can't multiply sequence by non-int of type 'float'

In [ ]: