In [1]:
from sklearn import metrics
from sklearn.cross_validation import train_test_split
from tensorflow.contrib import learn
from tqdm import tqdm
from context import *
from util.dfmgmt import initSet, wrangle
In [2]:
df = initSet()
df = df[df['decade'] != 2010]
In [3]:
# Drop both year and decade
dropList = ['most_used_term']
removeList = ['decade', 'year', 'charted']
target = 'charted' # main feature to be predicted
df, features = wrangle(df, dropList, removeList, True)
df.head()
Out[3]:
In [4]:
X = df[features].as_matrix()
y = df[target].as_matrix()
In [5]:
# Do cross validation
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
In [6]:
def gridSearchTF(X_train, X_test, y_train, y_test, units, steps=200, batch_size=64):
units = units.split(',')
# Instantiate model
clf = learn.DNNClassifier(hidden_units=units, n_classes=len(units))
# Train model
clf.fit(X_train, y_train, steps=steps, batch_size=batch_size)
# Score model
score = metrics.accuracy_score(y_test, clf.predict(X_test))
return score
In [7]:
from random import sample, shuffle
def randomList(nClasses):
new = ''
for i in xrange(nClasses):
new += str(sample(xrange(30, 101), 10)[0]) + ','
return new[:-1]
def randomizer(nClasses, nSearches):
hiddenUnits = []
for _ in xrange(nSearches):
hiddenUnits.append(randomList(nClasses))
shuffle(hiddenUnits)
return hiddenUnits
In [8]:
params = {units: None for units in randomizer(3, 10)}
for units in tqdm(params):
params[units] = gridSearchTF(X_train, X_test, y_train, y_test, units)
In [9]:
params
Out[9]:
In [ ]: