In [1]:
    
from sklearn import metrics
from sklearn.cross_validation import train_test_split
from tensorflow.contrib import learn
from tqdm import tqdm
from context import *
from util.dfmgmt import initSet, wrangle
    
In [2]:
    
df = initSet()
df = df[df['decade'] != 2010]
    
In [3]:
    
# Drop both year and decade
dropList = ['most_used_term']
removeList = ['decade', 'year', 'charted']
target = 'charted'  # main feature to be predicted
df, features = wrangle(df, dropList, removeList, True)
df.head()
    
    Out[3]:
In [4]:
    
X = df[features].as_matrix()
y = df[target].as_matrix()
    
In [5]:
    
# Do cross validation
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
    
In [6]:
    
def gridSearchTF(X_train, X_test, y_train, y_test, units, steps=200, batch_size=64):
    units = units.split(',')
    # Instantiate model
    clf = learn.DNNClassifier(hidden_units=units, n_classes=len(units))
    # Train model
    clf.fit(X_train, y_train, steps=steps, batch_size=batch_size)
    # Score model
    score = metrics.accuracy_score(y_test, clf.predict(X_test))
    return score
    
In [7]:
    
from random import sample, shuffle
def randomList(nClasses):
    
    new = ''
    
    for i in xrange(nClasses):        
        new += str(sample(xrange(30, 101), 10)[0]) + ','
    return new[:-1]
def randomizer(nClasses, nSearches):
    hiddenUnits = []
    for _ in xrange(nSearches):
        hiddenUnits.append(randomList(nClasses))
        
    shuffle(hiddenUnits)
    return hiddenUnits
    
In [8]:
    
params = {units: None for units in randomizer(3, 10)}
for units in tqdm(params):
    params[units] = gridSearchTF(X_train, X_test, y_train, y_test, units)
    
    
In [9]:
    
params
    
    Out[9]:
In [ ]: