In [1]:
import numpy as np

k = 9

def convert_base5(n):
    return {"0": "N", "1": "A", "2": "C", "3": "T", "4": "G"}.get(n,"N")

def convert_nt(c):
    return {"N": 0, "A": 1, "C": 2, "T": 3, "G": 4}.get(c, 0)

def convert_nt_complement(c):
    return {"N": 0, "A": 3, "C": 4, "T": 1, "G": 2}.get(c, 0)

def convert_kmer_to_int(kmer):
    return int(''.join(str(x) for x in (map(convert_nt, kmer))), 5)

def kmer_processor(seq,offset):
    return list(map(convert_kmer_to_int, get_kmers(k)(seq[offset:])))

def partition(n, step, coll):
    for i in range(0, len(coll), step):
        if (i+n > len(coll)):
            break #  raise StopIteration...
        yield coll[i:i+n]

def get_kmers(k):
    return lambda sequence: partition(k, k, sequence)

def get_kmers_from_seq(sequence):
    kmers_from_seq = list()

    kp = functools.partial(kmer_processor, sequence)
    
    for i in map(kp, range(0,k)):
        kmers_from_seq.append(i)

    rev = sequence[::-1]
    kpr = functools.partial(kmer_processor, rev)
    
    for i in map(kpr, range(0,k)):
        kmers_from_seq.append(i)
            
#    for i in range(0,k):
#        kmers_from_seq.append(kmer_processor(sequence,i))
#    for i in range(0,k):
#        kmers_from_seq.append(kmer_processor(rev, i))
    return kmers_from_seq

In [2]:
with open("Glove/vocab.txt", 'r') as f:
    words = [x.rstrip().split(' ')[0] for x in f.readlines()]

with open("Glove/vectors.txt", 'r') as f:
    vectors = {}
    for line in f:
        vals = line.rstrip().split(' ')
        vectors[vals[0]] = [float(x) for x in vals[1:]]

vocab_size = len(words)
vocab = {w: idx for idx, w in enumerate(words)}
ivocab = {idx: w for idx, w in enumerate(words)}

vector_dim = len(vectors[ivocab[0]])
W = np.zeros((vocab_size, vector_dim))
for word, v in vectors.items():
    if word == '<unk>':
        continue
    W[vocab[word], :] = v

# normalize each word vector to unit variance
W_norm = np.zeros(W.shape)
d = (np.sum(W ** 2, 1) ** (0.5))
W_norm = (W.T / d).T

def convert_to_kmer(kmer):
    return ''.join(map(convert_base5, str(np.base_repr(kmer, 5))))

In [3]:
vocab['1008064']


Out[3]:
0

In [ ]:


In [ ]:


In [4]:
def distance(W, vocab, ivocab, input_term):
    for idx, term in enumerate(input_term.split(' ')):
        if term in vocab:
            print('Word: %s  Position in vocabulary: %i' % (convert_to_kmer(int(term)), vocab[term]))
            if idx == 0:
                vec_result = np.copy(W[vocab[term], :])
            else:
                vec_result += W[vocab[term], :] 
        else:
            print('Word: %s  Out of dictionary!\n' % (convert_to_kmer(int(term))))
            return
    
    vec_norm = np.zeros(vec_result.shape)
    d = (np.sum(vec_result ** 2,) ** (0.5))
    vec_norm = (vec_result.T / d).T

    dist = np.dot(W, vec_norm.T)

    for term in input_term.split(' '):
        index = vocab[term]
        dist[index] = -np.Inf

    a = np.argsort(-dist)[:100]

    print("\n                               Word       Cosine distance\n")
    print("---------------------------------------------------------\n")
    for x in a:
        print("%35s\t\t%f\n" % (convert_to_kmer(int(ivocab[x])), dist[x]))

In [5]:
distance(W_norm, vocab, ivocab, str(convert_kmer_to_int("ATGACGATC")))


Word: ATGACGATC  Position in vocabulary: 16524

                               Word       Cosine distance

---------------------------------------------------------

                          GCGGCGATC		0.321689

                          CGGATCTCG		0.301249

                          TGATATTTG		0.298273

                          TCGACGATG		0.296905

                          AGCACGATC		0.294950

                          ATGATCGCC		0.291062

                          TATTCCTTG		0.283953

                          GCGATCTCG		0.280744

                          AGCGAACTG		0.280360

                          GCAAGCGCC		0.280148

                          TACGCNNNN		0.273024

                          TCGCCGATC		0.271661

                          GCGATCGCC		0.270788

                          CTGACGCTG		0.270154

                          ACATCGGTC		0.267977

                          CCGAGCGTG		0.267093

                          CCGTCGATC		0.266004

                          CCGATCCCG		0.265412

                          CTGTCGATC		0.265294

                          CGGTTGCGG		0.264336

                          ACGTAGTCG		0.264066

                          ATCCTGATT		0.263745

                          CCGGTGACG		0.263546

                          CCGGTGATC		0.260371

                          TTCGACACG		0.260114

                          ACGAAGGCG		0.259242

                          GCGATGATC		0.258910

                          GTGTTCAAC		0.254153

                          TGGTCGATA		0.252265

                          AATACTCTG		0.252192

                          TCGACCGCC		0.251661

                          AGCGCCATG		0.251451

                          ATGTTGGCC		0.250913

                          ACGAAGCGC		0.250768

                          CCAAGTTCC		0.250355

                          GCCCCCTCT		0.250168

                          TTGATGCCG		0.250042

                          GTCGTCTTC		0.247122

                          ATCAGCGCC		0.246919

                          TTGCCGACG		0.245994

                          GCCGTCATG		0.245988

                          TGGGTCGTG		0.245839

                          ATCGCGGCG		0.245816

                          GAGGCGACG		0.244959

                          TCGTCGACG		0.244772

                          CCGATCCAG		0.244695

                          GTGAAGGCG		0.243684

                          ACGCCGACG		0.242642

                          TCGATCGCC		0.242169

                          AATCATCTC		0.241508

                          GCGAGGCTG		0.241098

                          ACGACGAAG		0.241073

                          GCGATCTGC		0.241031

                          AGGATCAGC		0.240995

                          CCGCCGAGC		0.238985

                          TGCAGGATG		0.238883

                          CCCGCCTTC		0.237990

                          AGAATCACG		0.236862

                          TTTATGCTT		0.235939

                          CCCTTGATG		0.235911

                          GAGACGACC		0.235184

                          ACCGCATTC		0.234477

                          TCGCCGATG		0.233412

                          TCGAGCAGC		0.233153

                          AGGACGGTG		0.232392

                          GGCCCGTAG		0.232218

                          GGCATCATG		0.231430

                          TAGATACCA		0.231242

                          CCGATCCCC		0.230162

                          TTCTCGCCC		0.230014

                          ATCAGCACG		0.229643

                          TCGGAACCG		0.229494

                          GCGATGGCA		0.229354

                          ATCTGGCGG		0.228955

                          AGGATACCT		0.228740

                          AAGGGGAGC		0.228528

                          TCGATCGAG		0.227877

                          ACGATGATG		0.227805

                          CCGGTTTTC		0.227459

                          TCGAAGGCG		0.227316

                          TCATGTTCG		0.227158

                          AAGGGTGAC		0.226832

                          TGGTTGAAG		0.226713

                          ATCGCCTCC		0.226600

                          ACCTGGAAG		0.226356

                          ATGCGGAAG		0.226309

                          GCCTCGTCG		0.225796

                          TCGATCGAC		0.225600

                          CACGACATG		0.225557

                          GCCGCGATG		0.225314

                          GGCGCGATC		0.225286

                          AGCTATTAC		0.225038

                          GAAATAACG		0.224890

                          CCCATGACG		0.224645

                          GGGTTGATC		0.224633

                          TTGACGGCA		0.224590

                          TCGATCCTC		0.224558

                          TCGTAATTC		0.224546

                          ATCACCGGC		0.224476

                          AAGGCGAAG		0.224455


In [6]:
# np.save("glove_embeddings.np", W_norm)

In [7]:
convert_to_kmer(1008064)


Out[7]:
'CCGCCGCCG'

In [8]:
# GCGGCGATC

In [9]:
## Tensorflow Model

# Vector length is 256

# Input is 15 kmers (can be altered)

In [10]:
import tensorflow as tf
import functools
from functools import partial
import os.path
import Bio
from Bio import SeqIO
import random
from random import shuffle
import ntpath
import pickle
import sys


C:\ProgramData\Anaconda3\lib\site-packages\h5py\__init__.py:34: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters

In [11]:
embedding_dim = 256

# sess = tf.Session()
sess = tf.InteractiveSession()

# Weights = tf.Variable(tf.constant(0.0, shape=[vocab_size, embedding_dim]), trainable=False, name="Weights")

# embedding_placeholder = tf.placeholder(tf.float32, [vocab_size, embedding_dim])
# embedding_init = Weights.assign(embedding_placeholder)

# sess.run(embedding_init, feed_dict={embedding_placeholder: W_norm})

In [12]:
def load_fasta(filename):
    # tf.summary.text("File", tf.as_string(filename))
    data = dict()
    file_base_name = ntpath.basename(filename)
    picklefilename = file_base_name + ".picklepickle"
    if os.path.isfile(picklefilename):
        print("Loading from pickle: " + filename)
        data = pickle.load(open(picklefilename, "rb"))
    else:
        print("File not found, generating new sequence: " + picklefilename)
        for seq_record in SeqIO.parse(filename, "fasta"):
            data.update({seq_record.id:
                         get_kmers_from_seq(seq_record.seq.upper())})
        pickle.dump(data, open(picklefilename, "wb"))
    sys.stdout.flush()
    return(data)

In [52]:
def gen_training_data_generator(input_data, window_size, repdict):
    for k in input_data.keys():
        for kdata in input_data[k]:
            for i in range(window_size + 1, len(kdata) - window_size):
                kentry = list()
                for x in range(i - window_size - 1, i + window_size):
                    kentry.append(vocab[str(kdata[x])])
                yield(kentry, [repdict[k]])


def get_categories(directory):
    data = list()
    files = os.listdir(directory)
    for filename in files:
        for seq_record in SeqIO.parse(directory + "/" + filename, "fasta"):
            data.append(seq_record.id)
    data = sorted(list(set(data)))
    return(data)

replicons_list = get_categories("training-files/")

def kmer_generator(directory, window_size):
    files = [directory + "/" + f for f in os.listdir(directory)]
    random.shuffle(files)
    
    replicons_list = get_categories("training-files/")
    repdict = dict()
    a = 0
    for i in replicons_list:
        repdict[i] = a
        a += 1
    
    for f in files:
        yield from gen_training_data_generator(load_fasta(f), window_size, repdict)


def input_fn():
    kmer_gen = functools.partial(kmer_generator, "training-files/", 7)

    ds = tf.data.Dataset.from_generator(kmer_gen, 
                                        (tf.float32,
                                         tf.int64),
                                        (tf.TensorShape([15]),
                                         tf.TensorShape(None)))
                                        
#    # Numbers reduced to run on my desktop
#    ds = ds.repeat(5)
#    ds = ds.prefetch(5000) # Each batch is only 2048, so prefetch 5000
#    ds = ds.shuffle(buffer_size=1000000) # Large buffer size for better randomization
#    ds = ds.batch(2048) # Reduced from 5000 so it runs quicker
    
#    ds = ds.repeat(1)
#    ds = ds.prefetch(2)
#    ds = ds.shuffle(buffer_size=500)
    ds = ds.batch(20)
    
    def add_labels(arr, lab):
        return({"kmers": arr}, lab)
    
    ds = ds.map(add_labels)
    iterator = ds.make_one_shot_iterator()
    batch_features, batch_labels = iterator.get_next()
    return batch_features, batch_labels

def init():
    return W

replicons_fc = tf.feature_column.categorical_column_with_vocabulary_list(
    key='label',
    vocabulary_list=replicons_list)
    
kmers_fc = tf.feature_column.numeric_column(key="kmers", shape=15, dtype=tf.int64)
# kmers_dict = tf.feature_column.categorical_column_with_vocabulary_list(
#     key="kmers",
#     shape=15,
#     vocabulary_list=vocab.keys())
# kmers_fc_embed = tf.feature_column.embedding_column(
#     categorical_column=kmers_dict, 
#     dimension=256,
#     initializer=init,
#     trainable=False)

In [ ]:


In [41]:
kmer_gen = functools.partial(kmer_generator, "training-files/", 7)
next(kmer_gen())


Loading from pickle: training-files//WSM419.final.fasta
Out[41]:
([21851,
  103912,
  5944,
  155395,
  164903,
  18374,
  1951,
  234160,
  137275,
  79405,
  36675,
  153357,
  82637,
  25877,
  125731],
 [5])

In [15]:
# sess = tf.InteractiveSession()
v = input_fn()
#v[0]["kmers"].eval()
a = tf.feature_column.input_layer(v[0], [kmers_fc])
a = tf.Print(a, [a], message="This is a: ")
a.eval()


File not found, generating new sequence: SM11.final.fasta.picklepickle
Out[15]:
array([[ 85790., 166485.,  43233., 101577., 154642., 188957.,  77993.,
        126535., 107128.,  13017., 121746.,   1037., 102026., 128846.,
        149349.]], dtype=float32)

In [16]:
#v = input_fn()
# v[0]["kmers"]
a = tf.feature_column.input_layer(v[0], kmers_fc)
a = tf.cast(a, tf.int64)
a.eval()


Out[16]:
array([[166485,  43233, 101577, 154642, 188957,  77993, 126535, 107128,
         13017, 121746,   1037, 102026, 128846, 149349,  95425]],
      dtype=int64)

In [17]:
# ks = tf.feature_column.input_layer(v[0], [kmers_fc])

embedding_dim = 256

Weights = tf.Variable(tf.constant(0.0, shape=[vocab_size, embedding_dim]), trainable=False, name="Weights")

embedding_placeholder = tf.placeholder(tf.float32, [vocab_size, embedding_dim])
embedding_init = Weights.assign(embedding_placeholder)

sess.run(embedding_init, feed_dict={embedding_placeholder: W_norm})

a = tf.feature_column.input_layer(v[0], kmers_fc)
a = tf.cast(a, tf.int64)
words = tf.nn.embedding_lookup(Weights, a)

b = tf.Print(words, [words])

b.eval()


Out[17]:
array([[[-0.01377905, -0.0105249 , -0.04465437, ...,  0.00205964,
         -0.05210606,  0.00690691],
        [ 0.10313533,  0.02833033, -0.04824345, ..., -0.01655637,
         -0.07461011,  0.07710814],
        [ 0.07772083,  0.00162455, -0.06415386, ..., -0.02984827,
         -0.04164281,  0.05927063],
        ...,
        [ 0.00598741,  0.00918654,  0.02112901, ..., -0.00552926,
          0.07925026, -0.00039346],
        [ 0.01271917,  0.00480851,  0.04154439, ..., -0.01210083,
         -0.02720928, -0.05585257],
        [-0.01477062, -0.01350971,  0.11504258, ..., -0.01094713,
          0.12314176,  0.01675047]]], dtype=float32)

In [25]:
tf.shape(b).eval()


Out[25]:
array([  1,  15, 256])

In [ ]:
vectors[5]

In [89]:
def cnn_model_fn(features, labels, mode):
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        tf.logging.info("my_model_fn: PREDICT, {}".format(mode))
    elif mode == tf.estimator.ModeKeys.EVAL:
        tf.logging.info("my_model_fn: EVAL, {}".format(mode))
    elif mode == tf.estimator.ModeKeys.TRAIN:
        tf.logging.info("my_model_fn: TRAIN, {}".format(mode))

    Weights = tf.Variable(tf.constant(0.0, shape=[vocab_size, embedding_dim]), trainable=False, name="Weights")
    embedding_placeholder = tf.placeholder(tf.float32, [vocab_size, embedding_dim])
    embedding_init = Weights.assign(embedding_placeholder)
    
    def init_fn(scaffold, sess):
        sess.run(Weights.initializer, {Weights.initial_value: W})
    scaffold = tf.train.Scaffold(init_fn=init_fn)
    
    inputs = tf.feature_column.input_layer(features, [kmers_fc])
    input_i64 = tf.cast(inputs, tf.int64)
    embedded_kmers = tf.nn.embedding_lookup(Weights, input_i64)
    
    # input_layer = tf.reshape(embedded_kmers, [-1, 15, 256 ,1])
    input_layer = tf.reshape(embedded_kmers, [-1, 3840])
    input_layer = tf.cast(input_layer, tf.float32)
    
#    conv1 = tf.layers.conv2d(inputs = input_layer,
#                             filters=32,
#                             kernel_size=[-1,2,256],
#                             strides=3,
#                             padding="same",
#                             name="Conv1",
#                             activation=None)
    
#    avg_pool1 = tf.layers.average_pooling2d(conv1, 
#                                            pool_size=[-1,4,32], 
#                                            strides=[-1,2,16],
#                                            padding="same",
#                                            name="AvgPooling_1")
    
    # 29 is number of replicons
    
#    print(tf.shape(avg_pool1))
    
    #logits = tf.layers.dense(units=len(replicons_list), inputs=avg_pool1)
    # inputs=avg_pool1, units=len(replicons_list))
    
    h1 = tf.layers.Dense(1000, activation=tf.nn.relu)(input_layer)
    h2 = tf.layers.Dense(500, activation=tf.nn.relu)(h1)
    logits = tf.layers.Dense(29)(h2)
    
    
    predictions = {
        "class_ids": tf.argmax(input=logits, axis=1)
    }

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels,
                                                  logits = logits)
    
    accuracy = tf.metrics.accuracy(labels, predictions['class_ids'])

    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(
            mode,
            loss=loss,
            eval_metric_ops={'my_accuracy': accuracy})

    # If mode is not PREDICT nor EVAL, then we must be in TRAIN
    assert mode == tf.estimator.ModeKeys.TRAIN, "TRAIN is only ModeKey left"    
    
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
    train_op = optimizer.minimize(
        loss=loss,
        global_step=tf.train.get_global_step())
    
    tf.summary.scalar('my_accuracy', accuracy[1])

    
    return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
   
    # Add evaluation metrics (for EVAL mode)
#    eval_metric_ops = {
#            "accuracy": tf.metrics.accuracy(
#                    labels=labels, predictions=predictions["classes"])}
    
#    return tf.estimator.EstimatorSpec(
#            mode=mode, 
#            loss=loss, 
#            eval_metric_ops=eval_metric_ops)

In [90]:
len(replicons_list)


Out[90]:
29

In [ ]:
classifier = tf.estimator.Estimator(
    model_fn=cnn_model_fn,
    model_dir="classifier_glove_cnn4.2",
    config=tf.contrib.learn.RunConfig(
        save_checkpoints_steps=10,
        save_checkpoints_secs=None,
        save_summary_steps=5))

classifier.train(input_fn=input_fn, steps=10000)
classifier.evaluate(input_fn=input_fn, steps=1000)


INFO:tensorflow:Using config: {'_task_type': None, '_task_id': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000001E45F8C66D8>, '_master': '', '_num_ps_replicas': 0, '_num_worker_replicas': 0, '_environment': 'local', '_is_chief': True, '_evaluation_master': '', '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_tf_random_seed': None, '_save_summary_steps': 5, '_save_checkpoints_secs': None, '_log_step_count_steps': 100, '_session_config': None, '_save_checkpoints_steps': 10, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_model_dir': 'classifier_glove_cnn4.2'}
INFO:tensorflow:my_model_fn: TRAIN, train
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Restoring parameters from classifier_glove_cnn4.2\model.ckpt-300
Loading from pickle: training-files//WSM419.final.fasta
INFO:tensorflow:Saving checkpoints for 301 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:loss = 3.2793221, step = 301
INFO:tensorflow:Saving checkpoints for 311 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 321 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 331 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 341 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 351 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 361 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 371 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 381 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 391 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 401 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:global_step/sec: 3.1264
INFO:tensorflow:loss = 3.1837664, step = 401 (31.989 sec)
INFO:tensorflow:Saving checkpoints for 411 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 421 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 431 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 441 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 451 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 461 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 471 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 481 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 491 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 501 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:global_step/sec: 3.15402
INFO:tensorflow:loss = 3.088994, step = 501 (31.703 sec)
INFO:tensorflow:Saving checkpoints for 511 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 521 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 531 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 541 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 551 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 561 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 571 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 581 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 591 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 601 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:global_step/sec: 3.14901
INFO:tensorflow:loss = 2.9950721, step = 601 (31.756 sec)
INFO:tensorflow:Saving checkpoints for 611 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 621 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 631 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 641 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 651 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 661 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 671 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 681 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 691 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 701 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:global_step/sec: 3.0425
INFO:tensorflow:loss = 2.9020698, step = 701 (32.868 sec)
INFO:tensorflow:Saving checkpoints for 711 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 721 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 731 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 741 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 751 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 761 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 771 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 781 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 791 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 801 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:global_step/sec: 3.05828
INFO:tensorflow:loss = 2.810063, step = 801 (32.698 sec)
INFO:tensorflow:Saving checkpoints for 811 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 821 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 831 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 841 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 851 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 861 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 871 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 881 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 891 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 901 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:global_step/sec: 3.0978
INFO:tensorflow:loss = 2.7191284, step = 901 (32.281 sec)
INFO:tensorflow:Saving checkpoints for 911 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 921 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 931 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 941 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 951 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 961 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 971 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 981 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 991 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1001 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:global_step/sec: 3.02382
INFO:tensorflow:loss = 2.6293461, step = 1001 (33.071 sec)
INFO:tensorflow:Saving checkpoints for 1011 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1021 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1031 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1041 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1051 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1061 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1071 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1081 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1091 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1101 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:global_step/sec: 3.10521
INFO:tensorflow:loss = 2.5408006, step = 1101 (32.204 sec)
INFO:tensorflow:Saving checkpoints for 1111 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1121 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1131 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1141 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1151 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1161 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1171 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1181 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1191 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1201 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:global_step/sec: 3.06097
INFO:tensorflow:loss = 2.453577, step = 1201 (32.669 sec)
INFO:tensorflow:Saving checkpoints for 1211 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1221 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1231 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1241 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1251 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1261 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1271 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1281 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1291 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1301 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:global_step/sec: 3.02978
INFO:tensorflow:loss = 2.3677616, step = 1301 (33.006 sec)
INFO:tensorflow:Saving checkpoints for 1311 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1321 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1331 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1341 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1351 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1361 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1371 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1381 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1391 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1401 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:global_step/sec: 2.98364
INFO:tensorflow:loss = 2.283443, step = 1401 (33.516 sec)
INFO:tensorflow:Saving checkpoints for 1411 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1421 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1431 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1441 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1451 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1461 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1471 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1481 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1491 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1501 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:global_step/sec: 2.99374
INFO:tensorflow:loss = 2.2007077, step = 1501 (33.403 sec)
INFO:tensorflow:Saving checkpoints for 1511 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1521 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1531 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1541 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1551 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1561 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1571 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1581 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1591 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1601 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:global_step/sec: 3.10296
INFO:tensorflow:loss = 2.1196437, step = 1601 (32.227 sec)
INFO:tensorflow:Saving checkpoints for 1611 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1621 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1631 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1641 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1651 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1661 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1671 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1681 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1691 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1701 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:global_step/sec: 2.93645
INFO:tensorflow:loss = 2.0403342, step = 1701 (34.055 sec)
INFO:tensorflow:Saving checkpoints for 1711 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1721 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1731 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1741 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1751 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1761 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1771 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1781 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1791 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1801 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:global_step/sec: 2.87626
INFO:tensorflow:loss = 1.9628611, step = 1801 (34.767 sec)
INFO:tensorflow:Saving checkpoints for 1811 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1821 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1831 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1841 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1851 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1861 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1871 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1881 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1891 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1901 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:global_step/sec: 2.87842
INFO:tensorflow:loss = 1.8873008, step = 1901 (34.741 sec)
INFO:tensorflow:Saving checkpoints for 1911 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1921 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1931 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1941 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1951 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1961 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1971 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1981 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 1991 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 2001 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:global_step/sec: 3.03024
INFO:tensorflow:loss = 1.8137262, step = 2001 (33.001 sec)
INFO:tensorflow:Saving checkpoints for 2011 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 2021 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 2031 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 2041 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 2051 into classifier_glove_cnn4.2\model.ckpt.
INFO:tensorflow:Saving checkpoints for 2061 into classifier_glove_cnn4.2\model.ckpt.

In [ ]:
# Ignore below for now...

In [18]:
def main(unused_argv):
    classifier = tf.estimator.Estimator(
            model_fn=cnn_model_fn,
            model_dir="classifier_glove_cnn4",
            config=tf.contrib.learn.RunConfig(
                    save_checkpoints_steps=10,
                    save_checkpoints_secs=None,
                    save_summary_steps=5))
    
    classifier.train(input_fn=input_fn, steps=10)
    
    # eval_results = classifier.evaluate(input_fn=my_input_fn, steps=10)
    # print(eval_results)

In [ ]:
if __name__ == "__main__":
  tf.app.run()


INFO:tensorflow:Using config: {'_task_type': None, '_task_id': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000022ECEACC9E8>, '_master': '', '_num_ps_replicas': 0, '_num_worker_replicas': 0, '_environment': 'local', '_is_chief': True, '_evaluation_master': '', '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_tf_random_seed': None, '_save_summary_steps': 5, '_save_checkpoints_secs': None, '_log_step_count_steps': 100, '_session_config': None, '_save_checkpoints_steps': 10, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_model_dir': 'classifier_glove_cnn4'}
INFO:tensorflow:Create CheckpointSaverHook.

In [ ]:


In [32]:
# Using pre-made models starts down here....

In [28]:
kmers_fc = tf.feature_column.numeric_column(key="kmers", shape=15, dtype=tf.int64)

kmers_dict = tf.feature_column.categorical_column_with_vocabulary_list(
    key="kmers",
    vocabulary_list=list(map(int, vocab.keys())))

kmers_dict_1 = tf.feature_column.categorical_column_with_vocabulary_list(
    key="kmer_1",
    vocabulary_list=list(map(int, vocab.keys())))

kmers_dict_2 = tf.feature_column.categorical_column_with_vocabulary_list(
    key="kmer_2",
    vocabulary_list=list(map(int, vocab.keys())))

kmers_dict_3 = tf.feature_column.categorical_column_with_vocabulary_list(
    key="kmer_3",
    vocabulary_list=list(map(int, vocab.keys())))

kmers_dict_4 = tf.feature_column.categorical_column_with_vocabulary_list(
    key="kmer_4",
    vocabulary_list=list(map(int, vocab.keys())))

kmers_dict_5 = tf.feature_column.categorical_column_with_vocabulary_list(
    key="kmer_5",
    vocabulary_list=list(map(int, vocab.keys())))

kmers_dict_6 = tf.feature_column.categorical_column_with_vocabulary_list(
    key="kmer_6",
    vocabulary_list=list(map(int, vocab.keys())))

kmers_dict_7 = tf.feature_column.categorical_column_with_vocabulary_list(
    key="kmer_7",
    vocabulary_list=list(map(int, vocab.keys())))

kmers_dict_8 = tf.feature_column.categorical_column_with_vocabulary_list(
    key="kmer_8",
    vocabulary_list=list(map(int, vocab.keys())))

kmers_dict_9 = tf.feature_column.categorical_column_with_vocabulary_list(
    key="kmer_9",
    vocabulary_list=list(map(int, vocab.keys())))

kmers_dict_10 = tf.feature_column.categorical_column_with_vocabulary_list(
    key="kmer_10",
    vocabulary_list=list(map(int, vocab.keys())))

kmers_dict_11 = tf.feature_column.categorical_column_with_vocabulary_list(
    key="kmer_11",
    vocabulary_list=list(map(int, vocab.keys())))

kmers_dict_12 = tf.feature_column.categorical_column_with_vocabulary_list(
    key="kmer_12",
    vocabulary_list=list(map(int, vocab.keys())))

kmers_dict_13 = tf.feature_column.categorical_column_with_vocabulary_list(
    key="kmer_13",
    vocabulary_list=list(map(int, vocab.keys())))

kmers_dict_14 = tf.feature_column.categorical_column_with_vocabulary_list(
    key="kmer_14",
    vocabulary_list=list(map(int, vocab.keys())))

kmers_dict_15 = tf.feature_column.categorical_column_with_vocabulary_list(
    key="kmer_15",
    vocabulary_list=list(map(int, vocab.keys())))

kmers_fc_embed_1 = tf.feature_column.embedding_column(
    categorical_column=kmers_dict_1, 
    dimension=256,
    initializer=init,
    trainable=False)

kmers_fc_embed_2 = tf.feature_column.embedding_column(
    categorical_column=kmers_dict_2, 
    dimension=256,
    initializer=init,
    trainable=False)

kmers_fc_embed_3 = tf.feature_column.embedding_column(
    categorical_column=kmers_dict_3, 
    dimension=256,
    initializer=init,
    trainable=False)

kmers_fc_embed_4 = tf.feature_column.embedding_column(
    categorical_column=kmers_dict_4, 
    dimension=256,
    initializer=init,
    trainable=False)

kmers_fc_embed_5 = tf.feature_column.embedding_column(
    categorical_column=kmers_dict_5, 
    dimension=256,
    initializer=init,
    trainable=False)

kmers_fc_embed_6 = tf.feature_column.embedding_column(
    categorical_column=kmers_dict_6, 
    dimension=256,
    initializer=init,
    trainable=False)

kmers_fc_embed_7 = tf.feature_column.embedding_column(
    categorical_column=kmers_dict_7, 
    dimension=256,
    initializer=init,
    trainable=False)

kmers_fc_embed_8 = tf.feature_column.embedding_column(
    categorical_column=kmers_dict_8, 
    dimension=256,
    initializer=init,
    trainable=False)

kmers_fc_embed_9 = tf.feature_column.embedding_column(
    categorical_column=kmers_dict_9, 
    dimension=256,
    initializer=init,
    trainable=False)

kmers_fc_embed_10 = tf.feature_column.embedding_column(
    categorical_column=kmers_dict_10, 
    dimension=256,
    initializer=init,
    trainable=False)

kmers_fc_embed_11 = tf.feature_column.embedding_column(
    categorical_column=kmers_dict_11, 
    dimension=256,
    initializer=init,
    trainable=False)

kmers_fc_embed_12 = tf.feature_column.embedding_column(
    categorical_column=kmers_dict_12, 
    dimension=256,
    initializer=init,
    trainable=False)

kmers_fc_embed_13 = tf.feature_column.embedding_column(
    categorical_column=kmers_dict_13, 
    dimension=256,
    initializer=init,
    trainable=False)

kmers_fc_embed_14 = tf.feature_column.embedding_column(
    categorical_column=kmers_dict_14, 
    dimension=256,
    initializer=init,
    trainable=False)

kmers_fc_embed_15 = tf.feature_column.embedding_column(
    categorical_column=kmers_dict_15, 
    dimension=256,
    initializer=init,
    trainable=False)

In [29]:
#kmers_dict

In [30]:
columns = [kmers_fc_embed_1, kmers_fc_embed_2, kmers_fc_embed_3, kmers_fc_embed_4, kmers_fc_embed_5,
           kmers_fc_embed_6, kmers_fc_embed_7, kmers_fc_embed_8, kmers_fc_embed_9, kmers_fc_embed_10,
           kmers_fc_embed_11, kmers_fc_embed_12, kmers_fc_embed_13, kmers_fc_embed_14, kmers_fc_embed_15]

In [31]:
estimator = tf.estimator.DNNClassifier(feature_columns=columns,
                                      hidden_units=[1024,512,256])


INFO:tensorflow:Using default config.
WARNING:tensorflow:Using temporary folder as model directory: C:\Users\Joey\AppData\Local\Temp\tmp8nt_njqc
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\Joey\\AppData\\Local\\Temp\\tmp8nt_njqc', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000001E49D4C9160>, '_task_type': 'worker', '_task_id': 0, '_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}

In [32]:
estimator.train(input_fn=input_fn_new, steps=10)


INFO:tensorflow:Create CheckpointSaverHook.
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-32-72ad0ac385d3> in <module>()
----> 1 estimator.train(input_fn=input_fn_new, steps=10)

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py in train(self, input_fn, hooks, steps, max_steps, saving_listeners)
    300 
    301     saving_listeners = _check_listeners_type(saving_listeners)
--> 302     loss = self._train_model(input_fn, hooks, saving_listeners)
    303     logging.info('Loss for final step: %s.', loss)
    304     return self

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py in _train_model(self, input_fn, hooks, saving_listeners)
    778           save_summaries_steps=self._config.save_summary_steps,
    779           config=self._session_config,
--> 780           log_step_count_steps=self._config.log_step_count_steps) as mon_sess:
    781         loss = None
    782         while not mon_sess.should_stop():

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py in MonitoredTrainingSession(master, is_chief, checkpoint_dir, scaffold, hooks, chief_only_hooks, save_checkpoint_secs, save_summaries_steps, save_summaries_secs, config, stop_grace_period_secs, log_step_count_steps)
    366     all_hooks.extend(hooks)
    367   return MonitoredSession(session_creator=session_creator, hooks=all_hooks,
--> 368                           stop_grace_period_secs=stop_grace_period_secs)
    369 
    370 

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py in __init__(self, session_creator, hooks, stop_grace_period_secs)
    671     super(MonitoredSession, self).__init__(
    672         session_creator, hooks, should_recover=True,
--> 673         stop_grace_period_secs=stop_grace_period_secs)
    674 
    675 

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py in __init__(self, session_creator, hooks, should_recover, stop_grace_period_secs)
    484     self._hooks = hooks or []
    485     for h in self._hooks:
--> 486       h.begin()
    487     # Create the session.
    488     self._coordinated_creator = self._CoordinatedSessionCreator(

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\training\basic_session_run_hooks.py in begin(self)
    416 
    417   def begin(self):
--> 418     self._summary_writer = SummaryWriterCache.get(self._checkpoint_dir)
    419     self._global_step_tensor = training_util._get_or_create_global_step_read()  # pylint: disable=protected-access
    420     if self._global_step_tensor is None:

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\summary\writer\writer_cache.py in get(logdir)
     59       if logdir not in FileWriterCache._cache:
     60         FileWriterCache._cache[logdir] = FileWriter(
---> 61             logdir, graph=ops.get_default_graph())
     62       return FileWriterCache._cache[logdir]

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\summary\writer\writer.py in __init__(self, logdir, graph, max_queue, flush_secs, graph_def, filename_suffix)
    335     event_writer = EventFileWriter(logdir, max_queue, flush_secs,
    336                                    filename_suffix)
--> 337     super(FileWriter, self).__init__(event_writer, graph, graph_def)
    338 
    339   def __enter__(self):

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\summary\writer\writer.py in __init__(self, event_writer, graph, graph_def)
     78     if graph is not None or graph_def is not None:
     79       # Calling it with both graph and graph_def for backward compatibility.
---> 80       self.add_graph(graph=graph, graph_def=graph_def)
     81       # Also export the meta_graph_def in this case.
     82       # graph may itself be a graph_def due to positional arguments

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\summary\writer\writer.py in add_graph(self, graph, global_step, graph_def)
    188 
    189       # Serialize the graph with additional info.
--> 190       true_graph_def = graph.as_graph_def(add_shapes=True)
    191       self._write_plugin_assets(graph)
    192     elif (isinstance(graph, graph_pb2.GraphDef) or

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py in as_graph_def(self, from_version, add_shapes)
   2771     """
   2772     # pylint: enable=line-too-long
-> 2773     result, _ = self._as_graph_def(from_version, add_shapes)
   2774     return result
   2775 

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py in _as_graph_def(self, from_version, add_shapes)
   2731           bytesize += op.node_def.ByteSize()
   2732           if bytesize >= (1 << 31) or bytesize < 0:
-> 2733             raise ValueError("GraphDef cannot be larger than 2GB.")
   2734       if self._functions:
   2735         for f in self._functions.values():

ValueError: GraphDef cannot be larger than 2GB.

In [ ]:


In [ ]:


In [74]:



Out[74]:
{'kmers': <tf.Tensor 'IteratorGetNext_7:0' shape=(?, 15) dtype=float32>}

In [23]:
def input_fn_new():
    kmer_gen = functools.partial(kmer_generator, "training-files/", 7)

    ds = tf.data.Dataset.from_generator(kmer_gen, 
                                        (tf.int64,
                                         tf.int64),
                                        (tf.TensorShape(15),
                                         tf.TensorShape(None)))
                                        
#    # Numbers reduced to run on my desktop
#    ds = ds.repeat(5)
#    ds = ds.prefetch(5000) # Each batch is only 2048, so prefetch 5000
#    ds = ds.shuffle(buffer_size=1000000) # Large buffer size for better randomization
#    ds = ds.batch(2048) # Reduced from 5000 so it runs quicker
    
#    ds = ds.repeat(1)
#    ds = ds.prefetch(2)
#    ds = ds.shuffle(buffer_size=500)
    ds = ds.batch(1)
    
    def add_labels(arr, lab):
        return({"kmer_1": arr[0],
                "kmer_2": arr[1],
                "kmer_3": arr[2],
                "kmer_4": arr[3],
                "kmer_5": arr[4],
                "kmer_6": arr[5],
                "kmer_7": arr[6],
                "kmer_8": arr[7],
                "kmer_9": arr[8],
                "kmer_10": arr[9],
                "kmer_11": arr[10],
                "kmer_12": arr[11],
                "kmer_13": arr[12],
                "kmer_14": arr[13],
                "kmer_15": arr[14]}, lab)
    
    ds = ds.map(add_labels)
    iterator = ds.make_one_shot_iterator()
    batch_features, batch_labels = iterator.get_next()
    return batch_features, batch_labels

In [24]:
def add_labels(arr, lab):
        return({"kmer_1": arr[0],
                "kmer_2": arr[1],
                "kmer_3": arr[2],
                "kmer_4": arr[3],
                "kmer_5": arr[4],
                "kmer_6": arr[5],
                "kmer_7": arr[6],
                "kmer_8": arr[7],
                "kmer_9": arr[8],
                "kmer_10": arr[9],
                "kmer_11": arr[10],
                "kmer_12": arr[11],
                "kmer_13": arr[12],
                "kmer_14": arr[13],
                "kmer_15": arr[14]}, lab)
    
    ds = tf.data.Dataset.from_generator(kmer_gen, 
                                        (tf.int64,
                                         tf.int64),
                                        (tf.TensorShape([15]),
                                         tf.TensorShape(None)))
    
    ds = ds.map(add_labels)

In [25]:
iterator = ds.make_one_shot_iterator()

In [26]:
batch_features, batch_labels = iterator.get_next()

In [27]:
def init(shape=None,
         dtype=None,
         partition_info=None):
    return W

In [99]:



Out[99]:
array([15], dtype=int64)

In [ ]:
# Previous classifier

def cnn_model_fn(features, labels, mode):
    """Model for CNN"""
    
    Weights = tf.Variable(tf.constant(0.0, shape=[vocab_size, embedding_dim]), trainable=False, name="Weights")
    
    embedding_placeholder = tf.placeholder(tf.float32, [vocab_size, embedding_dim])
    embedding_init = Weights.assign(embedding_placeholder)
    
    def init_fn(scaffold, sess):
        sess.run(Weights.initializer, {Weights.initial_value: W})
    scaffold = tf.train.Scaffold(init_fn=init_fn)

    inputs = tf.feature_column.input_layer(features, [kmers_fc])
    
    input_i64 = tf.cast(inputs, tf.int64)
    
    embedded_kmers = tf.nn.embedding_lookup(Weights, input_i64)
    
    input_layer = tf.reshape(embedded_kmers, [-1, 15, 256, 1])
    
    input_layer = tf.cast(input_layer, tf.float32)
    
    conv1 = tf.layers.conv2d(inputs = input_layer,
                             filters=32,
                             kernel_size=[2,256],
                             strides=3,
                             padding="same",
                             name="Conv1",
                             activation=None)
    
    avg_pool1 = tf.layers.average_pooling2d(conv1, 
                                            pool_size=[4,32], 
                                            strides=[2,16],
                                            padding="same",
                                            name="AvgPooling_1")
    
    logits = tf.layers.dense(inputs=avg_pool1, units=len(replicons_list))
    
    predictions = {
        "classes": tf.argmax(input=logits, axis=1),
        "probabilities": tf.nn.softmax(logits, name="softmax_tensor")}
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32),
                              depth=len(replicons_list))
    
    correct_prediction = tf.equal(tf.argmax(logits, 1), labels)
    
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float64))
    
    tf.summary.scalar("Accuracy", accuracy)
    
    # labels = tf.squeeze(labels, 1)
    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels,
                                                  logits = logits
                                                 )
    
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
        train_op = optimizer.minimize(
                loss=loss,
                global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
   
    # Add evaluation metrics (for EVAL mode)
    eval_metric_ops = {
            "accuracy": tf.metrics.accuracy(
                    labels=labels, predictions=predictions["classes"])}
    
    return tf.estimator.EstimatorSpec(
            mode=mode, 
            loss=loss, 
            eval_metric_ops=eval_metric_ops)