In [1]:
import warnings # current version of seaborn generates a bunch of warnings that we'll ignore
warnings.filterwarnings("ignore")
"""Example code for TensorFlow Wide & Deep Tutorial using TF.Learn API."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tempfile
from six.moves import urllib

import pandas as pd
import tensorflow as tf

flags = tf.app.flags
FLAGS = flags.FLAGS

flags.DEFINE_string("model_dir", "", "Base directory for output models.")
flags.DEFINE_string("model_type", "wide_n_deep",
                    "Valid model types: {'wide', 'deep', 'wide_n_deep'}.")
flags.DEFINE_integer("train_steps", 200, "Number of training steps.")
flags.DEFINE_string(
    "train_data",
    "../data",
    "Path to the training data.")
flags.DEFINE_string(
    "test_data",
    "../data",
    "Path to the test data.")

In [2]:
# "Position", "HistCTR" , "IsUserLoggedOn" , "reg_ads" \
# , "highlighted_ads" ,"context_ads", "user_non_clicks_no",  "day_of_week" , "hour" ,\
# "AdTitleSZ" , "SearchQuerySZ",  "weight",  "IsClick",  "SearchParamsNum", "SearchAdSimpleRatio"

In [3]:
donts = ["user_click_prob" , "SearchCategoryLevel" , "Price" , "user_clicks_no" ,"hour"  ,"SearchRussian" ,"SearchParamsSZ" , "SearchAdPartialRatio" ]

COLUMNS = ["Position", "HistCTR" , "IsUserLoggedOn" , "reg_ads" \
, "highlighted_ads" ,"context_ads", "user_non_clicks_no",  "day_of_week"  ,\
"AdTitleSZ" , "SearchQuerySZ",  "weight",  "IsClick",  "SearchParamsNum", "SearchAdSimpleRatio" ]

LABEL_COLUMN = "IsClick"
#CATEGORICAL_COLUMNS = 
CONTINUOUS_COLUMNS = ["Position", "user_non_clicks_no" , "IsUserLoggedOn"  , "day_of_week" , "weight", "HistCTR" , "reg_ads" , "highlighted_ads" ,"context_ads" , "AdTitleSZ" , "SearchQuerySZ" ,  "SearchParamsNum", "SearchAdSimpleRatio"]

In [4]:
SOURCE_TRAIN_ALL = "../data/train4.csv"
df = pd.read_csv(SOURCE_TRAIN_ALL, sep=',' , nrows =200000)
df = df.drop(donts, axis=1)

SOURCE_TEST = "../data/valid4.csv"
df_test = pd.read_csv(SOURCE_TEST, sep=',' , nrows = 10000)
df_test = df_test.drop(donts, axis=1)

In [5]:
#labels = df["IsClick"]
#df = df.drop(["IsClick"], axis=1)

In [6]:
def input_fn(df):
  con_cols = {k: tf.constant(df[k].values) for k in CONTINUOUS_COLUMNS}
  continuous_cols = {k: tf.constant(df[k].values) for k in CONTINUOUS_COLUMNS}
    
  feature_cols = dict(continuous_cols)
  # Creates a dictionary mapping from each categorical feature column name (k)
  # to the values of that column stored in a tf.SparseTensor.
#   categorical_cols = {k: tf.SparseTensor( indices=[[i, 0] for i in range(df[k].size)],
#       values=df[k].values,
#       shape=[df[k].size, 1]) for k in CATEGORICAL_COLUMNS}
  # Merges the two dictionaries into one.
#   feature_cols = dict(continuous_cols.items() + categorical_cols.items())
  # Converts the label column into a constant Tensor.
  label = tf.constant(df[LABEL_COLUMN].values)
  return feature_cols, label

def train_input_fn():
  return input_fn(df)

def eval_input_fn():
  return input_fn(df_test)

In [7]:
model_dir = tempfile.mkdtemp()
m = tf.contrib.learn.LinearClassifier(feature_columns=[
  "Position", "HistCTR" , "IsUserLoggedOn" , "reg_ads" \
, "highlighted_ads" ,"context_ads", "user_non_clicks_no",  "day_of_week"  ,\
"AdTitleSZ" , "SearchQuerySZ",  "weight",   "SearchParamsNum", "SearchAdSimpleRatio"],                        
  model_dir=model_dir)


INFO:tensorflow:Using default config.
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_environment': 'local', 'keep_checkpoint_every_n_hours': 10000, 'tf_random_seed': None, '_evaluation_master': '', 'save_summary_steps': 100, '_is_chief': True, 'keep_checkpoint_max': 5, 'save_checkpoints_steps': None, '_task_id': 0, '_master': '', '_task_type': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7ff06b2e6748>, 'tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
}
INFO:tensorflow:Using config: {'save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_environment': 'local', 'keep_checkpoint_every_n_hours': 10000, 'tf_random_seed': None, '_evaluation_master': '', 'save_summary_steps': 100, '_is_chief': True, 'keep_checkpoint_max': 5, 'save_checkpoints_steps': None, '_task_id': 0, '_master': '', '_task_type': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7ff06b2e6748>, 'tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
}

In [8]:
ff=[
  "Position", "HistCTR" , "IsUserLoggedOn" , "reg_ads" \
, "highlighted_ads" ,"context_ads", "user_non_clicks_no",  "day_of_week"  ,\
"AdTitleSZ" , "SearchQuerySZ",  "weight",   "SearchParamsNum", "SearchAdSimpleRatio"]

In [9]:
Position = tf.contrib.layers.real_valued_column("Position")
HistCTR = tf.contrib.layers.real_valued_column("HistCTR")
context_ads = tf.contrib.layers.real_valued_column("context_ads")
weight = tf.contrib.layers.real_valued_column("weight")
IsUserLoggedOn = tf.contrib.layers.real_valued_column("IsUserLoggedOn")
reg_ads = tf.contrib.layers.real_valued_column("reg_ads")
highlighted_ads = tf.contrib.layers.real_valued_column("highlighted_ads")
user_non_clicks_no = tf.contrib.layers.real_valued_column("user_non_clicks_no")
day_of_week = tf.contrib.layers.real_valued_column("day_of_week")
SearchQuerySZ = tf.contrib.layers.real_valued_column("SearchQuerySZ")
SearchParamsNum = tf.contrib.layers.real_valued_column("SearchParamsNum")
SearchAdSimpleRatio = tf.contrib.layers.real_valued_column("SearchAdSimpleRatio")
AdTitleSZ = tf.contrib.layers.real_valued_column("AdTitleSZ")

In [ ]:
aa = tf.contrib.learn.LinearClassifier(feature_columns=[AdTitleSZ ,Position, HistCTR,context_ads,weight, IsUserLoggedOn , reg_ads , highlighted_ads ,user_non_clicks_no ,day_of_week , SearchQuerySZ , SearchParamsNum , SearchAdSimpleRatio], model_dir=model_dir, n_classes=2, weight_column_name= "weight" , optimizer=tf.train.FtrlOptimizer(learning_rate=0.1,  l1_regularization_strength=1.0, l2_regularization_strength=1.0), gradient_clip_norm=None, enable_centered_bias=False, _joint_weight=False, config=None, feature_engineering_fn=None)


INFO:tensorflow:Using default config.
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_environment': 'local', 'keep_checkpoint_every_n_hours': 10000, 'tf_random_seed': None, '_evaluation_master': '', 'save_summary_steps': 100, '_is_chief': True, 'keep_checkpoint_max': 5, 'save_checkpoints_steps': None, '_task_id': 0, '_master': '', '_task_type': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7ff065a54b38>, 'tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
}
INFO:tensorflow:Using config: {'save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_environment': 'local', 'keep_checkpoint_every_n_hours': 10000, 'tf_random_seed': None, '_evaluation_master': '', 'save_summary_steps': 100, '_is_chief': True, 'keep_checkpoint_max': 5, 'save_checkpoints_steps': None, '_task_id': 0, '_master': '', '_task_type': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7ff065a54b38>, 'tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
}

In [ ]:
aa.fit(input_fn=train_input_fn , steps=200)


WARNING:tensorflow:From /home/mazen/anaconda3/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/estimators/linear.py:446 in fit.: calling BaseEstimator.fit (from tensorflow.contrib.learn.python.learn.estimators.estimator) with x is deprecated and will be removed after 2016-12-01.
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...))
WARNING:tensorflow:From /home/mazen/anaconda3/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/estimators/linear.py:446 in fit.: calling BaseEstimator.fit (from tensorflow.contrib.learn.python.learn.estimators.estimator) with x is deprecated and will be removed after 2016-12-01.
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...))
WARNING:tensorflow:From /home/mazen/anaconda3/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/estimators/linear.py:446 in fit.: calling BaseEstimator.fit (from tensorflow.contrib.learn.python.learn.estimators.estimator) with y is deprecated and will be removed after 2016-12-01.
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...))
WARNING:tensorflow:From /home/mazen/anaconda3/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/estimators/linear.py:446 in fit.: calling BaseEstimator.fit (from tensorflow.contrib.learn.python.learn.estimators.estimator) with y is deprecated and will be removed after 2016-12-01.
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...))
WARNING:tensorflow:From /home/mazen/anaconda3/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/estimators/linear.py:446 in fit.: calling BaseEstimator.fit (from tensorflow.contrib.learn.python.learn.estimators.estimator) with batch_size is deprecated and will be removed after 2016-12-01.
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...))
WARNING:tensorflow:From /home/mazen/anaconda3/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/estimators/linear.py:446 in fit.: calling BaseEstimator.fit (from tensorflow.contrib.learn.python.learn.estimators.estimator) with batch_size is deprecated and will be removed after 2016-12-01.
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...))
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Create CheckpointSaverHook.

In [ ]:
results = aa.evaluate(input_fn=eval_input_fn, steps=1)

In [ ]:
for key in sorted(results):
    print(key, results[key])

In [ ]:


In [ ]:


In [ ]: