In [1]:
import warnings # current version of seaborn generates a bunch of warnings that we'll ignore
warnings.filterwarnings("ignore")
"""Example code for TensorFlow Wide & Deep Tutorial using TF.Learn API."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tempfile
from six.moves import urllib
import pandas as pd
import tensorflow as tf
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string("model_dir", "", "Base directory for output models.")
flags.DEFINE_string("model_type", "wide_n_deep",
"Valid model types: {'wide', 'deep', 'wide_n_deep'}.")
flags.DEFINE_integer("train_steps", 200, "Number of training steps.")
flags.DEFINE_string(
"train_data",
"../data",
"Path to the training data.")
flags.DEFINE_string(
"test_data",
"../data",
"Path to the test data.")
In [2]:
# "Position", "HistCTR" , "IsUserLoggedOn" , "reg_ads" \
# , "highlighted_ads" ,"context_ads", "user_non_clicks_no", "day_of_week" , "hour" ,\
# "AdTitleSZ" , "SearchQuerySZ", "weight", "IsClick", "SearchParamsNum", "SearchAdSimpleRatio"
In [3]:
donts = ["user_click_prob" , "SearchCategoryLevel" , "Price" , "user_clicks_no" ,"hour" ,"SearchRussian" ,"SearchParamsSZ" , "SearchAdPartialRatio" ]
COLUMNS = ["Position", "HistCTR" , "IsUserLoggedOn" , "reg_ads" \
, "highlighted_ads" ,"context_ads", "user_non_clicks_no", "day_of_week" ,\
"AdTitleSZ" , "SearchQuerySZ", "weight", "IsClick", "SearchParamsNum", "SearchAdSimpleRatio" ]
LABEL_COLUMN = "IsClick"
#CATEGORICAL_COLUMNS =
CONTINUOUS_COLUMNS = ["Position", "user_non_clicks_no" , "IsUserLoggedOn" , "day_of_week" , "weight", "HistCTR" , "reg_ads" , "highlighted_ads" ,"context_ads" , "AdTitleSZ" , "SearchQuerySZ" , "SearchParamsNum", "SearchAdSimpleRatio"]
In [4]:
SOURCE_TRAIN_ALL = "../data/train4.csv"
df = pd.read_csv(SOURCE_TRAIN_ALL, sep=',' , nrows =200000)
df = df.drop(donts, axis=1)
SOURCE_TEST = "../data/valid4.csv"
df_test = pd.read_csv(SOURCE_TEST, sep=',' , nrows = 10000)
df_test = df_test.drop(donts, axis=1)
In [5]:
#labels = df["IsClick"]
#df = df.drop(["IsClick"], axis=1)
In [6]:
def input_fn(df):
con_cols = {k: tf.constant(df[k].values) for k in CONTINUOUS_COLUMNS}
continuous_cols = {k: tf.constant(df[k].values) for k in CONTINUOUS_COLUMNS}
feature_cols = dict(continuous_cols)
# Creates a dictionary mapping from each categorical feature column name (k)
# to the values of that column stored in a tf.SparseTensor.
# categorical_cols = {k: tf.SparseTensor( indices=[[i, 0] for i in range(df[k].size)],
# values=df[k].values,
# shape=[df[k].size, 1]) for k in CATEGORICAL_COLUMNS}
# Merges the two dictionaries into one.
# feature_cols = dict(continuous_cols.items() + categorical_cols.items())
# Converts the label column into a constant Tensor.
label = tf.constant(df[LABEL_COLUMN].values)
return feature_cols, label
def train_input_fn():
return input_fn(df)
def eval_input_fn():
return input_fn(df_test)
In [7]:
model_dir = tempfile.mkdtemp()
m = tf.contrib.learn.LinearClassifier(feature_columns=[
"Position", "HistCTR" , "IsUserLoggedOn" , "reg_ads" \
, "highlighted_ads" ,"context_ads", "user_non_clicks_no", "day_of_week" ,\
"AdTitleSZ" , "SearchQuerySZ", "weight", "SearchParamsNum", "SearchAdSimpleRatio"],
model_dir=model_dir)
In [8]:
ff=[
"Position", "HistCTR" , "IsUserLoggedOn" , "reg_ads" \
, "highlighted_ads" ,"context_ads", "user_non_clicks_no", "day_of_week" ,\
"AdTitleSZ" , "SearchQuerySZ", "weight", "SearchParamsNum", "SearchAdSimpleRatio"]
In [9]:
Position = tf.contrib.layers.real_valued_column("Position")
HistCTR = tf.contrib.layers.real_valued_column("HistCTR")
context_ads = tf.contrib.layers.real_valued_column("context_ads")
weight = tf.contrib.layers.real_valued_column("weight")
IsUserLoggedOn = tf.contrib.layers.real_valued_column("IsUserLoggedOn")
reg_ads = tf.contrib.layers.real_valued_column("reg_ads")
highlighted_ads = tf.contrib.layers.real_valued_column("highlighted_ads")
user_non_clicks_no = tf.contrib.layers.real_valued_column("user_non_clicks_no")
day_of_week = tf.contrib.layers.real_valued_column("day_of_week")
SearchQuerySZ = tf.contrib.layers.real_valued_column("SearchQuerySZ")
SearchParamsNum = tf.contrib.layers.real_valued_column("SearchParamsNum")
SearchAdSimpleRatio = tf.contrib.layers.real_valued_column("SearchAdSimpleRatio")
AdTitleSZ = tf.contrib.layers.real_valued_column("AdTitleSZ")
In [ ]:
aa = tf.contrib.learn.LinearClassifier(feature_columns=[AdTitleSZ ,Position, HistCTR,context_ads,weight, IsUserLoggedOn , reg_ads , highlighted_ads ,user_non_clicks_no ,day_of_week , SearchQuerySZ , SearchParamsNum , SearchAdSimpleRatio], model_dir=model_dir, n_classes=2, weight_column_name= "weight" , optimizer=tf.train.FtrlOptimizer(learning_rate=0.1, l1_regularization_strength=1.0, l2_regularization_strength=1.0), gradient_clip_norm=None, enable_centered_bias=False, _joint_weight=False, config=None, feature_engineering_fn=None)
In [ ]:
aa.fit(input_fn=train_input_fn , steps=200)
In [ ]:
results = aa.evaluate(input_fn=eval_input_fn, steps=1)
In [ ]:
for key in sorted(results):
print(key, results[key])
In [ ]:
In [ ]:
In [ ]: