In [ ]:
# Ensure that we have Tensorflow 1.13 installed.
!pip3 freeze | grep tensorflow==1.13.1 || pip3 install tensorflow==1.13.1
In [2]:
import tensorflow as tf
tf.enable_eager_execution()
tf.logging.set_verbosity(tf.logging.ERROR)
In [3]:
# Toy Features Dictionary
features = {"sq_footage": [ 1000, 2000, 3000, 4000, 5000],
"house_type": ["house", "house", "apt", "apt", "townhouse"]}
In [4]:
feat_cols = [
tf.feature_column.numeric_column('sq_footage'),
tf.feature_column.indicator_column(
tf.feature_column.categorical_column_with_vocabulary_list(
'house_type',['house','apt']
))
]
In [5]:
tf.feature_column.input_layer(features,feat_cols)
Out[5]:
In [7]:
feat_cols = [
tf.feature_column.numeric_column('sq_footage'),
tf.feature_column.indicator_column(
tf.feature_column.categorical_column_with_vocabulary_list(
'house_type',['house','apt'], default_value=0
))
]
tf.feature_column.input_layer(features,feat_cols)
Out[7]:
In [8]:
feat_cols = [
tf.feature_column.numeric_column('sq_footage'),
tf.feature_column.indicator_column(
tf.feature_column.categorical_column_with_vocabulary_list(
'house_type',['house','apt'], num_oov_buckets=1
))
]
tf.feature_column.input_layer(features,feat_cols)
Out[8]:
Assume we didn't have a vocabulary list available. Modify the feature column to one-hot encode house type based on a hash function.
What is the minimum hash bucket size to ensure no collisions? 5 is the minumum. With a hash bucket size of 2, all categories collide. With a size of 3 of 4, 'house' and 'townhouse' collide'
In [14]:
feat_cols = [
tf.feature_column.numeric_column('sq_footage'),
tf.feature_column.indicator_column(
tf.feature_column.categorical_column_with_hash_bucket(
'house_type',5
))
]
tf.feature_column.input_layer(features,feat_cols)
Out[14]: