In [6]:
from featureforge.feature import input_schema, output_schema
@input_schema({"body": str})
@output_schema(int, lambda i: i >= 0)
def body_length(message):
return len(message["body"])
In [7]:
record = {}
record['body'] = 'this is an example'
body_length(record)
Out[7]:
In [8]:
record = {}
record['body'] = 555
body_length(record)
In [17]:
%%writefile english-badwords.txt
bad
word
In [29]:
from featureforge.feature import Feature, soft_schema
from schema import Schema
class SubjectHasBadWord(Feature):
input_schema = soft_schema(subject=str)
# The above is equivalent to
# input_schema = Schema({"subject": str, str: Optional(object)})
output_schema = Schema(bool)
def __init__(self, bad_words_filename):
self.bad_words = set(open(bad_words_filename).readlines())
def _evaluate(self, message):
subject_words = set(message["subject"].split())
return bool(subject_words & self.bad_words)
has_bad_word_english = SubjectHasBadWord("english-badwords.txt")
In [30]:
record = {}
record['subject'] = 'this is a bad word'
has_bad_word_english(record)
Out[30]:
In [32]:
record = {}
record['subject'] = 'this is another one'
has_bad_word_english(record)
Out[32]: