featureforge


In [6]:
from featureforge.feature import input_schema, output_schema

@input_schema({"body": str})
@output_schema(int, lambda i: i >= 0)
def body_length(message):
    return len(message["body"])

In [7]:
record = {}
record['body'] = 'this is an example'
body_length(record)


Out[7]:
18

In [8]:
record = {}
record['body'] = 555
body_length(record)


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-8-20cbaa6cff4b> in <module>()
      1 record = {}
      2 record['body'] = 555
----> 3 body_length(record)

<ipython-input-6-bb77d45f5a28> in body_length(message)
      4 @output_schema(int, lambda i: i >= 0)
      5 def body_length(message):
----> 6     return len(message["body"])

TypeError: object of type 'int' has no len()

In [17]:
%%writefile english-badwords.txt
bad
word


Writing english-badwords.txt

In [29]:
from featureforge.feature import Feature, soft_schema
from schema import Schema

class SubjectHasBadWord(Feature):
    input_schema = soft_schema(subject=str)
    # The above is equivalent to
    #   input_schema = Schema({"subject": str, str: Optional(object)})
    output_schema = Schema(bool)

    def __init__(self, bad_words_filename):
        self.bad_words = set(open(bad_words_filename).readlines())

    def _evaluate(self, message):
        subject_words = set(message["subject"].split())
        return bool(subject_words & self.bad_words)

has_bad_word_english = SubjectHasBadWord("english-badwords.txt")

In [30]:
record = {}
record['subject'] = 'this is a bad word'
has_bad_word_english(record)


Out[30]:
True

In [32]:
record = {}
record['subject'] = 'this is another one'
has_bad_word_english(record)


Out[32]:
False