In [1]:
import yara
import re
import io
import bsonsearch

In [2]:
yara_source = '''
rule example
{
    strings:
        $a = "hello" nocase
        $b = "world" nocase
        $c = "dominant rule" nocase
    condition:
        ($a and $b) or $c
}
'''

In [3]:
doc1 = {'msg': "hELLO there WOrld"}
doc2 = {'msg': "not hw but has a dominant rule string in it"}

In [4]:
%%timeit
with bsonsearch.bsoncompare() as bc:
    doc1_id = bc.generate_doc(doc1)
    doc2_id = bc.generate_doc(doc2)
    regx_spec = {"$or":[{"msg": re.compile(r".*dominant rule.*", re.IGNORECASE)},
                        {"$and":[{"msg": re.compile(r".*hello.*", re.IGNORECASE)},
                                 {"msg": re.compile(r".*world.*", re.IGNORECASE)}]}]}
    regx_matcher = bc.generate_matcher(regx_spec)
    result1 = bc.match_doc(regx_matcher, doc1_id)
    result2 = bc.match_doc(regx_matcher, doc2_id)
    assert (result1 and result2)


100 loops, best of 3: 8.28 ms per loop

In [5]:
%%timeit
with bsonsearch.bsoncompare() as bc:
    doc1_id = bc.generate_doc(doc1)
    doc2_id = bc.generate_doc(doc2)
    yara_spec = {"msg": bsonsearch.YARA_COMPILE_STR(yara_source)}
    yara_matcher = bc.generate_matcher(yara_spec)
    result1 = bc.match_doc(yara_matcher, doc1_id)
    result2 =  bc.match_doc(yara_matcher, doc2_id)
    assert (result1 and result2)


100 loops, best of 3: 9.85 ms per loop

In [ ]: