Let's look at a basic usage example for Haystack.

First we have a program which allocates structures on the HEAP:


In [ ]:
!cat ../test/src/test-ctypes3.c


/*
    Simple test of multiple simple structures allocations.
*/

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <dlfcn.h>
#include <stdbool.h>

struct test3
{
  unsigned int val1;
  unsigned int val2;
  unsigned int * me;
  unsigned int val2b;
  unsigned int val1b;
};

struct Node {
  unsigned int val1;
  void * ptr1;
  void * ptr2;
};

int test3(){
  struct test3 * t3;
  t3 = (struct test3 *) malloc(sizeof(struct test3));
  t3->val1 = 0xdeadbeef;
  t3->val1b = 0xdeadbeef;
  t3->val2 = 0x10101010;
  t3->val2b = 0x10101010;
  t3->me = (unsigned int *) t3;
  printf("o: test3 %p\n",t3);

  return 0;
}

int test1(){
  struct Node * node;
  node = (struct Node *) malloc(sizeof(struct Node));
  node->val1 = 0xdeadbeef;
  node->ptr1 = node;
  node->ptr2 = node;
  printf("o: test1 %p\n",node);

  return 0;
}


int main(){

  // TEST
  test1();
  test3();
  test1();
  test3();
  test1();
  test3();

  printf("pid %u\n",getpid());
  fflush(stdout);
  sleep(-1);

  return 0;
}


Then, using ctypeslib, we have generated python ctypes records for the same structures.


In [ ]:
!clang2py ../test/src/test-ctypes3.c #-o ../test/src/clang3_gen64.py


find_library("clang-3.7") libclang-3.7.so.1
find_library("clang") None
# -*- coding: utf-8 -*-
#
# TARGET arch is: []
# WORD_SIZE is: 8
# POINTER_SIZE is: 8
# LONGDOUBLE_SIZE is: 16
#
import ctypes


# if local wordsize is same as target, keep ctypes pointer function.
if ctypes.sizeof(ctypes.c_void_p) == 8:
    POINTER_T = ctypes.POINTER
else:
    # required to access _ctypes
    import _ctypes
    # Emulate a pointer class using the approriate c_int32/c_int64 type
    # The new class should have :
    # ['__module__', 'from_param', '_type_', '__dict__', '__weakref__', '__doc__']
    # but the class should be submitted to a unique instance for each base type
    # to that if A == B, POINTER_T(A) == POINTER_T(B)
    ctypes._pointer_t_type_cache = {}
    def POINTER_T(pointee):
        # a pointer should have the same length as LONG
        fake_ptr_base_type = ctypes.c_uint64 
        # specific case for c_void_p
        if pointee is None: # VOID pointer type. c_void_p.
            pointee = type(None) # ctypes.c_void_p # ctypes.c_ulong
            clsname = 'c_void'
        else:
            clsname = pointee.__name__
        if clsname in ctypes._pointer_t_type_cache:
            return ctypes._pointer_t_type_cache[clsname]
        # make template
        class _T(_ctypes._SimpleCData,):
            _type_ = 'L'
            _subtype_ = pointee
            def _sub_addr_(self):
                return self.value
            def __repr__(self):
                return '%s(%d)'%(clsname, self.value)
            def contents(self):
                raise TypeError('This is not a ctypes pointer.')
            def __init__(self, **args):
                raise TypeError('This is not a ctypes pointer. It is not instanciable.')
        _class = type('LP_%d_%s'%(8, clsname), (_T,),{}) 
        ctypes._pointer_t_type_cache[clsname] = _class
        return _class

c_int128 = ctypes.c_ubyte*16
c_uint128 = c_int128
void = None
if ctypes.sizeof(ctypes.c_longdouble) == 16:
    c_long_double_t = ctypes.c_longdouble
else:
    c_long_double_t = ctypes.c_ubyte*16



class struct_test3(ctypes.Structure):
    _pack_ = True # source:False
    _fields_ = [
    ('val1', ctypes.c_uint32),
    ('val2', ctypes.c_uint32),
    ('me', POINTER_T(ctypes.c_uint32)),
    ('val2b', ctypes.c_uint32),
    ('val1b', ctypes.c_uint32),
     ]

class struct_Node(ctypes.Structure):
    _pack_ = True # source:False
    _fields_ = [
    ('val1', ctypes.c_uint32),
    ('PADDING_0', ctypes.c_ubyte * 4),
    ('ptr1', POINTER_T(None)),
    ('ptr2', POINTER_T(None)),
     ]

__all__ = \
    ['struct_Node', 'struct_test3']

We have run the compiled C program, and dumped its memory mappings to files:


In [4]:
!ls -hsw 1 ../test/src/test-ctypes3.64.dump/
print ''
!cat ../test/src/test-ctypes3.64.stdout


total 2,3M
4,0K 0x0000000000400000-0x0000000000401000
4,0K 0x0000000000600000-0x0000000000601000
132K 0x000000000072c000-0x000000000074d000
1,8M 0x00007fb7519db000-0x00007fb751b9b000
 16K 0x00007fb751d9b000-0x00007fb751d9f000
8,0K 0x00007fb751d9f000-0x00007fb751da1000
 16K 0x00007fb751da1000-0x00007fb751da5000
 12K 0x00007fb751da5000-0x00007fb751da8000
4,0K 0x00007fb751fa7000-0x00007fb751fa8000
4,0K 0x00007fb751fa8000-0x00007fb751fa9000
144K 0x00007fb751fa9000-0x00007fb751fcd000
 12K 0x00007fb7521a2000-0x00007fb7521a5000
 12K 0x00007fb7521c9000-0x00007fb7521cc000
4,0K 0x00007fb7521cc000-0x00007fb7521cd000
4,0K 0x00007fb7521cd000-0x00007fb7521ce000
4,0K 0x00007fb7521ce000-0x00007fb7521cf000
132K 0x00007ffcf8c6e000-0x00007ffcf8c8f000
4,0K mappings

o: test1 0x72c010
o: test3 0x72c030
o: test1 0x72c050
o: test3 0x72c070
o: test1 0x72c090
o: test3 0x72c0b0
pid 5931

Let's load that memory dump in haystack.


In [5]:
import haystack
memdumpname = '../test/src/test-ctypes3.64.dump'
# we need a memory dump loader
from haystack import dump_loader
memory_handler = dump_loader.load(memdumpname)
print memory_handler


<MemoryHandler for /home/other/Compil/python-haystack/test/src/test-ctypes3.64.dump with 17 mappings>

Now the first feature of haystack is to search a specific structure in memory.


In [6]:
# we need to add our test path to the env
import sys
sys.path.append('../test/src/')
py_modulename = 'ctypes3_gen64'

# load this module with haystack
my_model = memory_handler.get_model()
test3 = my_model.import_module("ctypes3_gen64")
print test3.__dict__.keys()


['struct_Node', 'c_int128', '__all__', '__builtins__', '__file__', 'c_uint128', 'struct_test3', '__package__', 'ctypes', 'void', 'POINTER_T', '__name__', 'c_long_double_t', '__doc__']

Now we can search for the structure in memory.


In [7]:
py_class = test3.struct_test3
results = haystack.search_record(memory_handler, py_class)
print results


[(<ctypes3_gen64.struct_test3 object at 0x7f5cea101e60>, 7520272), (<ctypes3_gen64.struct_test3 object at 0x7f5cea101ef0>, 7520304L), (<ctypes3_gen64.struct_test3 object at 0x7f5cea106050>, 7520336L), (<ctypes3_gen64.struct_test3 object at 0x7f5cea106170>, 7520368L), (<ctypes3_gen64.struct_test3 object at 0x7f5cea106290>, 7520400L), (<ctypes3_gen64.struct_test3 object at 0x7f5cea1063b0>, 7520432L)]

The output is a list of ctypes records and their memory offset. We can also get a better formatted string output:


In [8]:
out = haystack.output_to_string(memory_handler, results)
print out


[# --------------- 0x72c010 
{ # <struct_test3 at 0x72c010>
"val1": 3735928559L, # c_uint
"val2": 0L, # c_uint
"me": 3735928559, # c_uint 0xdeadbeefL,
"val2b": 7520272L, # c_uint
"val1b": 0L, # c_uint
}# --------------- 0x72c030 
{ # <struct_test3 at 0x72c030>
"val1": 3735928559L, # c_uint
"val2": 269488144L, # c_uint
"me": 3735928559, # c_uint 0xdeadbeefL,
"val2b": 269488144L, # c_uint
"val1b": 3735928559L, # c_uint
}# --------------- 0x72c050 
{ # <struct_test3 at 0x72c050>
"val1": 3735928559L, # c_uint
"val2": 0L, # c_uint
"me": 3735928559, # c_uint 0xdeadbeefL,
"val2b": 7520336L, # c_uint
"val1b": 0L, # c_uint
}# --------------- 0x72c070 
{ # <struct_test3 at 0x72c070>
"val1": 3735928559L, # c_uint
"val2": 269488144L, # c_uint
"me": 3735928559, # c_uint 0xdeadbeefL,
"val2b": 269488144L, # c_uint
"val1b": 3735928559L, # c_uint
}# --------------- 0x72c090 
{ # <struct_test3 at 0x72c090>
"val1": 3735928559L, # c_uint
"val2": 0L, # c_uint
"me": 3735928559, # c_uint 0xdeadbeefL,
"val2b": 7520400L, # c_uint
"val1b": 0L, # c_uint
}# --------------- 0x72c0b0 
{ # <struct_test3 at 0x72c0b0>
"val1": 3735928559L, # c_uint
"val2": 269488144L, # c_uint
"me": 3735928559, # c_uint 0xdeadbeefL,
"val2b": 269488144L, # c_uint
"val1b": 3735928559L, # c_uint
}]

Or even a translation of these records to plain old python dynamically generated objects.


In [9]:
out = haystack.output_to_python(memory_handler, results)
print out


[(<haystack.model.ctypes3_gen64.struct_test3_py object at 0x7f5cea0dbd90>, 7520272), (<haystack.model.ctypes3_gen64.struct_test3_py object at 0x7f5cea0dbd10>, 7520304L), (<haystack.model.ctypes3_gen64.struct_test3_py object at 0x7f5cea0e3d90>, 7520336L), (<haystack.model.ctypes3_gen64.struct_test3_py object at 0x7f5cea0e3f50>, 7520368L), (<haystack.model.ctypes3_gen64.struct_test3_py object at 0x7f5cea0e3f90>, 7520400L), (<haystack.model.ctypes3_gen64.struct_test3_py object at 0x7f5cea0e3fd0>, 7520432L)]

In [10]:
for x in out:
    print "@0x%x val1: 0x%x , val1b: 0x%x "%(x[1], x[0].val1, x[0].val1b)


@0x72c010 val1: 0xdeadbeef , val1b: 0x0 
@0x72c030 val1: 0xdeadbeef , val1b: 0xdeadbeef 
@0x72c050 val1: 0xdeadbeef , val1b: 0x0 
@0x72c070 val1: 0xdeadbeef , val1b: 0xdeadbeef 
@0x72c090 val1: 0xdeadbeef , val1b: 0x0 
@0x72c0b0 val1: 0xdeadbeef , val1b: 0xdeadbeef 

There should only be 3 of struct_test3. The true instance are at 0x202a030, 0x202a070 and 0x202a0b0

oh-oh, there is a bit too many chunks in memory which could be possible struct_test3.

Let's apply more constraints to the search


In [11]:
!cat ../test/src/ctypes3.constraints


[struct_test3]
val1: [0xdeadbeef]
val1b: [0xdeadbeef]
val2: [0x10101010]
val2b: [0x10101010]


[struct_Node]
val1: [0xdeadbeef]
ptr2: [NotNull]

Here we say that val1 and val1b fields should both be 0xdeadbeef. Let's apply these constraints to the search.


In [12]:
from haystack import constraints
handler = constraints.ConstraintsConfigHandler()
my_constraints = handler.read('../test/src/ctypes3.constraints')

# now use them
results = haystack.search_record(memory_handler, py_class, my_constraints)
print haystack.output_to_string(memory_handler, results)


[# --------------- 0x72c030 
{ # <struct_test3 at 0x72c030>
"val1": 3735928559L, # c_uint
"val2": 269488144L, # c_uint
"me": 3735928559, # c_uint 0xdeadbeefL,
"val2b": 269488144L, # c_uint
"val1b": 3735928559L, # c_uint
}# --------------- 0x72c070 
{ # <struct_test3 at 0x72c070>
"val1": 3735928559L, # c_uint
"val2": 269488144L, # c_uint
"me": 3735928559, # c_uint 0xdeadbeefL,
"val2b": 269488144L, # c_uint
"val1b": 3735928559L, # c_uint
}# --------------- 0x72c0b0 
{ # <struct_test3 at 0x72c0b0>
"val1": 3735928559L, # c_uint
"val2": 269488144L, # c_uint
"me": 3735928559, # c_uint 0xdeadbeefL,
"val2b": 269488144L, # c_uint
"val1b": 3735928559L, # c_uint
}]

In [13]:
for x in results:
    print "@0x%x val1: 0x%x , val1b: 0x%x "%(x[1], x[0].val1, x[0].val1b)


@0x72c030 val1: 0xdeadbeef , val1b: 0xdeadbeef 
@0x72c070 val1: 0xdeadbeef , val1b: 0xdeadbeef 
@0x72c0b0 val1: 0xdeadbeef , val1b: 0xdeadbeef 

The constraints did reduce the number of results, and haystack only returns allocated chunks of memory that match these constraints.


In [ ]: