In [2]:
import os, sys, re
import asm_utils
import pymongo as pm
import multiprocessing
from joblib import delayed, Parallel
import src.utils.utils as utils
In [3]:
samples = asm_utils.get_collection()
Aaron and Andres,
The four common call convention keywords that are used in the IDA .asm files are:
__stdcall
__cdecl
__fastcall
__thiscall
the general format of the function comments looks like:
; <TYPE> <CONVENTION> <NAME>(<ARG0>, <ARG1>, ...)
as an example:
; void __stdcall memcpy(char *dst, char *src, int n)
Another possible thing that might be helpful is the DLL files that are imported. These are listed on lines that look like the following:
;
; Imports from <NAME>.dll
;
as an example:
;
; Imports from KERNEL32.dll
;
these would give a list of dll's that are used by the samples. It might be as simple as using any line that contains ".dll" or ".DLL".
In [3]:
calls = {'__stdcall', '__cdecl', '__fastcall', '__thiscall'}
In [6]:
test_comments = samples.find_one({'class' : '5'})['ida_comments']
In [7]:
dlls = [line for line in test_comments if '.dll' in line.lower()]
In [8]:
dlls = [line.split()[-1] for line in dlls]
dlls
Out[8]:
['"TAPI32.DLL"',
'ADVAPI32.dll',
'KERNEL32.dll',
'USER32.dll',
'ole32.dll',
'ADVAPI32.dll',
'KERNEL32.dll',
'USER32.dll',
'ole32.dll']
In [4]:
def get_dlls(comments):
""" Returns a lowercase set of DLLs accessed by the file does not preserve order.
Remove set command to preserve redundency"""
dlls = [line for line in comments if '.dll' in line.lower()]
return list(set(line.split()[-1].lower() for line in dlls))
In [5]:
get_dlls(test_comments)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-5-2d0c96ed5186> in <module>()
----> 1 get_dlls(test_comments)
NameError: name 'test_comments' is not defined
In [11]:
fcalls = [line for line in test_comments for word in line.split() if word in calls]
In [5]:
def get_calls(comments):
call_dict = {}
calls = {'__stdcall', '__cdecl', '__fastcall', '__thiscall'}
fcalls = [line for line in comments for word in line.split() if word in calls]
call_dict['calls'] = fcalls
call_dict['total_calls'] = len(fcalls)
call_dict['stdcall'] = [line for line in fcalls if '__stdcall' in line.split()]
call_dict['cdecl'] = [line for line in fcalls if '__cdecl' in line.split()]
call_dict['fastcall'] = [line for line in fcalls if '__fastcall' in line.split()]
call_dict['thiscall'] = [line for line in fcalls if '__thiscall' in line.split()]
call_dict['stdcall_count'] = len(call_dict['stdcall'])
call_dict['cdecl_count'] = len(call_dict['cdecl'])
call_dict['fastcall_count'] = len(call_dict['fastcall'])
call_dict['thiscall_count'] = len(call_dict['thiscall'])
return call_dict
In [13]:
get_calls(test_comments)
Out[13]:
{'calls': ['int __cdecl sub_401000(SIZE_T dwBytes)',
'int __cdecl sub_401018(LPVOID lpMem, size_t)',
'int __stdcall sub_401042(HWND hWnd, int, int, int)',
'int __cdecl sub_401941(void *)',
'int __stdcall WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine, int nShowCmd)',
'int __cdecl sub_402000(void *,void *)',
'int __stdcall sub_40201C(LPCSTR lpProcName, int)',
'BOOL __stdcall GetTokenInformation(HANDLE TokenHandle, TOKEN_INFORMATION_CLASS TokenInformationClass,LPVOID TokenInformation, DWORD TokenInformationLength, PDWORD ReturnLength)',
'BOOL __stdcall InitializeSecurityDescriptor(PSECURITY_DESCRIPTOR pSecurityDescriptor,DWORD dwRevision)',
'LSTATUS __stdcall RegCloseKey(HKEY hKey)',
'LSTATUS __stdcall RegCreateKeyExA(HKEY hKey, LPCSTR lpSubKey,DWORD Reserved,LPSTR lpClass, DWORD dwOptions,REGSAM samDesired, const LPSECURITY_ATTRIBUTES lpSecurityAttributes, PHKEY phkResult, LPDWORD lpdwDisposition)',
'LSTATUS __stdcall RegEnumValueA(HKEY hKey, DWORD dwIndex, LPSTR lpValueName, LPDWORD lpcchValueName, LPDWORD lpReserved, LPDWORD lpType, LPBYTE lpData, LPDWORD lpcbData)',
'LSTATUS __stdcall RegQueryValueExA(HKEY hKey,LPCSTR lpValueName, LPDWORD lpReserved,LPDWORDlpType,LPBYTE lpData, LPDWORD lpcbData)',
'LSTATUS __stdcall RegOpenKeyA(HKEY hKey, LPCSTR lpSubKey, PHKEY phkResult)',
'BOOL __stdcall AllocateAndInitializeSid(PSID_IDENTIFIER_AUTHORITY pIdentifierAuthority, BYTE nSubAuthorityCount, DWORD nSubAuthority0, DWORD nSubAuthority1, DWORD nSubAuthority2, DWORD nSubAuthority3, DWORD nSubAuthority4, DWORD nSubAuthority5, DWORD nSubAuthority6, DWORD nSubAuthority7, PSID*pSid)',
'BOOL __stdcall GetHandleInformation(HANDLE hObject, LPDWORD lpdwFlags)',
'BOOL __stdcall GetFileTime(HANDLE hFile, LPFILETIME lpCreationTime, LPFILETIME lpLastAccessTime, LPFILETIME lpLastWriteTime)',
'BOOL __stdcall GetComputerNameA(LPSTRlpBuffer, LPDWORD nSize)',
'UINT __stdcall GetACP()',
'BOOL __stdcall FreeLibrary(HMODULE hLibModule)',
'HMODULE __stdcall LoadLibraryA(LPCSTRlpLibFileName)',
'BOOL __stdcall SetEnvironmentVariableA(LPCSTRlpName,LPCSTR lpValue)',
'BOOL __stdcall FreeEnvironmentStringsA(LPCH)',
'LPCH __stdcall GetEnvironmentStrings()',
'BOOL __stdcall SetTimeZoneInformation(const TIME_ZONE_INFORMATION *lpTimeZoneInformation)',
'void __stdcall InitializeCriticalSection(LPCRITICAL_SECTION lpCriticalSection)',
'void __stdcall DeleteCriticalSection(LPCRITICAL_SECTION lpCriticalSection)',
'void __stdcall LeaveCriticalSection(LPCRITICAL_SECTION lpCriticalSection)',
'void __stdcall EnterCriticalSection(LPCRITICAL_SECTION lpCriticalSection)',
'LONG __stdcall InterlockedCompareExchange(volatile LONG *Destination,LONG Exchange, LONG Comperand)',
'LONG __stdcall InterlockedIncrement(volatile LONG *lpAddend)',
'BOOL __stdcall GlobalUnlock(HGLOBAL hMem)',
'LPVOID __stdcall GlobalLock(HGLOBAL hMem)',
'HGLOBAL __stdcall GlobalReAlloc(HGLOBAL hMem,SIZE_T dwBytes,UINT uFlags)',
'void __stdcall ExitThread(DWORD dwExitCode)',
'void __stdcall ExitProcess(UINT uExitCode)',
'HANDLE __stdcall CreateThread(LPSECURITY_ATTRIBUTES lpThreadAttributes, SIZE_T dwStackSize, LPTHREAD_START_ROUTINE lpStartAddress, LPVOID lpParameter, DWORD dwCreationFlags,LPDWORDlpThreadId)',
'ATOM __stdcall GlobalFindAtomA(LPCSTRlpString)',
'ATOM __stdcall GlobalDeleteAtom(ATOM nAtom)',
'ATOM __stdcall GlobalAddAtomA(LPCSTR lpString)',
'ATOM __stdcall DeleteAtom(ATOM nAtom)',
'ATOM __stdcall AddAtomA(LPCSTR lpString)',
'UINT __stdcall GetWindowsDirectoryA(LPSTR lpBuffer, UINT uSize)',
'BOOL __stdcall DeleteFileA(LPCSTR lpFileName)',
'BOOL __stdcall FlushFileBuffers(HANDLE hFile)',
'BOOL __stdcall WriteFile(HANDLE hFile, LPCVOID lpBuffer, DWORD nNumberOfBytesToWrite,LPDWORDlpNumberOfBytesWritten,LPOVERLAPPED lpOverlapped)',
'BOOL __stdcall ReadFile(HANDLE hFile,LPVOID lpBuffer, DWORD nNumberOfBytesToRead, LPDWORD lpNumberOfBytesRead, LPOVERLAPPED lpOverlapped)',
'HANDLE __stdcall CreateFileA(LPCSTR lpFileName, DWORDdwDesiredAccess, DWORD dwShareMode, LPSECURITY_ATTRIBUTES lpSecurityAttributes,DWORD dwCreationDisposition, DWORD dwFlagsAndAttributes, HANDLEhTemplateFile)',
'BOOL __stdcall CopyFileA(LPCSTR lpExistingFileName, LPCSTR lpNewFileName, BOOL bFailIfExists)',
'BOOL __stdcall SetThreadPriority(HANDLE hThread, int nPriority)',
'HANDLE __stdcall CreateEventA(LPSECURITY_ATTRIBUTES lpEventAttributes, BOOL bManualReset, BOOL bInitialState,LPCSTR lpName)',
'BOOL __stdcall ResetEvent(HANDLE hEvent)',
'HANDLE __stdcall OpenEventA(DWORD dwDesiredAccess, BOOL bInheritHandle, LPCSTR lpName)',
'BOOL __stdcall SetEvent(HANDLE hEvent)',
'BOOL __stdcall MoveFileA(LPCSTR lpExistingFileName, LPCSTR lpNewFileName)',
'LPVOID __stdcall HeapReAlloc(HANDLE hHeap, DWORD dwFlags, LPVOID lpMem, SIZE_T dwBytes)',
'BOOL __stdcall ReleaseMutex(HANDLE hMutex)',
'HANDLE __stdcall CreateMutexA(LPSECURITY_ATTRIBUTES lpMutexAttributes, BOOL bInitialOwner, LPCSTR lpName)',
'HANDLE __stdcall OpenMutexA(DWORD dwDesiredAccess, BOOL bInheritHandle, LPCSTR lpName)',
'BOOL __stdcall TerminateThread(HANDLEhThread, DWORD dwExitCode)',
'LPVOID __stdcall TlsGetValue(DWORD dwTlsIndex)',
'BOOL __stdcall TlsSetValue(DWORD dwTlsIndex, LPVOID lpTlsValue)',
'BOOL __stdcall TlsFree(DWORD dwTlsIndex)',
'BOOL __stdcall GetSystemTimeAdjustment(PDWORDlpTimeAdjustment, PDWORD lpTimeIncrement, PBOOLlpTimeAdjustmentDisabled)',
'BOOL __stdcall GetStringTypeA(LCID Locale, DWORD dwInfoType, LPCSTR lpSrcStr,int cchSrc, LPWORD lpCharType)',
'LPVOID __stdcall VirtualAlloc(LPVOID lpAddress, SIZE_T dwSize, DWORD flAllocationType, DWORD flProtect)',
'UINT __stdcall GetOEMCP()',
'BOOL __stdcall GetCPInfo(UINTCodePage, LPCPINFO lpCPInfo)',
'void __stdcall RtlUnwind(PVOID TargetFrame, PVOID TargetIp, PEXCEPTION_RECORDExceptionRecord, PVOID ReturnValue)',
'BOOL __stdcall VirtualFree(LPVOID lpAddress, SIZE_T dwSize, DWORD dwFreeType)',
'HANDLE __stdcall HeapCreate(DWORD flOptions, SIZE_T dwInitialSize, SIZE_T dwMaximumSize)',
'BOOL __stdcall HeapDestroy(HANDLE hHeap)',
'HANDLE __stdcall GetStdHandle(DWORD nStdHandle)',
'UINT __stdcall SetHandleCount(UINT uNumber)',
'BOOL __stdcall FreeEnvironmentStringsW(LPWCH)',
'LONG __stdcall UnhandledExceptionFilter(struct _EXCEPTION_POINTERS *ExceptionInfo)',
'void __stdcall GetLocalTime(LPSYSTEMTIME lpSystemTime)',
'void __stdcall GetStartupInfoA(LPSTARTUPINFOAlpStartupInfo)',
'void __stdcall GetSystemInfo(LPSYSTEM_INFO lpSystemInfo)',
'BOOL __stdcall GetVersionExA(LPOSVERSIONINFOAlpVersionInformation)',
'BOOL __stdcall IsBadCodePtr(FARPROC lpfn)',
'void __stdcall OutputDebugStringA(LPCSTR lpOutputString)',
'BOOL __stdcall HeapFree(HANDLE hHeap,DWORD dwFlags, LPVOID lpMem)',
'HANDLE __stdcall GetProcessHeap()',
'BOOL __stdcall CloseHandle(HANDLE hObject)',
'LPVOID __stdcall HeapAlloc(HANDLE hHeap, DWORD dwFlags, SIZE_T dwBytes)',
'HANDLE __stdcall GetCurrentProcess()',
'BOOL __stdcall TerminateProcess(HANDLE hProcess, UINTuExitCode)',
'HMODULE __stdcall GetModuleHandleA(LPCSTR lpModuleName)',
'FARPROC __stdcall GetProcAddress(HMODULE hModule, LPCSTR lpProcName)',
'BOOL __stdcall GetStringTypeW(DWORD dwInfoType, LPCWSTR lpSrcStr, intcchSrc,LPWORD lpCharType)',
'BOOL __stdcall PeekMessageA(LPMSG lpMsg, HWNDhWnd, UINT wMsgFilterMin, UINT wMsgFilterMax, UINT wRemoveMsg)',
'LRESULT __stdcall DispatchMessageA(const MSG *lpMsg)',
'BOOL __stdcall TranslateMessage(constMSG *lpMsg)',
'BOOL __stdcall GetMessageA(LPMSG lpMsg, HWND hWnd, UINT wMsgFilterMin, UINT wMsgFilterMax)',
'BOOL __stdcall UpdateWindow(HWND hWnd)',
'BOOL __stdcall ShowWindow(HWND hWnd, int nCmdShow)',
'HWND __stdcall GetParent(HWNDhWnd)',
'BOOL __stdcall KillTimer(HWNDhWnd, UINT_PTR uIDEvent)',
'BOOL __stdcall PostMessageA(HWND hWnd, UINT Msg, WPARAM wParam, LPARAM lParam)',
'void __stdcall PostQuitMessage(int nExitCode)',
'UINT_PTR __stdcall SetTimer(HWND hWnd, UINT_PTR nIDEvent, UINT uElapse, TIMERPROC lpTimerFunc)',
'HCURSOR __stdcall LoadCursorA(HINSTANCE hInstance, LPCSTR lpCursorName)',
'ATOM __stdcall RegisterClassA(const WNDCLASSA*lpWndClass)',
'HWND __stdcall CreateWindowExA(DWORD dwExStyle, LPCSTR lpClassName, LPCSTR lpWindowName, DWORD dwStyle, int X, int Y,int nWidth, intnHeight, HWND hWndParent, HMENUhMenu, HINSTANCE hInstance, LPVOID lpParam)',
'void __stdcall CoUninitialize()',
'HRESULT __stdcall CoInitialize(LPVOIDpvReserved)',
'HRESULT __stdcall OleRun(LPUNKNOWN pUnknown)',
'HRESULT __stdcall OleSetClipboard(LPDATAOBJECT pDataObj)',
'HRESULT __stdcall OleSaveToStream(LPPERSISTSTREAM pPStm, LPSTREAM pStm)',
'HRESULT __stdcall CoGetMalloc(DWORD dwMemContext, LPMALLOC *ppMalloc)'],
'cdecl': ['int __cdecl sub_401000(SIZE_T dwBytes)',
'int __cdecl sub_401018(LPVOID lpMem, size_t)',
'int __cdecl sub_401941(void *)',
'int __cdecl sub_402000(void *,void *)'],
'cdecl_count': 4,
'fastcall': [],
'fastcall_count': 0,
'stdcall': ['int __stdcall sub_401042(HWND hWnd, int, int, int)',
'int __stdcall WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine, int nShowCmd)',
'int __stdcall sub_40201C(LPCSTR lpProcName, int)',
'BOOL __stdcall GetTokenInformation(HANDLE TokenHandle, TOKEN_INFORMATION_CLASS TokenInformationClass,LPVOID TokenInformation, DWORD TokenInformationLength, PDWORD ReturnLength)',
'BOOL __stdcall InitializeSecurityDescriptor(PSECURITY_DESCRIPTOR pSecurityDescriptor,DWORD dwRevision)',
'LSTATUS __stdcall RegCloseKey(HKEY hKey)',
'LSTATUS __stdcall RegCreateKeyExA(HKEY hKey, LPCSTR lpSubKey,DWORD Reserved,LPSTR lpClass, DWORD dwOptions,REGSAM samDesired, const LPSECURITY_ATTRIBUTES lpSecurityAttributes, PHKEY phkResult, LPDWORD lpdwDisposition)',
'LSTATUS __stdcall RegEnumValueA(HKEY hKey, DWORD dwIndex, LPSTR lpValueName, LPDWORD lpcchValueName, LPDWORD lpReserved, LPDWORD lpType, LPBYTE lpData, LPDWORD lpcbData)',
'LSTATUS __stdcall RegQueryValueExA(HKEY hKey,LPCSTR lpValueName, LPDWORD lpReserved,LPDWORDlpType,LPBYTE lpData, LPDWORD lpcbData)',
'LSTATUS __stdcall RegOpenKeyA(HKEY hKey, LPCSTR lpSubKey, PHKEY phkResult)',
'BOOL __stdcall AllocateAndInitializeSid(PSID_IDENTIFIER_AUTHORITY pIdentifierAuthority, BYTE nSubAuthorityCount, DWORD nSubAuthority0, DWORD nSubAuthority1, DWORD nSubAuthority2, DWORD nSubAuthority3, DWORD nSubAuthority4, DWORD nSubAuthority5, DWORD nSubAuthority6, DWORD nSubAuthority7, PSID*pSid)',
'BOOL __stdcall GetHandleInformation(HANDLE hObject, LPDWORD lpdwFlags)',
'BOOL __stdcall GetFileTime(HANDLE hFile, LPFILETIME lpCreationTime, LPFILETIME lpLastAccessTime, LPFILETIME lpLastWriteTime)',
'BOOL __stdcall GetComputerNameA(LPSTRlpBuffer, LPDWORD nSize)',
'UINT __stdcall GetACP()',
'BOOL __stdcall FreeLibrary(HMODULE hLibModule)',
'HMODULE __stdcall LoadLibraryA(LPCSTRlpLibFileName)',
'BOOL __stdcall SetEnvironmentVariableA(LPCSTRlpName,LPCSTR lpValue)',
'BOOL __stdcall FreeEnvironmentStringsA(LPCH)',
'LPCH __stdcall GetEnvironmentStrings()',
'BOOL __stdcall SetTimeZoneInformation(const TIME_ZONE_INFORMATION *lpTimeZoneInformation)',
'void __stdcall InitializeCriticalSection(LPCRITICAL_SECTION lpCriticalSection)',
'void __stdcall DeleteCriticalSection(LPCRITICAL_SECTION lpCriticalSection)',
'void __stdcall LeaveCriticalSection(LPCRITICAL_SECTION lpCriticalSection)',
'void __stdcall EnterCriticalSection(LPCRITICAL_SECTION lpCriticalSection)',
'LONG __stdcall InterlockedCompareExchange(volatile LONG *Destination,LONG Exchange, LONG Comperand)',
'LONG __stdcall InterlockedIncrement(volatile LONG *lpAddend)',
'BOOL __stdcall GlobalUnlock(HGLOBAL hMem)',
'LPVOID __stdcall GlobalLock(HGLOBAL hMem)',
'HGLOBAL __stdcall GlobalReAlloc(HGLOBAL hMem,SIZE_T dwBytes,UINT uFlags)',
'void __stdcall ExitThread(DWORD dwExitCode)',
'void __stdcall ExitProcess(UINT uExitCode)',
'HANDLE __stdcall CreateThread(LPSECURITY_ATTRIBUTES lpThreadAttributes, SIZE_T dwStackSize, LPTHREAD_START_ROUTINE lpStartAddress, LPVOID lpParameter, DWORD dwCreationFlags,LPDWORDlpThreadId)',
'ATOM __stdcall GlobalFindAtomA(LPCSTRlpString)',
'ATOM __stdcall GlobalDeleteAtom(ATOM nAtom)',
'ATOM __stdcall GlobalAddAtomA(LPCSTR lpString)',
'ATOM __stdcall DeleteAtom(ATOM nAtom)',
'ATOM __stdcall AddAtomA(LPCSTR lpString)',
'UINT __stdcall GetWindowsDirectoryA(LPSTR lpBuffer, UINT uSize)',
'BOOL __stdcall DeleteFileA(LPCSTR lpFileName)',
'BOOL __stdcall FlushFileBuffers(HANDLE hFile)',
'BOOL __stdcall WriteFile(HANDLE hFile, LPCVOID lpBuffer, DWORD nNumberOfBytesToWrite,LPDWORDlpNumberOfBytesWritten,LPOVERLAPPED lpOverlapped)',
'BOOL __stdcall ReadFile(HANDLE hFile,LPVOID lpBuffer, DWORD nNumberOfBytesToRead, LPDWORD lpNumberOfBytesRead, LPOVERLAPPED lpOverlapped)',
'HANDLE __stdcall CreateFileA(LPCSTR lpFileName, DWORDdwDesiredAccess, DWORD dwShareMode, LPSECURITY_ATTRIBUTES lpSecurityAttributes,DWORD dwCreationDisposition, DWORD dwFlagsAndAttributes, HANDLEhTemplateFile)',
'BOOL __stdcall CopyFileA(LPCSTR lpExistingFileName, LPCSTR lpNewFileName, BOOL bFailIfExists)',
'BOOL __stdcall SetThreadPriority(HANDLE hThread, int nPriority)',
'HANDLE __stdcall CreateEventA(LPSECURITY_ATTRIBUTES lpEventAttributes, BOOL bManualReset, BOOL bInitialState,LPCSTR lpName)',
'BOOL __stdcall ResetEvent(HANDLE hEvent)',
'HANDLE __stdcall OpenEventA(DWORD dwDesiredAccess, BOOL bInheritHandle, LPCSTR lpName)',
'BOOL __stdcall SetEvent(HANDLE hEvent)',
'BOOL __stdcall MoveFileA(LPCSTR lpExistingFileName, LPCSTR lpNewFileName)',
'LPVOID __stdcall HeapReAlloc(HANDLE hHeap, DWORD dwFlags, LPVOID lpMem, SIZE_T dwBytes)',
'BOOL __stdcall ReleaseMutex(HANDLE hMutex)',
'HANDLE __stdcall CreateMutexA(LPSECURITY_ATTRIBUTES lpMutexAttributes, BOOL bInitialOwner, LPCSTR lpName)',
'HANDLE __stdcall OpenMutexA(DWORD dwDesiredAccess, BOOL bInheritHandle, LPCSTR lpName)',
'BOOL __stdcall TerminateThread(HANDLEhThread, DWORD dwExitCode)',
'LPVOID __stdcall TlsGetValue(DWORD dwTlsIndex)',
'BOOL __stdcall TlsSetValue(DWORD dwTlsIndex, LPVOID lpTlsValue)',
'BOOL __stdcall TlsFree(DWORD dwTlsIndex)',
'BOOL __stdcall GetSystemTimeAdjustment(PDWORDlpTimeAdjustment, PDWORD lpTimeIncrement, PBOOLlpTimeAdjustmentDisabled)',
'BOOL __stdcall GetStringTypeA(LCID Locale, DWORD dwInfoType, LPCSTR lpSrcStr,int cchSrc, LPWORD lpCharType)',
'LPVOID __stdcall VirtualAlloc(LPVOID lpAddress, SIZE_T dwSize, DWORD flAllocationType, DWORD flProtect)',
'UINT __stdcall GetOEMCP()',
'BOOL __stdcall GetCPInfo(UINTCodePage, LPCPINFO lpCPInfo)',
'void __stdcall RtlUnwind(PVOID TargetFrame, PVOID TargetIp, PEXCEPTION_RECORDExceptionRecord, PVOID ReturnValue)',
'BOOL __stdcall VirtualFree(LPVOID lpAddress, SIZE_T dwSize, DWORD dwFreeType)',
'HANDLE __stdcall HeapCreate(DWORD flOptions, SIZE_T dwInitialSize, SIZE_T dwMaximumSize)',
'BOOL __stdcall HeapDestroy(HANDLE hHeap)',
'HANDLE __stdcall GetStdHandle(DWORD nStdHandle)',
'UINT __stdcall SetHandleCount(UINT uNumber)',
'BOOL __stdcall FreeEnvironmentStringsW(LPWCH)',
'LONG __stdcall UnhandledExceptionFilter(struct _EXCEPTION_POINTERS *ExceptionInfo)',
'void __stdcall GetLocalTime(LPSYSTEMTIME lpSystemTime)',
'void __stdcall GetStartupInfoA(LPSTARTUPINFOAlpStartupInfo)',
'void __stdcall GetSystemInfo(LPSYSTEM_INFO lpSystemInfo)',
'BOOL __stdcall GetVersionExA(LPOSVERSIONINFOAlpVersionInformation)',
'BOOL __stdcall IsBadCodePtr(FARPROC lpfn)',
'void __stdcall OutputDebugStringA(LPCSTR lpOutputString)',
'BOOL __stdcall HeapFree(HANDLE hHeap,DWORD dwFlags, LPVOID lpMem)',
'HANDLE __stdcall GetProcessHeap()',
'BOOL __stdcall CloseHandle(HANDLE hObject)',
'LPVOID __stdcall HeapAlloc(HANDLE hHeap, DWORD dwFlags, SIZE_T dwBytes)',
'HANDLE __stdcall GetCurrentProcess()',
'BOOL __stdcall TerminateProcess(HANDLE hProcess, UINTuExitCode)',
'HMODULE __stdcall GetModuleHandleA(LPCSTR lpModuleName)',
'FARPROC __stdcall GetProcAddress(HMODULE hModule, LPCSTR lpProcName)',
'BOOL __stdcall GetStringTypeW(DWORD dwInfoType, LPCWSTR lpSrcStr, intcchSrc,LPWORD lpCharType)',
'BOOL __stdcall PeekMessageA(LPMSG lpMsg, HWNDhWnd, UINT wMsgFilterMin, UINT wMsgFilterMax, UINT wRemoveMsg)',
'LRESULT __stdcall DispatchMessageA(const MSG *lpMsg)',
'BOOL __stdcall TranslateMessage(constMSG *lpMsg)',
'BOOL __stdcall GetMessageA(LPMSG lpMsg, HWND hWnd, UINT wMsgFilterMin, UINT wMsgFilterMax)',
'BOOL __stdcall UpdateWindow(HWND hWnd)',
'BOOL __stdcall ShowWindow(HWND hWnd, int nCmdShow)',
'HWND __stdcall GetParent(HWNDhWnd)',
'BOOL __stdcall KillTimer(HWNDhWnd, UINT_PTR uIDEvent)',
'BOOL __stdcall PostMessageA(HWND hWnd, UINT Msg, WPARAM wParam, LPARAM lParam)',
'void __stdcall PostQuitMessage(int nExitCode)',
'UINT_PTR __stdcall SetTimer(HWND hWnd, UINT_PTR nIDEvent, UINT uElapse, TIMERPROC lpTimerFunc)',
'HCURSOR __stdcall LoadCursorA(HINSTANCE hInstance, LPCSTR lpCursorName)',
'ATOM __stdcall RegisterClassA(const WNDCLASSA*lpWndClass)',
'HWND __stdcall CreateWindowExA(DWORD dwExStyle, LPCSTR lpClassName, LPCSTR lpWindowName, DWORD dwStyle, int X, int Y,int nWidth, intnHeight, HWND hWndParent, HMENUhMenu, HINSTANCE hInstance, LPVOID lpParam)',
'void __stdcall CoUninitialize()',
'HRESULT __stdcall CoInitialize(LPVOIDpvReserved)',
'HRESULT __stdcall OleRun(LPUNKNOWN pUnknown)',
'HRESULT __stdcall OleSetClipboard(LPDATAOBJECT pDataObj)',
'HRESULT __stdcall OleSaveToStream(LPPERSISTSTREAM pPStm, LPSTREAM pStm)',
'HRESULT __stdcall CoGetMalloc(DWORD dwMemContext, LPMALLOC *ppMalloc)'],
'stdcall_count': 107,
'thiscall': [],
'thiscall_count': 0,
'total_calls': 111}
In [60]:
In [6]:
def expert_comment_maker(document):
document['dlls'] = get_dlls(document['ida_comments'])
document['calls'] = get_calls(document['ida_comments'])
In [9]:
test_doc = samples.find_one({'class' : '5'})
In [10]:
test_doc.keys()
Out[10]:
dict_keys(['id', 'calls', 'ida_comments', '_id', 'asm_info', 'class', 'dlls'])
In [17]:
expert_comment_maker(test_doc)
In [18]:
test_doc.keys()
Out[18]:
dict_keys(['asm_info', 'class', 'dlls', 'calls', 'ida_comments', '_id', 'id'])
In [125]:
In [11]:
expert_needed = samples.find({'dlls':[]})
In [12]:
expert_needed.count()
Out[12]:
375
In [10]:
def _expert_comment_maker(doc):
print('extracting expert comments from %s' % doc['id'])
expert_comment_maker(doc)
samples.save(doc)
In [18]:
samples = asm_utils.get_collection()
expert_needed = samples.find({'dlls':[]})
In [19]:
num_cores = multiprocessing.cpu_count()
print('Running code on %d processors' % num_cores)
Parallel(n_jobs=num_cores)(
delayed(_expert_comment_maker)(doc) for doc in expert_needed)
Running code on 4 processors
extracting expert comments from 60vgAOVtBRdxTMisJw5X
extracting expert comments from FkfjQ8g7dPxXi9nL3IRM
extracting expert comments from a9oIzfw03ED4lTBCt52Y
extracting expert comments from CkzJnxomRNpMBfeK8TDg
extracting expert comments from jERVLnaTwhHFrZbvNfCyextracting expert comments from jzf91HDNIbFMPaKEmZvLextracting expert comments from 58kxhXouHzFd4g3rmInBextracting expert comments from KivjcOQFy2PmDhodWJxC
extracting expert comments from 6tfw0xSL2FNHOCJBdlaA
extracting expert comments from fRLS3aKkijp4GH0Ds6Pv
extracting expert comments from KfQ58FVTkB9sb1i4u7zH
extracting expert comments from LEnDGzVIjHaqmrPMJyR0
extracting expert comments from DALwrgpdQcolMWVBzb1t
extracting expert comments from 30htxi8FRcmfUkInewlS
extracting expert comments from IidxQvXrlBkWPZAfcqKT
extracting expert comments from d0iHC6ANYGon7myPFzBe
extracting expert comments from 6d0uJ9rYK1FcjRimvVNt
extracting expert comments from LH5pzdDSPOtgIaBC1jWo
extracting expert comments from 32r6nRhN1UCwtFTfy8ZG
extracting expert comments from da3XhOZzQEbKVtLgMYWv
extracting expert comments from KCxI1ZA3oiEqc8Xe4MkO
extracting expert comments from 6GRw1oqBOEjgbYI093UA
extracting expert comments from cMDuGY0R5UbhJNtkHnq9
extracting expert comments from fqtA64LyDpER8nvrTslF
extracting expert comments from eDyLCi1EumNSnI5dqa3G
extracting expert comments from EWjvFU5GXeSaHzxBh0f9
extracting expert comments from 3Bsqe5aIEWKZvOjXmox9
extracting expert comments from 3HxWhP8eUNovuLEiS0VB
extracting expert comments from 3r0swJ67FWm5HXDnjaIy
extracting expert comments from 7LCKOhHDaXRbcFwxdnz5
extracting expert comments from bRoM4EpjukSGXwAlZ6zn
extracting expert comments from f4BqH0J1xLkYKI3TEQWm
extracting expert comments from 6PG57gUmE2rLebCBTMno
extracting expert comments from 8odIrLKhPvcDWNCVasUl
extracting expert comments from i4f81CyIkZtEprWaOVRS
extracting expert comments from dClSn1heupsZWENJzmxQ
extracting expert comments from GgHz1ZSPklNmUJao9xhtextracting expert comments from ErlsfYCZaD0tuLjSo8GMextracting expert comments from 5dqijn0zcEPgONTLFRoIextracting expert comments from b28VuAYhaez0lDPtc6kT
extracting expert comments from bv0C5hAytloHIWaUG6r7extracting expert comments from 5JLHiDhkzyYPdTeuMqXbextracting expert comments from 4j5ZXQI3ghezluq1snEpextracting expert comments from ckaHuew6QN1dhb07q2V3
extracting expert comments from 3OX0rbks7LQmeDcKx8C4extracting expert comments from 1x2u5Ws7tzFRAgyqoJBVextracting expert comments from 7D92cJaF5QIdpA1OEhStextracting expert comments from 8EQSs7XKIkVNHL0Z41au
extracting expert comments from 8vJiQURcq15ZtmEdHOIpextracting expert comments from idDSEvTsVqZhI7MlKnGuextracting expert comments from CqRphLNQa7KiTx8sdYgZextracting expert comments from E9AvZIuDkLT36dK8bPj4
extracting expert comments from iWXubQ6nP23yTYH4lJxkextracting expert comments from ew25jRNd81oLryHW4ZVIextracting expert comments from hcnS2PgZufMNXtQ5GAz9extracting expert comments from A7QialhFt2kLjBGXw5qI
extracting expert comments from 5HbKUA1oyJDeLwhM26Fq
extracting expert comments from CQ9ATzbP3j2On810GX4dextracting expert comments from BSJOwazTVHixmLDZEuUnextracting expert comments from egnhwcvsVjWONCU8t2Ju
extracting expert comments from 0JOb8TyN6VBGCrjAkzfP
extracting expert comments from gYd81epFW6QfuBxAvILn
extracting expert comments from bfpQVKGOUXwl4NaZAvhxextracting expert comments from F58JACMhQPSYrdxqBW2p
extracting expert comments from cd2W8FxsrJDnOHZGEiet
extracting expert comments from IeSLbp1rGWqQAvPX3T2j
extracting expert comments from hS1lyzcapZusPmKUv34W
extracting expert comments from DZFtMKSlYEdX9a3L8jf4
extracting expert comments from aneBJbW8khZgVxqGIKEU
extracting expert comments from AlNimKRVGWnbq7X64jfs
extracting expert comments from C1owXSDmBaq6hHzyGQJ3
extracting expert comments from 73cMCHeUwStZjy2VoQRmextracting expert comments from ComqpXwFH8zgJdIrEMyaextracting expert comments from dYAObu2FDVoXmvTt5UjGextracting expert comments from IGE0k5g4oyYmOKiAZ1ut
extracting expert comments from bG4TtEiph3JdyZCNsoz9
extracting expert comments from BV5FwZi6DPO4UcgoG9h1
extracting expert comments from GTXj5Lg2ZF4tJAsPBCYcextracting expert comments from k2W1nfmMKqwIPxsl6ireextracting expert comments from Ft0dsjHpwqcWoXK6ST7nextracting expert comments from 7Ebd8M0gDHBRIjFvr6SQ
extracting expert comments from b6ZsxrcU9oTlziSwDGVIextracting expert comments from 3ekS6OIvjTVhQsgrqiHoextracting expert comments from 5JfdLYbqX8oFmD1WTg97extracting expert comments from k2uRGpoUItiWrTga9ysQ
extracting expert comments from eGMTAm0csZLI9QbJpCUYextracting expert comments from 8wFGat9S6sIJCNmd20zqextracting expert comments from 8G30bR6M9dmVXziN1Fxsextracting expert comments from 7KHscjvztoka0QpqYFxb
extracting expert comments from KOR9MFiqcBYTZ3UksX4lextracting expert comments from 9UvoPgMf0tnp6KA7NiCu
extracting expert comments from a63qScpsxBFJtAXjd1nV
extracting expert comments from AgSXb01HILY3mVwfnesd
extracting expert comments from c0h5bDU2PjwARy1W7uSi
extracting expert comments from COz1qc0Ds7RHkdbQBwvU
extracting expert comments from bsdFLWce8w4kI72U6hS0
extracting expert comments from 6VNXsCD0lueo1nw3KO5Mextracting expert comments from eGwk8W6m4NIzsAaHvfx3
extracting expert comments from g937V6FUYlT0PybHMEin
extracting expert comments from drBa6Xi5wAsectZYNb2n
extracting expert comments from GLrQbRUluV0qzahkDEms
extracting expert comments from HpMdj30s7EDPkiFLWvYyextracting expert comments from Fw2DKBfl05J4xkoArtU3
extracting expert comments from banLTf3rYzPjOMK1osqd
extracting expert comments from i1OyRUG9mCvIZYD6olNT
extracting expert comments from ireBgWXwZGyvjU9aR7P6extracting expert comments from Bv7fxdPDSZKmAXGp8baQ
extracting expert comments from jBXul1FAgEcKxO9ZYCDUextracting expert comments from IVk2Scu69wdmQ7tEO8rY
extracting expert comments from FNAVfQgh4a6k3lUY5ZcK
extracting expert comments from K9kNdLS83clsP0wDFObZextracting expert comments from hJw2Om4LxnIBFKo5feua
extracting expert comments from drTJEXDQkP82ueAgIitZextracting expert comments from l5zY6vNZbOKF07uSn2Am
extracting expert comments from iKQcpTlzFkgrb7RJyjDs
extracting expert comments from IwgVeC9qRbUkfE6Q3HTh
extracting expert comments from 51rb6a438sZKqOgzwYljextracting expert comments from 7Ounj5D9ZaJBSbK0iodY
extracting expert comments from 8DxQhtuOKPRZAjGy6Y3E
extracting expert comments from 5GvLiZoCkwMyFfBIXY6P
extracting expert comments from 9CVbzaWsLvcHjPqZu2Q3extracting expert comments from 9yEvV8KfDm3hcUZPqBe0
extracting expert comments from 9itfPnyplHZhTb8IMYq3
extracting expert comments from 0Q4ALVSRnlHUBjyOb1sw
extracting expert comments from 6tV2UrOqmFoGua380Rhd
extracting expert comments from 1eOaAY4fpV38LIdhxl95
extracting expert comments from 3mSnzZXGkwKY4a7yp2bJ
extracting expert comments from fIQ7X2ysMOH3qG6kKwibextracting expert comments from 9vSJ1dDnwbxl4Fm0By8hextracting expert comments from 6WbENDkcC750euPGqApQextracting expert comments from 1FacC02JPfxSdXeD7MEw
extracting expert comments from Aci3M7yVmTuk2SDgNrRwextracting expert comments from lIROYxmjTw8r2A3HiQayextracting expert comments from H2toBzikE7AxGYNnZMXqextracting expert comments from 7eJywSFZtf9D18RxdTUM
extracting expert comments from GJ1tcdBHRUOMILq3soiZextracting expert comments from dJjQbDGsra5i6XzkmWfFextracting expert comments from lLvracbzqACpwsUtnuHhextracting expert comments from lkqEXK4NrYSseRTt0Gb3
extracting expert comments from LOqA6FX02GWguYrI1Zbeextracting expert comments from J8pdGbHyWis9ToawOIMAextracting expert comments from gHPbaK69RwpGA7nFtuvU
extracting expert comments from dQ5kFDwP4ByqU3tnLTZf
extracting expert comments from JV6Qew1hoIUYF7uiCz4L
extracting expert comments from fqKj2wJk7rsb1yHFmoQd
extracting expert comments from 5UXfQC6q2ERzr8BM0InPextracting expert comments from D0FvmACsaLoJViOk3ybX
extracting expert comments from HA2qnVEF4t31ceLkiTbK
extracting expert comments from hStvL36JYZfqk1XWr2dN
extracting expert comments from j3thgSImoFBZAPsrpaqT
extracting expert comments from jwOH9vMbCimpZGrQdYPq
extracting expert comments from JN6fQoCvEeImVXjk5usc
extracting expert comments from H5QR3qYhCvUI6px1oaEV
extracting expert comments from ifUhxpyIT6wVsHcGj7JL
extracting expert comments from KNgWvxRr1p8qEfdlsLYM
extracting expert comments from kbL7DnYzATVpahrefqxo
extracting expert comments from iYRmKWHhed3jLEnZ1tTG
extracting expert comments from j7z5IJT2XyD6lGASxcei
extracting expert comments from HaTioeY3kbvJW2LXtOwF
extracting expert comments from G54U96frYb2CLIOSpMyX
extracting expert comments from Fbdqw108uxIcZ6rDMgmY
extracting expert comments from 2VhINMP4oKmsnjZkY38U
extracting expert comments from 1AXEyUDVt42T6ZFx8COo
extracting expert comments from ILTHy6snFQiapuxbXS7R
extracting expert comments from hd13cetC4NLr7TpAYxui
extracting expert comments from 6cYUST29aReytkJO4zXi
extracting expert comments from 0rgi8OfjvwKbA57IXVcE
extracting expert comments from 8aIkyv7jJRcWpYOD9Ub1
extracting expert comments from 2icwOdb5xCshnIvMXfy4
extracting expert comments from 6HdwDkgvEnNh9cqlyfR7
extracting expert comments from 5QvCJULkphcYGge78RNs
extracting expert comments from 0qPGt4cRVk9NoiJgubf2
extracting expert comments from 508Uk9z1gvCtucARFWG3
extracting expert comments from Bj1A0a6Ec4MIUQzfsTg7extracting expert comments from 7OZ8auLijd26EwXcIGHo
extracting expert comments from 6FnbGwkoRmj0ltYiPpZE
extracting expert comments from 7MOSHZDbnxzfhyAmqjYl
extracting expert comments from AmPVn2SGUxg8IT3BDda9extracting expert comments from 8NOJxRZ65rjy1H0pKWsA
extracting expert comments from EdT3lQiGHbzmrX2L1NgD
Out[19]:
[None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None]
extracting expert comments from bnYykKarSTdp3qszeJiw
extracting expert comments from ekRZWI15vJCVP3iBYjldextracting expert comments from CEfkRZXa14zAWG9Duqvcextracting expert comments from eHFQBbSs0P1uT6Ljr9x7extracting expert comments from ezpQ9ksPIVhAarCwSg70
extracting expert comments from FfmsbQcv6V3rGdDLBNXMextracting expert comments from ePCwhpaOtzJHT4QMBsXZextracting expert comments from Erlut8aMiL1F5YsjO4GSextracting expert comments from f25CLhEsZYBuOwvoTjz0
extracting expert comments from hHnpTaPYU4StmeC6yNxrextracting expert comments from FtBHiVSc1PAaxelsC7ygextracting expert comments from eoYx592qRyF10HBdaJ8Lextracting expert comments from fpiZ6no01V8gydTe4UFw
extracting expert comments from j9fqwK78kFxmBWclQh2Nextracting expert comments from iClgu3d9HaVLfXhUzkIGextracting expert comments from gRBocU81iGy75lemTdsaextracting expert comments from iJpgS9hMDuFrCTItyokl
extracting expert comments from kS8dLUGCRbajYvJZyDPeextracting expert comments from J9BpcFqysLwr0YHeNt2oextracting expert comments from j3tdbX6HDPxA7ZkBUpvGextracting expert comments from Ju4nWgOCHXBlyZmTo51e
extracting expert comments from LejcaXxAEgC31WhlswQ2extracting expert comments from kAQCMqUNsZDoribmvw84extracting expert comments from JzHoWdVa3CrB2qOR1NbLextracting expert comments from l06yuHvrqdzTo8CQeAO5
extracting expert comments from 6Ij39ZnpNmcdKOUe0BvY
extracting expert comments from KsaoU1ZWTfqSFYbOxPg8extracting expert comments from kCyuYhjLRZU7XF52NVzoextracting expert comments from kdzj2p0qHin4fMBCaPF5
extracting expert comments from E4eSC8pqYdrkWfBOtFv5
extracting expert comments from 2LR1cT43f809ZG6KUd5l
extracting expert comments from Ki1pYrmszFSjuWUvHIqkextracting expert comments from LfVY3tPrRZHGQ2SdJm5x
extracting expert comments from kjn4yXdo0iLCmZ9GugOV
extracting expert comments from ARIuhH95Kori4B8wmQLM
extracting expert comments from B4wMxFdt30yYp2ourNRI
extracting expert comments from Dri7s2veJT4hj0R3aHkf
extracting expert comments from HuqJS8CAp24F1aWeLtIoextracting expert comments from kRUx3TuoJSgp0sqDzNGXextracting expert comments from CJR6XzFHGUfeDMsh8xQk
extracting expert comments from FU1p8GJSVXAry3e2HMwn
extracting expert comments from JbzyqeI9ThRA4LWQN823
extracting expert comments from JeR146WPmlVnxKvQAESbextracting expert comments from Dzu1Kt9QEPrBUp3iRqAO
extracting expert comments from ldNfaCpceLnGUE0rPzqF
extracting expert comments from Li5YIMaubo4TWtEV7gxs
extracting expert comments from 0Iv6U2hbcP1xeBitW5Ooextracting expert comments from kJ1a9c6NoCv4DqA7Hh8R
extracting expert comments from ErYMOiwT8h3yqvfkm9Pu
extracting expert comments from aud9U7Xni4qS0cTrmjOR
extracting expert comments from CkqxXreDEdPLm6QHnMiOextracting expert comments from 8KNqArVDfusEobR4Y0lO
extracting expert comments from bzmIweixPWfjVQXhO3r4
extracting expert comments from 9CpcVP4NlA0n2hU7DzTY
extracting expert comments from DR3eyfHBIJgK08krA1CTextracting expert comments from ErbRWGIYgktQ1mf5Hw2u
extracting expert comments from CPELmM8GYegcxRIQH4VA
extracting expert comments from 5YMeDkHjclrCPd8OuymR
extracting expert comments from CRYTnotb5J4qHfviU2DS
extracting expert comments from G2wlN4BuxnLUHiXQeSZRextracting expert comments from 9LKBIVulpHtRfP2OaTAZ
extracting expert comments from gC2GsRX3ewpQyvjO0Hb4
extracting expert comments from gacV7j1fSJ2ObNlykwEq
extracting expert comments from J1rAVnKEwlsFqkG9xYNhextracting expert comments from A4VuQZbt2oGyOkUcvS7W
extracting expert comments from j9xpRuC3HiYyoDlQ6JFO
extracting expert comments from LOP6HaJKXpkic5dyuVnT
extracting expert comments from AiTGE0dPvU4JH92LYoXDextracting expert comments from D2TGLaizwXHf3YjNJ1pv
extracting expert comments from dj7GfVSTmo3Hkv46Wiah
extracting expert comments from imjGPLrpKkesMR764noI
extracting expert comments from 4aMbnHlA9es8vKmOFR2Z
extracting expert comments from jOZMUJF5waGhqLQbWsYz
extracting expert comments from DzCt290diLU3eKIExNpvextracting expert comments from KD2jHtg0IGAX46qF8PU5extracting expert comments from DSWecTduKPtZNQoVR8qhextracting expert comments from 6EnhedlGIKp14bXusfwo
extracting expert comments from 9MKSaiLY4AtGDZfvRQ1Eextracting expert comments from EKmgShQsf6a9vY0znNlUextracting expert comments from DQJfl0CT3gvt8pbMqHyXextracting expert comments from 50fz3EgBdYQKMjvinR9w
extracting expert comments from 4ZBJzEqnW52fFUw0PG3vextracting expert comments from cDRA2GdrqXYfoNaUMJg7extracting expert comments from e7JZ2kUbKHzhfpMRctqE
extracting expert comments from FD0QUxGhmurO9Y6bITAj
extracting expert comments from GBpWNc6frYvmq57wyxgX
extracting expert comments from BVSv2EQDiNfR4OxL60l5extracting expert comments from B5YUlVtdxFgkTM7mN9Shextracting expert comments from hdJnQNkT6b4yDeVwaR3F
extracting expert comments from fSn3lMtv0pJWIKLzNxeG
extracting expert comments from IY9HnrhqPomv3L6bAzeXextracting expert comments from gq9cv7lPXyzBDsY8SfVeextracting expert comments from cM2qiXmpHhuag3r1RUIeextracting expert comments from cquHZ28hReUoI4aJGKin
extracting expert comments from JAB5De4OMltSf6IKHo3Qextracting expert comments from j2gBF09pxTcfIPy4kwJHextracting expert comments from D6WLAbdEhBvgXrRMJkaoextracting expert comments from FLjUoM3ahgeXQuRmAS8O
extracting expert comments from Gbs4cQpR5JWdFhBIVKHl
extracting expert comments from jV9q0Ae8pCmL7iGRDv6oextracting expert comments from LivEmF2ytpsDexIVWuYRextracting expert comments from EpBVNQnoMzYaCbe96OZxextracting expert comments from gPRfarzs4W7Notj0liHM
extracting expert comments from GHyIqK3kbD7SvBftzaYo
extracting expert comments from eGc1liEILN0hzWaOsnSHextracting expert comments from LCrZhlaY3MnDA1GKwtvT
extracting expert comments from EcRZsJXubnGd8eSImAWa
extracting expert comments from ICeS9QhkFyus07p8PRWB
extracting expert comments from LoWgaidpb2IUM5ACcSGOextracting expert comments from GsAVzyD4hUnebMNvJFxc
extracting expert comments from LNAnljesrEDma5oUfVZI
extracting expert comments from g7sDFNXIqYmCMkWOpPrAextracting expert comments from lBuYeZUbtHQVsh7Tn2NJextracting expert comments from IUSuf2hBaRG8yrNJqDHz
extracting expert comments from Jpvuw1BibgPOt0SnKD9Qextracting expert comments from jEZkrOP7GayNmiXdget5extracting expert comments from j2wLH4fBoVF7rJK0ks5t
extracting expert comments from ifJCRz1vL0eu4ljIZ3p8
extracting expert comments from 15sGnFeEvMIgpQ8acbUu
extracting expert comments from lcqv9STBAX20houtQnR3
extracting expert comments from jrg4n6P8cIGoSBXyWAtpextracting expert comments from lS0IVqXeJrN6Dzi9Pap1
extracting expert comments from g7SQeJHR0IGrqBzfumlC
extracting expert comments from 65cjJpPCUQiLDRyXfWd4
In [28]:
col = asm_utils.get_collection(collection='test_samples')
In [29]:
Out[29]:
Collection(Database(MongoClient('afruizc-office.cs.unm.edu', 27017), 'malware'), 'test_samples')
In [34]:
nodll = col.find({'dlls': []})
In [ ]:
In [47]:
Out[47]:
{'_id': ObjectId('55152473127d277af4f58864'),
'asm_info': {'num_instr': 369,
'num_uniq_instr': 45,
'seq': ['in',
'in',
'in',
'in',
'in',
'pusha',
'call',
'pop',
'add',
'jmp',
'inc',
'push',
'nop',
'mov',
'jmp',
'jmp',
'jmp',
'jmp',
'jmp',
'jmp',
'mov',
'popa',
'add',
'jmp',
'push',
'jmp',
'push',
'clc',
'sal',
'aaa',
'movsb',
'or',
'push',
'mov',
'jmp',
'lds',
'dec',
'sub',
'daa',
'aam',
'sub',
'jmp',
'pop',
'jmp',
'add',
'push',
'push',
'push',
'jmp',
'push',
'pop',
'jmp',
'xor',
'xor',
'pop',
'push',
'push',
'pop',
'jmp',
'push',
'jmp',
'mov',
'add',
'pop',
'jmp',
'fimul',
'pop',
'push',
'jmp',
'xchg',
'cmp',
'mov',
'jmp',
'sub',
'jmp',
'sub',
'pop',
'pop',
'push',
'push',
'mov',
'jmp',
'add',
'jmp',
'add',
'jmp',
'test',
'cmp',
'dec',
'pop',
'push',
'jmp',
'push',
'pop',
'jmp',
'sub',
'jmp',
'pop',
'xor',
'push',
'push',
'lea',
'mov',
'not',
'test',
'neg',
'dec',
'cld',
'pop',
'pop',
'cmp',
'jz',
'push',
'push',
'pop',
'jmp',
'add',
'jmp',
'pop',
'push',
'mov',
'jmp',
'push',
'mov',
'jmp',
'dec',
'sub',
'pop',
'jmp',
'pop',
'mov',
'push',
'jmp',
'mov',
'jmp',
'sub',
'jmp',
'pop',
'add',
'jmp',
'mov',
'push',
'jmp',
'mov',
'jmp',
'sub',
'jmp',
'xchg',
'pop',
'push',
'jmp',
'push',
'mov',
'jmp',
'add',
'jmp',
'pop',
'jmp',
'add',
'add',
'push',
'push',
'jmp',
'mov',
'jmp',
'xor',
'pop',
'push',
'jmp',
'mov',
'jmp',
'sub',
'jmp',
'sti',
'sbb',
'xlat',
'les',
'pop',
'add',
'jmp',
'jmp',
'pop',
'push',
'mov',
'jmp',
'push',
'mov',
'and',
'inc',
'push',
'nop',
'mov',
'xor',
'jmp',
'xchg',
'push',
'push',
'xor',
'xor',
'inc',
'mov',
'neg',
'cld',
'pop',
'pop',
'pop',
'push',
'push',
'adc',
'mov',
'mov',
'mov',
'sub',
'mov',
'cld',
'pop',
'pop',
'xor',
'push',
'jmp',
'push',
'mov',
'jmp',
'fdiv',
'push',
'xchg',
'test',
'xor',
'add',
'jmp',
'pop',
'jmp',
'mov',
'add',
'push',
'push',
'pop',
'jmp',
'fstp',
'push',
'mov',
'jmp',
'wait',
'cmp',
'sub',
'pop',
'jmp',
'pop',
'xor',
'push',
'jmp',
'push',
'pop',
'jmp',
'sub',
'jmp',
'pop',
'add',
'jmp',
'push',
'rol',
'jmp',
'jmp',
'jmp',
'jmp',
'jmp',
'inc',
'jmp',
'jmp',
'jmp',
'jmp',
'jmp',
'mov',
'jmp',
'lea',
'nop',
'mov',
'jmp',
'dec',
'push',
'jmp',
'dec',
'xchg',
'push',
'mov',
'jmp',
'dec',
'add',
'add',
'jmp',
'mov',
'pop',
'jmp',
'push',
'inc',
'add',
'push',
'mov',
'jmp',
'push',
'mov',
'jmp',
'adc',
'pop',
'aas',
'or',
'push',
'clc',
'sal',
'aaa',
'sub',
'pop',
'jmp',
'movsb',
'or',
'push',
'das',
'cmp',
'pop',
'mov',
'add',
'jmp',
'jge',
'push',
'jmp',
'push',
'pop',
'jmp',
'sub',
'jmp',
'mov',
'pop',
'add',
'jmp',
'jmp',
'jmp',
'jmp',
'jmp',
'and',
'inc',
'pop',
'push',
'push',
'xchg',
'mov',
'cmp',
'mov',
'push',
'pop',
'neg',
'repne',
'cld',
'pop',
'pop',
'push',
'push',
'push',
'test',
'mov',
'mov',
'mov',
'test',
'mov',
'inc',
'xor',
'test',
'cld',
'pop',
'pop'],
'uniq_instr': ['test',
'neg',
'wait',
'sal',
'jz',
'nop',
'sbb',
'call',
'les',
'add',
'or',
'inc',
'in',
'push',
'dec',
'adc',
'fimul',
'fdiv',
'popa',
'repne',
'movsb',
'lea',
'sub',
'mov',
'jmp',
'xor',
'xchg',
'cmp',
'aam',
'cld',
'daa',
'fstp',
'clc',
'xlat',
'pop',
'sti',
'lds',
'das',
'pusha',
'not',
'aaa',
'and',
'aas',
'rol',
'jge']},
'id': 'PALqcwCgBHQDo7F9ZbeY',
'ida_comments': ['++',
'| This filehas been generated by The Interactive Disassembler (IDA) |',
'| Copyright(c) 2013 Hexrays.com> |',
'| License info: |',
'| Microsoft |',
'++',
'[00001000 BYTES: COLLAPSED SEGMENT HEADER. PRESS KEYPAD CTRL-"+" TO EXPAND]',
'Format: Portable executable for 80386(PE)',
'Imagebase: 10000000',
'Section 1. (virtualaddress00001000)',
'Virtual size : 00014000 (81920.)',
'Section size in file : 00006000 (24576.)',
'Offset to raw data for section: 00000400',
'Flags E0000020: Text Executable Readable Writable',
'Alignment : default',
'Segment type: Pure code',
'Segment permissions: Read/Write/Execute',
'org 10001000h',
'DATA XREF: HEADER:1000013C\x18o',
'HEADER:10000214\x18o',
'Section 2. (virtual address 00015000)',
'Virtual size: 0000E000 ( 57344.)',
'Section size in file: 0000D400 ( 54272.)',
'Offset to rawdata for section: 00006400',
'FlagsC0000040: Data ReadableWritable',
'Alignment: default',
'Segment type:Pure data',
'Segment permissions: Read/Write',
'org 10015000h',
'HEADER:1000023C\x18o',
'Section 3. (virtualaddress00023000)',
'Virtual size : 00001000 ( 4096.)',
'Section size in file : 00000400 ( 1024.)',
'Offset to raw data for section: 00013800',
'Flags C0000040: Data Readable Writable',
'Alignment : default',
'Segment type: Pure data',
'Segment permissions: Read/Write',
'org 10023000h',
'DATA XREF: HEADER:10000264\x18o',
'Section 4. (virtual address 00024000)',
'Virtual size: 00001000 ( 4096.)',
'Section size in file: 00000200 (512.)',
'Offset to rawdata for section: 00013C00',
'Flags40000040: Data Readable',
'Alignment: default',
'Imports from KERNEL32.dll',
'Segment type:Externs',
'_idata',
'DWORD__stdcall GetFileSize(HANDLE hFile, LPDWORD lpFileSizeHigh)',
'DATA XREF: HEADER:1000028C\x18o',
'BOOL __stdcall UnmapViewOfFile(LPCVOID lpBaseAddress)',
'BOOL __stdcall WriteFile(HANDLE hFile, LPCVOID lpBuffer, DWORD nNumberOfBytesToWrite,LPDWORDlpNumberOfBytesWritten,LPOVERLAPPED lpOverlapped)',
'LPSTR__stdcall lstrcatA(LPSTR lpString1, LPCSTR lpString2)',
'int __stdcalllstrlenA(LPCSTRlpString)',
'void __stdcall ExitProcess(UINT uExitCode)',
'DATA XREF: HEADER:10000190\x18o',
'Section 5. (virtual address 00025000)',
'Virtual size: 00001000 ( 4096.)',
'Section size in file: 00000800 ( 2048.)',
'Offset to rawdata for section: 00013E00',
'Flags60000020: Text Executable Readable',
'Alignment: default',
'Segment type:Pure code',
'Segment permissions: Read/Execute',
'org 10025000h',
'BOOL __stdcall DllEntryPoint(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved)',
'DATA XREF: HEADER:100002B4\x18o',
'DATA XREF: DllEntryPoint:loc_1002516C\x19r',
'DllEntryPoint:loc_10025229\x19r',
'CODE XREF: DllEntryPoint:loc_1002503C\x19j',
'CODE XREF: DllEntryPoint+A\x18j',
'CODE XREF: DllEntryPoint:loc_10025053\x19j',
'CODE XREF: DllEntryPoint:loc_10025023\x18j',
'CODE XREF: DllEntryPoint:loc_10025072\x19j',
'CODE XREF: DllEntryPoint:loc_10025044\x18j',
'CODE XREF: DllEntryPoint:loc_10025055\x18j',
'CODE XREF: DllEntryPoint+89\x18j',
'CODE XREF: DllEntryPoint+A0\x18j',
'CODE XREF: DllEntryPoint+B4\x18j',
'CODE XREF: DllEntryPoint+CB\x18j',
'CODE XREF: DllEntryPoint+D2\x18j',
'CODE XREF: DllEntryPoint+EA\x18j',
'CODE XREF: DllEntryPoint+F9\x18j',
'CODE XREF: DllEntryPoint+116\x18j',
'CODE XREF: DllEntryPoint+11E\x18j',
'CODE XREF: DllEntryPoint+129\x18j',
'CODE XREF: DllEntryPoint+140\x18j',
'CODE XREF: DllEntryPoint+150\x18j',
'CODE XREF: DllEntryPoint+156\x18j',
'CODE XREF: DllEntryPoint+167\x18j',
'CODE XREF: DllEntryPoint+16E\x18j',
'CODE XREF: DllEntryPoint+175\x18j',
'CODE XREF: DllEntryPoint+186\x18j',
'CODE XREF: DllEntryPoint+199\x18j',
'CODE XREF: DllEntryPoint+1A7\x18j',
'CODE XREF: DllEntryPoint+1D7\x18j',
'CODE XREF: DllEntryPoint+1E9\x18j',
'CODE XREF: DllEntryPoint+1FD\x18j',
'CODE XREF: DllEntryPoint+214\x18j',
'CODE XREF: DllEntryPoint+22C\x18j',
'CODE XREF: DllEntryPoint+23C\x18j',
'CODE XREF: DllEntryPoint+245\x18j',
'CODE XREF: DllEntryPoint+256\x18j',
'CODE XREF: DllEntryPoint+1CA\x18j',
'DllEntryPoint+268\x18j',
'CODE XREF: DllEntryPoint+278\x18j',
'CODE XREF: DllEntryPoint+287\x18j',
'CODE XREF: DllEntryPoint+298\x18j',
'CODE XREF: DllEntryPoint+2B3\x18j',
'CODE XREF: DllEntryPoint+2BE\x18j',
'CODE XREF: DllEntryPoint+2CB\x18j',
'CODE XREF: DllEntryPoint+2D3\x18j',
'CODE XREF: DllEntryPoint+2F1\x18j',
'CODE XREF: DllEntryPoint+2FF\x18j',
'CODE XREF: DllEntryPoint+314\x18j',
'CODE XREF: DllEntryPoint+322\x18j',
'CODE XREF: DllEntryPoint+32B\x18j',
'CODE XREF: DllEntryPoint+342\x18j',
'CODE XREF: DllEntryPoint+35D\x18j',
'CODE XREF: DllEntryPoint+374\x18j',
'CODE XREF: DllEntryPoint+348\x18j',
'CODE XREF: DllEntryPoint+3B5\x18j',
'CODE XREF: DllEntryPoint+3BB\x18j',
'CODE XREF: DllEntryPoint+3CC\x18j',
'CODE XREF: DllEntryPoint+3D7\x18j',
'CODE XREF: DllEntryPoint+3EB\x18j',
'CODE XREF: DllEntryPoint+3F9\x18j',
'CODE XREF: DllEntryPoint+404\x18j',
'CODE XREF: DllEntryPoint+520\x19j',
'CODE XREF: DllEntryPoint+40D\x18j',
'CODE XREF: DllEntryPoint+427\x18j',
'CODE XREF: DllEntryPoint+430\x18j',
'CODE XREF: DllEntryPoint+43F\x18j',
'CODE XREF: DllEntryPoint:loc_10025477\x19j',
'CODE XREF: DllEntryPoint+449\x18j',
'CODE XREF: DllEntryPoint:loc_10025487\x19j',
'CODE XREF: DllEntryPoint:loc_10025452\x18j',
'CODE XREF: DllEntryPoint:loc_1002547F\x18j',
'CODE XREF: DllEntryPoint:loc_100254D6\x19j',
'CODE XREF: DllEntryPoint+4AB\x18j',
'CODE XREF: DllEntryPoint:loc_100254E9\x19j',
'CODE XREF: DllEntryPoint:loc_100254B0\x18j',
'CODE XREF: DllEntryPoint:loc_1002550C\x19j',
'CODE XREF: DllEntryPoint:loc_100254DD\x18j',
'CODE XREF: DllEntryPoint:loc_100254F2\x18j',
'CODE XREF: DllEntryPoint+59C\x19j',
'CODE XREF: DllEntryPoint+52B\x18j',
'CODE XREF: DllEntryPoint+53C\x18j',
'CODE XREF: DllEntryPoint+551\x18j',
'CODE XREF: DllEntryPoint+567\x18j',
'CODE XREF: DllEntryPoint+582\x18j',
'CODE XREF: DllEntryPoint+595\x18j',
'CODE XREF: DllEntryPoint+5AB\x18j',
'CODE XREF: DllEntryPoint+5C3\x18j',
'CODE XREF: DllEntryPoint+5D0\x18j',
'CODE XREF: DllEntryPoint+5E4\x18j',
'CODE XREF: DllEntryPoint+5F8\x18j',
'CODE XREF: DllEntryPoint:loc_1002561C\x19j',
'CODE XREF: DllEntryPoint+605\x18j',
'CODE XREF: DllEntryPoint:loc_1002563C\x19j',
'CODE XREF: DllEntryPoint+5C8\x18j',
'DllEntryPoint:loc_1002560A\x18j',
'CODE XREF: DllEntryPoint:loc_10025623\x18j',
'sp-analysis failed']}
In [ ]:
In [ ]:
In [ ]:
In [35]:
nodll.count()
Out[35]:
0
In [ ]:
In [31]:
nodll
Out[31]:
0
In [ ]:
In [ ]:
In [11]:
def main():
samples = asm_utils.get_collection(collection='test_samples')
expert_needed = samples.find()
print(expert_needed.count())
print(samples)
num_cores = multiprocessing.cpu_count()
print('Running code on %d processors' % num_cores)
Parallel(n_jobs=num_cores)(
delayed(_expert_comment_maker)(doc) for doc in expert_needed)
In [ ]:
In [ ]:
Content source: afruizc/microsoft_malware_challenge
Similar notebooks: