Import Libraries


In [1]:
from pyspark.sql import SparkSession

Create SparkSession


In [2]:
spark = SparkSession.builder \
    .appName("HELK Reader") \
    .master("spark://helk-spark-master:7077") \
    .enableHiveSupport() \
    .getOrCreate()

Verify Spark Variable


In [3]:
spark


Out[3]:

SparkSession - hive

SparkContext

Spark UI

Version
v2.4.3
Master
spark://helk-spark-master:7077
AppName
HELK Reader

Initiate Elasticsearch Dataframe Reader


In [4]:
es_reader = (spark.read
    .format("org.elasticsearch.spark.sql")
    .option("inferSchema", "true")
    .option("es.read.field.as.array.include", "tags")
    .option("es.nodes","helk-elasticsearch:9200")
    .option("es.net.http.auth.user","elastic")
)

Load Data from Elasticsearch : Sysmon Index


In [5]:
sysmon_df = es_reader.load("logs-endpoint-winevent-sysmon-*/")

In [6]:
processcreate_df = sysmon_df.filter(sysmon_df.action == "processcreate")

In [10]:
processcreate_df = processcreate_df.select(
    "process_guid","process_parent_name","process_parent_command_line",
    "process_name","process_command_line","action","@timestamp"
)

Show Sysmon Spark DataFrame


In [11]:
processcreate_df.show(10)


+--------------------+-------------------+---------------------------+--------------------+--------------------+-------------+--------------------+
|        process_guid|process_parent_name|process_parent_command_line|        process_name|process_command_line|       action|          @timestamp|
+--------------------+-------------------+---------------------------+--------------------+--------------------+-------------+--------------------+
|aa6b4a20-7cde-5ce...|        svchost.exe|       c:\windows\system...|        wmiprvse.exe|c:\windows\system...|processcreate|2019-05-18 21:45:...|
|aa6b4a20-7cde-5ce...|       wmiprvse.exe|       c:\windows\system...|      powershell.exe|c:\windows\system...|processcreate|2019-05-18 21:45:...|
|aa6b4a20-7cdf-5ce...|     powershell.exe|       c:\windows\system...|         conhost.exe|\??\c:\windows\sy...|processcreate|2019-05-18 21:45:...|
|aa6b4a20-7ce7-5ce...|       winlogon.exe|               winlogon.exe|         logonui.exe|"logonui.exe" /fl...|processcreate|2019-05-18 21:45:...|
|aa6b4a20-7ce9-5ce...|        svchost.exe|       c:\windows\system...|       taskhostw.exe|taskhostw.exe key...|processcreate|2019-05-18 21:45:...|
|aa6b4a20-7cd9-5ce...|        svchost.exe|       c:\windows\system...|backgroundtaskhos...|"c:\windows\syste...|processcreate|2019-05-18 21:44:...|
|aa6b4a20-7cda-5ce...|        svchost.exe|       c:\windows\system...|   runtimebroker.exe|c:\windows\system...|processcreate|2019-05-18 21:44:...|
|aa6b4a20-7cea-5ce...|        svchost.exe|       c:\windows\system...|         dllhost.exe|c:\windows\system...|processcreate|2019-05-18 21:45:...|
|aa6b4a20-7cea-5ce...|        svchost.exe|       c:\windows\system...|       taskhostw.exe|       taskhostw.exe|processcreate|2019-05-18 21:45:...|
|aa6b4a20-7cec-5ce...|        svchost.exe|       c:\windows\system...|         dllhost.exe|c:\windows\system...|processcreate|2019-05-18 21:45:...|
+--------------------+-------------------+---------------------------+--------------------+--------------------+-------------+--------------------+
only showing top 10 rows


In [ ]:
$