Skip to content

Setting up Presto for Investigations

Overview

Presto is an open-source application that allows for querying data stored in data storage platforms such as HDFS and S3 using SQL. You can use Presto to query Sophos Linux Sensor (SLS) investigations data directly from HDFS.

Requirements

Hive Configuration

The first step is to configure the Presto cluster to connect to a Hive metadata store. You can do this using the hive connector: https://prestodb.io/docs/current/connector/hive.html. Additionally, the catalog config must have hive.recursive-directories=true set. This allows for data written in hive partitioned folders by SLS to be scanned by Presto.

Create Tables

The second step is to create a database. This can be done in the Presto CLI with:

CREATE SCHEMA hive.presto;

And then set the default database:

USE hive.presto;

With the database created, the next step is to add the tables. This can be done by entering the following DDL statements in Presto CLI:

Note

hadoop-nn is the default namenode and should be replace with the namenode of the HDFS cluster that contains the files to be queried.

ALERT table

CREATE TABLE hive.presto.alerts ( 
  unix_nano_timestamp bigint, 
  alert_id varchar, 
  sensor_id varchar, 
  priority varchar, 
  process_uuid varchar, 
  incident_id varchar, 
  policy_type varchar, 
  strategy_name varchar, 
  audit_group_id varchar, 
  messages array(varchar) 
) 
WITH ( 
      external_location = 'hdfs://hadoop-nn:9000/c8/alerts', 
  format = 'PARQUET' 
)

CONNECTIONS table

CREATE TABLE hive.presto.connections ( 
  unix_nano_timestamp bigint, 
  process_uuid varchar, 
  process_pid integer, 
  monotime_nanos bigint, 
  dst_addr varchar, 
  dst_port integer, 
  incident_id varchar, 
  success boolean, 
  audit_group_id varchar 
) 
WITH ( 
      external_location = 'hdfs://hadoop-nn:9000/c8/connections', 
  format = 'PARQUET' 
)

CONTAINER_EVENTS table

CREATE TABLE hive.presto.container_events ( 
  container_id varchar, 
  unix_nano_timestamp bigint, 
  incident_id varchar, 
  sensor_id varchar, 
  process_uuid varchar, 
  event_type integer, 
  container_name varchar, 
  image_id varchar, 
  image_name varchar, 
  pod_name varchar, 
  namespace varchar, 
  event_type_name varchar, 
  audit_group_id varchar 
) 
WITH ( 
      external_location = 'hdfs://hadoop-nn:9000/c8/container_events', 
  format = 'PARQUET' 
)

FILE_EVENTS table

CREATE TABLE hive.presto.file_events ( 
  unix_nano_timestamp bigint, 
  sensor_id varchar, 
  process_uuid varchar, 
  pid integer, 
  container_id varchar, 
  path varchar, 
  source_path varchar, 
  event_type integer, 
  incident_id varchar, 
  event_type_name varchar, 
  audit_group_id varchar 
) 
WITH ( 
      external_location = 'hdfs://hadoop-nn:9000/c8/file_events', 
  format = 'PARQUET' 
)

LOST_RECORDS table

CREATE TABLE hive.presto.lost_records ( 
  unix_nano_timestamp bigint, 
  event_type integer, 
  action_type integer, 
  reason varchar, 
  lost_count bigint, 
  event_type_name varchar, 
  action_type_name varchar, 
  sensor_id varchar 
) 
WITH ( 
      external_location = 'hdfs://hadoop-nn:9000/c8/lost_records', 
  format = 'PARQUET' 
)

PROCESS_EVENTS table

CREATE TABLE hive.presto.process_events ( 
  unix_nano_timestamp bigint, 
  event_type integer, 
  process_uuid varchar, 
  sensor_id varchar, 
  pid integer, 
  container_id varchar, 
  parent_process_uuid varchar, 
  parent_pid integer, 
  gid integer, 
  group varchar, 
  uid integer, 
  username varchar, 
  euid integer, 
  effective_username varchar, 
  fsuid integer, 
  fs_username varchar, 
  fsgid integer, 
  fs_group varchar, 
  login_uid integer, 
  login_username varchar, 
  login_gid integer, 
  login_group varchar, 
  path varchar, 
  arguments array(varchar), 
  incident_id varchar, 
  child_pid integer, 
  child_process_uuid varchar, 
  egid integer, 
  effective_group varchar, 
  tid integer, 
  return_value integer, 
  event_type_name varchar, 
  audit_group_id varchar 
) 
WITH ( 
      external_location = 'hdfs://hadoop-nn:9000/c8/process_events', 
  format = 'PARQUET' 
)

SENSORS table

CREATE TABLE hive.presto.sensors ( 
  unix_nano_timestamp bigint, 
  sensor_id varchar, 
  sensor_pid integer, 
  hostname varchar 
) 
WITH ( 
      external_location = 'hdfs://hadoop-nn:9000/c8/sensors', 
  format = 'PARQUET' 
)

SENSOR_METADATA table

CREATE TABLE hive.presto.sensor_metadata ( 
  sensor_id varchar, 
  key varchar, 
  value varchar 
) 
WITH ( 
      external_location = 'hdfs://hadoop-nn:9000/c8/sensor_metadata', 
  format = 'PARQUET' 
)

SHELL_COMMANDS table

CREATE TABLE hive.presto.shell_commands ( 
  unix_nano_timestamp bigint, 
  program_filename varchar, 
  program_arguments array(varchar), 
  process_uuid varchar, 
  process_pid integer, 
  shell_process_uuid varchar, 
  shell_process_pid integer, 
  exec_event_id varchar, 
  monotime_nanos bigint, 
  container_id varchar, 
  sensor_id varchar, 
  uid integer, 
  username varchar, 
  gid integer, 
  group varchar, 
  euid integer, 
  effective_username varchar, 
  egid integer, 
  effective_group varchar, 
  suid integer, 
  saved_username varchar, 
  sgid integer, 
  saved_group varchar, 
  fsuid integer, 
  file_system_username varchar, 
  fsgid integer, 
  file_system_group varchar, 
  incident_id varchar, 
  audit_group_id varchar 
) 
WITH ( 
      external_location = 'hdfs://hadoop-nn:9000/c8/shell_commands', 
  format = 'PARQUET' 
)

TTY_DATA table

CREATE TABLE hive.presto.tty_data ( 
  unix_nano_timestamp bigint, 
  data varchar, 
  is_input boolean, 
  monotime_nanos bigint, 
  shell_process_uuid varchar, 
  incident_id varchar, 
  audit_group_id varchar 
) 
WITH ( 
      external_location = 'hdfs://hadoop-nn:9000/c8/tty_data', 
  format = 'PARQUET' 
)