mvt/mvt/common/module.py

254 lines
8.3 KiB
Python
Raw Normal View History

2021-07-16 06:05:01 +00:00
# Mobile Verification Toolkit (MVT)
2023-09-09 15:55:27 +00:00
# Copyright (c) 2021-2023 The MVT Authors.
# Use of this software is governed by the MVT License 1.1 that can be found at
# https://license.mvt.re/1.1/
2021-07-16 06:05:01 +00:00
import csv
2022-06-17 20:30:46 +00:00
import logging
2021-07-30 09:40:09 +00:00
import os
import re
2023-03-24 18:02:02 +00:00
from typing import Any, Dict, List, Optional, Union
2021-07-30 09:40:09 +00:00
2021-07-16 06:05:01 +00:00
import simplejson as json
2023-06-29 11:22:43 +00:00
from .utils import exec_or_profile
2021-07-30 09:40:09 +00:00
class DatabaseNotFoundError(Exception):
pass
2021-11-19 14:27:51 +00:00
class DatabaseCorruptedError(Exception):
pass
2021-11-19 14:27:51 +00:00
class InsufficientPrivileges(Exception):
pass
2021-11-19 14:27:51 +00:00
2022-08-12 14:20:16 +00:00
class MVTModule:
2021-10-12 16:06:58 +00:00
"""This class provides a base for all extraction modules."""
2021-07-16 06:05:01 +00:00
enabled = True
2023-03-24 18:02:02 +00:00
slug: Optional[str] = None
2021-07-16 06:05:01 +00:00
def __init__(
self,
2022-08-17 13:52:17 +00:00
file_path: Optional[str] = None,
target_path: Optional[str] = None,
results_path: Optional[str] = None,
module_options: Optional[Dict[str, Any]] = None,
log: logging.Logger = logging.getLogger(__name__),
results: Union[List[Dict[str, Any]], Dict[str, Any], None] = None,
) -> None:
2021-07-16 06:05:01 +00:00
"""Initialize module.
2021-09-10 18:09:37 +00:00
2021-09-10 13:18:13 +00:00
:param file_path: Path to the module's database file, if there is any
2021-09-10 18:09:37 +00:00
:type file_path: str
:param target_path: Path to the target folder (backup or filesystem
dump)
2021-09-10 18:09:37 +00:00
:type file_path: str
:param results_path: Folder where results will be stored
:type results_path: str
2021-07-16 06:05:01 +00:00
:param fast_mode: Flag to enable or disable slow modules
2021-09-10 18:09:37 +00:00
:type fast_mode: bool
2021-07-16 06:05:01 +00:00
:param log: Handle to logger
:param results: Provided list of results entries
2021-09-10 18:09:37 +00:00
:type results: list
2021-07-16 06:05:01 +00:00
"""
self.file_path = file_path
self.target_path = target_path
self.results_path = results_path
self.module_options = module_options if module_options else {}
2021-07-16 06:05:01 +00:00
self.log = log
self.indicators = None
self.results = results if results else []
2023-03-24 18:02:02 +00:00
self.detected: List[Dict[str, Any]] = []
self.timeline: List[Dict[str, str]] = []
self.timeline_detected: List[Dict[str, str]] = []
2021-07-16 06:05:01 +00:00
@classmethod
2023-03-24 18:02:02 +00:00
def from_json(cls, json_path: str, log: logging.Logger):
with open(json_path, "r", encoding="utf-8") as handle:
2021-07-16 06:05:01 +00:00
results = json.load(handle)
if log:
log.info('Loaded %d results from "%s"', len(results), json_path)
2021-07-16 06:05:01 +00:00
return cls(results=results, log=log)
2022-06-22 14:53:29 +00:00
def get_slug(self) -> str:
2021-10-12 16:06:58 +00:00
"""Use the module's class name to retrieve a slug"""
2021-07-16 06:05:01 +00:00
if self.slug:
return self.slug
sub = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", self.__class__.__name__)
return re.sub("([a-z0-9])([A-Z])", r"\1_\2", sub).lower()
2022-06-17 20:30:46 +00:00
def check_indicators(self) -> None:
2021-07-22 21:21:31 +00:00
"""Check the results of this module against a provided list of
2021-09-10 13:18:13 +00:00
indicators.
2021-10-12 16:06:58 +00:00
2021-09-10 13:18:13 +00:00
"""
2021-07-16 06:05:01 +00:00
raise NotImplementedError
2022-06-17 20:30:46 +00:00
def save_to_json(self) -> None:
2021-10-12 16:06:58 +00:00
"""Save the collected results to a json file."""
if not self.results_path:
2021-07-16 06:05:01 +00:00
return
name = self.get_slug()
if self.results:
results_file_name = f"{name}.json"
results_json_path = os.path.join(self.results_path, results_file_name)
2022-01-29 14:13:35 +00:00
with open(results_json_path, "w", encoding="utf-8") as handle:
2021-07-25 12:13:10 +00:00
try:
json.dump(self.results, handle, indent=4, default=str)
except Exception as exc:
self.log.error(
"Unable to store results of module %s to file %s: %s",
self.__class__.__name__,
results_file_name,
exc,
)
2021-07-16 06:05:01 +00:00
if self.detected:
detected_file_name = f"{name}_detected.json"
detected_json_path = os.path.join(self.results_path, detected_file_name)
2022-01-29 14:13:35 +00:00
with open(detected_json_path, "w", encoding="utf-8") as handle:
json.dump(self.detected, handle, indent=4, default=str)
2021-07-16 06:05:01 +00:00
2023-03-24 18:02:02 +00:00
def serialize(self, record: dict) -> Union[dict, list, None]:
2021-07-16 06:05:01 +00:00
raise NotImplementedError
2021-08-28 10:33:27 +00:00
@staticmethod
2022-06-17 20:30:46 +00:00
def _deduplicate_timeline(timeline: list) -> list:
2021-09-10 13:18:13 +00:00
"""Serialize entry as JSON to deduplicate repeated entries
:param timeline: List of entries from timeline to deduplicate
2021-10-12 16:06:58 +00:00
2021-09-10 13:18:13 +00:00
"""
2021-08-28 10:33:27 +00:00
timeline_set = set()
for record in timeline:
timeline_set.add(json.dumps(record, sort_keys=True))
return [json.loads(record) for record in timeline_set]
2022-06-17 20:30:46 +00:00
def to_timeline(self) -> None:
2021-09-10 13:18:13 +00:00
"""Convert results into a timeline."""
2021-07-16 06:05:01 +00:00
for result in self.results:
record = self.serialize(result)
2021-07-26 23:23:22 +00:00
if record:
2022-08-12 14:20:16 +00:00
if isinstance(record, list):
2021-07-26 23:23:22 +00:00
self.timeline.extend(record)
else:
self.timeline.append(record)
2021-07-16 06:05:01 +00:00
for detected in self.detected:
record = self.serialize(detected)
2021-07-26 23:23:22 +00:00
if record:
2022-08-12 14:20:16 +00:00
if isinstance(record, list):
2021-07-26 23:23:22 +00:00
self.timeline_detected.extend(record)
else:
self.timeline_detected.append(record)
2021-07-16 06:05:01 +00:00
# De-duplicate timeline entries.
2021-08-28 10:33:27 +00:00
self.timeline = self._deduplicate_timeline(self.timeline)
self.timeline_detected = self._deduplicate_timeline(self.timeline_detected)
2021-07-16 06:05:01 +00:00
2022-06-17 20:30:46 +00:00
def run(self) -> None:
2021-10-12 16:06:58 +00:00
"""Run the main module procedure."""
2021-07-16 06:05:01 +00:00
raise NotImplementedError
2023-03-24 18:02:02 +00:00
def run_module(module: MVTModule) -> None:
2021-07-16 06:05:01 +00:00
module.log.info("Running module %s...", module.__class__.__name__)
try:
2023-06-29 11:22:43 +00:00
exec_or_profile("module.run()", globals(), locals())
2021-07-16 06:05:01 +00:00
except NotImplementedError:
module.log.exception(
"The run() procedure of module %s was not implemented yet!",
module.__class__.__name__,
)
except InsufficientPrivileges as exc:
module.log.info(
"Insufficient privileges for module %s: %s", module.__class__.__name__, exc
)
except DatabaseNotFoundError as exc:
module.log.info(
"There might be no data to extract by module %s: %s",
module.__class__.__name__,
exc,
)
except DatabaseCorruptedError as exc:
module.log.error(
"The %s module database seems to be corrupted: %s",
module.__class__.__name__,
exc,
)
except Exception as exc:
module.log.exception(
"Error in running extraction from module %s: %s",
module.__class__.__name__,
exc,
)
2021-07-16 06:05:01 +00:00
else:
try:
2023-06-29 11:22:43 +00:00
exec_or_profile("module.check_indicators()", globals(), locals())
2021-07-16 06:05:01 +00:00
except NotImplementedError:
module.log.info(
"The %s module does not support checking for indicators",
module.__class__.__name__,
)
2023-04-07 10:25:01 +00:00
except Exception as exc:
module.log.exception(
"Error when checking indicators from module %s: %s",
module.__class__.__name__,
exc,
)
2023-04-07 10:25:01 +00:00
else:
if module.indicators and not module.detected:
module.log.info(
"The %s module produced no detections!", module.__class__.__name__
)
2021-07-16 06:05:01 +00:00
try:
module.to_timeline()
except NotImplementedError:
pass
2023-04-13 07:26:52 +00:00
except Exception as exc:
module.log.exception(
"Error when serializing data from module %s: %s",
module.__class__.__name__,
exc,
)
2021-07-16 06:05:01 +00:00
module.save_to_json()
2022-06-22 14:53:29 +00:00
def save_timeline(timeline: list, timeline_path: str) -> None:
2021-07-16 06:05:01 +00:00
"""Save the timeline in a csv file.
2021-09-10 13:18:13 +00:00
:param timeline: List of records to order and store
:param timeline_path: Path to the csv file to store the timeline to
2021-10-12 16:06:58 +00:00
2021-07-16 06:05:01 +00:00
"""
2022-01-29 14:13:35 +00:00
with open(timeline_path, "a+", encoding="utf-8") as handle:
csvoutput = csv.writer(
handle, delimiter=",", quotechar='"', quoting=csv.QUOTE_ALL, escapechar="\\"
)
2021-07-16 06:05:01 +00:00
csvoutput.writerow(["UTC Timestamp", "Plugin", "Event", "Description"])
for event in sorted(
timeline, key=lambda x: x["timestamp"] if x["timestamp"] is not None else ""
):
csvoutput.writerow(
[
event.get("timestamp"),
event.get("module"),
event.get("event"),
event.get("data"),
]
)