mvt/mvt/ios/modules/mixed/webkit_session_resource_log.py

# Mobile Verification Toolkit (MVT)
# Copyright (c) 2021-2023 The MVT Authors.
# Use of this software is governed by the MVT License 1.1 that can be found at
#   https://license.mvt.re/1.1/

import logging
import os
import plistlib
from typing import Optional

from mvt.common.utils import convert_datetime_to_iso

from ..base import IOSExtraction

WEBKIT_SESSION_RESOURCE_LOG_BACKUP_IDS = [
    "a500ee38053454a02e990957be8a251935e28d3f",
]
WEBKIT_SESSION_RESOURCE_LOG_BACKUP_RELPATH = "Library/WebKit/WebsiteData/ResourceLoadStatistics/full_browsing_session_resourceLog.plist"  # pylint: disable=line-too-long
WEBKIT_SESSION_RESOURCE_LOG_ROOT_PATHS = [
    "private/var/mobile/Containers/Data/Application/*/SystemData/com.apple.SafariViewService/Library/WebKit/WebsiteData/full_browsing_session_resourceLog.plist",  # pylint: disable=line-too-long
    "private/var/mobile/Containers/Data/Application/*/Library/WebKit/WebsiteData/ResourceLoadStatistics/full_browsing_session_resourceLog.plist",  # pylint: disable=line-too-long
    "private/var/mobile/Library/WebClips/*/Storage/full_browsing_session_resourceLog.plist",  # pylint: disable=line-too-long
]


class WebkitSessionResourceLog(IOSExtraction):
    """This module extracts records from WebKit browsing session
    resource logs, and checks them against any provided list of
    suspicious domains.


    """

    def __init__(
        self,
        file_path: Optional[str] = None,
        target_path: Optional[str] = None,
        results_path: Optional[str] = None,
        module_options: Optional[dict] = None,
        log: logging.Logger = logging.getLogger(__name__),
        results: Optional[list] = None,
    ) -> None:
        super().__init__(
            file_path=file_path,
            target_path=target_path,
            results_path=results_path,
            module_options=module_options,
            log=log,
            results=results,
        )

        self.results = {} if not results else results

    @staticmethod
    def _extract_domains(entries):
        if not entries:
            return []

        domains = []
        for entry in entries:
            if "origin" in entry:
                domains.append(entry["origin"])
            if "domain" in entry:
                domains.append(entry["domain"])

        return domains

    def check_indicators(self) -> None:
        if not self.indicators:
            return

        for _, entries in self.results.items():
            for entry in entries:
                source_domains = self._extract_domains(entry["redirect_source"])
                destination_domains = self._extract_domains(
                    entry["redirect_destination"]
                )

                # TODO: Currently not used.
                # subframe_origins = self._extract_domains(
                #    entry["subframe_under_origin"])
                # subresource_domains = self._extract_domains(
                #    entry["subresource_under_origin"])

                all_origins = set(
                    [entry["origin"]] + source_domains + destination_domains
                )

                ioc = self.indicators.check_domains(all_origins)
                if ioc:
                    entry["matched_indicator"] = ioc
                    self.detected.append(entry)

                    redirect_path = ""
                    if len(source_domains) > 0:
                        redirect_path += "SOURCE: "
                        for idx, item in enumerate(source_domains):
                            source_domains[idx] = f'"{item}"'

                        redirect_path += ", ".join(source_domains)
                        redirect_path += " -> "

                    redirect_path += f"ORIGIN: \"{entry['origin']}\""

                    if len(destination_domains) > 0:
                        redirect_path += " -> "
                        redirect_path += "DESTINATION: "
                        for idx, item in enumerate(destination_domains):
                            destination_domains[idx] = f'"{item}"'

                        redirect_path += ", ".join(destination_domains)

                    self.log.warning(
                        "Found HTTP redirect between suspicious domains: %s",
                        redirect_path,
                    )

    def _extract_browsing_stats(self, log_path):
        items = []

        with open(log_path, "rb") as handle:
            file_plist = plistlib.load(handle)

        if "browsingStatistics" not in file_plist:
            return items

        browsing_stats = file_plist["browsingStatistics"]

        for item in browsing_stats:
            items.append(
                {
                    "origin": item.get("PrevalentResourceOrigin", ""),
                    "redirect_source": item.get("topFrameUniqueRedirectsFrom", ""),
                    "redirect_destination": item.get("topFrameUniqueRedirectsTo", ""),
                    "subframe_under_origin": item.get(
                        "subframeUnderTopFrameOrigins", ""
                    ),
                    "subresource_under_origin": item.get(
                        "subresourceUnderTopFrameOrigins", ""
                    ),
                    "user_interaction": item.get("hadUserInteraction"),
                    "most_recent_interaction": convert_datetime_to_iso(
                        item["mostRecentUserInteraction"]
                    ),
                    "last_seen": convert_datetime_to_iso(item["lastSeen"]),
                }
            )

        return items

    def run(self) -> None:
        if self.is_backup:
            for log_file in self._get_backup_files_from_manifest(
                relative_path=WEBKIT_SESSION_RESOURCE_LOG_BACKUP_RELPATH
            ):
                log_path = self._get_backup_file_from_id(log_file["file_id"])

                if not log_path:
                    continue

                self.log.info(
                    "Found Safari browsing session resource log at path: %s", log_path
                )
                self.results[log_path] = self._extract_browsing_stats(log_path)
        elif self.is_fs_dump:
            for log_path in self._get_fs_files_from_patterns(
                WEBKIT_SESSION_RESOURCE_LOG_ROOT_PATHS
            ):
                self.log.info(
                    "Found Safari browsing session resource log at path: %s", log_path
                )
                key = os.path.relpath(log_path, self.target_path)
                self.results[key] = self._extract_browsing_stats(log_path)

        self.log.info(
            "Extracted records from %d Safari browsing session resource logs",
            len(self.results),
        )
First commit 2021-07-16 06:05:01 +00:00			`# Mobile Verification Toolkit (MVT)`
Updated copyright notice 2023-09-09 15:55:27 +00:00			`# Copyright (c) 2021-2023 The MVT Authors.`
More explicit copyright and licensing notes 2021-08-01 19:11:08 +00:00			`# Use of this software is governed by the MVT License 1.1 that can be found at`
			`# https://license.mvt.re/1.1/`
First commit 2021-07-16 06:05:01 +00:00
Starting to add type hints 2022-06-17 20:30:46 +00:00			`import logging`
Sorted imports 2021-07-30 09:40:09 +00:00			`import os`
Got rid of biplist, using standard plistlib 2021-08-14 16:50:11 +00:00			`import plistlib`
Improved type hints and code style enforcement 2022-08-16 11:39:55 +00:00			`from typing import Optional`
First commit 2021-07-16 06:05:01 +00:00
Continuing enforcement of line length and simplifying date conversions 2022-08-13 00:14:24 +00:00			`from mvt.common.utils import convert_datetime_to_iso`
First commit 2021-07-16 06:05:01 +00:00
Restructured modules folders 2021-08-15 11:14:18 +00:00			`from ..base import IOSExtraction`
First commit 2021-07-16 06:05:01 +00:00
			`WEBKIT_SESSION_RESOURCE_LOG_BACKUP_IDS = [`
			`"a500ee38053454a02e990957be8a251935e28d3f",`
			`]`
Added additional missing space in inline comment 2022-08-16 16:26:34 +00:00			`WEBKIT_SESSION_RESOURCE_LOG_BACKUP_RELPATH = "Library/WebKit/WebsiteData/ResourceLoadStatistics/full_browsing_session_resourceLog.plist" # pylint: disable=line-too-long`
First commit 2021-07-16 06:05:01 +00:00			`WEBKIT_SESSION_RESOURCE_LOG_ROOT_PATHS = [`
Added additional missing space in inline comment 2022-08-16 16:26:34 +00:00			`"private/var/mobile/Containers/Data/Application/*/SystemData/com.apple.SafariViewService/Library/WebKit/WebsiteData/full_browsing_session_resourceLog.plist", # pylint: disable=line-too-long`
			`"private/var/mobile/Containers/Data/Application/*/Library/WebKit/WebsiteData/ResourceLoadStatistics/full_browsing_session_resourceLog.plist", # pylint: disable=line-too-long`
			`"private/var/mobile/Library/WebClips/*/Storage/full_browsing_session_resourceLog.plist", # pylint: disable=line-too-long`
First commit 2021-07-16 06:05:01 +00:00			`]`

Standardized code with flake8 2021-11-19 14:27:51 +00:00
First commit 2021-07-16 06:05:01 +00:00			`class WebkitSessionResourceLog(IOSExtraction):`
			`"""This module extracts records from WebKit browsing session`
			`resource logs, and checks them against any provided list of`
Standardizing reST docstrings 2021-09-10 13:18:13 +00:00			`suspicious domains.`


			`"""`
First commit 2021-07-16 06:05:01 +00:00
Improved type hints and code style enforcement 2022-08-16 11:39:55 +00:00			`def __init__(`
			`self,`
Changed default for Optional[str] 2022-08-17 13:52:17 +00:00			`file_path: Optional[str] = None,`
			`target_path: Optional[str] = None,`
			`results_path: Optional[str] = None,`
Move --fast flag from being a top-level MVT module parameter to an option in a new module_options parameter 2023-07-17 16:29:43 +00:00			`module_options: Optional[dict] = None,`
Improved type hints and code style enforcement 2022-08-16 11:39:55 +00:00			`log: logging.Logger = logging.getLogger(__name__),`
Linted code using isort + autoflake + black, fixed wrong use of Optional[bool] 2023-06-01 21:40:26 +00:00			`results: Optional[list] = None,`
Improved type hints and code style enforcement 2022-08-16 11:39:55 +00:00			`) -> None:`
Linted code using isort + autoflake + black, fixed wrong use of Optional[bool] 2023-06-01 21:40:26 +00:00			`super().__init__(`
			`file_path=file_path,`
			`target_path=target_path,`
			`results_path=results_path,`
Move --fast flag from being a top-level MVT module parameter to an option in a new module_options parameter 2023-07-17 16:29:43 +00:00			`module_options=module_options,`
Linted code using isort + autoflake + black, fixed wrong use of Optional[bool] 2023-06-01 21:40:26 +00:00			`log=log,`
			`results=results,`
			`)`
First commit 2021-07-16 06:05:01 +00:00
Added check-iocs command to mvt-android 2022-01-27 17:23:19 +00:00			`self.results = {} if not results else results`
Fixed WebkitSessionResourceLog module, still needs testing 2021-08-15 18:00:29 +00:00
First commit 2021-07-16 06:05:01 +00:00			`@staticmethod`
			`def _extract_domains(entries):`
			`if not entries:`
			`return []`

			`domains = []`
			`for entry in entries:`
			`if "origin" in entry:`
			`domains.append(entry["origin"])`
			`if "domain" in entry:`
			`domains.append(entry["domain"])`

			`return domains`

Starting to add type hints 2022-06-17 20:30:46 +00:00			`def check_indicators(self) -> None:`
Checking for valid indicators before continuing (closes: #35) 2021-07-23 16:04:41 +00:00			`if not self.indicators:`
			`return`

Started linting the code 2022-08-12 14:20:16 +00:00			`for _, entries in self.results.items():`
First commit 2021-07-16 06:05:01 +00:00			`for entry in entries:`
			`source_domains = self._extract_domains(entry["redirect_source"])`
Improved type hints and code style enforcement 2022-08-16 11:39:55 +00:00			`destination_domains = self._extract_domains(`
Linted code using isort + autoflake + black, fixed wrong use of Optional[bool] 2023-06-01 21:40:26 +00:00			`entry["redirect_destination"]`
			`)`
First commit 2021-07-16 06:05:01 +00:00
			`# TODO: Currently not used.`
Improved type hints and code style enforcement 2022-08-16 11:39:55 +00:00			`# subframe_origins = self._extract_domains(`
			`# entry["subframe_under_origin"])`
			`# subresource_domains = self._extract_domains(`
			`# entry["subresource_under_origin"])`
First commit 2021-07-16 06:05:01 +00:00
Improved type hints and code style enforcement 2022-08-16 11:39:55 +00:00			`all_origins = set(`
Linted code using isort + autoflake + black, fixed wrong use of Optional[bool] 2023-06-01 21:40:26 +00:00			`[entry["origin"]] + source_domains + destination_domains`
Improved type hints and code style enforcement 2022-08-16 11:39:55 +00:00			`)`
First commit 2021-07-16 06:05:01 +00:00
Adding indicator to matched results 2022-01-23 14:01:49 +00:00			`ioc = self.indicators.check_domains(all_origins)`
			`if ioc:`
			`entry["matched_indicator"] = ioc`
First commit 2021-07-16 06:05:01 +00:00			`self.detected.append(entry)`

			`redirect_path = ""`
			`if len(source_domains) > 0:`
			`redirect_path += "SOURCE: "`
			`for idx, item in enumerate(source_domains):`
Linted code using isort + autoflake + black, fixed wrong use of Optional[bool] 2023-06-01 21:40:26 +00:00			`source_domains[idx] = f'"{item}"'`
First commit 2021-07-16 06:05:01 +00:00
			`redirect_path += ", ".join(source_domains)`
			`redirect_path += " -> "`

			`redirect_path += f"ORIGIN: \"{entry['origin']}\""`

			`if len(destination_domains) > 0:`
			`redirect_path += " -> "`
			`redirect_path += "DESTINATION: "`
			`for idx, item in enumerate(destination_domains):`
Linted code using isort + autoflake + black, fixed wrong use of Optional[bool] 2023-06-01 21:40:26 +00:00			`destination_domains[idx] = f'"{item}"'`
First commit 2021-07-16 06:05:01 +00:00
			`redirect_path += ", ".join(destination_domains)`

Linted code using isort + autoflake + black, fixed wrong use of Optional[bool] 2023-06-01 21:40:26 +00:00			`self.log.warning(`
			`"Found HTTP redirect between suspicious domains: %s",`
			`redirect_path,`
			`)`
First commit 2021-07-16 06:05:01 +00:00
Overhaul of mvt-ios modules 2021-08-16 08:50:35 +00:00			`def _extract_browsing_stats(self, log_path):`
			`items = []`

			`with open(log_path, "rb") as handle:`
			`file_plist = plistlib.load(handle)`

			`if "browsingStatistics" not in file_plist:`
			`return items`

			`browsing_stats = file_plist["browsingStatistics"]`

			`for item in browsing_stats:`
Linted code using isort + autoflake + black, fixed wrong use of Optional[bool] 2023-06-01 21:40:26 +00:00			`items.append(`
			`{`
			`"origin": item.get("PrevalentResourceOrigin", ""),`
			`"redirect_source": item.get("topFrameUniqueRedirectsFrom", ""),`
			`"redirect_destination": item.get("topFrameUniqueRedirectsTo", ""),`
			`"subframe_under_origin": item.get(`
			`"subframeUnderTopFrameOrigins", ""`
			`),`
			`"subresource_under_origin": item.get(`
			`"subresourceUnderTopFrameOrigins", ""`
			`),`
			`"user_interaction": item.get("hadUserInteraction"),`
			`"most_recent_interaction": convert_datetime_to_iso(`
			`item["mostRecentUserInteraction"]`
			`),`
			`"last_seen": convert_datetime_to_iso(item["lastSeen"]),`
			`}`
			`)`
Overhaul of mvt-ios modules 2021-08-16 08:50:35 +00:00
			`return items`

Starting to add type hints 2022-06-17 20:30:46 +00:00			`def run(self) -> None:`
Fixed WebkitSessionResourceLog module, still needs testing 2021-08-15 18:00:29 +00:00			`if self.is_backup:`
Improved type hints and code style enforcement 2022-08-16 11:39:55 +00:00			`for log_file in self._get_backup_files_from_manifest(`
Linted code using isort + autoflake + black, fixed wrong use of Optional[bool] 2023-06-01 21:40:26 +00:00			`relative_path=WEBKIT_SESSION_RESOURCE_LOG_BACKUP_RELPATH`
			`):`
Fixes a bug in retrieving the backup file path in webkit session resource logs 2021-09-01 19:49:23 +00:00			`log_path = self._get_backup_file_from_id(log_file["file_id"])`
Improved type hints and code style enforcement 2022-08-16 11:39:55 +00:00
Fixes a bug in retrieving the backup file path in webkit session resource logs 2021-09-01 19:49:23 +00:00			`if not log_path:`
			`continue`
Conformed browerstate plugin to others with similar structure 2022-08-08 14:44:54 +00:00
Linted code using isort + autoflake + black, fixed wrong use of Optional[bool] 2023-06-01 21:40:26 +00:00			`self.log.info(`
			`"Found Safari browsing session resource log at path: %s", log_path`
			`)`
Overhaul of mvt-ios modules 2021-08-16 08:50:35 +00:00			`self.results[log_path] = self._extract_browsing_stats(log_path)`
			`elif self.is_fs_dump:`
Improved type hints and code style enforcement 2022-08-16 11:39:55 +00:00			`for log_path in self._get_fs_files_from_patterns(`
Linted code using isort + autoflake + black, fixed wrong use of Optional[bool] 2023-06-01 21:40:26 +00:00			`WEBKIT_SESSION_RESOURCE_LOG_ROOT_PATHS`
			`):`
			`self.log.info(`
			`"Found Safari browsing session resource log at path: %s", log_path`
			`)`
Refactoring cli commands for iOS too 2022-06-16 13:18:50 +00:00			`key = os.path.relpath(log_path, self.target_path)`
Overhaul of mvt-ios modules 2021-08-16 08:50:35 +00:00			`self.results[key] = self._extract_browsing_stats(log_path)`

Linted code using isort + autoflake + black, fixed wrong use of Optional[bool] 2023-06-01 21:40:26 +00:00			`self.log.info(`
			`"Extracted records from %d Safari browsing session resource logs",`
			`len(self.results),`
			`)`