mvt/mvt/common/utils.py

197 lines
5.7 KiB
Python
Raw Normal View History

2021-07-16 06:05:01 +00:00
# Mobile Verification Toolkit (MVT)
2023-02-08 19:18:16 +00:00
# Copyright (c) 2021-2023 Claudio Guarnieri.
# Use of this software is governed by the MVT License 1.1 that can be found at
# https://license.mvt.re/1.1/
2021-07-16 06:05:01 +00:00
import datetime
import hashlib
import os
2021-07-30 09:40:09 +00:00
import re
2023-03-24 18:02:02 +00:00
from typing import Any, Iterator, Union
2021-07-30 09:40:09 +00:00
2021-07-16 06:05:01 +00:00
def convert_chrometime_to_datetime(timestamp: int) -> datetime.datetime:
"""Converts Chrome timestamp to a datetime.
2021-09-10 13:18:13 +00:00
:param timestamp: Chrome timestamp as int.
:type timestamp: int
:returns: datetime.
2021-10-12 16:06:58 +00:00
2021-07-16 06:05:01 +00:00
"""
2021-11-19 14:27:51 +00:00
epoch_start = datetime.datetime(1601, 1, 1)
2021-07-16 06:05:01 +00:00
delta = datetime.timedelta(microseconds=timestamp)
return epoch_start + delta
2022-08-16 14:02:17 +00:00
def convert_datetime_to_iso(date_time: datetime.datetime) -> str:
"""Converts datetime to ISO string.
2021-09-10 13:18:13 +00:00
:param datetime: datetime.
:type datetime: datetime.datetime
:returns: ISO datetime string in YYYY-mm-dd HH:MM:SS.ms format.
:rtype: str
"""
try:
2022-08-16 14:02:17 +00:00
return date_time.strftime("%Y-%m-%d %H:%M:%S.%f")
except Exception:
return ""
def convert_unix_to_utc_datetime(
timestamp: Union[int, float, str]
) -> datetime.datetime:
2022-08-13 12:04:10 +00:00
"""Converts a unix epoch timestamp to UTC datetime.
:param timestamp: Epoc timestamp to convert.
:type timestamp: int
:returns: datetime.
"""
return datetime.datetime.utcfromtimestamp(float(timestamp))
2022-08-13 12:04:10 +00:00
2023-03-24 18:02:02 +00:00
def convert_unix_to_iso(timestamp: Union[int, float, str]) -> str:
"""Converts a unix epoch to ISO string.
:param timestamp: Epoc timestamp to convert.
2021-09-10 13:18:13 +00:00
:type timestamp: int
:returns: ISO datetime string in YYYY-mm-dd HH:MM:SS.ms format.
2021-09-10 13:18:13 +00:00
:rtype: str
2021-10-12 16:06:58 +00:00
2021-07-16 06:05:01 +00:00
"""
try:
2022-08-13 12:04:10 +00:00
return convert_datetime_to_iso(convert_unix_to_utc_datetime(timestamp))
2021-07-16 06:05:01 +00:00
except Exception:
return ""
2022-08-13 12:04:10 +00:00
def convert_mactime_to_datetime(timestamp: Union[int, float],
from_2001: bool = True):
"""Converts Mac Standard Time to a datetime.
:param timestamp: MacTime timestamp (either int or float).
:type timestamp: int
:param from_2001: bool: Whether to (Default value = True)
:param from_2001: Default value = True)
:returns: datetime.
"""
if not timestamp:
return None
# This is to fix formats in case of, for example, SMS messages database
# timestamp format.
if isinstance(timestamp, int) and len(str(timestamp)) == 18:
timestamp = int(str(timestamp)[:9])
# MacTime counts from 2001-01-01.
if from_2001:
timestamp = timestamp + 978307200
# TODO: This is rather ugly. Happens sometimes with invalid timestamps.
try:
return convert_unix_to_utc_datetime(timestamp)
except Exception:
return None
def convert_mactime_to_iso(timestamp: int, from_2001: bool = True):
"""Wraps two conversions from mactime to iso date.
:param timestamp: MacTime timestamp (either int or float).
:type timestamp: int
:param from_2001: bool: Whether to (Default value = True)
:param from_2001: Default value = True)
:returns: ISO timestamp string in YYYY-mm-dd HH:MM:SS.ms format.
:rtype: str
"""
return convert_datetime_to_iso(
convert_mactime_to_datetime(timestamp, from_2001))
2021-07-16 06:05:01 +00:00
2021-11-19 14:27:51 +00:00
2022-06-22 14:53:29 +00:00
def check_for_links(text: str) -> list:
2021-07-16 06:05:01 +00:00
"""Checks if a given text contains HTTP links.
2021-09-10 13:18:13 +00:00
:param text: Any provided text.
:type text: str
:returns: Search results.
2021-10-12 16:06:58 +00:00
2021-07-16 06:05:01 +00:00
"""
return re.findall(r"(?P<url>https?://[^\s]+)", text, re.IGNORECASE)
2021-07-16 06:05:01 +00:00
2021-11-19 14:27:51 +00:00
2021-07-16 06:05:01 +00:00
# Note: taken from here:
# https://stackoverflow.com/questions/57014259/json-dumps-on-dictionary-with-bytes-for-keys
2023-03-24 18:02:02 +00:00
def keys_bytes_to_string(obj: Any) -> Any:
2021-07-16 06:05:01 +00:00
"""Convert object keys from bytes to string.
2021-09-10 13:18:13 +00:00
2021-07-16 06:05:01 +00:00
:param obj: Object to convert from bytes to string.
2021-09-10 13:18:13 +00:00
:returns: Object converted to string.
:rtype: str
2021-10-12 16:06:58 +00:00
2021-07-16 06:05:01 +00:00
"""
new_obj = {}
if not isinstance(obj, dict):
if isinstance(obj, (tuple, list, set)):
value = [keys_bytes_to_string(x) for x in obj]
return value
2022-08-12 14:20:16 +00:00
return obj
2021-07-16 06:05:01 +00:00
for key, value in obj.items():
if isinstance(key, bytes):
key = key.decode()
if isinstance(value, dict):
value = keys_bytes_to_string(value)
elif isinstance(value, (tuple, list, set)):
value = [keys_bytes_to_string(x) for x in value]
new_obj[key] = value
return new_obj
def get_sha256_from_file_path(file_path: str) -> str:
"""Calculate the SHA256 hash of a file from a file path.
:param file_path: Path to the file to hash
:returns: The SHA256 hash string
"""
sha256_hash = hashlib.sha256()
with open(file_path, "rb") as handle:
for byte_block in iter(lambda: handle.read(4096), b""):
sha256_hash.update(byte_block)
return sha256_hash.hexdigest()
def generate_hashes_from_path(path: str, log) -> Iterator[dict]:
"""
Generates hashes of all files at the given path.
:params path: Path of the given folder or file
:returns: generator of dict {"file_path", "hash"}
"""
if os.path.isfile(path):
hash_value = get_sha256_from_file_path(path)
yield {"file_path": path, "sha256": hash_value}
elif os.path.isdir(path):
for (root, _, files) in os.walk(path):
for file in files:
file_path = os.path.join(root, file)
try:
sha256 = get_sha256_from_file_path(file_path)
except FileNotFoundError:
log.error("Failed to hash the file %s: might be a symlink",
file_path)
continue
except PermissionError:
log.error("Failed to hash the file %s: permission denied",
file_path)
continue
yield {"file_path": file_path, "sha256": sha256}