#!/usr/bin/env python3
#
# © 2024 AO Kaspersky Lab. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Kaspersky locations datasheet module."""
from pathlib import Path
import json
# pylint: disable-next=pointless-string-statement
"""Mapping for M49 regions to stix 2.1 region codes vocabulary."""
M49_REGIONS = {
"Africa": "africa",
"Eastern Africa": "eastern-africa",
"Middle Africa": "middle-africa",
"Northern Africa": "northern-africa",
"Southern Africa": "southern-africa",
"Western Africa": "western-africa",
"Americas": "americas",
"Caribbean": "caribbean",
"Central America": "central-america",
"Latin America and the Caribbean": "latin-america-caribbean",
"Northern America": "northern-america",
"South America": "south-america",
"Asia": "asia",
"Central Asia": "central-asia",
"Eastern Asia": "eastern-asia",
"Southern Asia": "southern-asia",
"South-eastern Asia": "south-eastern-asia",
"Western Asia": "western-asia",
"Europe": "europe",
"Eastern Europe": "eastern-europe",
"Northern Europe": "northern-europe",
"Southern Europe": "southern-europe",
"Western Europe": "western-europe",
"Oceania": "oceania",
"Australia and New Zealand": "australia-new-zealand",
"Melanesia": "melanesia",
"Micronesia": "micronesia",
"Polynesia": "polynesia",
}
def to_stix_region(m49_region: str):
"""Convert m49 region name to stix 2.1 region code."""
if m49_region not in M49_REGIONS:
raise RuntimeError(f"unknown M42 region {m49_region}")
return M49_REGIONS[m49_region]
def read_datasheet(filename):
"""Read specified datasheet file."""
datasheat_path = Path(__file__).resolve().parent / filename
if not datasheat_path.is_file():
error = f"datasheet file '{datasheat_path}' not found"
raise RuntimeError(error)
with open(datasheat_path, "r", encoding="UTF-8") as datasheet_file:
return json.load(datasheet_file)
def raise_datasheet_error(filename, message, record=None):
"""Raise exception about broken datasheet file."""
error = f"datasheet file '{filename}' is broken: '{message}'"
if record is not None:
data = json.dumps(record)
error = f"'{error}' in record '{data}'"
raise RuntimeError(error)
def validate_datasheet_record(filename, record):
"""Validate specified datasheet file record."""
fields = [
"code",
"title",
"pattern",
"labels",
"region",
"inter_region",
"sub_region",
]
for field in fields:
if field not in record:
message = f"field '{field}' not found"
raise_datasheet_error(filename=filename, message=message, record=record)
if field == "labels":
values = record["labels"]
good = isinstance(values, list) and all(
isinstance(value, str) for value in values
)
if not good:
message = f"field '{field}' expected to be a list of strings"
raise_datasheet_error(filename=filename, message=message, record=record)
else:
value = record[field]
if not isinstance(value, str):
message = f"field '{field}' expected to be a string"
raise_datasheet_error(filename=filename, message=message, record=record)
def initialize_registry():
"""Initialize locations registry based on datasheet file."""
datasheat_filename = "datasheet.json"
datasheet = read_datasheet(filename=datasheat_filename)
registry = {}
country_titles = set()
country_patterns = set()
for record in datasheet:
validate_datasheet_record(filename=datasheat_filename, record=record)
country_code = record["code"].upper()
if country_code in registry:
message = f"country code '{country_code}' found more than once"
raise_datasheet_error(filename=datasheat_filename, message=message)
country_title = record["title"]
if country_title in country_titles:
message = f"country title '{country_title}' found more than once"
raise_datasheet_error(filename=datasheat_filename, message=message)
country_pattern = record["pattern"].upper()
if country_pattern in country_patterns:
message = f"country pattern '{country_pattern}' found more than once"
raise_datasheet_error(filename=datasheat_filename, message=message)
country_labels = [value.upper() for value in record["labels"]]
region_title = record["region"]
region_code = to_stix_region(region_title)
sub_region_title = record.get("inter_region", None)
if sub_region_title is None or len(sub_region_title) == 0:
sub_region_title = record.get("sub_region", None)
if sub_region_title is None or len(sub_region_title) == 0:
message = "neither 'sub_region' field nor 'inter_region' field found"
raise_datasheet_error(
filename=datasheat_filename, message=message, record=record
)
sub_region_code = to_stix_region(sub_region_title)
registry[country_code] = {
"country_code": country_code,
"country_title": country_title,
"country_pattern": country_pattern,
"country_labels": country_labels,
"region_title": region_title,
"region_code": region_code,
"sub_region_title": sub_region_title,
"sub_region_code": sub_region_code,
}
country_titles.add(country_title)
country_patterns.add(country_pattern)
return registry
# pylint: disable-next=pointless-string-statement
"""Known locations registry."""
REGISTRY = initialize_registry()