Skip to content

Commit

Permalink
Introducing gulmc: full monte carlo loss engine (OasisLMF#1137)
Browse files Browse the repository at this point in the history
* [mcgul] first implementation of full MC gul

* [modelpy] montecarlo implementation in modelpy

* stop tracking mcgul

* [modelpy] fixes

* simplify algorithm

* remove unused imports

* use numba, process last areaperil id, cleanup

* [modelpy] add docstrings

* [modelpy] function namechange

* [modelpy] add TODOs not to forget

* [gulpy] bugfix in calling read_getmodel_data

* [gulpy] drafting monte carlo implementation

* [mcgul] Add major modelpy and gulpy rewrite as one tool

* [mcgul] do not sample haz if no haz uncertainty

* [mcgul] cleanup

* [mcgul] good working implementation

* [mcgul] perfectly reproduces effective damageability

* [mcgul] further simplification

* [mcgul] wip

* [mcgul] compute haz cdf in map_areaperil_ids_in_footprint

* [gulmc] update cli

* [getmodel] reverting full mc modifications

* [gul] reverting mc modifications

* [getmodel] reverting mc modifications

* [getmodel] Reverting unused mc modifications

* [gul] updating docstring

* [getmode;] update docstring

* [gulmc] dynamic buff_size

* [gulmc] imports cleanup

* [gulmc] cleanup

* [gulmc] dynamic buff size

* [gulmc] compute effective damageability

* [gulmc] effective damageability with numba

* [gulpy] minor bugfix

* [gulmc] bugfix: use 4 as item size in int32_mv

* [gulmc] minor cleanup

* [gulmc] fix conflicts with stashed edits

* [gulmc] cleanup

* [gulmc] remove unused imports

* [modelpy] remove one blank line

* [gulmc] add effective_damageability optional arg

* [gulmc] bugfix effective damageability

* [gulmc] add tests

* [gulmc] add tests for effective damageability

* [gulmc] move gulpy tests to separate module

* [tests] add test_model_1 to the tests assets

* [tests] use typing.Tuple for type hints

* [gulmc] better tests, tiv set to float64

* [gulmc] log info about effective_damageabilty

* [gulmc] cleaning up, adding docs (WIP).

* [gulmc] adding documentation and docstrings

* [gulmc] bugfix

* [gulmc] adding docs

* [gulmc] add docs

* [gulmc] rewrite complex outputs as tuples

* [gulmc] add final docs

* [gulmc] remove unused import

* [gulmc] Improve --debug flag

* [gulmc] raise ValueError if alloc_rule>0 when debug is 1 or 2

* [gulmc] cleanup

* [flake8] fix error code in ignore config

* [requirements] testing unpinning virtualenv

* [requirements] testing unpinning virtualenv

* [requirements] fixing package clash

* [gulmc] test ValueError if alloc_rule is invalid

* [gulmc] improve tests

* [gulmc] remove unnecessary binary files

* [requirements] removing unnecessary virtualenv

* [CI] specify pip-compile resolver for py 3.7

* [CI] fix bug in CI

* [CI] bugfix

* [gulmc] update following review comments

* [gulmc] implement fixes following review

* [gulmc] bugfix in logging
  • Loading branch information
mtazzari authored Dec 8, 2022
1 parent a0fa532 commit 2ccb138
Show file tree
Hide file tree
Showing 133 changed files with 5,818 additions and 38 deletions.
1 change: 0 additions & 1 deletion .github/workflows/unittest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install pip-tools
- name: Set pip resolver flag
if: matrix.cfg.python-version == 3.7
run: |
Expand Down
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,12 @@ tests/model_execution/tmp_fc_kparse_output/

!tests/pytools/data_layer/oasis_files/meta_data/correlations.bin

# do not ignore tests assets
!tests/assets/test_model_1/*.bin
!tests/assets/test_model_1/input/*.bin
!tests/assets/test_model_1/static/*.bin
!tests/assets/test_model_1/expected/*.bin

# pycharm
.idea/
data/
Expand Down
7 changes: 7 additions & 0 deletions oasislmf/pytools/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""
This file defines quantities reused across the pytools stack.
"""

# streams
PIPE_CAPACITY = 65536 # bytes
1 change: 1 addition & 0 deletions oasislmf/pytools/getmodel/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
parquetfootprint_meta_filename = "footprint_parquet_meta.json"

areaperil_int = np.dtype(os.environ.get('AREAPERIL_TYPE', 'u4'))
nb_areaperil_int = nb.from_dtype(areaperil_int)
oasis_float = np.dtype(os.environ.get('OASIS_FLOAT', 'f4'))

FootprintHeader = nb.from_dtype(np.dtype([('num_intensity_bins', np.int32),
Expand Down
47 changes: 28 additions & 19 deletions oasislmf/pytools/getmodel/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,18 @@
from contextlib import ExitStack

import numba as nb
from numba.typed import Dict
import numpy as np
import pyarrow.parquet as pq
from numba.typed import Dict
from oasislmf.pytools.common import PIPE_CAPACITY

from oasislmf.pytools.data_layer.footprint_layer import FootprintLayerClient
from .common import areaperil_int, oasis_float, Index_type, Keys
from .footprint import Footprint
from oasislmf.pytools.getmodel.common import areaperil_int, oasis_float, Index_type, Keys
from oasislmf.pytools.getmodel.footprint import Footprint

logger = logging.getLogger(__name__)

buff_size = 65536
buff_size = PIPE_CAPACITY

oasis_int_dtype = np.dtype('i4')
oasis_int = np.int32
Expand Down Expand Up @@ -317,7 +318,8 @@ def get_vulns(static_path, vuln_dict, num_intensity_bins, ignore_file_type=set()
vuln_array = load_vulns_bin_idx(vulns_bin, vulns_idx_bin, vuln_dict,
num_damage_bins, num_intensity_bins)
else:
vulns_bin = np.memmap(os.path.join(static_path, "vulnerability.bin"), dtype=Vulnerability, offset=4, mode='r')
vulns_bin = np.memmap(os.path.join(static_path, "vulnerability.bin"),
dtype=Vulnerability, offset=4, mode='r')
vuln_array = load_vulns_bin(vulns_bin, vuln_dict, num_damage_bins, num_intensity_bins)

elif "vulnerability.csv" in input_files and "csv" not in ignore_file_type:
Expand Down Expand Up @@ -362,7 +364,7 @@ def get_damage_bins(static_path, ignore_file_type=set()):
return np.fromfile(os.path.join(static_path, "damage_bin_dict.bin"), dtype=damagebindictionary)
elif "damage_bin_dict.csv" in input_files and 'csv' not in ignore_file_type:
logger.debug(f"loading {os.path.join(static_path, 'damage_bin_dict.csv')}")
return np.genfromtxt(os.path.join(static_path, "damage_bin_dict.csv"), dtype=damagebindictionaryCsv)
return np.genfromtxt(os.path.join(static_path, "damage_bin_dict.csv"), dtype=damagebindictionaryCsv, skip_header=1, delimiter=',')
else:
raise FileNotFoundError(f'damage_bin_dict file not found at {static_path}')

Expand All @@ -371,13 +373,15 @@ def get_damage_bins(static_path, ignore_file_type=set()):
def damage_bin_prob(p, intensities_min, intensities_max, vulns, intensities):
"""
Calculate the probability of an event happening and then causing damage.
Note: vulns is a 1-d array containing 1 damage bin of the damage probability distribution as a
function of hazard intensity.
Args:
p: (float) the probability to be updated
intensities_min: (int) intensity minimum
intensities_max: (int) intensity maximum
vulns: (List[float]) PLEASE FILL IN
intensities: (List[float]) list of all the intensities
intensities_min: (int) minimum intensity bin id
intensities_max: (int) maximum intensity bin id
vulns: (List[float]) slice of damage probability distribution given hazard intensity
intensities: (List[float]) intensity probability distribution
Returns: (float) the updated probability
"""
Expand All @@ -402,9 +406,9 @@ def do_result(vulns_id, vuln_array, mean_damage_bins,
mean_damage_bins: (List[float]) the mean of each damage bin (len(mean_damage_bins) == num_damage_bins)
int32_mv: (List[int]) FILL IN LATER
num_damage_bins: (int) number of damage bins in the data
intensities_min: (int) intensity minimum
intensities_max: (int) intensity maximum
intensities: (List[float]) list of all the intensities
intensities_min: (int) minimum intensity bin id
intensities_max: (int) maximum intensity bin id
intensities: (List[float]) intensity probability distribution
event_id: (int) the event ID that concerns the result being calculated
areaperil_id: (List[int]) the areaperil ID that concerns the result being calculated
vuln_i: (int) the index concerning the vulnerability inside the vuln_array
Expand Down Expand Up @@ -540,6 +544,7 @@ def run(run_dir, file_in, file_out, ignore_file_type, data_server, peril_filter)
file_out: (Optional[str]) the path to the output directory
ignore_file_type: set(str) file extension to ignore when loading
data_server: (bool) if set to True runs the data server
peril_filter (list[int]): list of perils to include in the computation (if None, all perils will be included).
Returns: None
"""
Expand Down Expand Up @@ -573,7 +578,8 @@ def run(run_dir, file_in, file_out, ignore_file_type, data_server, peril_filter)
if peril_filter:
keys_df = pd.read_csv(os.path.join(input_path, 'keys.csv'), dtype=Keys)
valid_area_peril_id = keys_df.loc[keys_df['PerilID'].isin(peril_filter), 'AreaPerilID'].to_numpy()
logger.debug(f'Peril specific run: ({peril_filter}), {len(valid_area_peril_id)} AreaPerilID included out of {len(keys_df)}')
logger.debug(
f'Peril specific run: ({peril_filter}), {len(valid_area_peril_id)} AreaPerilID included out of {len(keys_df)}')
else:
valid_area_peril_id = None

Expand All @@ -599,9 +605,7 @@ def run(run_dir, file_in, file_out, ignore_file_type, data_server, peril_filter)

# even_id, areaperil_id, vulnerability_id, num_result, [oasis_float] * num_result
max_result_relative_size = 1 + + areaperil_int_relative_size + 1 + 1 + num_damage_bins * results_relative_size

mv = memoryview(bytearray(buff_size))

int32_mv = np.ndarray(buff_size // np.int32().itemsize, buffer=mv, dtype=np.int32)

# header
Expand All @@ -613,13 +617,18 @@ def run(run_dir, file_in, file_out, ignore_file_type, data_server, peril_filter)
if len_read == 0:
break

# get the next event_id from the input stream
event_id = event_ids[0]

if data_server:
event_footprint = FootprintLayerClient.get_event(event_ids[0])
event_footprint = FootprintLayerClient.get_event(event_id)
else:
event_footprint = footprint_obj.get_event(event_ids[0])
event_footprint = footprint_obj.get_event(event_id)

if event_footprint is not None:
for cursor_bytes in doCdf(event_ids[0],
# compute effective damageability probability distribution
# stream out: event_id, areaperil_id, number of damage bins, effecive damageability cdf bins (bin_mean and prob_to)
for cursor_bytes in doCdf(event_id,
num_intensity_bins, event_footprint,
areaperil_to_vulns_idx_dict, areaperil_to_vulns_idx_array, areaperil_to_vulns,
vuln_array, vulns_id, num_damage_bins, mean_damage_bins,
Expand Down
14 changes: 10 additions & 4 deletions oasislmf/pytools/gul/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,20 @@
# probably need to set this dynamically depending on the stream type
gul_header = np.int32(1 | 2 << 24).tobytes()

PIPE_CAPACITY = 65536 # bytes
GETMODEL_STREAM_BUFF_SIZE = 2 * PIPE_CAPACITY

items_data_type = nb.from_dtype(np.dtype([('item_id', np.int32),
('damagecdf_i', np.int32),
('rng_index', np.int32)
]))

coverage_type = nb.from_dtype(np.dtype([('tiv', np.float),
items_MC_data_type = nb.from_dtype(np.dtype([('item_id', np.int32),
('vulnerability_id', np.int32),
('hazcdf_i', np.int32),
('rng_index', np.int32),
('eff_vuln_cdf_i', np.int32),
('eff_vuln_cdf_Ndamage_bins', np.int32)
]))

coverage_type = nb.from_dtype(np.dtype([('tiv', np.float64),
('max_items', np.int32),
('start_items', np.int32),
('cur_items', np.int32)
Expand All @@ -38,6 +43,7 @@

ITEM_MAP_KEY_TYPE = nb.types.Tuple((nb.from_dtype(areaperil_int), nb.types.int32))
ITEM_MAP_VALUE_TYPE = nb.types.UniTuple(nb.types.int32, 3)
ITEM_MAP_KEY_TYPE_internal = nb.types.Tuple((nb.from_dtype(areaperil_int), nb.types.int64))

# compute the relative size of oasis_float and areaperil_int vs int32
oasis_float_to_int32_size = oasis_float.itemsize // np.int32().itemsize
Expand Down
8 changes: 5 additions & 3 deletions oasislmf/pytools/gul/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@
from numba import njit
from numba.typed import Dict, List
from numba.types import int32 as nb_int32, int64 as nb_int64, int8 as nb_int8
from oasislmf.pytools.common import PIPE_CAPACITY

from oasislmf.pytools.getmodel.common import oasis_float, areaperil_int
from oasislmf.pytools.gul.common import (
ProbMean, damagecdfrec_stream, oasis_float_to_int32_size, areaperil_int_to_int32_size,
items_data_type, ProbMean_size, NP_BASE_ARRAY_SIZE, GETMODEL_STREAM_BUFF_SIZE
items_data_type, ProbMean_size, NP_BASE_ARRAY_SIZE
)
from oasislmf.pytools.gul.random import generate_hash

Expand Down Expand Up @@ -40,7 +41,7 @@ def gen_valid_area_peril(valid_area_peril_id):
return valid_area_peril_dict


def read_getmodel_stream(stream_in, item_map, coverages, compute, seeds, valid_area_peril_id=None, buff_size=GETMODEL_STREAM_BUFF_SIZE, ):
def read_getmodel_stream(stream_in, item_map, coverages, compute, seeds, valid_area_peril_id=None, buff_size=PIPE_CAPACITY):
"""Read the getmodel output stream yielding data event by event.
Args:
Expand All @@ -50,7 +51,8 @@ def read_getmodel_stream(stream_in, item_map, coverages, compute, seeds, valid_a
coverages (numpy.ndarray[coverage_type]): array with coverage data.
compute (numpy.array[int]): list of coverages to be computed.
seeds (numpy.array[int]): the random seeds for each coverage_id.
buff_size (int): size in bytes of the read buffer (see note). Default is GETMODEL_STREAM_BUFF_SIZE.
buff_size (int): size in bytes of the read buffer (see note).
valid_area_peril_id (list[int]): list of valid areaperil_ids.
Raises:
ValueError: If the stream type is not 1.
Expand Down
15 changes: 5 additions & 10 deletions oasislmf/pytools/gul/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import numpy as np
from numba import njit
from numba.typed import Dict, List
from oasislmf.pytools.common import PIPE_CAPACITY

from oasislmf.pytools.data_layer.conversions.correlations import CorrelationsData

Expand All @@ -19,22 +20,19 @@
from oasislmf.pytools.getmodel.common import oasis_float, Keys, Correlation

from oasislmf.pytools.gul.common import (
MEAN_IDX, PIPE_CAPACITY, STD_DEV_IDX, TIV_IDX, CHANCE_OF_LOSS_IDX, MAX_LOSS_IDX, NUM_IDX,
MEAN_IDX, STD_DEV_IDX, TIV_IDX, CHANCE_OF_LOSS_IDX, MAX_LOSS_IDX, NUM_IDX,
ITEM_MAP_KEY_TYPE, ITEM_MAP_VALUE_TYPE,
gulSampleslevelRec_size, gulSampleslevelHeader_size, coverage_type, gul_header,
)
from oasislmf.pytools.gul.core import split_tiv_classic, split_tiv_multiplicative, get_gul, setmaxloss, compute_mean_loss
from oasislmf.pytools.gul.io import (
write_negative_sidx, write_sample_header,
write_sample_rec, read_getmodel_stream,
)

from oasislmf.pytools.gul.random import (
get_random_generator, compute_norm_cdf_lookup,
compute_norm_inv_cdf_lookup, get_corr_rval, generate_correlated_hash_vector
)

from oasislmf.pytools.gul.random import get_random_generator
from oasislmf.pytools.gul.core import split_tiv_classic, split_tiv_multiplicative, get_gul, setmaxloss, compute_mean_loss
from oasislmf.pytools.gul.utils import append_to_dict_value, binary_search


Expand All @@ -52,7 +50,6 @@ def get_coverages(input_path, ignore_file_type=set()):
numpy.array[oasis_float]: array with the coverage values for each coverage_id.
"""
input_files = set(os.listdir(input_path))
# TODO: store default filenames (e.g., coverages.bin) in a parameters file

if "coverages.bin" in input_files and "bin" not in ignore_file_type:
coverages_fname = os.path.join(input_path, 'coverages.bin')
Expand Down Expand Up @@ -150,9 +147,7 @@ def run(run_dir, ignore_file_type, sample_size, loss_threshold, alloc_rule, debu
"""
logger.info("starting gulpy")

# TODO: store static_path in a paraparameters file
static_path = os.path.join(run_dir, 'static')
# TODO: store input_path in a paraparameters file
input_path = os.path.join(run_dir, 'input')
ignore_file_type = set(ignore_file_type)

Expand Down Expand Up @@ -295,7 +290,7 @@ def run(run_dir, ignore_file_type, sample_size, loss_threshold, alloc_rule, debu
last_processed_coverage_ids_idx = 0

# adjust buff size so that the buffer fits the longest coverage
buff_size = PIPE_CAPACITY
buff_size = PIPE_CAPACITY * 2
max_bytes_per_coverage = np.max(coverages['cur_items']) * max_bytes_per_item
while buff_size < max_bytes_per_coverage:
buff_size *= 2
Expand Down Expand Up @@ -337,7 +332,7 @@ def compute_event_losses(event_id, coverages, coverage_ids, items_data,
Args:
event_id (int32): event id.
coverages (numpy.array[oasis_float]): array with the coverage values for each coverage_id.
coverage_ids (numpy.array[: array of **uniques** coverage ids used in this event.
coverage_ids (numpy.array[int]): array of unique coverage ids used in this event.
items_data (numpy.array[items_data_type]): items-related data.
last_processed_coverage_ids_idx (int): index of the last coverage_id stored in `coverage_ids` that was fully processed
and printed to the output stream.
Expand Down
24 changes: 23 additions & 1 deletion oasislmf/pytools/gul/random.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,14 @@
EVENT_ID_HASH_CODE = np.int64(1943272559)
HASH_MOD_CODE = np.int64(2147483648)

HAZ_GROUP_ID_HASH_CODE = np.int64(1343271947)
HAZ_EVENT_ID_HASH_CODE = np.int64(1743274343)
HAZ_HASH_MOD_CODE = np.int64(2157483709)


@njit(cache=True, fastmath=True)
def generate_hash(group_id, event_id, base_seed=0):
"""Generate hash for a given `group_id`, `event_id` pair.
"""Generate hash for a given `group_id`, `event_id` pair for the vulnerability pdf.
Args:
group_id (int): group id.
Expand All @@ -35,6 +39,24 @@ def generate_hash(group_id, event_id, base_seed=0):
return hash


@njit(cache=True, fastmath=True)
def generate_hash_haz(group_id, event_id, base_seed=0):
"""Generate hash for a given `group_id`, `event_id` pair for the hazard pdf.
Args:
group_id (int): group id.
event_id (int]): event id.
base_seed (int, optional): base random seed. Defaults to 0.
Returns:
int64: hash
"""
hash = (base_seed + (group_id * HAZ_GROUP_ID_HASH_CODE) % HAZ_HASH_MOD_CODE +
(event_id * HAZ_EVENT_ID_HASH_CODE) % HAZ_HASH_MOD_CODE) % HAZ_HASH_MOD_CODE

return hash


def get_random_generator(random_generator):
"""Get the random generator function.
Expand Down
5 changes: 5 additions & 0 deletions oasislmf/pytools/gulmc/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import logging
from logging import NullHandler

logger = logging.getLogger(__name__)
logger.addHandler(NullHandler())
Loading

0 comments on commit 2ccb138

Please sign in to comment.