diff --git a/.coveragerc b/.coveragerc index 9aabb0e68..fd87b56d3 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,8 +1,5 @@ [run] omit = db.py - auto.py */tests/* [report] -exclude_lines = - def get_required_resources diff --git a/.flake8 b/.flake8 index 3d2dd9047..54d60194d 100644 --- a/.flake8 +++ b/.flake8 @@ -1,4 +1,4 @@ [flake8] max-line-length = 85 ignore = E203, E241, E701, W503 -exclude = db.py,auto.py,flycheck*,Hazus_Earthquake_IM.py,Hazus_Earthquake_Story.py,export_DB.py \ No newline at end of file +exclude = flycheck* \ No newline at end of file diff --git a/.pylintrc b/.pylintrc index 75c9aedcf..26b7a9985 100644 --- a/.pylintrc +++ b/.pylintrc @@ -6,7 +6,7 @@ init-hook='import sys; sys.path.append("."); sys.path.append("../"); sys.path.ap # Files or directories to be skipped. They should be base names, not # paths. -ignore=auto.py,db.py,flycheck_*.py +ignore=flycheck_* # Add files or directories matching the regex patterns to the ignore-list. The # regex matches against paths and can be in Posix or Windows format. @@ -143,7 +143,7 @@ logging-format-style=old [MISCELLANEOUS] # List of note tags to take in consideration, separated by a comma. -notes=FIXME,XXX,TODO +notes=FIXME,XXX,TODO,todo,debug # Regular expression of note tags to take in consideration. #notes-rgx= @@ -152,7 +152,7 @@ notes=FIXME,XXX,TODO [SIMILARITIES] # Minimum lines number of a similarity. -min-similarity-lines=6 +min-similarity-lines=8 # Ignore comments when computing similarities. ignore-comments=yes @@ -335,6 +335,16 @@ docstring-min-length=-1 # List of decorators that define properties, such as abc.abstractproperty. property-classes=abc.abstractproperty +# +# Docstring parameter documentation: +# https://pylint.pycqa.org/en/1.7/technical_reference/extensions.html +# + +accept-no-raise-doc = no +accept-no-param-doc = no +accept-no-return-doc = no +accept-no-yields-doc = no + [TYPECHECK] diff --git a/pelicun/assessment.py b/pelicun/assessment.py index f3c8dba37..3c30201f3 100644 --- a/pelicun/assessment.py +++ b/pelicun/assessment.py @@ -68,7 +68,7 @@ class Assessment: ... damage: DamageModel ... - bldg_repair: BldgRepairModel + repair: RepairModel ... stories: int Number of stories. @@ -146,18 +146,18 @@ def damage(self): return self.damage @property - def bldg_repair(self): + def repair(self): """ - Return an BldgRepairModel object that manages the repair information. + Return a RepairModel object that manages the repair information. """ # pylint: disable = access-member-before-definition - if hasattr(self, '_bldg_repair'): - return self._bldg_repair + if hasattr(self, '_repair'): + return self._repair - self._bldg_repair = model.BldgRepairModel(self) - return self.bldg_repair + self._repair = model.RepairModel(self) + return self.repair def get_default_data(self, data_name): """ diff --git a/pelicun/auto.py b/pelicun/auto.py index d1fadc03b..e7a1ea864 100644 --- a/pelicun/auto.py +++ b/pelicun/auto.py @@ -50,29 +50,33 @@ import sys import importlib -import json from pathlib import Path from . import base -def auto_populate(config, auto_script_path, **kwargs): + +def auto_populate( + config, auto_script_path, **kwargs # pylint: disable=unused-argument +): """ - Automatically prepares the DL configuration for a Pelicun calculation. + Automatically populates the DL configuration for a Pelicun + calculation. Parameters ---------- config: dict - Configuration dictionary with a GeneralInformation key that holds - another dictionary with attributes of the asset of interest. + Configuration dictionary with a GeneralInformation key that + holds another dictionary with attributes of the asset of + interest. auto_script_path: string - Path pointing to a python script with the auto-population rules. - Built-in scripts can be referenced using the PelicunDefault/XY format - where XY is the name of the script. + Path pointing to a python script with the auto-population + rules. Built-in scripts can be referenced using the + PelicunDefault/XY format where XY is the name of the script. """ # try to get the AIM attributes AIM = config.get('GeneralInformation', None) - if AIM == None: + if AIM is None: raise ValueError( "No Asset Information provided for the auto-population routine." ) @@ -80,11 +84,12 @@ def auto_populate(config, auto_script_path, **kwargs): # replace default keyword with actual path in auto_script location if 'PelicunDefault/' in auto_script_path: auto_script_path = auto_script_path.replace( - 'PelicunDefault/', f'{base.pelicun_path}/resources/auto/') + 'PelicunDefault/', f'{base.pelicun_path}/resources/auto/' + ) # load the auto population module ASP = Path(auto_script_path).resolve() - sys.path.insert(0, str(ASP.parent)+'/') + sys.path.insert(0, str(ASP.parent) + '/') auto_script = importlib.__import__(ASP.name[:-3], globals(), locals(), [], 0) auto_populate_ext = auto_script.auto_populate @@ -97,4 +102,3 @@ def auto_populate(config, auto_script_path, **kwargs): # return the extended config data and the component quantities return config, CMP - diff --git a/pelicun/db.py b/pelicun/db.py index 999cdde81..86a18bf52 100644 --- a/pelicun/db.py +++ b/pelicun/db.py @@ -45,23 +45,23 @@ .. autosummary:: create_FEMA_P58_fragility_db - create_FEMA_P58_bldg_repair_db + create_FEMA_P58_repair_db create_FEMA_P58_bldg_injury_db create_FEMA_P58_bldg_redtag_db create_Hazus_EQ_fragility_db - create_Hazus_EQ_bldg_repair_db + create_Hazus_EQ_repair_db create_Hazus_EQ_bldg_injury_db """ import re import json +from pathlib import Path +from copy import deepcopy import numpy as np from scipy.stats import norm import pandas as pd -from pathlib import Path -from copy import deepcopy from . import base from .uq import fit_distribution_to_percentiles @@ -69,6 +69,10 @@ idx = base.idx +# pylint: disable=too-many-statements +# pylint: disable=too-many-locals + + def parse_DS_Hierarchy(DSH): """ Parses the FEMA P58 DS hierarchy into a set of arrays. @@ -84,18 +88,20 @@ def parse_DS_Hierarchy(DSH): DSH = DSH[4:] elif DSH[:5] in {'MutEx', 'Simul'}: closing_pos = DSH.find(')') - subDSH = DSH[:closing_pos + 1] - DSH = DSH[closing_pos + 2:] + subDSH = DSH[: closing_pos + 1] + DSH = DSH[closing_pos + 2 :] DS_setup.append([subDSH[:5]] + subDSH[6:-1].split(',')) return DS_setup -def create_FEMA_P58_fragility_db(source_file, - meta_file='', - target_data_file='damage_DB_FEMA_P58_2nd.csv', - target_meta_file='damage_DB_FEMA_P58_2nd.json'): +def create_FEMA_P58_fragility_db( + source_file, + meta_file='', + target_data_file='damage_DB_FEMA_P58_2nd.csv', + target_meta_file='damage_DB_FEMA_P58_2nd.json', +): """ Create a fragility parameter database based on the FEMA P58 data @@ -123,13 +129,18 @@ def create_FEMA_P58_fragility_db(source_file, """ # parse the source file - df = pd.read_excel(source_file, sheet_name='Summary', header=2, index_col=1, - true_values=["YES", "Yes", "yes"], - false_values=["NO", "No", "no"]) + df = pd.read_excel( + source_file, + sheet_name='Summary', + header=2, + index_col=1, + true_values=["YES", "Yes", "yes"], + false_values=["NO", "No", "no"], + ) # parse the extra metadata file if Path(meta_file).is_file(): - with open(meta_file, 'r') as f: + with open(meta_file, 'r', encoding='utf-8') as f: frag_meta = json.load(f) else: frag_meta = {} @@ -230,10 +241,10 @@ def create_FEMA_P58_fragility_db(source_file, "LS4-Family", "LS4-Theta_0", "LS4-Theta_1", - "LS4-DamageStateWeights" + "LS4-DamageStateWeights", ], index=df_db_source.index, - dtype=float + dtype=float, ) # initialize the dictionary that stores the fragility metadata @@ -241,7 +252,6 @@ def create_FEMA_P58_fragility_db(source_file, # add the general information to the meta dict if "_GeneralInformation" in frag_meta.keys(): - frag_meta = frag_meta["_GeneralInformation"] # remove the decision variable part from the general info @@ -249,9 +259,6 @@ def create_FEMA_P58_fragility_db(source_file, meta_dict.update({"_GeneralInformation": frag_meta}) - - - # conversion dictionary for demand types convert_demand_type = { 'Story Drift Ratio': "Peak Interstory Drift Ratio", @@ -259,7 +266,7 @@ def create_FEMA_P58_fragility_db(source_file, 'Effective Drift': "Peak Effective Drift Ratio", 'Link Beam Chord Rotation': "Peak Link Beam Chord Rotation", 'Peak Floor Acceleration': "Peak Floor Acceleration", - 'Peak Floor Velocity': "Peak Floor Velocity" + 'Peak Floor Velocity': "Peak Floor Velocity", } # conversion dictionary for demand unit names @@ -267,14 +274,13 @@ def create_FEMA_P58_fragility_db(source_file, 'Unit less': 'unitless', 'Radians': 'rad', 'g': 'g', - 'meter/sec': 'mps' + 'meter/sec': 'mps', } # for each component... # (this approach is not efficient, but easy to follow which was considered # more important than efficiency.) for cmp in df_db_source.itertuples(): - # create a dotted component index ID = cmp.Index.split('.') cmpID = f'{ID[0][0]}.{ID[0][1:3]}.{ID[0][3:5]}.{ID[1]}' @@ -286,14 +292,16 @@ def create_FEMA_P58_fragility_db(source_file, incomplete = False # store demand specifications - df_db.loc[cmp.Index, 'Demand-Type'] = ( - convert_demand_type[cmp.Demand_Parameter_value]) - df_db.loc[cmp.Index, 'Demand-Unit'] = ( - convert_demand_unit[cmp.Demand_Parameter_unit]) - df_db.loc[cmp.Index, 'Demand-Offset'] = ( - int(cmp.Demand_Location_use_floor_above_YesNo)) - df_db.loc[cmp.Index, 'Demand-Directional'] = ( - int(cmp.Directional)) + df_db.loc[cmp.Index, 'Demand-Type'] = convert_demand_type[ + cmp.Demand_Parameter_value + ] + df_db.loc[cmp.Index, 'Demand-Unit'] = convert_demand_unit[ + cmp.Demand_Parameter_unit + ] + df_db.loc[cmp.Index, 'Demand-Offset'] = int( + cmp.Demand_Location_use_floor_above_YesNo + ) + df_db.loc[cmp.Index, 'Demand-Directional'] = int(cmp.Directional) # parse the damage state hierarchy DS_setup = parse_DS_Hierarchy(cmp.DS_Hierarchy) @@ -312,13 +320,20 @@ def create_FEMA_P58_fragility_db(source_file, # the additional fields are added to the description if they exist if cmp_meta['Construction_Quality'] != 'Not Specified': - comments += f'\nConstruction Quality: ' \ - f'{cmp_meta["Construction_Quality"]}' + comments += ( + f'\nConstruction Quality: {cmp_meta["Construction_Quality"]}' + ) if cmp_meta['Seismic_Installation_Conditions'] not in [ - 'Not Specified', 'Not applicable', 'Unknown', 'Any']: - comments += f'\nSeismic Installation Conditions: ' \ - f'{cmp_meta["Seismic_Installation_Conditions"]}' + 'Not Specified', + 'Not applicable', + 'Unknown', + 'Any', + ]: + comments += ( + f'\nSeismic Installation Conditions: ' + f'{cmp_meta["Seismic_Installation_Conditions"]}' + ) if cmp_meta['Comments__Notes'] != 'None': comments += f'\nNotes: {cmp_meta["Comments__Notes"]}' @@ -334,12 +349,11 @@ def create_FEMA_P58_fragility_db(source_file, "Comments": comments, "SuggestedComponentBlockSize": ' '.join(block_size), "RoundUpToIntegerQuantity": cmp_meta['Round_to_Integer_Unit'], - "LimitStates": {} + "LimitStates": {}, } # now look at each Limit State for LS_i, LS_contents in enumerate(DS_setup): - LS_i = LS_i + 1 LS_contents = np.atleast_1d(LS_contents) @@ -347,49 +361,58 @@ def create_FEMA_P58_fragility_db(source_file, # start with the special cases with multiple DSs in an LS if LS_contents[0] in {'MutEx', 'Simul'}: - # collect the fragility data for the member DSs median_demands = [] dispersions = [] weights = [] for ds in LS_contents[1:]: - median_demands.append( - getattr(cmp, f"DS_{ds[2]}_Median_Demand")) + median_demands.append(getattr(cmp, f"DS_{ds[2]}_Median_Demand")) dispersions.append( - getattr(cmp, f"DS_{ds[2]}_Total_Dispersion_Beta")) + getattr(cmp, f"DS_{ds[2]}_Total_Dispersion_Beta") + ) weights.append(getattr(cmp, f"DS_{ds[2]}_Probability")) # make sure the specified distribution parameters are appropriate - if ((np.unique(median_demands).size != 1) or ( - np.unique(dispersions).size != 1)): - raise ValueError(f"Incorrect mutually exclusive DS " - f"definition in component {cmp.Index} at " - f"Limit State {LS_i}") + if (np.unique(median_demands).size != 1) or ( + np.unique(dispersions).size != 1 + ): + raise ValueError( + f"Incorrect mutually exclusive DS " + f"definition in component {cmp.Index} at " + f"Limit State {LS_i}" + ) if LS_contents[0] == 'MutEx': - # in mutually exclusive cases, make sure the specified DS # weights sum up to one np.testing.assert_allclose( - np.sum(np.array(weights, dtype=float)), 1.0, + np.sum(np.array(weights, dtype=float)), + 1.0, err_msg=f"Mutually exclusive Damage State weights do " - f"not sum to 1.0 in component {cmp.Index} at " - f"Limit State {LS_i}") + f"not sum to 1.0 in component {cmp.Index} at " + f"Limit State {LS_i}", + ) # and save all DS metadata under this Limit State for ds in LS_contents[1:]: ds_id = ds[2] repair_action = cmp_meta[f"DS_{ds_id}_Repair_Description"] - if pd.isna(repair_action) == True: + if pd.isna(repair_action): repair_action = "" - ls_meta.update({f"DS{ds_id}": { - "Description": cmp_meta[f"DS_{ds_id}_Description"], - "RepairAction": repair_action - }}) + ls_meta.update( + { + f"DS{ds_id}": { + "Description": cmp_meta[ + f"DS_{ds_id}_Description" + ], + "RepairAction": repair_action, + } + } + ) else: # in simultaneous cases, convert simultaneous weights into @@ -402,10 +425,16 @@ def create_FEMA_P58_fragility_db(source_file, for ds_id in range(1, ds_count + 1): ds_map = format(ds_id, f'0{sim_ds_count}b') - sim_weights.append(np.product( - [weights[ds_i] - if ds_map[-ds_i - 1] == '1' else 1.0-weights[ds_i] - for ds_i in range(sim_ds_count)])) + sim_weights.append( + np.product( + [ + weights[ds_i] + if ds_map[-ds_i - 1] == '1' + else 1.0 - weights[ds_i] + for ds_i in range(sim_ds_count) + ] + ) + ) # save ds metadata - we need to be clever here # the original metadata is saved for the pure cases @@ -414,30 +443,41 @@ def create_FEMA_P58_fragility_db(source_file, # combination of pure DSs they represent if ds_map.count('1') == 1: - ds_pure_id = ds_map[::-1].find('1') + 1 - repair_action = cmp_meta[f"DS_{ds_pure_id}_Repair_Description"] - if pd.isna(repair_action) == True: + repair_action = cmp_meta[ + f"DS_{ds_pure_id}_Repair_Description" + ] + if pd.isna(repair_action): repair_action = "" - ls_meta.update({f"DS{ds_id}": { - "Description": f"Pure DS{ds_pure_id}. " + - cmp_meta[f"DS_{ds_pure_id}_Description"], - "RepairAction": repair_action - }}) + ls_meta.update( + { + f"DS{ds_id}": { + "Description": f"Pure DS{ds_pure_id}. " + + cmp_meta[f"DS_{ds_pure_id}_Description"], + "RepairAction": repair_action, + } + } + ) else: - - ds_combo = [f'DS{_.start() + 1}' - for _ in re.finditer('1', ds_map[::-1])] - - ls_meta.update({f"DS{ds_id}": { - "Description": 'Combination of ' + - ' & '.join(ds_combo), - "RepairAction": 'Combination of pure DS repair ' - 'actions.' - }}) + ds_combo = [ + f'DS{_.start() + 1}' + for _ in re.finditer('1', ds_map[::-1]) + ] + + ls_meta.update( + { + f"DS{ds_id}": { + "Description": 'Combination of ' + + ' & '.join(ds_combo), + "RepairAction": ( + 'Combination of pure DS repair actions.' + ), + } + } + ) # adjust weights to respect the assumption that at least # one DS will occur (i.e., the case with all DSs returning @@ -454,7 +494,6 @@ def create_FEMA_P58_fragility_db(source_file, # then look at the sequential DS cases elif LS_contents[0].startswith('DS'): - # this is straightforward, store the data in the table and dict ds_id = LS_contents[0][2] @@ -462,13 +501,17 @@ def create_FEMA_P58_fragility_db(source_file, theta_1 = getattr(cmp, f"DS_{ds_id}_Total_Dispersion_Beta") repair_action = cmp_meta[f"DS_{ds_id}_Repair_Description"] - if pd.isna(repair_action) == True: + if pd.isna(repair_action): repair_action = "" - ls_meta.update({f"DS{ds_id}": { - "Description": cmp_meta[f"DS_{ds_id}_Description"], - "RepairAction": repair_action - }}) + ls_meta.update( + { + f"DS{ds_id}": { + "Description": cmp_meta[f"DS_{ds_id}_Description"], + "RepairAction": repair_action, + } + } + ) # FEMA P58 assumes lognormal distribution for every fragility df_db.loc[cmp.Index, f'LS{LS_i}-Family'] = 'lognormal' @@ -515,11 +558,12 @@ def create_FEMA_P58_fragility_db(source_file, print("Successfully parsed and saved the fragility data from FEMA P58") -def create_FEMA_P58_bldg_repair_db( - source_file, - meta_file='', - target_data_file='loss_repair_DB_FEMA_P58_2nd.csv', - target_meta_file='loss_repair_DB_FEMA_P58_2nd.json'): +def create_FEMA_P58_repair_db( + source_file, + meta_file='', + target_data_file='loss_repair_DB_FEMA_P58_2nd.csv', + target_meta_file='loss_repair_DB_FEMA_P58_2nd.json', +): """ Create a repair consequence parameter database based on the FEMA P58 data @@ -543,12 +587,16 @@ def create_FEMA_P58_bldg_repair_db( # parse the source file df = pd.concat( - [pd.read_excel(source_file, sheet_name=sheet, header=2, index_col=1) - for sheet in ('Summary', 'Cost Summary', 'Env Summary')], axis=1) + [ + pd.read_excel(source_file, sheet_name=sheet, header=2, index_col=1) + for sheet in ('Summary', 'Cost Summary', 'Env Summary') + ], + axis=1, + ) # parse the extra metadata file if Path(meta_file).is_file(): - with open(meta_file, 'r') as f: + with open(meta_file, 'r', encoding='utf-8') as f: frag_meta = json.load(f) else: frag_meta = {} @@ -574,7 +622,7 @@ def create_FEMA_P58_bldg_repair_db( f"Lower Qty Cutoff, DS{DS_i}", f"Upper Qty Cutoff, DS{DS_i}", f"CV / Dispersion, DS{DS_i}", - + # -------------------------- f"Best Fit, DS{DS_i}.1", f"Lower Qty Mean, DS{DS_i}.1", f"Upper Qty Mean, DS{DS_i}.1", @@ -582,7 +630,7 @@ def create_FEMA_P58_bldg_repair_db( f"Upper Qty Cutoff, DS{DS_i}.1", f"CV / Dispersion, DS{DS_i}.2", f"DS {DS_i}, Long Lead Time", - + # -------------------------- f'Repair Cost, p10, DS{DS_i}', f'Repair Cost, p50, DS{DS_i}', f'Repair Cost, p90, DS{DS_i}', @@ -591,14 +639,14 @@ def create_FEMA_P58_bldg_repair_db( f'Time, p90, DS{DS_i}', f'Mean Value, DS{DS_i}', f'Mean Value, DS{DS_i}.1', - + # -------------------------- # Columns added for the Environmental loss f"DS{DS_i} Best Fit", f"DS{DS_i} CV or Beta", - + # -------------------------- f"DS{DS_i} Best Fit.1", f"DS{DS_i} CV or Beta.1", - + # -------------------------- f"DS{DS_i} Embodied Carbon (kg CO2eq)", f"DS{DS_i} Embodied Energy (MJ)", ] @@ -667,32 +715,23 @@ def create_FEMA_P58_bldg_repair_db( DVs = ['Cost', 'Time', 'Carbon', 'Energy'] df_MI = pd.MultiIndex.from_product([comps, DVs], names=['ID', 'DV']) - df_db = pd.DataFrame( - columns=out_cols, - index=df_MI, - dtype=float - ) + df_db = pd.DataFrame(columns=out_cols, index=df_MI, dtype=float) # initialize the dictionary that stores the loss metadata meta_dict = {} # add the general information to the meta dict if "_GeneralInformation" in frag_meta.keys(): - frag_meta = frag_meta["_GeneralInformation"] meta_dict.update({"_GeneralInformation": frag_meta}) - convert_family = { - 'LogNormal': 'lognormal', - 'Normal': 'normal' - } + convert_family = {'LogNormal': 'lognormal', 'Normal': 'normal'} # for each component... # (this approach is not efficient, but easy to follow which was considered # more important than efficiency.) for cmp in df_db_source.itertuples(): - ID = cmp.Index.split('.') cmpID = f'{ID[0][0]}.{ID[0][1:3]}.{ID[0][3:5]}.{ID[1]}' @@ -707,8 +746,9 @@ def create_FEMA_P58_bldg_repair_db( # store units - df_db.loc[cmp.Index, 'Quantity-Unit'] = ( - ' '.join(cmp.Fragility_Unit_of_Measure.split(' ')[::-1]).strip()) + df_db.loc[cmp.Index, 'Quantity-Unit'] = ' '.join( + cmp.Fragility_Unit_of_Measure.split(' ')[::-1] + ).strip() df_db.loc[(cmp.Index, 'Cost'), 'DV-Unit'] = "USD_2011" df_db.loc[(cmp.Index, 'Time'), 'DV-Unit'] = "worker_day" df_db.loc[(cmp.Index, 'Carbon'), 'DV-Unit'] = "kg" @@ -727,13 +767,20 @@ def create_FEMA_P58_bldg_repair_db( # the additional fields are added to the description if they exist if cmp_meta['Construction_Quality'] != 'Not Specified': - comments += f'\nConstruction Quality: ' \ - f'{cmp_meta["Construction_Quality"]}' + comments += ( + f'\nConstruction Quality: ' f'{cmp_meta["Construction_Quality"]}' + ) if cmp_meta['Seismic_Installation_Conditions'] not in [ - 'Not Specified', 'Not applicable', 'Unknown', 'Any']: - comments += f'\nSeismic Installation Conditions: ' \ - f'{cmp_meta["Seismic_Installation_Conditions"]}' + 'Not Specified', + 'Not applicable', + 'Unknown', + 'Any', + ]: + comments += ( + f'\nSeismic Installation Conditions: ' + f'{cmp_meta["Seismic_Installation_Conditions"]}' + ) if cmp_meta['Comments__Notes'] != 'None': comments += f'\nNotes: {cmp_meta["Comments__Notes"]}' @@ -750,12 +797,11 @@ def create_FEMA_P58_bldg_repair_db( "SuggestedComponentBlockSize": ' '.join(block_size), "RoundUpToIntegerQuantity": cmp_meta['Round_to_Integer_Unit'], "ControllingDemand": "Damage Quantity", - "DamageStates": {} + "DamageStates": {}, } # Handle components with simultaneous damage states separately if 'Simul' in cmp.DS_Hierarchy: - # Note that we are assuming that all damage states are triggered by # a single limit state in these components. # This assumption holds for the second edition of FEMA P58, but it @@ -768,53 +814,76 @@ def create_FEMA_P58_bldg_repair_db( # get the p10, p50, and p90 estimates for all damage states for DS_i in range(1, 6): - if not pd.isna(getattr(cmp, f'Repair_Cost_p10_DS{DS_i}')): - - cost_est.update({f'DS{DS_i}': np.array([ - getattr(cmp, f'Repair_Cost_p10_DS{DS_i}'), - getattr(cmp, f'Repair_Cost_p50_DS{DS_i}'), - getattr(cmp, f'Repair_Cost_p90_DS{DS_i}'), - getattr(cmp, f'Lower_Qty_Mean_DS{DS_i}'), - getattr(cmp, f'Upper_Qty_Mean_DS{DS_i}') - ])}) - - time_est.update({f'DS{DS_i}': np.array([ - getattr(cmp, f'Time_p10_DS{DS_i}'), - getattr(cmp, f'Time_p50_DS{DS_i}'), - getattr(cmp, f'Time_p90_DS{DS_i}'), - getattr(cmp, f'Lower_Qty_Mean_DS{DS_i}_1'), - getattr(cmp, f'Upper_Qty_Mean_DS{DS_i}_1'), - int(getattr(cmp, f'DS_{DS_i}_Long_Lead_Time') == 'YES') - ])}) + cost_est.update( + { + f'DS{DS_i}': np.array( + [ + getattr(cmp, f'Repair_Cost_p10_DS{DS_i}'), + getattr(cmp, f'Repair_Cost_p50_DS{DS_i}'), + getattr(cmp, f'Repair_Cost_p90_DS{DS_i}'), + getattr(cmp, f'Lower_Qty_Mean_DS{DS_i}'), + getattr(cmp, f'Upper_Qty_Mean_DS{DS_i}'), + ] + ) + } + ) + + time_est.update( + { + f'DS{DS_i}': np.array( + [ + getattr(cmp, f'Time_p10_DS{DS_i}'), + getattr(cmp, f'Time_p50_DS{DS_i}'), + getattr(cmp, f'Time_p90_DS{DS_i}'), + getattr(cmp, f'Lower_Qty_Mean_DS{DS_i}_1'), + getattr(cmp, f'Upper_Qty_Mean_DS{DS_i}_1'), + int( + getattr(cmp, f'DS_{DS_i}_Long_Lead_Time') + == 'YES' + ), + ] + ) + } + ) if not pd.isna(getattr(cmp, f'DS{DS_i}_Embodied_Carbon_kg_CO2eq')): - theta_0, theta_1, family = [ getattr(cmp, f'DS{DS_i}_Embodied_Carbon_kg_CO2eq'), getattr(cmp, f'DS{DS_i}_CV_or_Beta'), - getattr(cmp, f'DS{DS_i}_Best_Fit') + getattr(cmp, f'DS{DS_i}_Best_Fit'), ] if family == 'Normal': - p10, p50, p90 = norm.ppf([0.1, 0.5, 0.9], loc=theta_0, scale=theta_0 * theta_1) + p10, p50, p90 = norm.ppf( + [0.1, 0.5, 0.9], loc=theta_0, scale=theta_0 * theta_1 + ) elif family == 'LogNormal': - p10, p50, p90 = np.exp(norm.ppf([0.1, 0.5, 0.9], loc=np.log(theta_0), scale=theta_1)) + p10, p50, p90 = np.exp( + norm.ppf( + [0.1, 0.5, 0.9], loc=np.log(theta_0), scale=theta_1 + ) + ) carbon_est.update({f'DS{DS_i}': np.array([p10, p50, p90])}) if not pd.isna(getattr(cmp, f'DS{DS_i}_Embodied_Energy_MJ')): - theta_0, theta_1, family = [ getattr(cmp, f'DS{DS_i}_Embodied_Energy_MJ'), getattr(cmp, f'DS{DS_i}_CV_or_Beta_1'), - getattr(cmp, f'DS{DS_i}_Best_Fit_1') + getattr(cmp, f'DS{DS_i}_Best_Fit_1'), ] if family == 'Normal': - p10, p50, p90 = norm.ppf([0.1, 0.5, 0.9], loc=theta_0, scale=theta_0 * theta_1) + p10, p50, p90 = norm.ppf( + [0.1, 0.5, 0.9], loc=theta_0, scale=theta_0 * theta_1 + ) elif family == 'LogNormal': - p10, p50, p90 = np.exp(norm.ppf([0.1, 0.5, 0.9], loc=np.log(theta_0), scale=theta_1)) + p10, p50, p90 = np.exp( + norm.ppf( + [0.1, 0.5, 0.9], loc=np.log(theta_0), scale=theta_1 + ) + ) energy_est.update({f'DS{DS_i}': np.array([p10, p50, p90])}) @@ -825,47 +894,78 @@ def create_FEMA_P58_bldg_repair_db( for DS_i in range(1, ds_count + 1): ds_map = format(DS_i, f'0{sim_ds_count}b') - cost_vals = np.sum([cost_est[f'DS{ds_i + 1}'] - if ds_map[-ds_i - 1] == '1' else np.zeros(5) - for ds_i in range(sim_ds_count)], - axis=0) - - time_vals = np.sum([time_est[f'DS{ds_i + 1}'] - if ds_map[-ds_i - 1] == '1' else np.zeros(6) - for ds_i in range(sim_ds_count)], - axis=0) - - carbon_vals = np.sum([carbon_est[f'DS{ds_i + 1}'] - if ds_map[-ds_i - 1] == '1' else np.zeros(3) - for ds_i in range(sim_ds_count)], - axis=0) - - energy_vals = np.sum([energy_est[f'DS{ds_i + 1}'] - if ds_map[-ds_i - 1] == '1' else np.zeros(3) - for ds_i in range(sim_ds_count)], - axis=0) + cost_vals = np.sum( + [ + cost_est[f'DS{ds_i + 1}'] + if ds_map[-ds_i - 1] == '1' + else np.zeros(5) + for ds_i in range(sim_ds_count) + ], + axis=0, + ) + + time_vals = np.sum( + [ + time_est[f'DS{ds_i + 1}'] + if ds_map[-ds_i - 1] == '1' + else np.zeros(6) + for ds_i in range(sim_ds_count) + ], + axis=0, + ) + + carbon_vals = np.sum( + [ + carbon_est[f'DS{ds_i + 1}'] + if ds_map[-ds_i - 1] == '1' + else np.zeros(3) + for ds_i in range(sim_ds_count) + ], + axis=0, + ) + + energy_vals = np.sum( + [ + energy_est[f'DS{ds_i + 1}'] + if ds_map[-ds_i - 1] == '1' + else np.zeros(3) + for ds_i in range(sim_ds_count) + ], + axis=0, + ) # fit a distribution family_hat, theta_hat = fit_distribution_to_percentiles( - cost_vals[:3], [0.1, 0.5, 0.9], ['normal', 'lognormal']) + cost_vals[:3], [0.1, 0.5, 0.9], ['normal', 'lognormal'] + ) cost_theta = theta_hat if family_hat == 'normal': cost_theta[1] = cost_theta[1] / cost_theta[0] - time_theta = [time_vals[1], - np.sqrt(cost_theta[1] ** 2.0 + 0.25 ** 2.0)] + time_theta = [ + time_vals[1], + np.sqrt(cost_theta[1] ** 2.0 + 0.25**2.0), + ] # fit distributions to environmental impact consequences - family_hat_carbon, theta_hat_carbon = fit_distribution_to_percentiles( - carbon_vals[:3], [0.1, 0.5, 0.9], ['normal', 'lognormal']) + ( + family_hat_carbon, + theta_hat_carbon, + ) = fit_distribution_to_percentiles( + carbon_vals[:3], [0.1, 0.5, 0.9], ['normal', 'lognormal'] + ) carbon_theta = theta_hat_carbon if family_hat_carbon == 'normal': carbon_theta[1] = carbon_theta[1] / carbon_theta[0] - family_hat_energy, theta_hat_energy = fit_distribution_to_percentiles( - energy_vals[:3], [0.1, 0.5, 0.9], ['normal', 'lognormal']) + ( + family_hat_energy, + theta_hat_energy, + ) = fit_distribution_to_percentiles( + energy_vals[:3], [0.1, 0.5, 0.9], ['normal', 'lognormal'] + ) energy_theta = theta_hat_energy if family_hat_energy == 'normal': @@ -885,154 +985,193 @@ def create_FEMA_P58_bldg_repair_db( df_db.loc[(cmp.Index, 'Cost'), f'DS{DS_i}-Theta_0'] = ( f"{cost_vals[3]:g},{cost_vals[4]:g}|" - f"{cost_qnt_low:g},{cost_qnt_up:g}") + f"{cost_qnt_low:g},{cost_qnt_up:g}" + ) - df_db.loc[(cmp.Index, 'Cost'), - f'DS{DS_i}-Theta_1'] = f"{cost_theta[1]:g}" + df_db.loc[ + (cmp.Index, 'Cost'), f'DS{DS_i}-Theta_1' + ] = f"{cost_theta[1]:g}" df_db.loc[(cmp.Index, 'Time'), f'DS{DS_i}-Family'] = family_hat df_db.loc[(cmp.Index, 'Time'), f'DS{DS_i}-Theta_0'] = ( f"{time_vals[3]:g},{time_vals[4]:g}|" - f"{time_qnt_low:g},{time_qnt_up:g}") - - df_db.loc[(cmp.Index, 'Time'), - f'DS{DS_i}-Theta_1'] = f"{time_theta[1]:g}" + f"{time_qnt_low:g},{time_qnt_up:g}" + ) - df_db.loc[(cmp.Index, 'Time'), - f'DS{DS_i}-LongLeadTime'] = int(time_vals[5] > 0) + df_db.loc[ + (cmp.Index, 'Time'), f'DS{DS_i}-Theta_1' + ] = f"{time_theta[1]:g}" + df_db.loc[(cmp.Index, 'Time'), f'DS{DS_i}-LongLeadTime'] = int( + time_vals[5] > 0 + ) - df_db.loc[(cmp.Index, 'Carbon'), f'DS{DS_i}-Family'] = family_hat_carbon + df_db.loc[ + (cmp.Index, 'Carbon'), f'DS{DS_i}-Family' + ] = family_hat_carbon - df_db.loc[(cmp.Index, 'Carbon'), f'DS{DS_i}-Theta_0'] = f"{carbon_theta[0]:g}" + df_db.loc[ + (cmp.Index, 'Carbon'), f'DS{DS_i}-Theta_0' + ] = f"{carbon_theta[0]:g}" - df_db.loc[(cmp.Index, 'Carbon'), - f'DS{DS_i}-Theta_1'] = f"{carbon_theta[1]:g}" + df_db.loc[ + (cmp.Index, 'Carbon'), f'DS{DS_i}-Theta_1' + ] = f"{carbon_theta[1]:g}" - df_db.loc[(cmp.Index, 'Energy'), f'DS{DS_i}-Family'] = family_hat_energy + df_db.loc[ + (cmp.Index, 'Energy'), f'DS{DS_i}-Family' + ] = family_hat_energy - df_db.loc[(cmp.Index, 'Energy'), f'DS{DS_i}-Theta_0'] = f"{energy_theta[0]:g}" + df_db.loc[ + (cmp.Index, 'Energy'), f'DS{DS_i}-Theta_0' + ] = f"{energy_theta[0]:g}" - df_db.loc[(cmp.Index, 'Energy'), - f'DS{DS_i}-Theta_1'] = f"{energy_theta[1]:g}" + df_db.loc[ + (cmp.Index, 'Energy'), f'DS{DS_i}-Theta_1' + ] = f"{energy_theta[1]:g}" if ds_map.count('1') == 1: - ds_pure_id = ds_map[::-1].find('1') + 1 repair_action = cmp_meta[f"DS_{ds_pure_id}_Repair_Description"] - if pd.isna(repair_action) == True: + if pd.isna(repair_action): repair_action = "" - meta_data['DamageStates'].update({f"DS{DS_i}": { - "Description": f"Pure DS{ds_pure_id}. " + - cmp_meta[f"DS_{ds_pure_id}_Description"], - "RepairAction": repair_action - }}) + meta_data['DamageStates'].update( + { + f"DS{DS_i}": { + "Description": f"Pure DS{ds_pure_id}. " + + cmp_meta[f"DS_{ds_pure_id}_Description"], + "RepairAction": repair_action, + } + } + ) else: + ds_combo = [ + f'DS{_.start() + 1}' for _ in re.finditer('1', ds_map[::-1]) + ] - ds_combo = [f'DS{_.start() + 1}' - for _ in re.finditer('1', ds_map[::-1])] - - meta_data['DamageStates'].update({f"DS{DS_i}": { - "Description": 'Combination of ' + - ' & '.join(ds_combo), - "RepairAction": 'Combination of pure DS repair ' - 'actions.' - }}) + meta_data['DamageStates'].update( + { + f"DS{DS_i}": { + "Description": 'Combination of ' + + ' & '.join(ds_combo), + "RepairAction": 'Combination of pure DS repair ' + 'actions.', + } + } + ) # for every other component... else: # now look at each Damage State for DS_i in range(1, 6): - # cost if not pd.isna(getattr(cmp, f'Best_Fit_DS{DS_i}')): - df_db.loc[(cmp.Index, 'Cost'), f'DS{DS_i}-Family'] = ( - convert_family[getattr(cmp, f'Best_Fit_DS{DS_i}')]) + df_db.loc[ + (cmp.Index, 'Cost'), f'DS{DS_i}-Family' + ] = convert_family[getattr(cmp, f'Best_Fit_DS{DS_i}')] if not pd.isna(getattr(cmp, f'Lower_Qty_Mean_DS{DS_i}')): - theta_0_low = getattr(cmp, f'Lower_Qty_Mean_DS{DS_i}') theta_0_up = getattr(cmp, f'Upper_Qty_Mean_DS{DS_i}') qnt_low = getattr(cmp, f'Lower_Qty_Cutoff_DS{DS_i}') qnt_up = getattr(cmp, f'Upper_Qty_Cutoff_DS{DS_i}') - if theta_0_low == 0. and theta_0_up == 0.: - df_db.loc[(cmp.Index, 'Cost'), - f'DS{DS_i}-Family'] = np.nan + if theta_0_low == 0.0 and theta_0_up == 0.0: + df_db.loc[ + (cmp.Index, 'Cost'), f'DS{DS_i}-Family' + ] = np.nan else: df_db.loc[(cmp.Index, 'Cost'), f'DS{DS_i}-Theta_0'] = ( f"{theta_0_low:g},{theta_0_up:g}|" - f"{qnt_low:g},{qnt_up:g}") + f"{qnt_low:g},{qnt_up:g}" + ) - df_db.loc[(cmp.Index, 'Cost'), f'DS{DS_i}-Theta_1'] = ( - f"{getattr(cmp, f'CV__Dispersion_DS{DS_i}'):g}") + df_db.loc[ + (cmp.Index, 'Cost'), f'DS{DS_i}-Theta_1' + ] = f"{getattr(cmp, f'CV__Dispersion_DS{DS_i}'):g}" else: incomplete_cost = True repair_action = cmp_meta[f"DS_{DS_i}_Repair_Description"] - if pd.isna(repair_action) == True: + if pd.isna(repair_action): repair_action = "" - meta_data['DamageStates'].update({ - f"DS{DS_i}": { - "Description": cmp_meta[f"DS_{DS_i}_Description"], - "RepairAction": repair_action}}) + meta_data['DamageStates'].update( + { + f"DS{DS_i}": { + "Description": cmp_meta[f"DS_{DS_i}_Description"], + "RepairAction": repair_action, + } + } + ) # time if not pd.isna(getattr(cmp, f'Best_Fit_DS{DS_i}_1')): - - df_db.loc[(cmp.Index, 'Time'), f'DS{DS_i}-Family'] = ( - convert_family[getattr(cmp, f'Best_Fit_DS{DS_i}_1')]) + df_db.loc[ + (cmp.Index, 'Time'), f'DS{DS_i}-Family' + ] = convert_family[getattr(cmp, f'Best_Fit_DS{DS_i}_1')] if not pd.isna(getattr(cmp, f'Lower_Qty_Mean_DS{DS_i}_1')): - theta_0_low = getattr(cmp, f'Lower_Qty_Mean_DS{DS_i}_1') theta_0_up = getattr(cmp, f'Upper_Qty_Mean_DS{DS_i}_1') qnt_low = getattr(cmp, f'Lower_Qty_Cutoff_DS{DS_i}_1') qnt_up = getattr(cmp, f'Upper_Qty_Cutoff_DS{DS_i}_1') - if theta_0_low == 0. and theta_0_up == 0.: - df_db.loc[(cmp.Index, 'Time'), - f'DS{DS_i}-Family'] = np.nan + if theta_0_low == 0.0 and theta_0_up == 0.0: + df_db.loc[ + (cmp.Index, 'Time'), f'DS{DS_i}-Family' + ] = np.nan else: df_db.loc[(cmp.Index, 'Time'), f'DS{DS_i}-Theta_0'] = ( f"{theta_0_low:g},{theta_0_up:g}|" - f"{qnt_low:g},{qnt_up:g}") + f"{qnt_low:g},{qnt_up:g}" + ) - df_db.loc[(cmp.Index, 'Time'), f'DS{DS_i}-Theta_1'] = ( - f"{getattr(cmp, f'CV__Dispersion_DS{DS_i}_2'):g}") + df_db.loc[ + (cmp.Index, 'Time'), f'DS{DS_i}-Theta_1' + ] = f"{getattr(cmp, f'CV__Dispersion_DS{DS_i}_2'):g}" - df_db.loc[(cmp.Index, 'Time'), f'DS{DS_i}-LongLeadTime'] = ( - int(getattr(cmp, f'DS_{DS_i}_Long_Lead_Time') == 'YES')) + df_db.loc[ + (cmp.Index, 'Time'), f'DS{DS_i}-LongLeadTime' + ] = int(getattr(cmp, f'DS_{DS_i}_Long_Lead_Time') == 'YES') else: incomplete_time = True # Carbon if not pd.isna(getattr(cmp, f'DS{DS_i}_Best_Fit')): - df_db.loc[(cmp.Index, 'Carbon'), f'DS{DS_i}-Family'] = ( - convert_family[getattr(cmp, f'DS{DS_i}_Best_Fit')]) + df_db.loc[ + (cmp.Index, 'Carbon'), f'DS{DS_i}-Family' + ] = convert_family[getattr(cmp, f'DS{DS_i}_Best_Fit')] - df_db.loc[(cmp.Index, 'Carbon'), f'DS{DS_i}-Theta_0'] = getattr(cmp, - f'DS{DS_i}_Embodied_Carbon_kg_CO2eq') + df_db.loc[(cmp.Index, 'Carbon'), f'DS{DS_i}-Theta_0'] = getattr( + cmp, f'DS{DS_i}_Embodied_Carbon_kg_CO2eq' + ) - df_db.loc[(cmp.Index, 'Carbon'), f'DS{DS_i}-Theta_1'] = getattr(cmp, f'DS{DS_i}_CV_or_Beta') + df_db.loc[(cmp.Index, 'Carbon'), f'DS{DS_i}-Theta_1'] = getattr( + cmp, f'DS{DS_i}_CV_or_Beta' + ) # Energy if not pd.isna(getattr(cmp, f'DS{DS_i}_Best_Fit_1')): - df_db.loc[(cmp.Index, 'Energy'), f'DS{DS_i}-Family'] = ( - convert_family[getattr(cmp, f'DS{DS_i}_Best_Fit_1')]) + df_db.loc[ + (cmp.Index, 'Energy'), f'DS{DS_i}-Family' + ] = convert_family[getattr(cmp, f'DS{DS_i}_Best_Fit_1')] - df_db.loc[(cmp.Index, 'Energy'), f'DS{DS_i}-Theta_0'] = getattr(cmp, f'DS{DS_i}_Embodied_Energy_MJ') + df_db.loc[(cmp.Index, 'Energy'), f'DS{DS_i}-Theta_0'] = getattr( + cmp, f'DS{DS_i}_Embodied_Energy_MJ' + ) - df_db.loc[(cmp.Index, 'Energy'), f'DS{DS_i}-Theta_1'] = getattr(cmp, f'DS{DS_i}_CV_or_Beta_1') + df_db.loc[(cmp.Index, 'Energy'), f'DS{DS_i}-Theta_1'] = getattr( + cmp, f'DS{DS_i}_CV_or_Beta_1' + ) df_db.loc[(cmp.Index, 'Cost'), 'Incomplete'] = int(incomplete_cost) df_db.loc[(cmp.Index, 'Time'), 'Incomplete'] = int(incomplete_time) @@ -1043,14 +1182,14 @@ def create_FEMA_P58_bldg_repair_db( # assign the Index column as the new ID df_db.index = pd.MultiIndex.from_arrays( - [df_db['Index'].values, df_db.index.get_level_values(1)]) + [df_db['Index'].values, df_db.index.get_level_values(1)] + ) df_db.drop('Index', axis=1, inplace=True) # review the database and drop rows with no information cmp_to_drop = [] for cmp in df_db.index: - empty = True for DS_i in range(1, 6): @@ -1081,14 +1220,16 @@ def create_FEMA_P58_bldg_repair_db( with open(target_meta_file, 'w+', encoding='utf-8') as f: json.dump(meta_dict, f, indent=2) - print("Successfully parsed and saved the repair consequence data from FEMA " - "P58") + print( + "Successfully parsed and saved the repair consequence data from FEMA P58" + ) def create_FEMA_P58_bldg_injury_db( - source_file, - target_data_file='bldg_injury_DB_FEMA_P58_2nd.csv', - target_meta_file='bldg_injury_DB_FEMA_P58_2nd.json'): + source_file, + target_data_file='bldg_injury_DB_FEMA_P58_2nd.csv', + target_meta_file='bldg_injury_DB_FEMA_P58_2nd.json', +): """ Create an injury consequence parameter database based on the FEMA P58 data @@ -1109,9 +1250,14 @@ def create_FEMA_P58_bldg_injury_db( """ # parse the source file - df = pd.read_excel(source_file, sheet_name='Summary', header=2, index_col=1, - true_values=["YES", "Yes", "yes"], - false_values=["NO", "No", "no"]) + df = pd.read_excel( + source_file, + sheet_name='Summary', + header=2, + index_col=1, + true_values=["YES", "Yes", "yes"], + false_values=["NO", "No", "no"], + ) # remove empty rows and columns df.dropna(axis=0, how='all', inplace=True) @@ -1124,7 +1270,6 @@ def create_FEMA_P58_bldg_injury_db( ] for DS_i in range(1, 6): cols_to_db += [ - f'DS {DS_i}, Potential non-collapse casualty?', f'DS {DS_i} - Casualty Affected Area', f'DS {DS_i} Serious Injury Rate - Median', @@ -1194,11 +1339,7 @@ def create_FEMA_P58_bldg_injury_db( DVs = ['S1', 'S2'] df_MI = pd.MultiIndex.from_product([comps, DVs], names=['ID', 'Severity']) - df_db = pd.DataFrame( - columns=out_cols, - index=df_MI, - dtype=float - ) + df_db = pd.DataFrame(columns=out_cols, index=df_MI, dtype=float) # initialize the dictionary that stores the loss metadata meta_dict = {} @@ -1207,7 +1348,6 @@ def create_FEMA_P58_bldg_injury_db( # (this approach is not efficient, but easy to follow which was considered # more important than efficiency.) for cmp in df_db_source.itertuples(): - ID = cmp.Index.split('.') cmpID = f'{ID[0][0]}.{ID[0][1:3]}.{ID[0][3:5]}.{ID[1]}' @@ -1220,8 +1360,9 @@ def create_FEMA_P58_bldg_injury_db( # store units - df_db.loc[cmp.Index, 'Quantity-Unit'] = ( - ' '.join(cmp.Fragility_Unit_of_Measure.split(' ')[::-1]).strip()) + df_db.loc[cmp.Index, 'Quantity-Unit'] = ' '.join( + cmp.Fragility_Unit_of_Measure.split(' ')[::-1] + ).strip() df_db.loc[(cmp.Index, 'S1'), 'DV-Unit'] = "persons" df_db.loc[(cmp.Index, 'S2'), 'DV-Unit'] = "persons" @@ -1238,13 +1379,20 @@ def create_FEMA_P58_bldg_injury_db( # the additional fields are added to the description if they exist if cmp_meta['Construction_Quality'] != 'Not Specified': - comments += f'\nConstruction Quality: ' \ - f'{cmp_meta["Construction_Quality"]}' + comments += ( + f'\nConstruction Quality: ' f'{cmp_meta["Construction_Quality"]}' + ) if cmp_meta['Seismic_Installation_Conditions'] not in [ - 'Not Specified', 'Not applicable', 'Unknown', 'Any']: - comments += f'\nSeismic Installation Conditions: ' \ - f'{cmp_meta["Seismic_Installation_Conditions"]}' + 'Not Specified', + 'Not applicable', + 'Unknown', + 'Any', + ]: + comments += ( + f'\nSeismic Installation Conditions: ' + f'{cmp_meta["Seismic_Installation_Conditions"]}' + ) if cmp_meta['Comments__Notes'] != 'None': comments += f'\nNotes: {cmp_meta["Comments__Notes"]}' @@ -1261,12 +1409,11 @@ def create_FEMA_P58_bldg_injury_db( "SuggestedComponentBlockSize": ' '.join(block_size), "RoundUpToIntegerQuantity": cmp_meta['Round_to_Integer_Unit'], "ControllingDemand": "Damage Quantity", - "DamageStates": {} + "DamageStates": {}, } # Handle components with simultaneous damage states separately if 'Simul' in cmp.DS_Hierarchy: - # Note that we are assuming that all damage states are triggered by # a single limit state in these components. # This assumption holds for the second edition of FEMA P58, but it @@ -1277,19 +1424,41 @@ def create_FEMA_P58_bldg_injury_db( # get the p10, p50, and p90 estimates for all damage states for DS_i in range(1, 6): - casualty_model = getattr( - cmp, f'DS_{DS_i}_Potential_non_collapse_casualty') + cmp, f'DS_{DS_i}_Potential_non_collapse_casualty' + ) if casualty_model is True: - - inj_data.update({f'DS{DS_i}': np.array([ - getattr(cmp, f'DS_{DS_i}___Casualty_Affected_Area'), - getattr(cmp, f'DS_{DS_i}_Serious_Injury_Rate___Median'), - getattr(cmp, f'DS_{DS_i}_Serious_Injury_Rate___Dispersion'), - getattr(cmp, f'DS_{DS_i}_Loss_of_Life_Rate___Median'), - getattr(cmp, f'DS_{DS_i}_Loss_of_Life_Rate___Dispersion') - ])}) + inj_data.update( + { + f'DS{DS_i}': np.array( + [ + getattr( + cmp, f'DS_{DS_i}___Casualty_Affected_Area' + ), + getattr( + cmp, + f'DS_{DS_i}_Serious_Injury_Rate' + f'___Median', + ), + getattr( + cmp, + f'DS_{DS_i}_Serious_Injury_Rate' + f'___Dispersion', + ), + getattr( + cmp, + f'DS_{DS_i}_Loss_of_Life_Rate' f'___Median', + ), + getattr( + cmp, + f'DS_{DS_i}_Loss_of_Life_Rate' + f'___Dispersion', + ), + ] + ) + } + ) ds_tot += 1 elif casualty_model is False: @@ -1317,10 +1486,8 @@ def create_FEMA_P58_bldg_injury_db( ds_map = format(DS_i, f'0{sim_ds_count}b') if ds_map[-ds_trig] == '1': - # store the consequence data for severity in ('S1', 'S2'): - A_affected = inj_data[0] if severity == 'S1': @@ -1331,93 +1498,112 @@ def create_FEMA_P58_bldg_injury_db( theta_1 = inj_data[4] if theta_0 != 0.0: + df_db.loc[ + (cmp.Index, severity), f'DS{DS_i}-Family' + ] = 'lognormal' - df_db.loc[(cmp.Index, severity), - f'DS{DS_i}-Family'] = 'lognormal' + df_db.loc[ + (cmp.Index, severity), f'DS{DS_i}-Theta_0' + ] = theta_0 - df_db.loc[(cmp.Index, severity), - f'DS{DS_i}-Theta_0'] = theta_0 + df_db.loc[ + (cmp.Index, severity), f'DS{DS_i}-Theta_1' + ] = theta_1 - df_db.loc[(cmp.Index, severity), - f'DS{DS_i}-Theta_1'] = theta_1 - - df_db.loc[(cmp.Index, severity), - f'DS{DS_i}-AffectedArea'] = A_affected + df_db.loc[ + (cmp.Index, severity), f'DS{DS_i}-AffectedArea' + ] = A_affected # store the metadata if ds_map.count('1') == 1: - ds_pure_id = ds_map[::-1].find('1') + 1 - meta_data['DamageStates'].update({f"DS{DS_i}": { - "Description": f"Pure DS{ds_pure_id}. " + - cmp_meta[ - f"DS_{ds_pure_id}_Description"] - }}) + meta_data['DamageStates'].update( + { + f"DS{DS_i}": { + "Description": f"Pure DS{ds_pure_id}. " + + cmp_meta[f"DS_{ds_pure_id}_Description"] + } + } + ) else: + ds_combo = [ + f'DS{_.start() + 1}' for _ in re.finditer('1', ds_map[::-1]) + ] - ds_combo = [f'DS{_.start() + 1}' - for _ in re.finditer('1', ds_map[::-1])] - - meta_data['DamageStates'].update({f"DS{DS_i}": { - "Description": 'Combination of ' + - ' & '.join(ds_combo) - }}) + meta_data['DamageStates'].update( + { + f"DS{DS_i}": { + "Description": 'Combination of ' + + ' & '.join(ds_combo) + } + } + ) # for every other component... else: # now look at each Damage State for DS_i in range(1, 6): - casualty_flag = getattr( - cmp, f'DS_{DS_i}_Potential_non_collapse_casualty') + cmp, f'DS_{DS_i}_Potential_non_collapse_casualty' + ) if casualty_flag is True: - - A_affected = getattr(cmp, - f'DS_{DS_i}___Casualty_Affected_Area') + A_affected = getattr(cmp, f'DS_{DS_i}___Casualty_Affected_Area') for severity in ('S1', 'S2'): - if severity == 'S1': - theta_0 = getattr(cmp, f'DS_{DS_i}_Serious_Injury_' - f'Rate___Median') - theta_1 = getattr(cmp, f'DS_{DS_i}_Serious_Injury_' - f'Rate___Dispersion') + theta_0 = getattr( + cmp, f'DS_{DS_i}_Serious_Injury_' f'Rate___Median' + ) + theta_1 = getattr( + cmp, + f'DS_{DS_i}_Serious_Injury_' f'Rate___Dispersion', + ) elif severity == 'S2': - theta_0 = getattr(cmp, f'DS_{DS_i}_Loss_of_Life_' - f'Rate___Median') - theta_1 = getattr(cmp, f'DS_{DS_i}_Loss_of_Life_' - f'Rate___Dispersion') + theta_0 = getattr( + cmp, f'DS_{DS_i}_Loss_of_Life_' f'Rate___Median' + ) + theta_1 = getattr( + cmp, f'DS_{DS_i}_Loss_of_Life_' f'Rate___Dispersion' + ) if theta_0 != 0.0: - - df_db.loc[(cmp.Index, severity), - f'DS{DS_i}-Family'] = 'lognormal' - - df_db.loc[(cmp.Index, severity), - f'DS{DS_i}-Theta_0'] = theta_0 - - df_db.loc[(cmp.Index, severity), - f'DS{DS_i}-Theta_1'] = theta_1 - - df_db.loc[(cmp.Index, severity), - f'DS{DS_i}-AffectedArea'] = A_affected - - if (pd.isna(theta_0) or pd.isna( - theta_1) or pd.isna(A_affected)): - + df_db.loc[ + (cmp.Index, severity), f'DS{DS_i}-Family' + ] = 'lognormal' + + df_db.loc[ + (cmp.Index, severity), f'DS{DS_i}-Theta_0' + ] = theta_0 + + df_db.loc[ + (cmp.Index, severity), f'DS{DS_i}-Theta_1' + ] = theta_1 + + df_db.loc[ + (cmp.Index, severity), f'DS{DS_i}-AffectedArea' + ] = A_affected + + if ( + pd.isna(theta_0) + or pd.isna(theta_1) + or pd.isna(A_affected) + ): if severity == 'S1': incomplete_S1 = True else: incomplete_S2 = True if ~np.isnan(casualty_flag): - - meta_data['DamageStates'].update({ - f"DS{DS_i}": {"Description": - cmp_meta[f"DS_{DS_i}_Description"]}}) + meta_data['DamageStates'].update( + { + f"DS{DS_i}": { + "Description": cmp_meta[f"DS_{DS_i}_Description"] + } + } + ) df_db.loc[(cmp.Index, 'S1'), 'Incomplete'] = int(incomplete_S1) df_db.loc[(cmp.Index, 'S2'), 'Incomplete'] = int(incomplete_S2) @@ -1427,14 +1613,14 @@ def create_FEMA_P58_bldg_injury_db( # assign the Index column as the new ID df_db.index = pd.MultiIndex.from_arrays( - [df_db['Index'].values, df_db.index.get_level_values(1)]) + [df_db['Index'].values, df_db.index.get_level_values(1)] + ) df_db.drop('Index', axis=1, inplace=True) # review the database and drop rows with no information cmp_to_drop = [] for cmp in df_db.index: - empty = True for DS_i in range(1, 16): @@ -1471,14 +1657,16 @@ def create_FEMA_P58_bldg_injury_db( with open(target_meta_file, 'w+', encoding='utf-8') as f: json.dump(meta_dict, f, indent=2) - print("Successfully parsed and saved the injury consequence data from FEMA " - "P58") + print( + "Successfully parsed and saved the injury consequence data from FEMA P58" + ) def create_FEMA_P58_bldg_redtag_db( - source_file, - target_data_file='bldg_redtag_DB_FEMA_P58_2nd.csv', - target_meta_file='bldg_redtag_DB_FEMA_P58_2nd.json'): + source_file, + target_data_file='bldg_redtag_DB_FEMA_P58_2nd.csv', + target_meta_file='bldg_redtag_DB_FEMA_P58_2nd.json', +): """ Create an red tag consequence parameter database based on the FEMA P58 data @@ -1499,9 +1687,14 @@ def create_FEMA_P58_bldg_redtag_db( """ # parse the source file - df = pd.read_excel(source_file, sheet_name='Summary', header=2, index_col=1, - true_values=["YES", "Yes", "yes"], - false_values=["NO", "No", "no"]) + df = pd.read_excel( + source_file, + sheet_name='Summary', + header=2, + index_col=1, + true_values=["YES", "Yes", "yes"], + false_values=["NO", "No", "no"], + ) # take another pass with booleans because the first does not always work for true_str in ("YES", "Yes", "yes"): @@ -1522,7 +1715,7 @@ def create_FEMA_P58_bldg_redtag_db( cols_to_db += [ f'DS {DS_i}, Unsafe Placard Trigger Flag', f'DS {DS_i}, Unsafe Placard Damage Median', - f'DS {DS_i}, Unsafe Placard Damage Dispersion' + f'DS {DS_i}, Unsafe Placard Damage Dispersion', ] # filter the columns that we need for the metadata @@ -1572,20 +1765,12 @@ def create_FEMA_P58_bldg_redtag_db( "Incomplete", ] for DS_i in range(1, 6): - out_cols += [ - f"DS{DS_i}-Family", - f"DS{DS_i}-Theta_0", - f"DS{DS_i}-Theta_1" - ] + out_cols += [f"DS{DS_i}-Family", f"DS{DS_i}-Theta_0", f"DS{DS_i}-Theta_1"] # create the database index comps = df_db_source.index.values - df_db = pd.DataFrame( - columns=out_cols, - index=comps, - dtype=float - ) + df_db = pd.DataFrame(columns=out_cols, index=comps, dtype=float) # initialize the dictionary that stores the loss metadata meta_dict = {} @@ -1594,7 +1779,6 @@ def create_FEMA_P58_bldg_redtag_db( # (this approach is not efficient, but easy to follow which was considered # more important than efficiency.) for cmp in df_db_source.itertuples(): - ID = cmp.Index.split('.') cmpID = f'{ID[0][0]}.{ID[0][1:3]}.{ID[0][3:5]}.{ID[1]}' @@ -1617,13 +1801,20 @@ def create_FEMA_P58_bldg_redtag_db( # the additional fields are added to the description if they exist if cmp_meta['Construction_Quality'] != 'Not Specified': - comments += f'\nConstruction Quality: ' \ - f'{cmp_meta["Construction_Quality"]}' + comments += ( + f'\nConstruction Quality: ' f'{cmp_meta["Construction_Quality"]}' + ) if cmp_meta['Seismic_Installation_Conditions'] not in [ - 'Not Specified', 'Not applicable', 'Unknown', 'Any']: - comments += f'\nSeismic Installation Conditions: ' \ - f'{cmp_meta["Seismic_Installation_Conditions"]}' + 'Not Specified', + 'Not applicable', + 'Unknown', + 'Any', + ]: + comments += ( + f'\nSeismic Installation Conditions: ' + f'{cmp_meta["Seismic_Installation_Conditions"]}' + ) if cmp_meta['Comments__Notes'] != 'None': comments += f'\nNotes: {cmp_meta["Comments__Notes"]}' @@ -1640,12 +1831,11 @@ def create_FEMA_P58_bldg_redtag_db( "SuggestedComponentBlockSize": ' '.join(block_size), "RoundUpToIntegerQuantity": cmp_meta['Round_to_Integer_Unit'], "ControllingDemand": "Damage Quantity", - "DamageStates": {} + "DamageStates": {}, } # Handle components with simultaneous damage states separately if 'Simul' in cmp.DS_Hierarchy: - pass # Note that we are assuming that components with simultaneous # damage states do not have damage that would trigger a red tag. @@ -1656,34 +1846,34 @@ def create_FEMA_P58_bldg_redtag_db( else: # now look at each Damage State for DS_i in range(1, 6): - - redtag_flag = getattr( - cmp, f'DS_{DS_i}_Unsafe_Placard_Trigger_Flag') + redtag_flag = getattr(cmp, f'DS_{DS_i}_Unsafe_Placard_Trigger_Flag') if redtag_flag is True: - - theta_0 = getattr(cmp, f'DS_{DS_i}_Unsafe_Placard_Damage_' - f'Median') - theta_1 = getattr(cmp, f'DS_{DS_i}_Unsafe_Placard_Damage_' - f'Dispersion') + theta_0 = getattr( + cmp, f'DS_{DS_i}_Unsafe_Placard_Damage_' f'Median' + ) + theta_1 = getattr( + cmp, f'DS_{DS_i}_Unsafe_Placard_Damage_' f'Dispersion' + ) if theta_0 != 0.0: - df_db.loc[cmp.Index, f'DS{DS_i}-Family'] = 'lognormal' df_db.loc[cmp.Index, f'DS{DS_i}-Theta_0'] = theta_0 df_db.loc[cmp.Index, f'DS{DS_i}-Theta_1'] = theta_1 - if (pd.isna(theta_0) or pd.isna(theta_1)): - + if pd.isna(theta_0) or pd.isna(theta_1): incomplete = True if ~np.isnan(redtag_flag): - - meta_data['DamageStates'].update({ - f"DS{DS_i}": {"Description": - cmp_meta[f"DS_{DS_i}_Description"]}}) + meta_data['DamageStates'].update( + { + f"DS{DS_i}": { + "Description": cmp_meta[f"DS_{DS_i}_Description"] + } + } + ) df_db.loc[cmp.Index, 'Incomplete'] = int(incomplete) @@ -1696,7 +1886,6 @@ def create_FEMA_P58_bldg_redtag_db( # review the database and drop rows with no information cmp_to_drop = [] for cmp in df_db.index: - empty = True for DS_i in range(1, 6): @@ -1731,15 +1920,18 @@ def create_FEMA_P58_bldg_redtag_db( with open(target_meta_file, 'w+', encoding='utf-8') as f: json.dump(meta_dict, f, indent=2) - print("Successfully parsed and saved the red tag consequence data from FEMA " - "P58") + print( + "Successfully parsed and saved the red tag consequence data from FEMA P58" + ) -def create_Hazus_EQ_fragility_db(source_file, - meta_file='', - target_data_file='damage_DB_Hazus_EQ_bldg.csv', - target_meta_file='damage_DB_Hazus_EQ_bldg.json', - resolution='building'): +def create_Hazus_EQ_fragility_db( + source_file, + meta_file='', + target_data_file='damage_DB_Hazus_EQ_bldg.csv', + target_meta_file='damage_DB_Hazus_EQ_bldg.json', + resolution='building', +): """ Create a database file based on the HAZUS EQ Technical Manual @@ -1759,9 +1951,9 @@ def create_Hazus_EQ_fragility_db(source_file, target_meta_file: string Path where the fragility metadata should be saved. A json file is expected. - resoltuion: string - If building, the function produces the conventional Hazus - fragilities. If story, the function produces story-level + resolution: string + If building, the function produces the conventional Hazus + fragilities. If story, the function produces story-level fragilities. """ @@ -1777,23 +1969,25 @@ def create_Hazus_EQ_fragility_db(source_file, # parse the extra metadata file if Path(meta_file).is_file(): - with open(meta_file, 'r') as f: + with open(meta_file, 'r', encoding='utf-8') as f: frag_meta = json.load(f) else: frag_meta = {} # prepare lists of labels for various building features design_levels = list( - raw_data['Structural_Fragility_Groups']['EDP_limits'].keys()) + raw_data['Structural_Fragility_Groups']['EDP_limits'].keys() + ) building_types = list( - raw_data['Structural_Fragility_Groups']['P_collapse'].keys()) + raw_data['Structural_Fragility_Groups']['P_collapse'].keys() + ) convert_design_level = { 'High_code': 'HC', 'Moderate_code': 'MC', 'Low_code': 'LC', - 'Pre_code': 'PC' + 'Pre_code': 'PC', } # initialize the fragility table @@ -1820,10 +2014,10 @@ def create_Hazus_EQ_fragility_db(source_file, "LS4-Family", "LS4-Theta_0", "LS4-Theta_1", - "LS4-DamageStateWeights" + "LS4-DamageStateWeights", ], index=np.arange(len(building_types) * len(design_levels) * 5), - dtype=float + dtype=float, ) # initialize the dictionary that stores the fragility metadata @@ -1831,14 +2025,12 @@ def create_Hazus_EQ_fragility_db(source_file, # add the general information to the meta dict if "_GeneralInformation" in frag_meta.keys(): - GI = frag_meta["_GeneralInformation"] # remove the decision variable part from the general info GI.pop("DecisionVariables", None) for key, item in deepcopy(GI).items(): - if key == 'ComponentGroups_Damage': GI.update({'ComponentGroups': item}) @@ -1855,9 +2047,8 @@ def create_Hazus_EQ_fragility_db(source_file, for bt in building_types: for dl in design_levels: if bt in S_data['EDP_limits'][dl].keys(): - # add a dot in bt between structure and height labels, if needed - if ((len(bt)>2) and (bt[-1] in ['L','M','H'])): + if (len(bt) > 2) and (bt[-1] in {'L', 'M', 'H'}): bt_exp = f'{bt[:-1]}.{bt[-1]}' st = bt[:-1] hc = bt[-1] @@ -1868,9 +2059,9 @@ def create_Hazus_EQ_fragility_db(source_file, # story-level fragilities are based only on the low rise archetypes if resolution == 'story': - if hc in ['M', 'H']: + if hc in {'M', 'H'}: continue - elif hc == 'L': + if hc == 'L': bt_exp = st # create the component id @@ -1887,68 +2078,102 @@ def create_Hazus_EQ_fragility_db(source_file, df_db.loc[counter, 'Demand-Offset'] = 0 # add metadata - if hc != None: + if hc is not None: cmp_meta = { "Description": ( - frag_meta['Meta']['Collections']['STR']['Description']+", "+ - frag_meta['Meta']['StructuralSystems'][st]['Description']+", "+ - frag_meta['Meta']['HeightClasses'][hc]['Description'] + ", "+ - frag_meta['Meta']['DesignLevels'][convert_design_level[dl]]['Description'] - ), + frag_meta['Meta']['Collections']['STR']['Description'] + + ", " + + frag_meta['Meta']['StructuralSystems'][st][ + 'Description' + ] + + ", " + + frag_meta['Meta']['HeightClasses'][hc]['Description'] + + ", " + + frag_meta['Meta']['DesignLevels'][ + convert_design_level[dl] + ]['Description'] + ), "Comments": ( - frag_meta['Meta']['Collections']['STR']['Comment']+"\n"+ - frag_meta['Meta']['StructuralSystems'][st]['Comment']+"\n"+ - frag_meta['Meta']['HeightClasses'][hc]['Comment'] + "\n"+ - frag_meta['Meta']['DesignLevels'][convert_design_level[dl]]['Comment'] - ), + frag_meta['Meta']['Collections']['STR']['Comment'] + + "\n" + + frag_meta['Meta']['StructuralSystems'][st]['Comment'] + + "\n" + + frag_meta['Meta']['HeightClasses'][hc]['Comment'] + + "\n" + + frag_meta['Meta']['DesignLevels'][ + convert_design_level[dl] + ]['Comment'] + ), "SuggestedComponentBlockSize": "1 EA", "RoundUpToIntegerQuantity": "True", - "LimitStates": {} + "LimitStates": {}, } else: cmp_meta = { "Description": ( - frag_meta['Meta']['Collections']['STR']['Description']+", "+ - frag_meta['Meta']['StructuralSystems'][st]['Description']+", "+ - frag_meta['Meta']['DesignLevels'][convert_design_level[dl]]['Description'] - ), + frag_meta['Meta']['Collections']['STR']['Description'] + + ", " + + frag_meta['Meta']['StructuralSystems'][st][ + 'Description' + ] + + ", " + + frag_meta['Meta']['DesignLevels'][ + convert_design_level[dl] + ]['Description'] + ), "Comments": ( - frag_meta['Meta']['Collections']['STR']['Comment']+"\n"+ - frag_meta['Meta']['StructuralSystems'][st]['Comment']+"\n"+ - frag_meta['Meta']['DesignLevels'][convert_design_level[dl]]['Comment'] - ), + frag_meta['Meta']['Collections']['STR']['Comment'] + + "\n" + + frag_meta['Meta']['StructuralSystems'][st]['Comment'] + + "\n" + + frag_meta['Meta']['DesignLevels'][ + convert_design_level[dl] + ]['Comment'] + ), "SuggestedComponentBlockSize": "1 EA", "RoundUpToIntegerQuantity": "True", - "LimitStates": {} + "LimitStates": {}, } # store the Limit State parameters ds_meta = frag_meta['Meta']['StructuralSystems'][st]['DamageStates'] for LS_i in range(1, 5): - df_db.loc[counter, f'LS{LS_i}-Family'] = 'lognormal' - df_db.loc[counter, f'LS{LS_i}-Theta_0'] = \ - S_data['EDP_limits'][dl][bt][LS_i - 1] - df_db.loc[counter, f'LS{LS_i}-Theta_1'] = \ - S_data['Fragility_beta'][dl] + df_db.loc[counter, f'LS{LS_i}-Theta_0'] = S_data['EDP_limits'][ + dl + ][bt][LS_i - 1] + df_db.loc[counter, f'LS{LS_i}-Theta_1'] = S_data[ + 'Fragility_beta' + ][dl] if LS_i == 4: p_coll = S_data['P_collapse'][bt] - df_db.loc[counter, f'LS{LS_i}-DamageStateWeights'] = ( - f'{1.0 - p_coll} | {p_coll}') - - cmp_meta["LimitStates"].update({"LS4": { - "DS4": {"Description": ds_meta['DS4']}, - "DS5": {"Description": ds_meta['DS5']} - }}) + df_db.loc[ + counter, f'LS{LS_i}-DamageStateWeights' + ] = f'{1.0 - p_coll} | {p_coll}' + + cmp_meta["LimitStates"].update( + { + "LS4": { + "DS4": {"Description": ds_meta['DS4']}, + "DS5": {"Description": ds_meta['DS5']}, + } + } + ) else: - cmp_meta["LimitStates"].update({f"LS{LS_i}": { - f"DS{LS_i}": {"Description": ds_meta[f"DS{LS_i}"]} - }}) + cmp_meta["LimitStates"].update( + { + f"LS{LS_i}": { + f"DS{LS_i}": { + "Description": ds_meta[f"DS{LS_i}"] + } + } + } + ) # store metadata - meta_dict.update({cmp_id:cmp_meta}) + meta_dict.update({cmp_id: cmp_meta}) counter += 1 @@ -1973,24 +2198,23 @@ def create_Hazus_EQ_fragility_db(source_file, "Comments": frag_meta['Meta']['Collections']['NSD']['Comment'], "SuggestedComponentBlockSize": "1 EA", "RoundUpToIntegerQuantity": "True", - "LimitStates": {} + "LimitStates": {}, } # store the Limit State parameters ds_meta = frag_meta['Meta']['Collections']['NSD']['DamageStates'] for LS_i in range(1, 5): df_db.loc[counter, f'LS{LS_i}-Family'] = 'lognormal' - df_db.loc[counter, f'LS{LS_i}-Theta_0'] = NSD_data['EDP_limits'][ - LS_i - 1] + df_db.loc[counter, f'LS{LS_i}-Theta_0'] = NSD_data['EDP_limits'][LS_i - 1] df_db.loc[counter, f'LS{LS_i}-Theta_1'] = NSD_data['Fragility_beta'] # add limit state metadata - cmp_meta["LimitStates"].update({f"LS{LS_i}": - {f"DS{LS_i}": {"Description": ds_meta[f"DS{LS_i}"]} - }}) + cmp_meta["LimitStates"].update( + {f"LS{LS_i}": {f"DS{LS_i}": {"Description": ds_meta[f"DS{LS_i}"]}}} + ) # store metadata - meta_dict.update({'NSD':cmp_meta}) + meta_dict.update({'NSD': cmp_meta}) counter += 1 @@ -1998,7 +2222,6 @@ def create_Hazus_EQ_fragility_db(source_file, NSA_data = raw_data['NonStructural_Acceleration_Sensitive_Fragility_Groups'] for dl in design_levels: - # create the component id cmp_id = f'NSA.{convert_design_level[dl]}' df_db.loc[counter, 'ID'] = cmp_id @@ -2011,33 +2234,40 @@ def create_Hazus_EQ_fragility_db(source_file, # add metadata cmp_meta = { "Description": ( - frag_meta['Meta']['Collections']['NSA']['Description']+", "+ - frag_meta['Meta']['DesignLevels'][convert_design_level[dl]]['Description'] - ), + frag_meta['Meta']['Collections']['NSA']['Description'] + + ", " + + frag_meta['Meta']['DesignLevels'][convert_design_level[dl]][ + 'Description' + ] + ), "Comments": ( - frag_meta['Meta']['Collections']['NSA']['Comment']+"\n"+ - frag_meta['Meta']['DesignLevels'][convert_design_level[dl]]['Comment'] - ), + frag_meta['Meta']['Collections']['NSA']['Comment'] + + "\n" + + frag_meta['Meta']['DesignLevels'][convert_design_level[dl]][ + 'Comment' + ] + ), "SuggestedComponentBlockSize": "1 EA", "RoundUpToIntegerQuantity": "True", - "LimitStates": {} + "LimitStates": {}, } # store the Limit State parameters ds_meta = frag_meta['Meta']['Collections']['NSA']['DamageStates'] for LS_i in range(1, 5): df_db.loc[counter, f'LS{LS_i}-Family'] = 'lognormal' - df_db.loc[counter, f'LS{LS_i}-Theta_0'] = \ - NSA_data['EDP_limits'][dl][LS_i - 1] + df_db.loc[counter, f'LS{LS_i}-Theta_0'] = NSA_data['EDP_limits'][dl][ + LS_i - 1 + ] df_db.loc[counter, f'LS{LS_i}-Theta_1'] = NSA_data['Fragility_beta'] # add limit state metadata - cmp_meta["LimitStates"].update({f"LS{LS_i}": - {f"DS{LS_i}": {"Description": ds_meta[f"DS{LS_i}"]} - }}) + cmp_meta["LimitStates"].update( + {f"LS{LS_i}": {f"DS{LS_i}": {"Description": ds_meta[f"DS{LS_i}"]}}} + ) # store metadata - meta_dict.update({cmp_id:cmp_meta}) + meta_dict.update({cmp_id: cmp_meta}) counter += 1 @@ -2048,9 +2278,8 @@ def create_Hazus_EQ_fragility_db(source_file, for bt in building_types: for dl in design_levels: if bt in LF_data['EDP_limits'][dl].keys(): - # add a dot in bt between structure and height labels, if needed - if ((len(bt)>2) and (bt[-1] in ['L','M','H'])): + if (len(bt) > 2) and (bt[-1] in {'L', 'M', 'H'}): bt_exp = f'{bt[:-1]}.{bt[-1]}' st = bt[:-1] hc = bt[-1] @@ -2069,68 +2298,110 @@ def create_Hazus_EQ_fragility_db(source_file, df_db.loc[counter, 'Demand-Offset'] = 0 # add metadata - if hc != None: + if hc is not None: cmp_meta = { "Description": ( - frag_meta['Meta']['Collections']['LF']['Description']+", "+ - frag_meta['Meta']['StructuralSystems'][st]['Description']+", "+ - frag_meta['Meta']['HeightClasses'][hc]['Description'] + ", "+ - frag_meta['Meta']['DesignLevels'][convert_design_level[dl]]['Description'] - ), + frag_meta['Meta']['Collections']['LF']['Description'] + + ", " + + frag_meta['Meta']['StructuralSystems'][st][ + 'Description' + ] + + ", " + + frag_meta['Meta']['HeightClasses'][hc][ + 'Description' + ] + + ", " + + frag_meta['Meta']['DesignLevels'][ + convert_design_level[dl] + ]['Description'] + ), "Comments": ( - frag_meta['Meta']['Collections']['LF']['Comment']+"\n"+ - frag_meta['Meta']['StructuralSystems'][st]['Comment']+"\n"+ - frag_meta['Meta']['HeightClasses'][hc]['Comment'] + "\n"+ - frag_meta['Meta']['DesignLevels'][convert_design_level[dl]]['Comment'] - ), + frag_meta['Meta']['Collections']['LF']['Comment'] + + "\n" + + frag_meta['Meta']['StructuralSystems'][st][ + 'Comment' + ] + + "\n" + + frag_meta['Meta']['HeightClasses'][hc]['Comment'] + + "\n" + + frag_meta['Meta']['DesignLevels'][ + convert_design_level[dl] + ]['Comment'] + ), "SuggestedComponentBlockSize": "1 EA", "RoundUpToIntegerQuantity": "True", - "LimitStates": {} + "LimitStates": {}, } else: cmp_meta = { "Description": ( - frag_meta['Meta']['Collections']['LF']['Description']+", "+ - frag_meta['Meta']['StructuralSystems'][st]['Description']+", "+ - frag_meta['Meta']['DesignLevels'][convert_design_level[dl]]['Description'] - ), + frag_meta['Meta']['Collections']['LF']['Description'] + + ", " + + frag_meta['Meta']['StructuralSystems'][st][ + 'Description' + ] + + ", " + + frag_meta['Meta']['DesignLevels'][ + convert_design_level[dl] + ]['Description'] + ), "Comments": ( - frag_meta['Meta']['Collections']['LF']['Comment']+"\n"+ - frag_meta['Meta']['StructuralSystems'][st]['Comment']+"\n"+ - frag_meta['Meta']['DesignLevels'][convert_design_level[dl]]['Comment'] - ), + frag_meta['Meta']['Collections']['LF']['Comment'] + + "\n" + + frag_meta['Meta']['StructuralSystems'][st][ + 'Comment' + ] + + "\n" + + frag_meta['Meta']['DesignLevels'][ + convert_design_level[dl] + ]['Comment'] + ), "SuggestedComponentBlockSize": "1 EA", "RoundUpToIntegerQuantity": "True", - "LimitStates": {} + "LimitStates": {}, } # store the Limit State parameters - ds_meta = frag_meta['Meta']['StructuralSystems'][st]['DamageStates'] + ds_meta = frag_meta['Meta']['StructuralSystems'][st][ + 'DamageStates' + ] for LS_i in range(1, 5): - df_db.loc[counter, f'LS{LS_i}-Family'] = 'lognormal' - df_db.loc[counter, f'LS{LS_i}-Theta_0'] = \ - LF_data['EDP_limits'][dl][bt][LS_i - 1] - df_db.loc[counter, f'LS{LS_i}-Theta_1'] = \ - LF_data['Fragility_beta'][dl] + df_db.loc[counter, f'LS{LS_i}-Theta_0'] = LF_data[ + 'EDP_limits' + ][dl][bt][LS_i - 1] + df_db.loc[counter, f'LS{LS_i}-Theta_1'] = LF_data[ + 'Fragility_beta' + ][dl] if LS_i == 4: p_coll = LF_data['P_collapse'][bt] - df_db.loc[counter, f'LS{LS_i}-DamageStateWeights'] = ( - f'{1.0 - p_coll} | {p_coll}') - - cmp_meta["LimitStates"].update({"LS4": { - "DS4": {"Description": ds_meta['DS4']}, - "DS5": {"Description": ds_meta['DS5']} - }}) + df_db.loc[ + counter, f'LS{LS_i}-DamageStateWeights' + ] = f'{1.0 - p_coll} | {p_coll}' + + cmp_meta["LimitStates"].update( + { + "LS4": { + "DS4": {"Description": ds_meta['DS4']}, + "DS5": {"Description": ds_meta['DS5']}, + } + } + ) else: - cmp_meta["LimitStates"].update({f"LS{LS_i}": { - f"DS{LS_i}": {"Description": ds_meta[f"DS{LS_i}"]} - }}) + cmp_meta["LimitStates"].update( + { + f"LS{LS_i}": { + f"DS{LS_i}": { + "Description": ds_meta[f"DS{LS_i}"] + } + } + } + ) # store metadata - meta_dict.update({cmp_id:cmp_meta}) + meta_dict.update({cmp_id: cmp_meta}) counter += 1 @@ -2151,36 +2422,41 @@ def create_Hazus_EQ_fragility_db(source_file, # add metadata cmp_meta = { "Description": ( - frag_meta['Meta']['Collections']['GF']['Description']+ - f", {direction} Direction, {f_depth} Foundation" - ), - "Comments": ( - frag_meta['Meta']['Collections']['GF']['Comment'] - ), + frag_meta['Meta']['Collections']['GF']['Description'] + + f", {direction} Direction, {f_depth} Foundation" + ), + "Comments": (frag_meta['Meta']['Collections']['GF']['Comment']), "SuggestedComponentBlockSize": "1 EA", "RoundUpToIntegerQuantity": "True", - "LimitStates": {} + "LimitStates": {}, } # store the Limit State parameters ds_meta = frag_meta['Meta']['Collections']['GF']['DamageStates'] df_db.loc[counter, 'LS1-Family'] = 'lognormal' - df_db.loc[counter, 'LS1-Theta_0'] = \ - GF_data['EDP_limits'][direction][f_depth] - df_db.loc[counter, 'LS1-Theta_1'] = \ - GF_data['Fragility_beta'][direction][f_depth] + df_db.loc[counter, 'LS1-Theta_0'] = GF_data['EDP_limits'][direction][ + f_depth + ] + df_db.loc[counter, 'LS1-Theta_1'] = GF_data['Fragility_beta'][direction][ + f_depth + ] p_complete = GF_data['P_Complete'] - df_db.loc[counter, 'LS1-DamageStateWeights'] = ( - f'{1.0 - p_complete} | {p_complete}') - - cmp_meta["LimitStates"].update({"LS1": { - "DS1": {"Description": ds_meta['DS1']}, - "DS2": {"Description": ds_meta['DS2']} - }}) + df_db.loc[ + counter, 'LS1-DamageStateWeights' + ] = f'{1.0 - p_complete} | {p_complete}' + + cmp_meta["LimitStates"].update( + { + "LS1": { + "DS1": {"Description": ds_meta['DS1']}, + "DS2": {"Description": ds_meta['DS2']}, + } + } + ) # store metadata - meta_dict.update({cmp_id:cmp_meta}) + meta_dict.update({cmp_id: cmp_meta}) counter += 1 @@ -2203,17 +2479,19 @@ def create_Hazus_EQ_fragility_db(source_file, df_db.to_csv(target_data_file) # save the metadata - with open(target_meta_file, 'w+') as f: + with open(target_meta_file, 'w+', encoding='utf-8') as f: json.dump(meta_dict, f, indent=2) print("Successfully parsed and saved the fragility data from Hazus EQ") -def create_Hazus_EQ_bldg_repair_db(source_file, - meta_file='', - target_data_file='loss_repair_DB_Hazus_EQ_bldg.csv', - target_meta_file='loss_repair_DB_Hazus_EQ_bldg.json', - resolution='building'): +def create_Hazus_EQ_repair_db( + source_file, + meta_file='', + target_data_file='loss_repair_DB_Hazus_EQ_bldg.csv', + target_meta_file='loss_repair_DB_Hazus_EQ_bldg.json', + resolution='building', +): """ Create a database file based on the HAZUS EQ Technical Manual @@ -2233,9 +2511,9 @@ def create_Hazus_EQ_bldg_repair_db(source_file, target_meta_file: string Path where the repair DB metadata should be saved. A json file is expected. - resoltuion: string - If building, the function produces the conventional Hazus - fragilities. If story, the function produces story-level + resolution: string + If building, the function produces the conventional Hazus + fragilities. If story, the function produces story-level fragilities. """ @@ -2251,14 +2529,13 @@ def create_Hazus_EQ_bldg_repair_db(source_file, # parse the extra metadata file if Path(meta_file).is_file(): - with open(meta_file, 'r') as f: + with open(meta_file, 'r', encoding='utf-8') as f: frag_meta = json.load(f) else: frag_meta = {} # prepare lists of labels for various building features - occupancies = list( - raw_data['Structural_Fragility_Groups']['Repair_cost'].keys()) + occupancies = list(raw_data['Structural_Fragility_Groups']['Repair_cost'].keys()) # initialize the output loss table # define the columns @@ -2274,27 +2551,24 @@ def create_Hazus_EQ_bldg_repair_db(source_file, # create the MultiIndex cmp_types = ['STR', 'NSD', 'NSA', 'LF'] - comps = [f'{cmp_type}.{occ_type}' - for cmp_type in cmp_types for occ_type in occupancies] + comps = [ + f'{cmp_type}.{occ_type}' + for cmp_type in cmp_types + for occ_type in occupancies + ] DVs = ['Cost', 'Time'] df_MI = pd.MultiIndex.from_product([comps, DVs], names=['ID', 'DV']) - df_db = pd.DataFrame( - columns=out_cols, - index=df_MI, - dtype=float - ) + df_db = pd.DataFrame(columns=out_cols, index=df_MI, dtype=float) # initialize the dictionary that stores the loss metadata meta_dict = {} # add the general information to the meta dict if "_GeneralInformation" in frag_meta.keys(): - GI = frag_meta["_GeneralInformation"] for key, item in deepcopy(GI).items(): - if key == 'ComponentGroups_Loss_Repair': GI.update({'ComponentGroups': item}) @@ -2307,30 +2581,31 @@ def create_Hazus_EQ_bldg_repair_db(source_file, S_data = raw_data['Structural_Fragility_Groups'] for occ_type in occupancies: - # create the component id cmp_id = f'STR.{occ_type}' cmp_meta = { "Description": ( - frag_meta['Meta']['Collections']['STR']['Description']+", "+ - frag_meta['Meta']['OccupancyTypes'][occ_type]['Description'] - ), + frag_meta['Meta']['Collections']['STR']['Description'] + + ", " + + frag_meta['Meta']['OccupancyTypes'][occ_type]['Description'] + ), "Comments": ( - frag_meta['Meta']['Collections']['STR']['Comment']+"\n"+ - frag_meta['Meta']['OccupancyTypes'][occ_type]['Comment'] - ), + frag_meta['Meta']['Collections']['STR']['Comment'] + + "\n" + + frag_meta['Meta']['OccupancyTypes'][occ_type]['Comment'] + ), "SuggestedComponentBlockSize": "1 EA", "RoundUpToIntegerQuantity": "True", - "DamageStates": {} + "DamageStates": {}, } # store the consequence values for each Damage State ds_meta = frag_meta['Meta']['Collections']['STR']['DamageStates'] for DS_i in range(1, 6): - - cmp_meta["DamageStates"].update({f"DS{DS_i}": - {"Description": ds_meta[f"DS{DS_i}"]}}) + cmp_meta["DamageStates"].update( + {f"DS{DS_i}": {"Description": ds_meta[f"DS{DS_i}"]}} + ) # DS4 and DS5 have identical repair consequences if DS_i == 5: @@ -2338,142 +2613,145 @@ def create_Hazus_EQ_bldg_repair_db(source_file, else: ds_i = DS_i - df_db.loc[ - (cmp_id, 'Cost'), - f'DS{DS_i}-Theta_0'] = S_data['Repair_cost'][occ_type][ds_i-1] + df_db.loc[(cmp_id, 'Cost'), f'DS{DS_i}-Theta_0'] = S_data['Repair_cost'][ + occ_type + ][ds_i - 1] - df_db.loc[ - (cmp_id, 'Time'), - f'DS{DS_i}-Theta_0'] = S_data['Repair_time'][occ_type][ds_i-1] + df_db.loc[(cmp_id, 'Time'), f'DS{DS_i}-Theta_0'] = S_data['Repair_time'][ + occ_type + ][ds_i - 1] # store metadata - meta_dict.update({cmp_id:cmp_meta}) + meta_dict.update({cmp_id: cmp_meta}) # Second, the non-structural drift sensitive one NSD_data = raw_data['NonStructural_Drift_Sensitive_Fragility_Groups'] for occ_type in occupancies: - # create the component id cmp_id = f'NSD.{occ_type}' cmp_meta = { "Description": ( - frag_meta['Meta']['Collections']['NSD']['Description']+", "+ - frag_meta['Meta']['OccupancyTypes'][occ_type]['Description'] - ), + frag_meta['Meta']['Collections']['NSD']['Description'] + + ", " + + frag_meta['Meta']['OccupancyTypes'][occ_type]['Description'] + ), "Comments": ( - frag_meta['Meta']['Collections']['NSD']['Comment']+"\n"+ - frag_meta['Meta']['OccupancyTypes'][occ_type]['Comment'] - ), + frag_meta['Meta']['Collections']['NSD']['Comment'] + + "\n" + + frag_meta['Meta']['OccupancyTypes'][occ_type]['Comment'] + ), "SuggestedComponentBlockSize": "1 EA", "RoundUpToIntegerQuantity": "True", - "DamageStates": {} + "DamageStates": {}, } # store the consequence values for each Damage State ds_meta = frag_meta['Meta']['Collections']['NSD']['DamageStates'] for DS_i in range(1, 5): + cmp_meta["DamageStates"].update( + {f"DS{DS_i}": {"Description": ds_meta[f"DS{DS_i}"]}} + ) - cmp_meta["DamageStates"].update({f"DS{DS_i}": - {"Description": ds_meta[f"DS{DS_i}"]}}) - - df_db.loc[ - (cmp_id, 'Cost'), - f'DS{DS_i}-Theta_0'] = NSD_data['Repair_cost'][occ_type][DS_i-1] + df_db.loc[(cmp_id, 'Cost'), f'DS{DS_i}-Theta_0'] = NSD_data[ + 'Repair_cost' + ][occ_type][DS_i - 1] # store metadata - meta_dict.update({cmp_id:cmp_meta}) + meta_dict.update({cmp_id: cmp_meta}) # Third, the non-structural acceleration sensitive fragilities NSA_data = raw_data['NonStructural_Acceleration_Sensitive_Fragility_Groups'] for occ_type in occupancies: - # create the component id cmp_id = f'NSA.{occ_type}' cmp_meta = { "Description": ( - frag_meta['Meta']['Collections']['NSA']['Description']+", "+ - frag_meta['Meta']['OccupancyTypes'][occ_type]['Description'] - ), + frag_meta['Meta']['Collections']['NSA']['Description'] + + ", " + + frag_meta['Meta']['OccupancyTypes'][occ_type]['Description'] + ), "Comments": ( - frag_meta['Meta']['Collections']['NSA']['Comment']+"\n"+ - frag_meta['Meta']['OccupancyTypes'][occ_type]['Comment'] - ), + frag_meta['Meta']['Collections']['NSA']['Comment'] + + "\n" + + frag_meta['Meta']['OccupancyTypes'][occ_type]['Comment'] + ), "SuggestedComponentBlockSize": "1 EA", "RoundUpToIntegerQuantity": "True", - "DamageStates": {} + "DamageStates": {}, } # store the consequence values for each Damage State ds_meta = frag_meta['Meta']['Collections']['NSA']['DamageStates'] for DS_i in range(1, 5): + cmp_meta["DamageStates"].update( + {f"DS{DS_i}": {"Description": ds_meta[f"DS{DS_i}"]}} + ) - cmp_meta["DamageStates"].update({f"DS{DS_i}": - {"Description": ds_meta[f"DS{DS_i}"]}}) - - df_db.loc[ - (cmp_id, 'Cost'), - f'DS{DS_i}-Theta_0'] = NSA_data['Repair_cost'][occ_type][DS_i-1] + df_db.loc[(cmp_id, 'Cost'), f'DS{DS_i}-Theta_0'] = NSA_data[ + 'Repair_cost' + ][occ_type][DS_i - 1] # store metadata - meta_dict.update({cmp_id:cmp_meta}) + meta_dict.update({cmp_id: cmp_meta}) # Fourth, the lifeline facilities - only at the building-level resolution if resolution == 'building': LF_data = raw_data['Lifeline_Facilities'] for occ_type in occupancies: - # create the component id cmp_id = f'LF.{occ_type}' cmp_meta = { "Description": ( - frag_meta['Meta']['Collections']['LF']['Description']+", "+ - frag_meta['Meta']['OccupancyTypes'][occ_type]['Description'] - ), + frag_meta['Meta']['Collections']['LF']['Description'] + + ", " + + frag_meta['Meta']['OccupancyTypes'][occ_type]['Description'] + ), "Comments": ( - frag_meta['Meta']['Collections']['LF']['Comment']+"\n"+ - frag_meta['Meta']['OccupancyTypes'][occ_type]['Comment'] - ), + frag_meta['Meta']['Collections']['LF']['Comment'] + + "\n" + + frag_meta['Meta']['OccupancyTypes'][occ_type]['Comment'] + ), "SuggestedComponentBlockSize": "1 EA", "RoundUpToIntegerQuantity": "True", - "DamageStates": {} + "DamageStates": {}, } # store the consequence values for each Damage State ds_meta = frag_meta['Meta']['Collections']['LF']['DamageStates'] for DS_i in range(1, 6): - # DS4 and DS5 have identical repair consequences if DS_i == 5: ds_i = 4 else: ds_i = DS_i - cmp_meta["DamageStates"].update({f"DS{DS_i}": - {"Description": ds_meta[f"DS{DS_i}"]}}) + cmp_meta["DamageStates"].update( + {f"DS{DS_i}": {"Description": ds_meta[f"DS{DS_i}"]}} + ) - df_db.loc[ - (cmp_id, 'Cost'), - f'DS{DS_i}-Theta_0'] = LF_data['Repair_cost'][occ_type][ds_i - 1] + df_db.loc[(cmp_id, 'Cost'), f'DS{DS_i}-Theta_0'] = LF_data[ + 'Repair_cost' + ][occ_type][ds_i - 1] - df_db.loc[ - (cmp_id, 'Time'), - f'DS{DS_i}-Theta_0'] = LF_data['Repair_time'][occ_type][ds_i - 1] + df_db.loc[(cmp_id, 'Time'), f'DS{DS_i}-Theta_0'] = LF_data[ + 'Repair_time' + ][occ_type][ds_i - 1] # store metadata - meta_dict.update({cmp_id:cmp_meta}) + meta_dict.update({cmp_id: cmp_meta}) # remove empty rows (from the end) df_db.dropna(how='all', inplace=True) # All Hazus components have complete fragility info, df_db['Incomplete'] = 0 - #df_db.loc[:, 'Incomplete'] = 0 + # df_db.loc[:, 'Incomplete'] = 0 # The damage quantity unit is the same for all consequence values df_db.loc[:, 'Quantity-Unit'] = "1 EA" @@ -2495,16 +2773,19 @@ def create_Hazus_EQ_bldg_repair_db(source_file, df_db.to_csv(target_data_file) # save the metadata - later - with open(target_meta_file, 'w+') as f: + with open(target_meta_file, 'w+', encoding='utf-8') as f: json.dump(meta_dict, f, indent=2) - print("Successfully parsed and saved the repair consequence data from Hazus " - "EQ") + print( + "Successfully parsed and saved the repair consequence data from Hazus EQ" + ) -def create_Hazus_EQ_bldg_injury_db(source_file, - target_data_file='bldg_injury_DB_Hazus_EQ.csv', - target_meta_file='bldg_injury_DB_Hazus_EQ.json'): +def create_Hazus_EQ_bldg_injury_db( + source_file, + target_data_file='bldg_injury_DB_Hazus_EQ.csv', + target_meta_file='bldg_injury_DB_Hazus_EQ.json', +): """ Create a database file based on the HAZUS EQ Technical Manual @@ -2529,16 +2810,17 @@ def create_Hazus_EQ_bldg_injury_db(source_file, with open(source_file, 'r', encoding='utf-8') as f: raw_data = json.load(f) - # parse the extra metadata file - if Path(meta_file).is_file(): - with open(meta_file, 'r') as f: - frag_meta = json.load(f) - else: - frag_meta = {} + # # parse the extra metadata file + # if Path(meta_file).is_file(): + # with open(meta_file, 'r') as f: + # frag_meta = json.load(f) + # else: + # frag_meta = {} # prepare lists of labels for various building features building_types = list( - raw_data['Structural_Fragility_Groups']['P_collapse'].keys()) + raw_data['Structural_Fragility_Groups']['P_collapse'].keys() + ) # initialize the output loss table # define the columns @@ -2554,28 +2836,21 @@ def create_Hazus_EQ_bldg_injury_db(source_file, # create the MultiIndex cmp_types = ['STR', 'LF'] - comps = [f'{cmp_type}.{bt}' - for cmp_type in cmp_types for bt in building_types] + comps = [f'{cmp_type}.{bt}' for cmp_type in cmp_types for bt in building_types] DVs = ['S1', 'S2', 'S3', 'S4'] df_MI = pd.MultiIndex.from_product([comps, DVs], names=['ID', 'DV']) - df_db = pd.DataFrame( - columns=out_cols, - index=df_MI, - dtype=float - ) + df_db = pd.DataFrame(columns=out_cols, index=df_MI, dtype=float) # First, prepare the structural damage consequences S_data = raw_data['Structural_Fragility_Groups'] for bt in building_types: - # create the component id cmp_id = f'STR.{bt}' # store the consequence values for each Damage State for DS_i in range(1, 6): - # DS5 is stored under 'collapse' if DS_i == 5: ds_i = 'Collapse' @@ -2584,20 +2859,18 @@ def create_Hazus_EQ_bldg_injury_db(source_file, for S_i in range(1, 5): s_label = f'S{S_i}' - df_db.loc[(cmp_id, s_label), f'DS{DS_i}-Theta_0'] = ( - S_data['Injury_rates'][ds_i][bt][S_i-1]) + df_db.loc[(cmp_id, s_label), f'DS{DS_i}-Theta_0'] = S_data[ + 'Injury_rates' + ][ds_i][bt][S_i - 1] # Second, the lifeline facilities - LF_data = raw_data['Lifeline_Facilities'] for bt in building_types: - # create the component id cmp_id = f'STR.{bt}' # store the consequence values for each Damage State for DS_i in range(1, 6): - # DS5 is stored under 'collapse' if DS_i == 5: ds_i = 'Collapse' @@ -2606,8 +2879,9 @@ def create_Hazus_EQ_bldg_injury_db(source_file, for S_i in range(1, 5): s_label = f'S{S_i}' - df_db.loc[(cmp_id, s_label), f'DS{DS_i}-Theta_0'] = ( - S_data['Injury_rates'][ds_i][bt][S_i - 1]) + df_db.loc[(cmp_id, s_label), f'DS{DS_i}-Theta_0'] = S_data[ + 'Injury_rates' + ][ds_i][bt][S_i - 1] # remove empty rows df_db.dropna(how='all', inplace=True) diff --git a/pelicun/file_io.py b/pelicun/file_io.py index 0e0e8fc3e..be3063ae8 100644 --- a/pelicun/file_io.py +++ b/pelicun/file_io.py @@ -242,6 +242,64 @@ def save_to_csv(data, filepath, units=None, unit_conversion_factors=None, return None +def substitute_default_path(data_paths): + """ + Substitutes the default directory path in a list of data paths + with a specified path. + + This function iterates over a list of data paths and replaces + occurrences of the 'PelicunDefault/' substring with the path + specified by `base.pelicun_path` concatenated with + '/resources/SimCenterDBDL/'. This operation is performed to update + paths that are using a default location to a user-defined location + within the pelicun framework. The updated list of paths is then + returned. + + Parameters + ---------- + data_paths : list of str + A list containing the paths to data files. These paths may + include a placeholder directory 'PelicunDefault/' that needs + to be substituted with the actual path specified in + `base.pelicun_path`. + + Returns + ------- + list of str + The list with updated paths where 'PelicunDefault/' has been + replaced with the specified path in `base.pelicun_path` + concatenated with '/resources/SimCenterDBDL/'. + + Notes + ----- + - The function assumes that `base.pelicun_path` is properly + initialized and points to the correct directory where resources + are located. + - If a path in the input list does not contain 'PelicunDefault/', + it is added to the output list unchanged. + + Example + ------- + >>> data_paths = ['PelicunDefault/data/file1.txt', + 'data/file2.txt'] + >>> substitute_default_path(data_paths) + ['{base.pelicun_path}/resources/SimCenterDBDL/data/file1.txt', + 'data/file2.txt'] + + """ + updated_paths = [] + for data_path in data_paths: + if 'PelicunDefault/' in data_path: + path = data_path.replace( + 'PelicunDefault/', + f'{base.pelicun_path}/resources/SimCenterDBDL/', + ) + updated_paths.append(path) + else: + updated_paths.append(data_path) + return updated_paths + + def load_data( data_source, unit_conversion_factors, @@ -323,20 +381,11 @@ def load_data( if log: log.msg('Converting units...', prepend_timestamp=False) - # todo lambda - def get_conversion_factor(unit): - """ - Utility function to be used in `map`, handling the case - where unit is NaN and otherwise pulling values from the - `unit_conversion_factors` dictionary. - """ - return ( - 1.00 - if pd.isna(unit) - else unit_conversion_factors.get(unit, 1.00) - ) - - conversion_factors = units.map(get_conversion_factor) + conversion_factors = units.map( + lambda unit: 1.00 + if pd.isna(unit) + else unit_conversion_factors.get(unit, 1.00) + ) if orientation == 1: data.loc[:, numeric_elements] = data.loc[ diff --git a/pelicun/model.py b/pelicun/model.py deleted file mode 100644 index 22655a8e4..000000000 --- a/pelicun/model.py +++ /dev/null @@ -1,3998 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (c) 2018 Leland Stanford Junior University -# Copyright (c) 2018 The Regents of the University of California -# -# This file is part of pelicun. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its contributors -# may be used to endorse or promote products derived from this software without -# specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -# -# You should have received a copy of the BSD 3-Clause License along with -# pelicun. If not, see . -# -# Contributors: -# Adam Zsarnóczay -# John Vouvakis Manousakis - -""" -This module has classes and methods that define and access the model used for -loss assessment. - -.. rubric:: Contents - -.. autosummary:: - - prep_constant_median_DV - prep_bounded_multilinear_median_DV - - DemandModel - AssetModel - DamageModel - LossModel - BldgRepairModel - -""" - -from itertools import product -from copy import deepcopy -import numpy as np -import pandas as pd -from . import base -from . import uq -from . import file_io - - -idx = base.idx - - -class PelicunModel: - """ - Generic model class to manage methods shared between all models in Pelicun. - - """ - - def __init__(self, assessment): - - # link the PelicunModel object to its Assessment object - self._asmnt = assessment - - # link logging methods as attributes enabling more - # concise syntax - self.log_msg = self._asmnt.log.msg - self.log_div = self._asmnt.log.div - - def convert_marginal_params(self, marginal_params, units, arg_units=None): - """ - Converts the parameters of marginal distributions in a model to SI units. - - Parameters - ---------- - marginal_params: DataFrame - Each row corresponds to a marginal distribution with Theta - parameters and TruncateLower, TruncateUpper truncation limits - identified in separate columns. - units: Series - Identifies the input units of each marginal. The index shall be - identical to the index of the marginal_params argument. The values - are strings that correspond to the units listed in base.py. - arg_units: Series - Identifies the size of a reference entity for the marginal - parameters. For example, when the parameters refer to a component - repair cost, the reference size is the component block size the - repair cost corresponds to. When the parameters refer to a capacity, - demand, or component quantity, the reference size can be omitted - and the default value will ensure that the corresponding scaling is - skipped. This Series provides the units of the reference entities - for each component. Use '1 EA' if you want to skip such scaling for - select components but provide arg units for others. - - Returns - ------- - marginal_params: DataFrame - Same structure as the input DataFrame but with values scaled to - represent internal Standard International units. - - """ - assert np.all(marginal_params.index == units.index) - if arg_units is not None: - assert np.all( - marginal_params.index == arg_units.index) - - # preserve the columns in the input marginal_params - original_cols = marginal_params.columns - - # add extra columns if they are not available in the marginals - for col_name in ('Family', - 'Theta_0', 'Theta_1', 'Theta_2', - 'TruncateLower', 'TruncateUpper'): - if col_name not in marginal_params.columns: - - marginal_params[col_name] = np.nan - - # get a list of unique units - unique_units = units.unique() - - # for each unit - for unit_name in unique_units: - - # get the scale factor for converting from the source unit - unit_factor = self._asmnt.calc_unit_scale_factor(unit_name) - - # get the variables that use the given unit - unit_ids = marginal_params.loc[units == unit_name].index - - # for each variable - for row_id in unit_ids: - - # pull the parameters of the marginal distribution - family = marginal_params.at[row_id, 'Family'] - - if family == 'empirical': - continue - - # load the theta values - theta = marginal_params.loc[ - row_id, ['Theta_0', 'Theta_1', 'Theta_2']].values - - # for each theta - args = [] - for t_i, theta_i in enumerate(theta): - - # if theta_i evaluates to NaN, it is considered undefined - if pd.isna(theta_i): - args.append([]) - continue - - try: - # if theta is a scalar, just store it - theta[t_i] = float(theta_i) - args.append([]) - - except ValueError: - - # otherwise, we assume it is a string using SimCenter - # array notation to identify coordinates of a - # multilinear function - values = [val.split(',') for val in theta_i.split('|')] - - # the first set of values defines the ordinates that - # need to be passed to the distribution scaling method - theta[t_i] = np.array(values[0], dtype=float) - - # the second set of values defines the abscissae that - # we will use after the distribution scaling - args.append(np.array(values[1], dtype=float)) - - # load the truncation limits - tr_limits = marginal_params.loc[ - row_id, ['TruncateLower', 'TruncateUpper']] - - arg_unit_factor = 1.0 - - # check if there is a need to scale due to argument units - if not (arg_units is None): - - # get the argument unit for the given marginal - arg_unit = arg_units.get(row_id) - - if arg_unit != '1 EA': - - # get the scale factor - arg_unit_factor = self._asmnt.calc_unit_scale_factor( - arg_unit - ) - - # scale arguments, if needed - for a_i, arg in enumerate(args): - - if isinstance(arg, np.ndarray): - args[a_i] = arg * arg_unit_factor - - # convert the distribution parameters to SI - theta, tr_limits = uq.scale_distribution( - unit_factor / arg_unit_factor, family, theta, tr_limits) - - # convert multilinear function parameters back into strings - for a_i, arg in enumerate(args): - - if len(arg) > 0: - - theta[a_i] = '|'.join( - [','.join([f'{val:g}' for val in vals]) - for vals in (theta[a_i], args[a_i])]) - - # and update the values in the DF - marginal_params.loc[ - row_id, ['Theta_0', 'Theta_1', 'Theta_2']] = theta - - marginal_params.loc[ - row_id, ['TruncateLower', 'TruncateUpper']] = tr_limits - - # remove the added columns - marginal_params = marginal_params[original_cols] - - return marginal_params - - -class DemandModel(PelicunModel): - """ - Manages demand information used in assessments. - - Parameters - ---------- - marginal_params: DataFrame - Available after the model has been calibrated or calibration data has - been imported. Defines the marginal distribution of each demand - variable. - correlation: DataFrame - Available after the model has been calibrated or calibration data has - been imported. Defines the correlation between the demand variables in - standard normal space. That is, the variables are sampled in standard - normal space and then transformed into the space of their respective - distributions and the correlation matrix corresponds to the space where - they are sampled. - empirical_data: DataFrame - Available after the model has been calibrated or calibration data has - been imported. It provides an empirical dataset for the demand - variables that are modeled with an empirical distribution. - sample: DataFrame - Available after a sample has been generated. Demand variables are - listed in columns and each row provides an independent realization of - the joint demand distribution. - units: Series - Available after any demand data has been loaded. The index identifies - the demand variables and the values provide the unit for each variable. - - """ - - def __init__(self, assessment): - - super().__init__(assessment) - - self.marginal_params = None - self.correlation = None - self.empirical_data = None - self.units = None - - self._RVs = None - self.sample = None - - def save_sample(self, filepath=None, save_units=False): - """ - Save demand sample to a csv file or return it in a DataFrame - - """ - - self.log_div() - if filepath is not None: - self.log_msg('Saving demand sample...') - - res = file_io.save_to_csv( - self.sample, filepath, units=self.units, - unit_conversion_factors=self._asmnt.unit_conversion_factors, - use_simpleindex=(filepath is not None), - log=self._asmnt.log) - - if filepath is not None: - self.log_msg('Demand sample successfully saved.', - prepend_timestamp=False) - return None - - # else: - units = res.loc["Units"] - res.drop("Units", inplace=True) - - if save_units: - return res.astype(float), units - - # else: - return res.astype(float) - - def load_sample(self, filepath): - """ - Load demand sample data and parse it. - - Besides parsing the sample, the method also reads and saves the units - specified for each demand variable. If no units are specified, Standard - Units are assumed. - - Parameters - ---------- - filepath: string or DataFrame - Location of the file with the demand sample. - - """ - - def parse_header(raw_header): - - old_MI = raw_header - - # The first number (event_ID) in the demand labels is optional and - # currently not used. We remove it if it was in the raw data. - if old_MI.nlevels == 4: - - if self._asmnt.log.verbose: - self.log_msg('Removing event_ID from header...', - prepend_timestamp=False) - - new_column_index_array = np.array( - [old_MI.get_level_values(i) for i in range(1, 4)]) - - else: - new_column_index_array = np.array( - [old_MI.get_level_values(i) for i in range(3)]) - - # Remove whitespace to avoid ambiguity - - if self._asmnt.log.verbose: - self.log_msg('Removing whitespace from header...', - prepend_timestamp=False) - - wspace_remove = np.vectorize(lambda name: str(name).replace(' ', '')) - - new_column_index = wspace_remove(new_column_index_array) - - # Creating new, cleaned-up header - - new_MI = pd.MultiIndex.from_arrays( - new_column_index, names=['type', 'loc', 'dir']) - - return new_MI - - self.log_div() - self.log_msg('Loading demand data...') - - demand_data, units = file_io.load_data( - filepath, self._asmnt.unit_conversion_factors, - return_units=True, log=self._asmnt.log) - - parsed_data = demand_data.copy() - - # start with cleaning up the header - - parsed_data.columns = parse_header(parsed_data.columns) - - # Remove errors, if needed - if 'ERROR' in parsed_data.columns.get_level_values(0): - - self.log_msg('Removing errors from the raw data...', - prepend_timestamp=False) - - error_list = parsed_data.loc[:, idx['ERROR', :, :]].values.astype(bool) - - parsed_data = parsed_data.loc[~error_list, :].copy() - parsed_data.drop('ERROR', level=0, axis=1, inplace=True) - - self.log_msg("\nBased on the values in the ERROR column, " - f"{np.sum(error_list)} demand samples were removed.\n", - prepend_timestamp=False) - - self.sample = parsed_data - - self.log_msg('Demand data successfully parsed.', prepend_timestamp=False) - - # parse the index for the units - units.index = parse_header(units.index) - - self.units = units - - self.log_msg('Demand units successfully parsed.', prepend_timestamp=False) - - def estimate_RID(self, demands, params, method='FEMA P58'): - """ - Estimate residual drift realizations based on other demands - - Parameters - ---------- - demands: DataFrame - Sample of demands required for the method to estimate the RID values - params: dict - Parameters required for the method to estimate the RID values - method: {'FEMA P58'}, default: 'FEMA P58' - Method to use for the estimation - currently, only one is available. - """ - - if method == 'FEMA P58': - - # method is described in FEMA P-58 Volume 1 Section 5.4 & Appendix C - - # the provided demands shall be PID values at various loc-dir pairs - PID = demands - - # there's only one parameter needed: the yield drift - yield_drift = params['yield_drift'] - - # three subdomains of demands are identified - small = PID < yield_drift - medium = PID < 4 * yield_drift - large = PID >= 4 * yield_drift - - # convert PID to RID in each subdomain - RID = PID.copy() - RID[large] = PID[large] - 3 * yield_drift - RID[medium] = 0.3 * (PID[medium] - yield_drift) - RID[small] = 0. - - # add extra uncertainty to nonzero values - rng = self._asmnt.options.rng - eps = rng.normal(scale=0.2, size=RID.shape) - RID[RID > 0] = np.exp(np.log(RID[RID > 0]) + eps) - - # finally, make sure the RID values are never larger than the PIDs - RID = pd.DataFrame( - np.minimum(PID.values, RID.values), - columns=pd.DataFrame( - 1, index=['RID', ], - columns=PID.columns).stack(level=[0, 1]).index, - index=PID.index) - - else: - RID = None - - # return the generated drift realizations - return RID - - def calibrate_model(self, config): - """ - Calibrate a demand model to describe the raw demand data - - The raw data shall be parsed first to ensure that it follows the - schema expected by this method. The calibration settings define the - characteristics of the multivariate distribution that is fit to the - raw data. - - Parameters - ---------- - config: dict - A dictionary, typically read from a json file, that specifies the - distribution family, truncation and censoring limits, and other - settings for the calibration. - - """ - - def parse_settings(settings, demand_type): - - def parse_str_to_float(in_str, context_string): - - try: - out_float = float(in_str) - - except ValueError: - - self.log_msg(f"WARNING: Could not parse {in_str} provided as " - f"{context_string}. Using NaN instead.", - prepend_timestamp=False) - - out_float = np.nan - - return out_float - - active_d_types = ( - demand_sample.columns.get_level_values('type').unique()) - - if demand_type == 'ALL': - cols = tuple(active_d_types) - - else: - cols_lst = [] - - for d_type in active_d_types: - if d_type.split('_')[0] == demand_type: - cols_lst.append(d_type) - - cols = tuple(cols_lst) - - # load the distribution family - cal_df.loc[idx[cols, :, :], 'Family'] = settings['DistributionFamily'] - - # load limits - for lim in ('CensorLower', 'CensorUpper', - 'TruncateLower', 'TruncateUpper'): - - if lim in settings.keys(): - val = parse_str_to_float(settings[lim], lim) - if not pd.isna(val): - cal_df.loc[idx[cols, :, :], lim] = val - - # scale the censor and truncation limits, if needed - scale_factor = self._asmnt.scale_factor(settings.get('Unit', None)) - - rows_to_scale = ['CensorLower', 'CensorUpper', - 'TruncateLower', 'TruncateUpper'] - cal_df.loc[idx[cols, :, :], rows_to_scale] *= scale_factor - - # load the prescribed additional uncertainty - if 'AddUncertainty' in settings.keys(): - - sig_increase = parse_str_to_float(settings['AddUncertainty'], - 'AddUncertainty') - - # scale the sig value if the target distribution family is normal - if settings['DistributionFamily'] == 'normal': - sig_increase *= scale_factor - - cal_df.loc[idx[cols, :, :], 'SigIncrease'] = sig_increase - - def get_filter_mask(lower_lims, upper_lims): - - demands_of_interest = demand_sample.iloc[:, pd.notna(upper_lims)] - limits_of_interest = upper_lims[pd.notna(upper_lims)] - upper_mask = np.all(demands_of_interest < limits_of_interest, - axis=1) - - demands_of_interest = demand_sample.iloc[:, pd.notna(lower_lims)] - limits_of_interest = lower_lims[pd.notna(lower_lims)] - lower_mask = np.all(demands_of_interest > limits_of_interest, - axis=1) - - return np.all([lower_mask, upper_mask], axis=0) - - self.log_div() - self.log_msg('Calibrating demand model...') - - demand_sample = self.sample - - # initialize a DataFrame that contains calibration information - cal_df = pd.DataFrame( - columns=['Family', - 'CensorLower', 'CensorUpper', - 'TruncateLower', 'TruncateUpper', - 'SigIncrease', 'Theta_0', 'Theta_1'], - index=demand_sample.columns, - dtype=float - ) - - cal_df['Family'] = cal_df['Family'].astype(str) - - # start by assigning the default option ('ALL') to every demand column - parse_settings(config['ALL'], 'ALL') - - # then parse the additional settings and make the necessary adjustments - for demand_type in config.keys(): - if demand_type != 'ALL': - parse_settings(config[demand_type], demand_type) - - if self._asmnt.log.verbose: - self.log_msg( - "\nCalibration settings successfully parsed:\n" + str(cal_df), - prepend_timestamp=False) - else: - self.log_msg( - "\nCalibration settings successfully parsed:\n", - prepend_timestamp=False) - - # save the settings - model_params = cal_df.copy() - - # Remove the samples outside of censoring limits - # Currently, non-empirical demands are assumed to have some level of - # correlation, hence, a censored value in any demand triggers the - # removal of the entire sample from the population. - upper_lims = cal_df.loc[:, 'CensorUpper'].values - lower_lims = cal_df.loc[:, 'CensorLower'].values - - if ~np.all(pd.isna(np.array([upper_lims, lower_lims]))): - - censor_mask = get_filter_mask(lower_lims, upper_lims) - censored_count = np.sum(~censor_mask) - - demand_sample = demand_sample.loc[censor_mask, :] - - self.log_msg("\nBased on the provided censoring limits, " - f"{censored_count} samples were censored.", - prepend_timestamp=False) - else: - censored_count = 0 - - # Check if there is any sample outside of truncation limits - # If yes, that suggests an error either in the samples or the - # configuration. We handle such errors gracefully: the analysis is not - # terminated, but we show an error in the log file. - upper_lims = cal_df.loc[:, 'TruncateUpper'].values - lower_lims = cal_df.loc[:, 'TruncateLower'].values - - if ~np.all(pd.isna(np.array([upper_lims, lower_lims]))): - - truncate_mask = get_filter_mask(lower_lims, upper_lims) - truncated_count = np.sum(~truncate_mask) - - if truncated_count > 0: - - demand_sample = demand_sample.loc[truncate_mask, :] - - self.log_msg("\nBased on the provided truncation limits, " - f"{truncated_count} samples were removed before demand " - "calibration.", - prepend_timestamp=False) - - # Separate and save the demands that are kept empirical -> i.e., no - # fitting. Currently, empirical demands are decoupled from those that - # have a distribution fit to their samples. The correlation between - # empirical and other demands is not preserved in the demand model. - empirical_edps = [] - for edp in cal_df.index: - if cal_df.loc[edp, 'Family'] == 'empirical': - empirical_edps.append(edp) - - self.empirical_data = demand_sample.loc[:, empirical_edps].copy() - - # remove the empirical demands from the samples used for calibration - demand_sample = demand_sample.drop(empirical_edps, axis=1) - - # and the calibration settings - cal_df = cal_df.drop(empirical_edps, axis=0) - - if self._asmnt.log.verbose: - self.log_msg(f"\nDemand data used for calibration:\n{demand_sample}", - prepend_timestamp=False) - - # fit the joint distribution - self.log_msg("\nFitting the prescribed joint demand distribution...", - prepend_timestamp=False) - - demand_theta, demand_rho = uq.fit_distribution_to_sample( - raw_samples=demand_sample.values.T, - distribution=cal_df.loc[:, 'Family'].values, - censored_count=censored_count, - detection_limits=cal_df.loc[ - :, ['CensorLower', 'CensorUpper']].values, - truncation_limits=cal_df.loc[ - :, ['TruncateLower', 'TruncateUpper']].values, - multi_fit=False, - logger_object=self._asmnt.log - ) - # fit the joint distribution - self.log_msg("\nCalibration successful, processing results...", - prepend_timestamp=False) - - # save the calibration results - model_params.loc[cal_df.index, ['Theta_0', 'Theta_1']] = demand_theta - - # increase the variance of the marginal distributions, if needed - if ~np.all(pd.isna(model_params.loc[:, 'SigIncrease'].values)): - - self.log_msg("\nIncreasing demand variance...", - prepend_timestamp=False) - - sig_inc = np.nan_to_num(model_params.loc[:, 'SigIncrease'].values) - sig_0 = model_params.loc[:, 'Theta_1'].values - - model_params.loc[:, 'Theta_1'] = ( - np.sqrt(sig_0 ** 2. + sig_inc ** 2.)) - - # remove unneeded fields from model_params - for col in ('SigIncrease', 'CensorLower', 'CensorUpper'): - model_params = model_params.drop(col, axis=1) - - # reorder the remaining fields for clarity - model_params = model_params[[ - 'Family', 'Theta_0', 'Theta_1', 'TruncateLower', 'TruncateUpper']] - - self.marginal_params = model_params - - self.log_msg("\nCalibrated demand model marginal distributions:\n" - + str(model_params), - prepend_timestamp=False) - - # save the correlation matrix - self.correlation = pd.DataFrame(demand_rho, - columns=cal_df.index, - index=cal_df.index) - - self.log_msg("\nCalibrated demand model correlation matrix:\n" - + str(self.correlation), - prepend_timestamp=False) - - def save_model(self, file_prefix): - """ - Save parameters of the demand model to a set of csv files - - """ - - self.log_div() - self.log_msg('Saving demand model...') - - # save the correlation and empirical data - file_io.save_to_csv(self.correlation, file_prefix + '_correlation.csv') - file_io.save_to_csv( - self.empirical_data, - file_prefix + '_empirical.csv', - units=self.units, - unit_conversion_factors=self._asmnt.unit_conversion_factors, - log=self._asmnt.log, - ) - - # the log standard deviations in the marginal parameters need to be - # scaled up before feeding to the saving method where they will be - # scaled back down and end up being saved unscaled to the target file - - marginal_params = self.marginal_params.copy() - - log_rows = marginal_params['Family'] == 'lognormal' - log_demands = marginal_params.loc[log_rows, :] - - for label in log_demands.index: - - if label in self.units.index: - - unit_factor = self._asmnt.calc_unit_scale_factor(self.units[label]) - - marginal_params.loc[label, 'Theta_1'] *= unit_factor - - file_io.save_to_csv( - marginal_params, - file_prefix + '_marginals.csv', - units=self.units, - unit_conversion_factors=self._asmnt.unit_conversion_factors, - orientation=1, - log=self._asmnt.log, - ) - - self.log_msg('Demand model successfully saved.', prepend_timestamp=False) - - def load_model(self, data_source): - """ - Load the model that describes demands on the asset. - - Parameters - ---------- - data_source: string or dict - If string, the data_source is a file prefix ( in the - following description) that identifies the following files: - _marginals.csv, _empirical.csv, - _correlation.csv. If dict, the data source is a dictionary - with the following optional keys: 'marginals', 'empirical', and - 'correlation'. The value under each key shall be a DataFrame. - """ - - self.log_div() - self.log_msg('Loading demand model...') - - # prepare the marginal data source variable to load the data - if isinstance(data_source, dict): - marginal_data_source = data_source.get('marginals') - empirical_data_source = data_source.get('empirical', None) - correlation_data_source = data_source.get('correlation', None) - else: - marginal_data_source = data_source + '_marginals.csv' - empirical_data_source = data_source + '_empirical.csv' - correlation_data_source = data_source + '_correlation.csv' - - if empirical_data_source is not None: - self.empirical_data = file_io.load_data( - empirical_data_source, - self._asmnt.unit_conversion_factors, - log=self._asmnt.log, - ) - if not self.empirical_data.empty: - self.empirical_data.columns.set_names( - ['type', 'loc', 'dir'], inplace=True - ) - else: - self.empirical_data = None - else: - self.empirical_data = None - - if correlation_data_source is not None: - self.correlation = file_io.load_data( - correlation_data_source, - self._asmnt.unit_conversion_factors, - reindex=False, log=self._asmnt.log) - self.correlation.index.set_names(['type', 'loc', 'dir'], inplace=True) - self.correlation.columns.set_names(['type', 'loc', 'dir'], inplace=True) - else: - self.correlation = None - - # the log standard deviations in the marginal parameters need to be - # adjusted after getting the data from the loading method where they - # were scaled according to the units of the corresponding variable - - # Note that a data source without marginal information is not valid - marginal_params, units = file_io.load_data( - marginal_data_source, - None, - orientation=1, - reindex=False, - return_units=True, - log=self._asmnt.log, - ) - marginal_params.index.set_names(['type', 'loc', 'dir'], inplace=True) - - marginal_params = self.convert_marginal_params(marginal_params.copy(), - units) - - self.marginal_params = marginal_params - self.units = units - - self.log_msg('Demand model successfully loaded.', prepend_timestamp=False) - - def _create_RVs(self, preserve_order=False): - """ - Create a random variable registry for the joint distribution of demands. - - """ - - # initialize the registry - RV_reg = uq.RandomVariableRegistry(self._asmnt.options.rng) - - # add a random variable for each demand variable - for rv_params in self.marginal_params.itertuples(): - - edp = rv_params.Index - rv_tag = f'EDP-{edp[0]}-{edp[1]}-{edp[2]}' - family = getattr(rv_params, "Family", np.nan) - - if family == 'empirical': - - if preserve_order: - dist_family = 'coupled_empirical' - else: - dist_family = 'empirical' - - # empirical RVs need the data points - RV_reg.add_RV(uq.RandomVariable( - name=rv_tag, - distribution=dist_family, - raw_samples=self.empirical_data.loc[:, edp].values - )) - - else: - - # all other RVs need parameters of their distributions - RV_reg.add_RV(uq.RandomVariable( - name=rv_tag, - distribution=family, - theta=[getattr(rv_params, f"Theta_{t_i}", np.nan) - for t_i in range(3)], - truncation_limits=[ - getattr(rv_params, f"Truncate{side}", np.nan) - for side in ("Lower", "Upper")], - - - )) - - self.log_msg(f"\n{self.marginal_params.shape[0]} random variables created.", - prepend_timestamp=False) - - # add an RV set to consider the correlation between demands, if needed - if self.correlation is not None: - rv_set_tags = [f'EDP-{edp[0]}-{edp[1]}-{edp[2]}' - for edp in self.correlation.index.values] - - RV_reg.add_RV_set(uq.RandomVariableSet( - 'EDP_set', list(RV_reg.RVs(rv_set_tags).values()), - self.correlation.values)) - - self.log_msg( - f"\nCorrelations between {len(rv_set_tags)} random variables " - "successfully defined.", - prepend_timestamp=False) - - self._RVs = RV_reg - - def clone_demands(self, demand_cloning): - """ - Clones demands. This means copying over columns of the - original demand sample and assigning given names to them. The - columns to be copied over and the names to assign to the - copies are defined as the keys and values of the - `demand_cloning` dictionary, respectively. - The method modifies `sample` inplace. - - Parameters - ---------- - demand_cloning: dict - Keys correspond to the columns of the original sample to - be copied over and the values correspond to the intended - names for the copies. Caution: It's possible to define a - dictionary with duplicate keys, and Python will just keep - the last entry without warning. Users need to be careful - enough to avoid duplicate keys, because we can't validate - them. - E.g.: x = {'1': 1.00, '1': 2.00} results in x={'1': 2.00}. - - Raises - ------ - ValueError - In multiple instances of invalid demand_cloning entries. - - """ - - # it's impossible to have duplicate keys, because - # demand_cloning is a dictionary. - new_columns_list = demand_cloning.values() - # The following prevents duplicate entries in the values - # corresponding to a single cloned demand (1), but - # also the same column being specified as the cloned - # entry of multiple demands (2). - # e.g. - # (1): {'PGV-0-1': ['PGV-1-1', 'PGV-1-1', ...]} - # (2): {'PGV-0-1': ['PGV-1-1', ...], 'PGV-0-2': ['PGV-1-1', ...]} - flat_list = [] - for new_columns in new_columns_list: - flat_list.extend(new_columns) - if len(set(flat_list)) != len(flat_list): - raise ValueError( - 'Duplicate entries in demand cloning ' - 'configuration.' - ) - - # turn the config entries to tuples - def turn_to_tuples(demand_cloning): - demand_cloning_tuples = {} - for key, values in demand_cloning.items(): - demand_cloning_tuples[tuple(key.split('-'))] = [ - tuple(x.split('-')) for x in values - ] - return demand_cloning_tuples - - demand_cloning = turn_to_tuples(demand_cloning) - - # The demand cloning confuguration should not include - # columns that are not present in the orignal sample. - warn_columns = [] - for column in demand_cloning: - if column not in self.sample.columns: - warn_columns.append(column) - if warn_columns: - warn_columns = ['-'.join(x) for x in warn_columns] - self.log_msg( - "\nWARNING: The demand cloning configuration lists " - "columns that are not present in the original demand sample's " - f"columns: {warn_columns}.\n", - prepend_timestamp=False, - ) - - # we iterate over the existing columns of the sample and try - # to locate columns that need to be copied as required by the - # demand cloning configuration. If a column does not need - # to be cloned it is left as is. Otherwise, we keep track - # of its initial index location (in `column_index`) and the - # number of times it needs to be replicated, along with the - # new names of its copies (in `column_values`). - column_index = [] - column_values = [] - for i, column in enumerate(self.sample.columns): - if column not in demand_cloning: - column_index.append(i) - column_values.append(column) - else: - new_column_values = demand_cloning[column] - column_index.extend([i] * len(new_column_values)) - column_values.extend(new_column_values) - # copy the columns - self.sample = self.sample.iloc[:, column_index] - # update the column index - self.sample.columns = pd.MultiIndex.from_tuples(column_values) - - def generate_sample(self, config): - """ - Generates an RV sample with the specified configuration. - """ - - if self.marginal_params is None: - raise ValueError('Model parameters have not been specified. Either' - 'load parameters from a file or calibrate the ' - 'model using raw demand data.') - - self.log_div() - self.log_msg('Generating sample from demand variables...') - - self._create_RVs( - preserve_order=config.get('PreserveRawOrder', False)) - - sample_size = config['SampleSize'] - self._RVs.generate_sample( - sample_size=sample_size, - method=self._asmnt.options.sampling_method) - - # replace the potentially existing raw sample with the generated one - assert self._RVs is not None - assert self._RVs.RV_sample is not None - sample = pd.DataFrame(self._RVs.RV_sample) - sample.sort_index(axis=0, inplace=True) - sample.sort_index(axis=1, inplace=True) - - sample = base.convert_to_MultiIndex(sample, axis=1)['EDP'] - - sample.columns.names = ['type', 'loc', 'dir'] - self.sample = sample - - if config.get('DemandCloning', False): - self.clone_demands(config['DemandCloning']) - - self.log_msg(f"\nSuccessfully generated {sample_size} realizations.", - prepend_timestamp=False) - - -class AssetModel(PelicunModel): - """ - Manages asset information used in assessments. - - Parameters - ---------- - - """ - - def __init__(self, assessment): - - super().__init__(assessment) - - self.cmp_marginal_params = None - self.cmp_units = None - - self._cmp_RVs = None - self._cmp_sample = None - - @property - def cmp_sample(self): - """ - Assigns the _cmp_sample attribute if it is None and returns - the component sample. - """ - - if self._cmp_sample is None: - - cmp_sample = pd.DataFrame(self._cmp_RVs.RV_sample) - cmp_sample.sort_index(axis=0, inplace=True) - cmp_sample.sort_index(axis=1, inplace=True) - - cmp_sample = base.convert_to_MultiIndex(cmp_sample, axis=1)['CMP'] - - cmp_sample.columns.names = ['cmp', 'loc', 'dir', 'uid'] - - self._cmp_sample = cmp_sample - - else: - cmp_sample = self._cmp_sample - - return cmp_sample - - def save_cmp_sample(self, filepath=None, save_units=False): - """ - Save component quantity sample to a csv file - - """ - - self.log_div() - if filepath is not None: - self.log_msg('Saving asset components sample...') - - # prepare a units array - sample = self.cmp_sample - - units = pd.Series(name='Units', index=sample.columns, dtype=object) - - for cmp_id, unit_name in self.cmp_units.items(): - units.loc[cmp_id, :] = unit_name - - res = file_io.save_to_csv( - sample, filepath, units=units, - unit_conversion_factors=self._asmnt.unit_conversion_factors, - use_simpleindex=(filepath is not None), - log=self._asmnt.log) - - if filepath is not None: - self.log_msg('Asset components sample successfully saved.', - prepend_timestamp=False) - return None - # else: - units = res.loc["Units"] - res.drop("Units", inplace=True) - - if save_units: - return res.astype(float), units - - return res.astype(float) - - def load_cmp_sample(self, filepath): - """ - Load component quantity sample from a csv file - - """ - - self.log_div() - self.log_msg('Loading asset components sample...') - - sample, units = file_io.load_data( - filepath, self._asmnt.unit_conversion_factors, - return_units=True, log=self._asmnt.log) - - sample.columns.names = ['cmp', 'loc', 'dir', 'uid'] - - self._cmp_sample = sample - - self.cmp_units = units.groupby(level=0).first() - - self.log_msg('Asset components sample successfully loaded.', - prepend_timestamp=False) - - def load_cmp_model(self, data_source): - """ - Load the model that describes component quantities in the asset. - - Parameters - ---------- - data_source: string or dict - If string, the data_source is a file prefix ( in the - following description) that identifies the following files: - _marginals.csv, _empirical.csv, - _correlation.csv. If dict, the data source is a dictionary - with the following optional keys: 'marginals', 'empirical', and - 'correlation'. The value under each key shall be a DataFrame. - """ - - def get_locations(loc_str): - - try: - res = str(int(loc_str)) - return np.array([res, ]) - - except ValueError as exc: - - stories = self._asmnt.stories - - if "--" in loc_str: - s_low, s_high = loc_str.split('--') - s_low = get_locations(s_low) - s_high = get_locations(s_high) - return np.arange(int(s_low[0]), int(s_high[0]) + 1).astype(str) - - if "," in loc_str: - return np.array(loc_str.split(','), dtype=int).astype(str) - - if loc_str == "all": - return np.arange(1, stories + 1).astype(str) - - if loc_str == "top": - return np.array([stories, ]).astype(str) - - if loc_str == "roof": - return np.array([stories + 1, ]).astype(str) - - raise ValueError(f"Cannot parse location string: " - f"{loc_str}") from exc - - def get_directions(dir_str): - - if pd.isnull(dir_str): - return np.ones(1).astype(str) - - # else: - try: - res = str(int(dir_str)) - return np.array([res, ]) - - except ValueError as exc: - - if "," in dir_str: - return np.array(dir_str.split(','), dtype=int).astype(str) - - if "--" in dir_str: - d_low, d_high = dir_str.split('--') - d_low = get_directions(d_low) - d_high = get_directions(d_high) - return np.arange( - int(d_low[0]), int(d_high[0]) + 1).astype(str) - - # else: - raise ValueError(f"Cannot parse direction string: " - f"{dir_str}") from exc - - def get_attribute(attribute_str, dtype=float, default=np.nan): - - if pd.isnull(attribute_str): - return default - - # else: - - try: - - res = dtype(attribute_str) - return res - - except ValueError as exc: - - if "," in attribute_str: - # a list of weights - w = np.array(attribute_str.split(','), dtype=float) - - # return a normalized vector - return w / np.sum(w) - - # else: - raise ValueError(f"Cannot parse Blocks string: " - f"{attribute_str}") from exc - - self.log_div() - self.log_msg('Loading component model...') - - # Currently, we assume independent component distributions are defined - # throughout the building. Correlations may be added afterward or this - # method can be extended to read correlation matrices too if needed. - - # prepare the marginal data source variable to load the data - if isinstance(data_source, dict): - marginal_data_source = data_source['marginals'] - else: - marginal_data_source = data_source + '_marginals.csv' - - marginal_params, units = file_io.load_data( - marginal_data_source, - None, - orientation=1, - reindex=False, - return_units=True, - log=self._asmnt.log, - ) - - # group units by cmp id to avoid redundant entries - self.cmp_units = units.copy().groupby(level=0).first() - - marginal_params = pd.concat([marginal_params, units], axis=1) - - cmp_marginal_param_dct = { - 'Family': [], 'Theta_0': [], 'Theta_1': [], 'Theta_2': [], - 'TruncateLower': [], 'TruncateUpper': [], 'Blocks': [], - 'Units': [] - } - index_list = [] - for row in marginal_params.itertuples(): - locs = get_locations(row.Location) - dirs = get_directions(row.Direction) - indices = list(product((row.Index, ), locs, dirs)) - num_vals = len(indices) - for col, cmp_marginal_param in cmp_marginal_param_dct.items(): - if col == 'Blocks': - cmp_marginal_param.extend( - [ - get_attribute( - getattr(row, 'Blocks', np.nan), - dtype=int, - default=1.0, - ) - ] - * num_vals - ) - elif col == 'Units': - cmp_marginal_param.extend( - [self.cmp_units[row.Index]] * num_vals - ) - elif col == 'Family': - cmp_marginal_param.extend( - [getattr(row, col, np.nan)] * num_vals - ) - else: - cmp_marginal_param.extend( - [get_attribute(getattr(row, col, np.nan))] * num_vals - ) - index_list.extend(indices) - index = pd.MultiIndex.from_tuples(index_list, names=['cmp', 'loc', 'dir']) - dtypes = { - 'Family': object, 'Theta_0': float, 'Theta_1': float, - 'Theta_2': float, 'TruncateLower': float, - 'TruncateUpper': float, 'Blocks': int, 'Units': object - } - cmp_marginal_param_series = [] - for col, cmp_marginal_param in cmp_marginal_param_dct.items(): - cmp_marginal_param_series.append( - pd.Series( - cmp_marginal_param, - dtype=dtypes[col], name=col, index=index)) - - cmp_marginal_params = pd.concat( - cmp_marginal_param_series, axis=1 - ) - - assert not cmp_marginal_params['Theta_0'].isnull().values.any() - - cmp_marginal_params.dropna(axis=1, how='all', inplace=True) - - self.log_msg("Model parameters successfully parsed. " - f"{cmp_marginal_params.shape[0]} performance groups identified", - prepend_timestamp=False) - - # Now we can take care of converting the values to base units - self.log_msg("Converting model parameters to internal units...", - prepend_timestamp=False) - - # ensure that the index has unique entries by introducing an - # internal component uid - base.dedupe_index(cmp_marginal_params) - - cmp_marginal_params = self.convert_marginal_params( - cmp_marginal_params, cmp_marginal_params['Units'] - ) - - self.cmp_marginal_params = cmp_marginal_params.drop('Units', axis=1) - - self.log_msg("Model parameters successfully loaded.", - prepend_timestamp=False) - - self.log_msg("\nComponent model marginal distributions:\n" - + str(cmp_marginal_params), - prepend_timestamp=False) - - # the empirical data and correlation files can be added later, if needed - - def _create_cmp_RVs(self): - """ - Defines the RVs used for sampling component quantities. - """ - - # initialize the registry - RV_reg = uq.RandomVariableRegistry(self._asmnt.options.rng) - - # add a random variable for each component quantity variable - for rv_params in self.cmp_marginal_params.itertuples(): - - cmp = rv_params.Index - - # create a random variable and add it to the registry - RV_reg.add_RV(uq.RandomVariable( - name=f'CMP-{cmp[0]}-{cmp[1]}-{cmp[2]}-{cmp[3]}', - distribution=getattr(rv_params, "Family", np.nan), - theta=[getattr(rv_params, f"Theta_{t_i}", np.nan) - for t_i in range(3)], - truncation_limits=[getattr(rv_params, f"Truncate{side}", np.nan) - for side in ("Lower", "Upper")], - )) - - self.log_msg(f"\n{self.cmp_marginal_params.shape[0]} " - "random variables created.", - prepend_timestamp=False) - - self._cmp_RVs = RV_reg - - def generate_cmp_sample(self, sample_size=None): - """ - Generates component quantity realizations. If a sample_size - is not specified, the sample size found in the demand model is - used. - """ - - if self.cmp_marginal_params is None: - raise ValueError('Model parameters have not been specified. Load' - 'parameters from a file before generating a ' - 'sample.') - - self.log_div() - self.log_msg('Generating sample from component quantity variables...') - - if sample_size is None: - if self._asmnt.demand.sample is None: - raise ValueError( - 'Sample size was not specified, ' - 'and it cannot be determined from ' - 'the demand model.') - sample_size = self._asmnt.demand.sample.shape[0] - - self._create_cmp_RVs() - - self._cmp_RVs.generate_sample( - sample_size=sample_size, - method=self._asmnt.options.sampling_method) - - # replace the potentially existing sample with the generated one - self._cmp_sample = None - - self.log_msg(f"\nSuccessfully generated {sample_size} realizations.", - prepend_timestamp=False) - - -class DamageModel(PelicunModel): - """ - Manages damage information used in assessments. - - This class contains the following methods: - - - save_sample() - - load_sample() - - load_damage_model() - - calculate() - - _get_pg_batches() - - _generate_dmg_sample() - - _create_dmg_rvs() - - _get_required_demand_type() - - _assemble_required_demand_data() - - _evaluate_damage_state() - - _prepare_dmg_quantities() - - _perform_dmg_task() - - _apply_dmg_funcitons() - - Parameters - ---------- - - """ - - def __init__(self, assessment): - - super().__init__(assessment) - - self.damage_params = None - self.sample = None - - def save_sample(self, filepath=None, save_units=False): - """ - Save damage sample to a csv file - - """ - self.log_div() - self.log_msg('Saving damage sample...') - - cmp_units = self._asmnt.asset.cmp_units - qnt_units = pd.Series(index=self.sample.columns, name='Units', - dtype='object') - for cmp in cmp_units.index: - qnt_units.loc[cmp] = cmp_units.loc[cmp] - - res = file_io.save_to_csv( - self.sample, filepath, - units=qnt_units, - unit_conversion_factors=self._asmnt.unit_conversion_factors, - use_simpleindex=(filepath is not None), - log=self._asmnt.log) - - if filepath is not None: - self.log_msg('Damage sample successfully saved.', - prepend_timestamp=False) - return None - - # else: - units = res.loc["Units"] - res.drop("Units", inplace=True) - res.index = res.index.astype('int64') - - if save_units: - return res.astype(float), units - - return res.astype(float) - - def load_sample(self, filepath): - """ - Load damage state sample data. - - """ - self.log_div() - self.log_msg('Loading damage sample...') - - self.sample = file_io.load_data( - filepath, self._asmnt.unit_conversion_factors, - log=self._asmnt.log) - - # set the names of the columns - self.sample.columns.names = ['cmp', 'loc', 'dir', 'uid', 'ds'] - - self.log_msg('Damage sample successfully loaded.', - prepend_timestamp=False) - - def load_damage_model(self, data_paths): - """ - Load limit state damage model parameters and damage state assignments - - Parameters - ---------- - data_paths: list of string - List of paths to data files with damage model information. Default - XY datasets can be accessed as PelicunDefault/XY. - """ - - self.log_div() - self.log_msg('Loading damage model...') - - # replace default flag with default data path - for d_i, data_path in enumerate(data_paths): - - if 'PelicunDefault/' in data_path: - data_paths[d_i] = data_path.replace( - 'PelicunDefault/', - f'{base.pelicun_path}/resources/SimCenterDBDL/', - ) - - data_list = [] - # load the data files one by one - for data_path in data_paths: - - data = file_io.load_data( - data_path, None, orientation=1, reindex=False, log=self._asmnt.log - ) - - data_list.append(data) - - damage_params = pd.concat(data_list, axis=0) - - # drop redefinitions of components - damage_params = damage_params.groupby(damage_params.index).first() - - # get the component types defined in the asset model - cmp_labels = self._asmnt.asset.cmp_sample.columns - - # only keep the damage model parameters for the components in the model - cmp_unique = cmp_labels.unique(level=0) - cmp_mask = damage_params.index.isin(cmp_unique, level=0) - - damage_params = damage_params.loc[cmp_mask, :] - - if np.sum(cmp_mask) != len(cmp_unique): - - cmp_list = cmp_unique[ - np.isin(cmp_unique, damage_params.index.values, - invert=True)].to_list() - - self.log_msg("\nWARNING: The damage model does not provide " - "vulnerability information for the following component(s) " - f"in the asset model: {cmp_list}.\n", - prepend_timestamp=False) - - # TODO: load defaults for Demand-Offset and Demand-Directional - - # Now convert model parameters to base units - for LS_i in damage_params.columns.unique(level=0): - if LS_i.startswith('LS'): - - damage_params.loc[:, LS_i] = self.convert_marginal_params( - damage_params.loc[:, LS_i].copy(), - damage_params[('Demand', 'Unit')], - ).values - - # check for components with incomplete damage model information - cmp_incomplete_list = damage_params.loc[ - damage_params[('Incomplete', '')] == 1].index - - damage_params.drop(cmp_incomplete_list, inplace=True) - - if len(cmp_incomplete_list) > 0: - self.log_msg(f"\nWARNING: Damage model information is incomplete for " - f"the following component(s) {cmp_incomplete_list}. They " - f"were removed from the analysis.\n", - prepend_timestamp=False) - - self.damage_params = damage_params - - self.log_msg("Damage model parameters successfully parsed.", - prepend_timestamp=False) - - def _handle_operation(self, initial_value, operation, other_value): - """ - This method is used in `_create_dmg_RVs` to apply capacity - adjustment operations whenever required. It is defined as a - safer alternative to directly using `eval`. - - Parameters - ---------- - initial_value: float - Value before operation - operation: str - Any of +, -, *, / - other_value: float - Value used to apply the operation - - Returns - ------- - result: float - The result of the operation - - """ - if operation == '+': - return initial_value + other_value - if operation == '-': - return initial_value - other_value - if operation == '*': - return initial_value * other_value - if operation == '/': - return initial_value / other_value - raise ValueError(f'Invalid operation: {operation}') - - def _create_dmg_RVs(self, PGB, scaling_specification=None): - """ - Creates random variables required later for the damage calculation. - - The method initializes two random variable registries, - capacity_RV_reg and lsds_RV_reg, and loops through each - performance group in the input performance group block (PGB) - dataframe. For each performance group, it retrieves the - component sample and blocks and checks if the limit state is - defined for the component. If the limit state is defined, the - method gets the list of limit states and the parameters for - each limit state. The method assigns correlation between limit - state random variables, adds the limit state random variables - to the capacity_RV_reg registry, and adds LSDS assignments to - the lsds_RV_reg registry. After looping through all - performance groups, the method returns the two registries. - - Parameters - ---------- - PGB : DataFrame - A DataFrame that groups performance groups into batches - for efficient damage assessment. - scaling_specification: dict, optional - A dictionary defining the shift in median. - Example: {'CMP-1-1': '*1.2', 'CMP-1-2': '/1.4'} - The keys are individual components that should be present - in the `capacity_sample`. The values should be strings - containing an operation followed by the value formatted as - a float. The operation can be '+' for addition, '-' for - subtraction, '*' for multiplication, and '/' for division. - - """ - - def assign_lsds(ds_weights, ds_id, lsds_RV_reg, lsds_rv_tag): - """ - Prepare random variables to handle mutually exclusive damage states. - - """ - - # If the limit state has a single damage state assigned - # to it, we don't need random sampling - if pd.isnull(ds_weights): - - ds_id += 1 - - lsds_RV_reg.add_RV( - uq.RandomVariable( - name=lsds_rv_tag, - distribution='deterministic', - theta=ds_id, - ) - ) - - # Otherwise, we create a multinomial random variable - else: - - # parse the DS weights - ds_weights = np.array( - ds_weights.replace(" ", "").split('|'), dtype=float - ) - - def map_ds(values, offset=int(ds_id + 1)): - return values + offset - - lsds_RV_reg.add_RV( - uq.RandomVariable( - name=lsds_rv_tag, - distribution='multinomial', - theta=ds_weights, - f_map=map_ds, - ) - ) - - ds_id += len(ds_weights) - - return ds_id - - if self._asmnt.log.verbose: - self.log_msg('Generating capacity variables ...', prepend_timestamp=True) - - # initialize the registry - capacity_RV_reg = uq.RandomVariableRegistry(self._asmnt.options.rng) - lsds_RV_reg = uq.RandomVariableRegistry(self._asmnt.options.rng) - - # capacity adjustment: - # ensure the scaling_specification is a dictionary - if not scaling_specification: - scaling_specification = {} - else: - # if there are contents, ensure they are valid. - # See docstring for an example of what is expected. - parsed_scaling_specification = {} - # validate contents - for key, value in scaling_specification.items(): - css = 'capacity adjustment specification' - if not isinstance(value, str): - raise ValueError( - f'Invalud entry in {css}: {value}. It has to be a string. ' - f'See docstring of DamageModel._create_dmg_RVs.' - ) - capacity_adjustment_operation = value[0] - number = value[1::] - if capacity_adjustment_operation not in ('+', '-', '*', '/'): - raise ValueError( - f'Invalid operation in {css}: ' - f'{capacity_adjustment_operation}' - ) - fnumber = base.float_or_None(number) - if fnumber is None: - raise ValueError(f'Invalid number in {css}: {number}') - parsed_scaling_specification[key] = ( - capacity_adjustment_operation, - fnumber, - ) - scaling_specification = parsed_scaling_specification - - # get the component sample and blocks from the asset model - for PG in PGB.index: - - # determine demand capacity adjustment operation, if required - cmp_loc_dir = '-'.join(PG[0:3]) - capacity_adjustment_operation = scaling_specification.get( - cmp_loc_dir, None - ) - - cmp_id = PG[0] - blocks = PGB.loc[PG, 'Blocks'] - - # if the number of blocks is provided, calculate the weights - if np.atleast_1d(blocks).shape[0] == 1: - blocks = np.full(int(blocks), 1.0 / blocks) - # otherwise, assume that the list contains the weights - - # initialize the damaged quantity sample variable - - assert self.damage_params is not None - if cmp_id in self.damage_params.index: - - frg_params = self.damage_params.loc[cmp_id, :] - - # get the list of limit states - limit_states = [] - - for val in frg_params.index.get_level_values(0).unique(): - if 'LS' in val: - limit_states.append(val[2:]) - - ds_id = 0 - - frg_rv_set_tags = [[] for b in blocks] - anchor_RVs = [] - - for ls_id in limit_states: - - frg_params_LS = frg_params[f'LS{ls_id}'] - - theta_0 = frg_params_LS.get('Theta_0', np.nan) - family = frg_params_LS.get('Family', np.nan) - ds_weights = frg_params_LS.get('DamageStateWeights', np.nan) - - # check if the limit state is defined for the component - if pd.isna(theta_0): - continue - - theta = [ - frg_params_LS.get(f"Theta_{t_i}", np.nan) for t_i in range(3) - ] - - if capacity_adjustment_operation: - if family in {'normal', 'lognormal'}: - theta[0] = self._handle_operation( - theta[0], - capacity_adjustment_operation[0], - capacity_adjustment_operation[1], - ) - else: - self.log_msg( - f'\nWARNING: Capacity adjustment is only supported ' - f'for `normal` or `lognormal` distributions. ' - f'Ignoring: {cmp_loc_dir}, which is {family}', - prepend_timestamp=False, - ) - - tr_lims = [ - frg_params_LS.get(f"Truncate{side}", np.nan) - for side in ("Lower", "Upper") - ] - - for block_i, _ in enumerate(blocks): - - frg_rv_tag = ( - 'FRG-' - f'{PG[0]}-' # cmp_id - f'{PG[1]}-' # loc - f'{PG[2]}-' # dir - f'{PG[3]}-' # uid - f'{block_i+1}-' # block - f'{ls_id}' - ) - - # Assign correlation between limit state random - # variables - # Note that we assume perfectly correlated limit - # state random variables here. This approach is in - # line with how mainstream PBE calculations are - # performed. Assigning more sophisticated - # correlations between limit state RVs is possible, - # if needed. Please let us know through the - # SimCenter Message Board if you are interested in - # such a feature. - # Anchor all other limit state random variables to - # the first one to consider the perfect correlation - # between capacities in each LS - if ls_id == limit_states[0]: - anchor = None - else: - anchor = anchor_RVs[block_i] - - # parse theta values for multilinear_CDF - if family == 'multilinear_CDF': - theta = np.column_stack( - ( - np.array( - theta[0].split('|')[0].split(','), - dtype=float, - ), - np.array( - theta[0].split('|')[1].split(','), - dtype=float, - ), - ) - ) - - RV = uq.RandomVariable( - name=frg_rv_tag, - distribution=family, - theta=theta, - truncation_limits=tr_lims, - anchor=anchor, - ) - - capacity_RV_reg.add_RV(RV) - - # add the RV to the set of correlated variables - frg_rv_set_tags[block_i].append(frg_rv_tag) - - if ls_id == limit_states[0]: - anchor_RVs.append(RV) - - # Now add the LS->DS assignments - lsds_rv_tag = ( - 'LSDS-' - f'{PG[0]}-' # cmp_id - f'{PG[1]}-' # loc - f'{PG[2]}-' # dir - f'{PG[3]}-' # uid - f'{block_i+1}-' # block - f'{ls_id}' - ) - - ds_id_next = assign_lsds( - ds_weights, ds_id, lsds_RV_reg, lsds_rv_tag - ) - - ds_id = ds_id_next - - if self._asmnt.log.verbose: - rv_count = len(lsds_RV_reg.RV) - self.log_msg( - f"2x{rv_count} random variables created.", prepend_timestamp=False - ) - - return capacity_RV_reg, lsds_RV_reg - - def _generate_dmg_sample(self, sample_size, PGB, scaling_specification=None): - """ - This method generates a damage sample by creating random - variables (RVs) for capacities and limit-state-damage-states - (lsds), and then sampling from these RVs. The sample size and - performance group batches (PGB) are specified as inputs. The - method returns the capacity sample and the lsds sample. - - Parameters - ---------- - sample_size : int - The number of realizations to generate. - PGB : DataFrame - A DataFrame that groups performance groups into batches - for efficient damage assessment. - scaling_specification: dict, optional - A dictionary defining the shift in median. - Example: {'CMP-1-1': '*1.2', 'CMP-1-2': '/1.4'} - The keys are individual components that should be present - in the `capacity_sample`. The values should be strings - containing an operation followed by the value formatted as - a float. The operation can be '+' for addition, '-' for - subtraction, '*' for multiplication, and '/' for division. - - Returns - ------- - capacity_sample : DataFrame - A DataFrame that represents the capacity sample. - lsds_sample : DataFrame - A DataFrame that represents the . - - Raises - ------ - ValueError - If the damage parameters have not been specified. - - """ - - # Check if damage model parameters have been specified - if self.damage_params is None: - raise ValueError('Damage model parameters have not been specified. ' - 'Load parameters from the default damage model ' - 'databases or provide your own damage model ' - 'definitions before generating a sample.') - - # Create capacity and LSD RVs for each performance group - capacity_RVs, lsds_RVs = self._create_dmg_RVs(PGB, scaling_specification) - - if self._asmnt.log.verbose: - self.log_msg('Sampling capacities...', - prepend_timestamp=True) - - # Generate samples for capacity RVs - capacity_RVs.generate_sample( - sample_size=sample_size, - method=self._asmnt.options.sampling_method) - - # Generate samples for LSD RVs - lsds_RVs.generate_sample( - sample_size=sample_size, - method=self._asmnt.options.sampling_method) - - if self._asmnt.log.verbose: - self.log_msg("Raw samples are available", - prepend_timestamp=True) - - # get the capacity and lsds samples - capacity_sample = pd.DataFrame( - capacity_RVs.RV_sample).sort_index( - axis=0).sort_index(axis=1) - capacity_sample = base.convert_to_MultiIndex( - capacity_sample, axis=1)['FRG'] - capacity_sample.columns.names = [ - 'cmp', 'loc', 'dir', 'uid', 'block', 'ls'] - - lsds_sample = pd.DataFrame( - lsds_RVs.RV_sample).sort_index( - axis=0).sort_index(axis=1).astype(int) - lsds_sample = base.convert_to_MultiIndex( - lsds_sample, axis=1)['LSDS'] - lsds_sample.columns.names = [ - 'cmp', 'loc', 'dir', 'uid', 'block', 'ls'] - - if self._asmnt.log.verbose: - self.log_msg( - f"Successfully generated {sample_size} realizations.", - prepend_timestamp=True) - - return capacity_sample, lsds_sample - - def _get_required_demand_type(self, PGB): - """ - Returns the id of the demand needed to calculate damage to a - component. We assume that a damage model sample is available. - - This method returns the demand type and its properties - required to calculate the damage to a component. The - properties include whether the demand is directional, the - offset, and the type of the demand. The method takes as input - a dataframe PGB that contains information about the component - groups in the asset. For each component group PG in the PGB - dataframe, the method retrieves the relevant damage parameters - from the damage_params dataframe and parses the demand type - into its properties. If the demand type has a subtype, the - method splits it and adds the subtype to the demand type to - form the EDP (engineering demand parameter) type. The method - also considers the default offset for the demand type, if it - is specified in the options attribute of the assessment, and - adds the offset to the EDP. If the demand is directional, the - direction is added to the EDP. The method collects all the - unique EDPs for each component group and returns them as a - dictionary where each key is an EDP and its value is a list of - component groups that require that EDP. - - Parameters - ---------- - `PGB`: pd.DataFrame - A pandas DataFrame with the block information for - each component - - Returns - ------- - EDP_req: dict - A dictionary of EDP requirements, where each key is the EDP - string (e.g., "Peak Ground Acceleration-0-0"), and the - corresponding value is a list of tuples (component_id, - location, direction) - - """ - - # Assign the damage_params attribute to a local variable `DP` - DP = self.damage_params - - # Check if verbose logging is enabled in `self._asmnt.log` - if self._asmnt.log.verbose: - # If verbose logging is enabled, log a message indicating - # that we are collecting demand information - self.log_msg('Collecting required demand information...', - prepend_timestamp=True) - - # Initialize an empty dictionary to store the unique EDP - # requirements - EDP_req = {} - - # Iterate over the index of the `PGB` DataFrame - for PG in PGB.index: - # Get the component name from the first element of the - # `PG` tuple - cmp = PG[0] - - # Get the directional, offset, and demand_type parameters - # from the `DP` DataFrame - directional, offset, demand_type = DP.loc[ - cmp, [('Demand', 'Directional'), - ('Demand', 'Offset'), - ('Demand', 'Type')]] - - # Parse the demand type - - # Check if there is a subtype included in the demand_type - # string - if '|' in demand_type: - # If there is a subtype, split the demand_type string - # on the '|' character - demand_type, subtype = demand_type.split('|') - # Convert the demand type to the corresponding EDP - # type using `base.EDP_to_demand_type` - demand_type = base.EDP_to_demand_type[demand_type] - # Concatenate the demand type and subtype to form the - # EDP type - EDP_type = f'{demand_type}_{subtype}' - else: - # If there is no subtype, convert the demand type to - # the corresponding EDP type using - # `base.EDP_to_demand_type` - demand_type = base.EDP_to_demand_type[demand_type] - # Assign the EDP type to be equal to the demand type - EDP_type = demand_type - - # Consider the default offset, if needed - if demand_type in self._asmnt.options.demand_offset.keys(): - # If the demand type has a default offset in - # `self._asmnt.options.demand_offset`, add the offset - # to the default offset - offset = int(offset + self._asmnt.options.demand_offset[demand_type]) - else: - # If the demand type does not have a default offset in - # `self._asmnt.options.demand_offset`, convert the - # offset to an integer - offset = int(offset) - - # Determine the direction - if directional: - # If the demand is directional, use the third element - # of the `PG` tuple as the direction - direction = PG[2] - else: - # If the demand is not directional, use '0' as the - # direction - direction = '0' - - # Concatenate the EDP type, offset, and direction to form - # the EDP key - EDP = f"{EDP_type}-{str(int(PG[1]) + offset)}-{direction}" - - # If the EDP key is not already in the `EDP_req` - # dictionary, add it and initialize it with an empty list - if EDP not in EDP_req: - EDP_req.update({EDP: []}) - - # Add the current PG (performance group) to the list of - # PGs associated with the current EDP key - EDP_req[EDP].append(PG) - - # Return the unique EDP requirements - return EDP_req - - def _assemble_required_demand_data(self, EDP_req): - """ - Assembles demand data for damage state determination. - - The method takes the maximum of all available directions for - non-directional demand, scaling it using the non-directional - multiplier specified in self._asmnt.options, and returning the - result as a dictionary with keys in the format of - '--' and values as arrays of - demand values. If demand data is not found, logs a warning - message and skips the corresponding damages calculation. - - Parameters - ---------- - EDP_req : dict - A dictionary of unique EDP requirements - - Returns - ------- - demand_dict : dict - A dictionary of assembled demand data for calculation - - Raises - ------ - KeyError - If demand data for a given EDP cannot be found - - """ - - if self._asmnt.log.verbose: - self.log_msg('Assembling demand data for calculation...', - prepend_timestamp=True) - - demand_source = self._asmnt.demand.sample - - demand_dict = {} - - for EDP in EDP_req.keys(): - - EDP = EDP.split('-') - - # if non-directional demand is requested... - if EDP[2] == '0': - - # assume that the demand at the given location is available - try: - # take the maximum of all available directions and scale it - # using the nondirectional multiplier specified in the - # self._asmnt.options (the default value is 1.2) - demand = demand_source.loc[ - :, (EDP[0], EDP[1])].max(axis=1).values - demand = demand * self._asmnt.options.nondir_multi(EDP[0]) - - except KeyError: - - demand = None - - else: - demand = demand_source[(EDP[0], EDP[1], EDP[2])].values - - if demand is None: - - self.log_msg(f'\nWARNING: Cannot find demand data for {EDP}. The ' - 'corresponding damages cannot be calculated.', - prepend_timestamp=False) - else: - demand_dict.update({f'{EDP[0]}-{EDP[1]}-{EDP[2]}': demand}) - - return demand_dict - - def _evaluate_damage_state( - self, demand_dict, EDP_req, capacity_sample, lsds_sample): - """ - Use the demand and LS capacity sample to evaluate damage states - - Parameters - ---------- - demand_dict: dict - Dictionary containing the demand of each demand type. - EDP_req: dict - Dictionary containing the EDPs assigned to each demand - type. - capacity_sample: DataFrame - Provides a sample of the capacity. - lsds_sample: DataFrame - Provides the mapping between limit states and damage - states. - - Returns - ------- - dmg_sample: DataFrame - Assigns a Damage State to each component block in the - asset model. - """ - - # Log a message indicating that damage states are being - # evaluated - - if self._asmnt.log.verbose: - self.log_msg('Evaluating damage states...', prepend_timestamp=True) - - # Create an empty dataframe with columns and index taken from - # the input capacity sample - dmg_eval = pd.DataFrame(columns=capacity_sample.columns, - index=capacity_sample.index) - - # Initialize an empty list to store demand data - demand_df = [] - - # For each demand type in the demand dictionary - for demand_name, demand_vals in demand_dict.items(): - - # Get the list of PGs assigned to this demand type - PG_list = EDP_req[demand_name] - - # Create a list of columns for the demand data - # corresponding to each PG in the PG_list - PG_cols = pd.concat( - [dmg_eval.loc[:1, PG_i] for PG_i in PG_list], axis=1, keys=PG_list - ).columns - PG_cols.names = ['cmp', 'loc', 'dir', 'uid', 'block', 'ls'] - # Create a dataframe with demand values repeated for the - # number of PGs and assign the columns as PG_cols - demand_df.append(pd.concat([pd.Series(demand_vals)] * len(PG_cols), - axis=1, keys=PG_cols)) - - # Concatenate all demand dataframes into a single dataframe - demand_df = pd.concat(demand_df, axis=1) - # Sort the columns of the demand dataframe - demand_df.sort_index(axis=1, inplace=True) - - # Evaluate the damage exceedance by subtracting demand from - # capacity and checking if the result is less than zero - dmg_eval = (capacity_sample - demand_df) < 0 - - # Remove any columns with NaN values from the damage - # exceedance dataframe - dmg_eval.dropna(axis=1, inplace=True) - - # initialize the DataFrames that store the damage states and - # quantities - ds_sample = capacity_sample.groupby(level=[0, 1, 2, 3, 4], axis=1).first() - ds_sample.loc[:, :] = np.zeros(ds_sample.shape, dtype=int) - - # get a list of limit state ids among all components in the damage model - ls_list = dmg_eval.columns.get_level_values(5).unique() - - # for each consecutive limit state... - for LS_id in ls_list: - # get all cmp - loc - dir - block where this limit state occurs - dmg_e_ls = dmg_eval.loc[:, idx[:, :, :, :, :, LS_id]].dropna(axis=1) - - # Get the damage states corresponding to this limit state in each - # block - # Note that limit states with a set of mutually exclusive damage - # states options have their damage state picked here. - lsds = lsds_sample.loc[:, dmg_e_ls.columns] - - # Drop the limit state level from the columns to make the damage - # exceedance DataFrame compatible with the other DataFrames in the - # following steps - dmg_e_ls.columns = dmg_e_ls.columns.droplevel(5) - - # Same thing for the lsds DataFrame - lsds.columns = dmg_e_ls.columns - - # Update the damage state in the result with the values from the - # lsds DF if the limit state was exceeded according to the - # dmg_e_ls DF. - # This one-liner updates the given Limit State exceedance in the - # entire damage model. If subsequent Limit States are also exceeded, - # those cells in the result matrix will get overwritten by higher - # damage states. - ds_sample.loc[:, dmg_e_ls.columns] = ( - ds_sample.loc[:, dmg_e_ls.columns].mask(dmg_e_ls, lsds)) - - return ds_sample - - def _prepare_dmg_quantities(self, PGB, ds_sample, dropzero=True): - """ - Combine component quantity and damage state information in one - DataFrame. - - This method assumes that a component quantity sample is - available in the asset model and a damage state sample is - available in the damage model. - - Parameters - ---------- - PGB: DataFrame - A DataFrame that contains the Block identifier for each - component. - ds_sample: DataFrame - A DataFrame that assigns a damage state to each component - block in the asset model. - dropzero: bool, optional, default: True - If True, the quantity of non-damaged components is not - saved. - - Returns - ------- - res_df: DataFrame - A DataFrame that combines the component quantity and - damage state information. - - Raises - ------ - ValueError - If the number of blocks is not provided or if the list of - weights does not contain the same number of elements as - the number of blocks. - - """ - - # Log a message indicating that the calculation of damage - # quantities is starting - if self._asmnt.log.verbose: - self.log_msg('Calculating damage quantities...', - prepend_timestamp=True) - - # Store the damage state sample as a local variable - dmg_ds = ds_sample - - # Retrieve the component quantity information from the asset - # model - cmp_qnt = self._asmnt.asset.cmp_sample # .values - # Retrieve the component marginal parameters from the asset - # model - cmp_params = self._asmnt.asset.cmp_marginal_params - - # Combine the component quantity information for the columns - # in the damage state sample - dmg_qnt = pd.concat( - [cmp_qnt[PG[:4]] for PG in dmg_ds.columns], - axis=1, keys=dmg_ds.columns) - - # Initialize a list to store the block weights - block_weights = [] - - # For each component in the list of PG blocks - for PG in PGB.index: - - # Set the number of blocks to 1, unless specified - # otherwise in the component marginal parameters - blocks = 1 - if cmp_params is not None: - if 'Blocks' in cmp_params.columns: - - blocks = cmp_params.loc[PG, 'Blocks'] - - # If the number of blocks is specified, calculate the - # weights as the reciprocal of the number of blocks - if np.atleast_1d(blocks).shape[0] == 1: - blocks_array = np.full(int(blocks), 1. / blocks) - - # Otherwise, assume that the list contains the weights - block_weights += blocks_array.tolist() - - # Broadcast the block weights to match the shape of the damage - # quantity DataFrame - block_weights = np.broadcast_to( - block_weights, - (dmg_qnt.shape[0], len(block_weights))) - - # Multiply the damage quantities by the block weights - dmg_qnt *= block_weights - - # Get the unique damage states from the damage state sample - # Note that these might be fewer than all possible Damage - # States - ds_list = np.unique(dmg_ds.values) - # Filter out any NaN values from the list of damage states - ds_list = ds_list[pd.notna(ds_list)].astype(int) - - # If the dropzero option is True, remove the zero damage state - # from the list of damage states - if dropzero: - - ds_list = ds_list[ds_list != 0] - - # Only proceed with the calculation if there is at least one - # damage state in the list - if len(ds_list) > 0: - - # Create a list of DataFrames, where each DataFrame stores - # the damage quantities for a specific damage state - res_list = [pd.DataFrame( - np.where(dmg_ds == ds_i, dmg_qnt, 0), - columns=dmg_ds.columns, - index=dmg_ds.index - ) for ds_i in ds_list] - - # Combine the damage quantity DataFrames into a single - # DataFrame - res_df = pd.concat( - res_list, axis=1, - keys=[f'{ds_i:g}' for ds_i in ds_list]) - res_df.columns.names = ['ds', *res_df.columns.names[1::]] - # remove the block level from the columns - res_df.columns = res_df.columns.reorder_levels([1, 2, 3, 4, 0, 5]) - res_df = res_df.groupby(level=[0, 1, 2, 3, 4], axis=1).sum() - - # The damage states with no damaged quantities are dropped - # Note that some of these are not even valid DSs at the given PG - res_df = res_df.iloc[:, np.where(res_df.sum(axis=0) != 0)[0]] - - return res_df - - def _perform_dmg_task(self, task, qnt_sample): - """ - Perform a task from a damage process. - - The method performs a task from a damage process on a given - quantity sample. The method first checks if the source - component specified in the task exists among the available - components in the quantity sample. If the source component is - not found, a warning message is logged and the method returns - the original quantity sample unchanged. Otherwise, the method - executes the events specified in the task. The events can be - triggered by a limit state exceedance or a damage state - occurrence. If the event is triggered by a damage state, the - method moves all quantities of the target component(s) into - the target damage state in pre-selected realizations. If the - target event is "NA", the method removes quantity information - from the target components in the pre-selected - realizations. After executing the events, the method returns - the updated quantity sample. - - Parameters - ---------- - task : list - A list representing a task from the damage process. The - list contains two elements: - - The first element is a string representing the source - component, e.g., `'CMP_A'`. - - The second element is a dictionary representing the - events triggered by the damage state of the source - component. The keys of the dictionary are strings that - represent the damage state of the source component, - e.g., `'DS1'`. The values are lists of strings - representing the target component(s) and event(s), e.g., - `['CMP_B', 'CMP_C']`. - qnt_sample : pandas DataFrame - A DataFrame representing the quantities of the components - in the damage sample. It is modified in place to represent - the quantities of the components in the damage sample - after the task has been performed. - - Raises - ------ - ValueError - If the source component is not found among the components - in the damage sample - ValueError - If the source event is not a limit state (LS) or damage - state (DS) - ValueError - If the target event is not a limit state (LS), damage - state (DS), or not available (NA) - ValueError - If the target event is a limit state (LS) - - """ - - if self._asmnt.log.verbose: - self.log_msg('Applying task...', - prepend_timestamp=True) - - # get the list of available components - cmp_list = qnt_sample.columns.get_level_values(0).unique().tolist() - - # get the component quantities - cmp_qnt = self._asmnt.asset.cmp_sample - - # get the source component - source_cmp = task[0].split('_')[1] - - # check if it exists among the available ones - if source_cmp not in cmp_list: - - self.log_msg( - f"WARNING: Source component {source_cmp} in the prescribed " - "damage process not found among components in the damage " - "sample. The corresponding part of the damage process is " - "skipped.", prepend_timestamp=False) - - return - - # get the damage quantities for the source component - source_cmp_df = qnt_sample.loc[:, source_cmp] - - # execute the prescribed events - for source_event, target_infos in task[1].items(): - - # events triggered by limit state exceedance - if source_event.startswith('LS'): - - # ls_i = int(source_event[2:]) - # TODO: implement source LS support - raise ValueError('LS not supported yet.') - - # events triggered by damage state occurrence - if source_event.startswith('DS'): - - # get the ID of the damage state that triggers the event - ds_list = [source_event[2:], ] - - # if we are only looking for a single DS - if len(ds_list) == 1: - - ds_target = ds_list[0] - - # get the realizations with non-zero quantity of the target DS - source_ds_vals = source_cmp_df.groupby( - level=[3], axis=1).max() - - if ds_target in source_ds_vals.columns: - source_ds_vals = source_ds_vals[ds_target] - source_mask = source_cmp_df.loc[source_ds_vals > 0.0].index - else: - # if tge source_cmp is not in ds_target in any of the - # realizations, the prescribed event is not triggered - continue - - else: - pass # TODO: implement multiple DS support - - else: - raise ValueError(f"Unable to parse source event in damage " - f"process: {source_event}") - - # get the information about the events - target_infos = np.atleast_1d(target_infos) - - # for each event - for target_info in target_infos: - - # get the target component and event type - target_cmp, target_event = target_info.split('_') - - # ALL means all, but the source component - if target_cmp == 'ALL': - - # copy the list of available components - target_cmp = deepcopy(cmp_list) - - # remove the source component - if source_cmp in target_cmp: - target_cmp.remove(source_cmp) - - # otherwise we target a specific component - elif target_cmp in cmp_list: - target_cmp = [target_cmp, ] - - # trigger a limit state - if target_event.startswith('LS'): - - # ls_i = int(target_event[2:]) - # TODO: implement target LS support - raise ValueError('LS not supported yet.') - - # trigger a damage state - if target_event.startswith('DS'): - - # get the target damage state ID - ds_i = target_event[2:] - - # move all quantities of the target component(s) into the - # target damage state in the pre-selected realizations - qnt_sample.loc[source_mask, target_cmp] = 0.0 - - for target_cmp_i in target_cmp: - locs = cmp_qnt[target_cmp_i].columns.get_level_values(0) - dirs = cmp_qnt[target_cmp_i].columns.get_level_values(1) - uids = cmp_qnt[target_cmp_i].columns.get_level_values(2) - for loc, direction, uid in zip(locs, dirs, uids): - # because we cannot be certain that ds_i had been - # triggered earlier, we have to add this damage - # state manually for each PG of each component, if needed - if ds_i not in qnt_sample[ - (target_cmp_i, loc, direction, uid)].columns: - qnt_sample[ - (target_cmp_i, loc, direction, uid, ds_i)] = 0.0 - - qnt_sample.loc[ - source_mask, - (target_cmp_i, loc, direction, uid, ds_i)] = ( - cmp_qnt.loc[ - source_mask, - (target_cmp_i, loc, direction, uid)].values) - - # clear all damage information - elif target_event == 'NA': - - # remove quantity information from the target components - # in the pre-selected realizations - qnt_sample.loc[source_mask, target_cmp] = np.nan - - else: - raise ValueError(f"Unable to parse target event in damage " - f"process: {target_event}") - - if self._asmnt.log.verbose: - self.log_msg('Damage process task successfully applied.', - prepend_timestamp=False) - - def _get_pg_batches(self, block_batch_size): - """ - Group performance groups into batches for efficient damage assessment. - - The method takes as input the block_batch_size, which - specifies the maximum number of blocks per batch. The method - first checks if performance groups have been defined in the - cmp_marginal_params dataframe, and if so, it uses the 'Blocks' - column as the performance group information. If performance - groups have not been defined in cmp_marginal_params, the - method uses the cmp_sample dataframe to define the performance - groups, with each performance group having a single block. - - The method then checks if the performance groups are available - in the damage parameters dataframe, and removes any - performance groups that are not found in the damage - parameters. The method then groups the performance groups - based on the locations and directions of the components, and - calculates the cumulative sum of the blocks for each - group. The method then divides the performance groups into - batches of size specified by block_batch_size and assigns a - batch number to each group. Finally, the method groups the - performance groups by batch number, component, location, and - direction, and returns a dataframe that shows the number of - blocks for each batch. - - """ - - # Get the marginal parameters for the components from the - # asset model - cmp_marginals = self._asmnt.asset.cmp_marginal_params - - # Initialize the batch dataframe - pg_batch = None - - # If marginal parameters are available, use the 'Blocks' - # column to initialize the batch dataframe - if cmp_marginals is not None: - - # Check if the "Blocks" column exists in the component - # marginal parameters - if 'Blocks' in cmp_marginals.columns: - pg_batch = cmp_marginals['Blocks'].to_frame() - - # If the "Blocks" column doesn't exist, create a new dataframe - # with "Blocks" column filled with ones, using the component - # sample as the index. - if pg_batch is None: - cmp_sample = self._asmnt.asset.cmp_sample - pg_batch = pd.DataFrame(np.ones(cmp_sample.shape[1]), - index=cmp_sample.columns, - columns=['Blocks']) - - # Check if the damage model information exists for each - # performance group If not, remove the performance group from - # the analysis and log a warning message. - first_time = True - for pg_i in pg_batch.index: - - if np.any(np.isin(pg_i, self.damage_params.index)): - - blocks_i = pg_batch.loc[pg_i, 'Blocks'] - - # If the "Blocks" column contains a list of block - # weights, get the number of blocks from the shape of - # the list. - if np.atleast_1d(blocks_i).shape[0] != 1: - blocks_i = np.atleast_1d(blocks_i).shape[0] - - pg_batch.loc[pg_i, 'Blocks'] = blocks_i - - else: - pg_batch.drop(pg_i, inplace=True) - - if first_time: - self.log_msg("\nWARNING: Damage model information is " - "incomplete for some of the performance groups " - "and they had to be removed from the analysis:", - prepend_timestamp=False) - - first_time = False - - self.log_msg(f"{pg_i}", prepend_timestamp=False) - - # Convert the data types of the dataframe to be efficient - pg_batch = pg_batch.convert_dtypes() - - # Sum up the number of blocks for each performance group - pg_batch = pg_batch.groupby(['loc', 'dir', 'cmp', 'uid']).sum() - pg_batch.sort_index(axis=0, inplace=True) - - # Calculate cumulative sum of blocks - pg_batch['CBlocks'] = np.cumsum(pg_batch['Blocks'].values.astype(int)) - pg_batch['Batch'] = 0 - - # Group the performance groups into batches - for batch_i in range(1, pg_batch.shape[0] + 1): - - # Find the mask for blocks that are less than the batch - # size and greater than 0 - batch_mask = np.all( - np.array([pg_batch['CBlocks'] <= block_batch_size, - pg_batch['CBlocks'] > 0]), - axis=0) - - if np.sum(batch_mask) < 1: - batch_mask = np.full(batch_mask.shape, False) - batch_mask[np.where(pg_batch['CBlocks'] > 0)[0][0]] = True - - pg_batch.loc[batch_mask, 'Batch'] = batch_i - - # Decrement the cumulative block count by the max count in - # the current batch - pg_batch['CBlocks'] -= pg_batch.loc[ - pg_batch['Batch'] == batch_i, 'CBlocks'].max() - - # If the maximum cumulative block count is 0, exit the - # loop - if pg_batch['CBlocks'].max() == 0: - break - - # Group the performance groups by batch, component, location, - # and direction, and keep only the number of blocks for each - # group - pg_batch = pg_batch.groupby( - ['Batch', 'cmp', 'loc', 'dir', 'uid']).sum().loc[:, 'Blocks'].to_frame() - - return pg_batch - - def _complete_ds_cols(self, dmg_sample): - """ - Completes the damage sample dataframe with all possible damage - states for each component. - - Parameters - ---------- - dmg_sample : DataFrame - A DataFrame containing the damage state information for - each component block in the asset model. The columns are - MultiIndexed with levels corresponding to component - information ('cmp', 'loc', 'dir', 'uid') and the damage - state ('ds'). - - Returns - ------- - DataFrame - A DataFrame similar to `dmg_sample` but with additional - columns for missing damage states for each component, - ensuring that all possible damage states are - represented. The new columns are filled with zeros, - indicating no occurrence of those damage states in the - sample. - - Notes - ----- - - The method assumes that the damage model parameters - (`self.damage_params`) are available and contain the - necessary information to determine the total number of - damage states for each component. - - """ - # get a shortcut for the damage model parameters - DP = self.damage_params - - # Get the header for the results that we can use to identify - # cmp-loc-dir-uid sets - dmg_header = ( - dmg_sample.groupby(level=[0, 1, 2, 3], axis=1).first().iloc[:2, :] - ) - - # get the number of possible limit states - ls_list = [col for col in DP.columns.unique(level=0) if 'LS' in col] - - # initialize the result dataframe - res = pd.DataFrame() - - # walk through all components that have damage parameters provided - for cmp_id in DP.index: - - # get the component-specific parameters - cmp_data = DP.loc[cmp_id] - - # and initialize the damage state counter - ds_count = 0 - - # walk through all limit states for the component - for ls in ls_list: - - # check if the given limit state is defined - if not pd.isna(cmp_data[(ls, 'Theta_0')]): - - # check if there is only one damage state - if pd.isna(cmp_data[(ls, 'DamageStateWeights')]): - - ds_count += 1 - - else: - - # or if there are more than one, how many - ds_count += len( - cmp_data[(ls, 'DamageStateWeights')].split('|')) - - # get the list of valid cmp-loc-dir-uid sets - cmp_header = dmg_header.loc[:, [cmp_id, ]] - - # Create a dataframe where they are repeated ds_count times in the - # columns. The keys put the DS id in the first level of the - # multiindexed column - cmp_headers = pd.concat( - [cmp_header for ds_i in range(ds_count + 1)], - keys=[str(r) for r in range(0, ds_count + 1)], - axis=1) - cmp_headers.columns.names = ['ds', *cmp_headers.columns.names[1::]] - - # add these new columns to the result dataframe - res = pd.concat([res, cmp_headers], axis=1) - - # Fill the result dataframe with zeros and reorder its columns to have - # the damage states at the lowest like - matching the dmg_sample input - res = pd.DataFrame( - 0.0, - columns=res.columns.reorder_levels([1, 2, 3, 4, 0]), - index=dmg_sample.index, - ) - - # replace zeros wherever the dmg_sample has results - res.loc[:, dmg_sample.columns.to_list()] = dmg_sample - - return res - - def calculate( - self, dmg_process=None, block_batch_size=1000, scaling_specification=None - ): - """ - Calculate the damage state of each component block in the asset. - - """ - - self.log_div() - self.log_msg('Calculating damages...') - - sample_size = self._asmnt.demand.sample.shape[0] - - # Break up damage calculation and perform it by performance group. - # Compared to the simultaneous calculation of all PGs, this approach - # reduces demands on memory and increases the load on CPU. This leads - # to a more balanced workload on most machines for typical problems. - # It also allows for a straightforward extension with parallel - # computing. - - # get the list of performance groups - qnt_samples = [] - - self.log_msg(f'Number of Performance Groups in Asset Model:' - f' {self._asmnt.asset.cmp_sample.shape[1]}', - prepend_timestamp=False) - - pg_batch = self._get_pg_batches(block_batch_size) - batches = pg_batch.index.get_level_values(0).unique() - - self.log_msg(f'Number of Component Blocks: {pg_batch["Blocks"].sum()}', - prepend_timestamp=False) - - self.log_msg(f"{len(batches)} batches of Performance Groups prepared " - "for damage assessment", - prepend_timestamp=False) - - # for PG_i in self._asmnt.asset.cmp_sample.columns: - for PGB_i in batches: - - PGB = pg_batch.loc[PGB_i] - - self.log_msg(f"Calculating damage for PG batch {PGB_i} with " - f"{int(PGB['Blocks'].sum())} blocks") - - # Generate an array with component capacities for each block and - # generate a second array that assigns a specific damage state to - # each component limit state. The latter is primarily needed to - # handle limit states with multiple, mutually exclusive DS options - capacity_sample, lsds_sample = self._generate_dmg_sample( - sample_size, PGB, scaling_specification) - - # Get the required demand types for the analysis - EDP_req = self._get_required_demand_type(PGB) - - # Create the demand vector - demand_dict = self._assemble_required_demand_data(EDP_req) - - # Evaluate the Damage State of each Component Block - ds_sample = self._evaluate_damage_state( - demand_dict, EDP_req, - capacity_sample, lsds_sample) - qnt_sample = self._prepare_dmg_quantities(PGB, ds_sample, dropzero=False) - - qnt_samples.append(qnt_sample) - - qnt_sample = pd.concat(qnt_samples, axis=1) - - # Create a comprehensive table with all possible DSs to have a robust - # input for the damage processes evaluation below - qnt_sample = self._complete_ds_cols(qnt_sample) - qnt_sample.sort_index(axis=1, inplace=True) - - self.log_msg("Raw damage calculation successful.", - prepend_timestamp=False) - - # Apply the prescribed damage process, if any - if dmg_process is not None: - self.log_msg("Applying damage processes...") - - # sort the processes - dmg_process = {key: dmg_process[key] for key in sorted(dmg_process)} - - for task in dmg_process.items(): - - self._perform_dmg_task(task, qnt_sample) - - self.log_msg("Damage processes successfully applied.", - prepend_timestamp=False) - - # If requested, remove columns with no damage from the sample - if self._asmnt.options.list_all_ds is False: - qnt_sample = qnt_sample.iloc[:, np.where(qnt_sample.sum(axis=0) != 0)[0]] - - self.sample = qnt_sample - - self.log_msg('Damage calculation successfully completed.') - - -class LossModel(PelicunModel): - """ - Parent object for loss models. - - All loss assessment methods should be children of this class. - - Parameters - ---------- - - """ - - def __init__(self, assessment): - - super().__init__(assessment) - - self._sample = None - self.loss_map = None - self.loss_params = None - self.loss_type = 'Generic' - - @property - def sample(self): - """ - sample property - """ - return self._sample - - def save_sample(self, filepath=None, save_units=False): - """ - Save loss sample to a csv file - - """ - self.log_div() - if filepath is not None: - self.log_msg('Saving loss sample...') - - cmp_units = self.loss_params[('DV', 'Unit')] - dv_units = pd.Series(index=self.sample.columns, name='Units', - dtype='object') - - for cmp_id, dv_type in cmp_units.index: - dv_units.loc[(dv_type, cmp_id)] = cmp_units.at[(cmp_id, dv_type)] - - res = file_io.save_to_csv( - self.sample, filepath, units=dv_units, - unit_conversion_factors=self._asmnt.unit_conversion_factors, - use_simpleindex=(filepath is not None), - log=self._asmnt.log) - - if filepath is not None: - self.log_msg('Loss sample successfully saved.', - prepend_timestamp=False) - return None - - # else: - units = res.loc["Units"] - res.drop("Units", inplace=True) - - if save_units: - return res.astype(float), units - - return res.astype(float) - - def load_sample(self, filepath): - """ - Load damage sample data. - - """ - self.log_div() - self.log_msg('Loading loss sample...') - - self._sample = file_io.load_data( - filepath, self._asmnt.unit_conversion_factors, log=self._asmnt.log) - - self.log_msg('Loss sample successfully loaded.', prepend_timestamp=False) - - def load_model(self, data_paths, mapping_path, decision_variables=None): - """ - Load the list of prescribed consequence models and their parameters - - Parameters - ---------- - data_paths: list of string or DataFrame - List of paths to data files with consequence model - parameters. Default XY datasets can be accessed as - PelicunDefault/XY. The list can also contain DataFrame - objects, in which case that data is used directly. - mapping_path: string - Path to a csv file that maps drivers (i.e., damage or edp data) to - loss models. - decision_variables: list of string, optional - List of decision variables to include in the analysis. If None, - all variables provided in the consequence models are included. When - a list is provided, only variables in the list will be included. - """ - - self.log_div() - self.log_msg(f'Loading loss map for {self.loss_type}...') - - loss_map = file_io.load_data( - mapping_path, None, orientation=1, reindex=False, log=self._asmnt.log - ) - - loss_map['Driver'] = loss_map.index.values - loss_map['Consequence'] = loss_map[self.loss_type] - loss_map.index = np.arange(loss_map.shape[0]) - loss_map = loss_map.loc[:, ['Driver', 'Consequence']] - loss_map.dropna(inplace=True) - - self.loss_map = loss_map - - self.log_msg("Loss map successfully parsed.", prepend_timestamp=False) - - self.log_div() - self.log_msg(f'Loading loss parameters for {self.loss_type}...') - - # replace default flag with default data path - for d_i, data_path in enumerate(data_paths): - - if 'PelicunDefault/' in data_path: - data_paths[d_i] = data_path.replace( - 'PelicunDefault/', - f'{base.pelicun_path}/resources/SimCenterDBDL/') - - data_list = [] - # load the data files one by one - for data_path in data_paths: - data = file_io.load_data( - data_path, None, orientation=1, reindex=False, log=self._asmnt.log - ) - - data_list.append(data) - - loss_params = pd.concat(data_list, axis=0) - - # drop redefinitions of components - loss_params = loss_params.groupby( - level=[0, 1]).first().transform(lambda x: x.fillna(np.nan)) - # note: .groupby introduces None entries. We replace them with - # NaN for consistency. - - # keep only the relevant data - loss_cmp = np.unique(self.loss_map['Consequence'].values) - - available_cmp = loss_params.index.unique(level=0) - missing_cmp = [] - for cmp in loss_cmp: - if cmp not in available_cmp: - missing_cmp.append(cmp) - - if len(missing_cmp) > 0: - self.log_msg("\nWARNING: The loss model does not provide " - "consequence information for the following component(s) " - f"in the loss map: {missing_cmp}. They are removed from " - "further analysis\n", - prepend_timestamp=False) - - self.loss_map = self.loss_map.loc[ - ~loss_map['Consequence'].isin(missing_cmp)] - loss_cmp = np.unique(self.loss_map['Consequence'].values) - - loss_params = loss_params.loc[idx[loss_cmp, :], :] - - # drop unused damage states - DS_list = loss_params.columns.get_level_values(0).unique() - DS_to_drop = [] - for DS in DS_list: - if np.all(pd.isna(loss_params.loc[:, idx[DS, :]].values)) is True: - DS_to_drop.append(DS) - - loss_params.drop(columns=DS_to_drop, level=0, inplace=True) - - # convert values to internal base units - for DS in loss_params.columns.unique(level=0): - if DS.startswith('DS'): - loss_params.loc[:, DS] = self.convert_marginal_params( - loss_params.loc[:, DS].copy(), - loss_params[('DV', 'Unit')], - loss_params[('Quantity', 'Unit')] - ).values - - # check for components with incomplete loss information - cmp_incomplete_list = loss_params.loc[ - loss_params[('Incomplete', '')] == 1].index - - if len(cmp_incomplete_list) > 0: - loss_params.drop(cmp_incomplete_list, inplace=True) - - self.log_msg( - "\n" - "WARNING: Loss information is incomplete for the " - f"following component(s) {cmp_incomplete_list}. " - "They were removed from the analysis." - "\n", - prepend_timestamp=False) - - # filter decision variables, if needed - if decision_variables is not None: - - loss_params = loss_params.reorder_levels([1, 0]) - - available_DVs = loss_params.index.unique(level=0) - filtered_DVs = [] - - for DV_i in decision_variables: - - if DV_i in available_DVs: - filtered_DVs.append(DV_i) - - loss_params = loss_params.loc[filtered_DVs, :].reorder_levels([1, 0]) - - self.loss_params = loss_params.sort_index(axis=1) - - self.log_msg("Loss parameters successfully parsed.", - prepend_timestamp=False) - - def aggregate_losses(self): - """ - This is placeholder method. - - The method of aggregating the Decision Variable sample is specific to - each DV and needs to be implemented in every child of the LossModel - independently. - """ - raise NotImplementedError - - def _generate_DV_sample(self, dmg_quantities, sample_size): - """ - This is placeholder method. - - The method of sampling decision variables in Decision - Variable-specific and needs to be implemented in every child - of the LossModel independently. - """ - raise NotImplementedError - - def calculate(self): - """ - Calculate the consequences of each component block damage in - the asset. - - """ - - self.log_div() - self.log_msg("Calculating losses...") - - drivers = [d for d, _ in self.loss_map['Driver']] - - if 'DMG' in drivers: - sample_size = self._asmnt.damage.sample.shape[0] - elif 'DEM' in drivers: - sample_size = self._asmnt.demand.sample.shape[0] - else: - raise ValueError( - 'Invalid loss drivers. Check the specified loss map.') - - # First, get the damaged quantities in each damage state for - # each component of interest. - dmg_q = self._asmnt.damage.sample.copy() - - # Now sample random Decision Variables - # Note that this method is DV-specific and needs to be - # implemented in every child of the LossModel independently. - self._generate_DV_sample(dmg_q, sample_size) - - self.log_msg("Loss calculation successful.") - - -class BldgRepairModel(LossModel): - """ - Manages building repair consequence assessments. - - Parameters - ---------- - - """ - - def __init__(self, assessment): - - super().__init__(assessment) - - self.loss_type = 'BldgRepair' - - # def load_model(self, data_paths, mapping_path): - - # super().load_model(data_paths, mapping_path) - - # def calculate(self): - - # super().calculate() - - def _create_DV_RVs(self, case_list): - """ - Prepare the random variables used for repair cost and time simulation. - - Parameters - ---------- - case_list: MultiIndex - Index with cmp-loc-dir-ds descriptions that identify the RVs - we need for the simulation. - - Raises - ------ - ValueError - When any Loss Driver is not recognized. - """ - - RV_reg = uq.RandomVariableRegistry(self._asmnt.options.rng) - LP = self.loss_params - - # make ds the second level in the MultiIndex - case_DF = pd.DataFrame( - index=case_list.reorder_levels([0, 4, 1, 2, 3]), columns=[0, ]) - case_DF.sort_index(axis=0, inplace=True) - driver_cmps = case_list.get_level_values(0).unique() - - rv_count = 0 - - # for each loss component - for loss_cmp_id in self.loss_map.index.values: - - # load the corresponding parameters - driver_type, driver_cmp_id = self.loss_map.loc[loss_cmp_id, 'Driver'] - conseq_cmp_id = self.loss_map.loc[loss_cmp_id, 'Consequence'] - - # currently, we only support DMG-based loss calculations - # but this will be extended in the very near future - if driver_type != 'DMG': - raise ValueError(f"Loss Driver type not recognized: " - f"{driver_type}") - - # load the parameters - # TODO: remove specific DV_type references and make the code below - # generate parameters for any DV_types provided - if (conseq_cmp_id, 'Cost') in LP.index: - cost_params = LP.loc[(conseq_cmp_id, 'Cost'), :] - else: - cost_params = None - - if (conseq_cmp_id, 'Time') in LP.index: - time_params = LP.loc[(conseq_cmp_id, 'Time'), :] - else: - time_params = None - - if (conseq_cmp_id, 'Carbon') in LP.index: - carbon_params = LP.loc[(conseq_cmp_id, 'Carbon'), :] - else: - carbon_params = None - - if (conseq_cmp_id, 'Energy') in LP.index: - energy_params = LP.loc[(conseq_cmp_id, 'Energy'), :] - else: - energy_params = None - - if driver_cmp_id not in driver_cmps: - continue - - for ds in case_DF.loc[driver_cmp_id, :].index.unique(level=0): - - if ds == '0': - continue - - if cost_params is not None: - - cost_params_DS = cost_params[f'DS{ds}'] - - cost_family = cost_params_DS.get('Family', np.nan) - cost_theta = [cost_params_DS.get(f"Theta_{t_i}", np.nan) - for t_i in range(3)] - - # If the first parameter is controlled by a function, we use - # 1.0 in its place and will scale the results in a later - # step - if '|' in str(cost_theta[0]): - # if isinstance(cost_theta[0], str): - cost_theta[0] = 1.0 - - else: - cost_family = np.nan - - if time_params is not None: - - time_params_DS = time_params[f'DS{ds}'] - - time_family = time_params_DS.get('Family', np.nan) - time_theta = [time_params_DS.get(f"Theta_{t_i}", np.nan) - for t_i in range(3)] - - # If the first parameter is controlled by a function, we use - # 1.0 in its place and will scale the results in a later - # step - if '|' in str(time_theta[0]): - # if isinstance(time_theta[0], str): - time_theta[0] = 1.0 - - else: - time_family = np.nan - - if carbon_params is not None: - - carbon_params_DS = carbon_params[f'DS{ds}'] - - carbon_family = carbon_params_DS.get('Family', np.nan) - carbon_theta = [ - carbon_params_DS.get(f"Theta_{t_i}", np.nan) - for t_i in range(3) - ] - - # If the first parameter is controlled by a function, we use - # 1.0 in its place and will scale the results in a later - # step - if '|' in str(carbon_theta[0]): - # if isinstance(carbon_theta[0], str): - carbon_theta[0] = 1.0 - - else: - carbon_family = np.nan - - if energy_params is not None: - - energy_params_DS = energy_params[f'DS{ds}'] - - energy_family = energy_params_DS.get('Family', np.nan) - energy_theta = [ - energy_params_DS.get(f"Theta_{t_i}", np.nan) - for t_i in range(3) - ] - - # If the first parameter is controlled by a function, we use - # 1.0 in its place and will scale the results in a later - # step - if '|' in str(energy_theta[0]): - # if isinstance(energy_theta[0], str): - energy_theta[0] = 1.0 - - else: - energy_family = np.nan - - # If neither of the DV_types has a stochastic model assigned, - # we do not need random variables for this DS - if ( - (pd.isna(cost_family)) - and (pd.isna(time_family)) - and (pd.isna(carbon_family)) - and (pd.isna(energy_family)) - ): - continue - - # Otherwise, load the loc-dir cases - loc_dir_uid = case_DF.loc[(driver_cmp_id, ds)].index.values - - for loc, direction, uid in loc_dir_uid: - - # assign cost RV - if pd.isna(cost_family) is False: - - cost_rv_tag = ( - f'Cost-{loss_cmp_id}-{ds}-{loc}-{direction}-{uid}' - ) - - RV_reg.add_RV( - uq.RandomVariable( - name=cost_rv_tag, - distribution=cost_family, - theta=cost_theta, - truncation_limits=[0., np.nan] - ) - ) - rv_count += 1 - - # assign time RV - if pd.isna(time_family) is False: - time_rv_tag = ( - f'Time-{loss_cmp_id}-{ds}-{loc}-{direction}-{uid}' - ) - - RV_reg.add_RV(uq.RandomVariable( - name=time_rv_tag, - distribution=time_family, - theta=time_theta, - truncation_limits=[0., np.nan] - )) - rv_count += 1 - - # assign time RV - if pd.isna(carbon_family) is False: - carbon_rv_tag = ( - f'Carbon-{loss_cmp_id}-{ds}-{loc}-{direction}-{uid}' - ) - - RV_reg.add_RV(uq.RandomVariable( - name=carbon_rv_tag, - distribution=carbon_family, - theta=carbon_theta, - truncation_limits=[0., np.nan] - )) - rv_count += 1 - - # assign time RV - if pd.isna(energy_family) is False: - energy_rv_tag = ( - f'Energy-{loss_cmp_id}-{ds}-{loc}-{direction}-{uid}' - ) - - RV_reg.add_RV(uq.RandomVariable( - name=energy_rv_tag, - distribution=energy_family, - theta=energy_theta, - truncation_limits=[0., np.nan] - )) - rv_count += 1 - - # assign correlation between RVs across DV_types - # TODO: add more DV_types and handle cases with only a - # subset of them being defined - if ((pd.isna(cost_family) is False) and ( - pd.isna(time_family) is False) and ( - self._asmnt.options.rho_cost_time != 0.0)): - - rho = self._asmnt.options.rho_cost_time - - RV_reg.add_RV_set(uq.RandomVariableSet( - f'DV-{loss_cmp_id}-{ds}-{loc}-{direction}-{uid}_set', - list(RV_reg.RVs([cost_rv_tag, time_rv_tag]).values()), - np.array([[1.0, rho], [rho, 1.0]]))) - - self.log_msg(f"\n{rv_count} random variables created.", - prepend_timestamp=False) - - if rv_count > 0: - return RV_reg - # else: - return None - - def _calc_median_consequence(self, eco_qnt): - """ - Calculate the median repair consequence for each loss component. - - """ - - medians = {} - - DV_types = self.loss_params.index.unique(level=1) - - # for DV_type, DV_type_scase in zip(['COST', 'TIME'], ['Cost', 'Time']): - for DV_type in DV_types: - - cmp_list = [] - median_list = [] - - for loss_cmp_id in self.loss_map.index: - - driver_type, driver_cmp = self.loss_map.loc[ - loss_cmp_id, 'Driver'] - loss_cmp_name = self.loss_map.loc[loss_cmp_id, 'Consequence'] - - # check if the given DV type is available as an output for the - # selected component - if (loss_cmp_name, DV_type) not in self.loss_params.index: - continue - - if driver_type != 'DMG': - raise ValueError(f"Loss Driver type not recognized: " - f"{driver_type}") - - if driver_cmp not in eco_qnt.columns.get_level_values( - 0).unique(): - continue - - ds_list = [] - sub_medians = [] - - for ds in self.loss_params.columns.get_level_values(0).unique(): - - if not ds.startswith('DS'): - continue - - ds_id = ds[2:] - - if ds_id == '0': - continue - - loss_params_DS = self.loss_params.loc[ - (loss_cmp_name, DV_type), - ds] - - # check if theta_0 is defined - theta_0 = loss_params_DS.get('Theta_0', np.nan) - - if pd.isna(theta_0): - continue - - # check if the distribution type is supported - family = loss_params_DS.get('Family', np.nan) - - if ((not pd.isna(family)) and ( - family not in [ - 'normal', 'lognormal', 'deterministic'])): - raise ValueError(f"Loss Distribution of type {family} " - f"not supported.") - - # If theta_0 is a scalar - try: - theta_0 = float(theta_0) - - if pd.isna(loss_params_DS.get('Family', np.nan)): - - # if theta_0 is constant, then use it directly - f_median = prep_constant_median_DV(theta_0) - - else: - - # otherwise use a constant 1.0 as the median - # The random variable will be generated as a - # variation from this 1.0 and added in a later step. - f_median = prep_constant_median_DV(1.0) - - except ValueError: - - # otherwise, use the multilinear function - all_vals = np.array( - [val.split(',') for val in theta_0.split('|')], - dtype=float) - medns = all_vals[0] - qnts = all_vals[1] - f_median = prep_bounded_multilinear_median_DV( - medns, qnts) - - # get the corresponding aggregate damage quantities - # to consider economies of scale - if 'ds' in eco_qnt.columns.names: - - avail_ds = ( - eco_qnt.loc[:, driver_cmp].columns.unique(level=0)) - - if (ds_id not in avail_ds): - continue - - eco_qnt_i = eco_qnt.loc[:, (driver_cmp, ds_id)].copy() - - else: - eco_qnt_i = eco_qnt.loc[:, driver_cmp].copy() - - if isinstance(eco_qnt_i, pd.Series): - eco_qnt_i = eco_qnt_i.to_frame() - eco_qnt_i.columns = ['X'] - eco_qnt_i.columns.name = 'del' - - # generate the median values for each realization - eco_qnt_i.loc[:, :] = f_median(eco_qnt_i.values) - - sub_medians.append(eco_qnt_i) - ds_list.append(ds_id) - - if len(ds_list) > 0: - - # combine medians across damage states into one DF - median_list.append(pd.concat(sub_medians, axis=1, - keys=ds_list)) - cmp_list.append(loss_cmp_id) - - if len(cmp_list) > 0: - - # combine medians across components into one DF - result = pd.concat(median_list, axis=1, keys=cmp_list) - - # remove the extra column header level - if 'del' in result.columns.names: - result.columns = result.columns.droplevel('del') - - # name the remaining column header levels - if self._asmnt.options.eco_scale["AcrossFloors"] is True: - result.columns.names = ['cmp', 'ds'] - - else: - result.columns.names = ['cmp', 'ds', 'loc'] - - # save the results to the returned dictionary - medians.update({DV_type: result}) - - return medians - - def aggregate_losses(self): - """ - Aggregates repair consequences across components. - - Repair costs are simply summed up for each realization while repair - times are aggregated to provide lower and upper limits of the total - repair time using the assumption of parallel and sequential repair of - floors, respectively. Repairs within each floor are assumed to occur - sequentially. - """ - - self.log_div() - self.log_msg("Aggregating repair consequences...") - - DV = self.sample - - # group results by DV type and location - DVG = DV.groupby(level=[0, 4], axis=1).sum() - - # create the summary DF - df_agg = pd.DataFrame(index=DV.index, - columns=['repair_cost', - 'repair_time-parallel', - 'repair_time-sequential', - 'repair_carbon', - 'repair_energy']) - - if 'Cost' in DVG.columns: - df_agg['repair_cost'] = DVG['Cost'].sum(axis=1) - else: - df_agg = df_agg.drop('repair_cost', axis=1) - - if 'Time' in DVG.columns: - df_agg['repair_time-sequential'] = DVG['Time'].sum(axis=1) - - df_agg['repair_time-parallel'] = DVG['Time'].max(axis=1) - else: - df_agg = df_agg.drop(['repair_time-parallel', - 'repair_time-sequential'], - axis=1) - - if 'Carbon' in DVG.columns: - df_agg['repair_carbon'] = DVG['Carbon'].sum(axis=1) - else: - df_agg = df_agg.drop('repair_carbon', axis=1) - - if 'Energy' in DVG.columns: - df_agg['repair_energy'] = DVG['Energy'].sum(axis=1) - else: - df_agg = df_agg.drop('repair_energy', axis=1) - - # convert units - - cmp_units = self.loss_params[('DV', 'Unit')].groupby(level=[1, ]).agg( - lambda x: x.value_counts().index[0]) - - dv_units = pd.Series(index=df_agg.columns, name='Units', dtype='object') - - if 'Cost' in DVG.columns: - dv_units['repair_cost'] = cmp_units['Cost'] - - if 'Time' in DVG.columns: - dv_units['repair_time-parallel'] = cmp_units['Time'] - dv_units['repair_time-sequential'] = cmp_units['Time'] - - if 'Carbon' in DVG.columns: - dv_units['repair_carbon'] = cmp_units['Carbon'] - - if 'Energy' in DVG.columns: - dv_units['repair_energy'] = cmp_units['Energy'] - - df_agg = file_io.save_to_csv( - df_agg, None, units=dv_units, - unit_conversion_factors=self._asmnt.unit_conversion_factors, - use_simpleindex=False, - log=self._asmnt.log) - - df_agg.drop("Units", inplace=True) - - # convert header - - df_agg = base.convert_to_MultiIndex(df_agg, axis=1) - - self.log_msg("Repair consequences successfully aggregated.") - - return df_agg.astype(float) - - def _generate_DV_sample(self, dmg_quantities, sample_size): - """ - Generate a sample of repair costs and times. - - Parameters - ---------- - dmg_quantities: DataFrame - A table with the quantity of damage experienced in each damage state - of each performance group at each location and direction. You can use - the prepare_dmg_quantities method in the DamageModel to get such a - DF. - sample_size: integer - The number of realizations to generate. - - Raises - ------ - ValueError - When any Loss Driver is not recognized. - """ - - # calculate the quantities for economies of scale - self.log_msg("\nAggregating damage quantities...", - prepend_timestamp=False) - - if self._asmnt.options.eco_scale["AcrossFloors"]: - - if self._asmnt.options.eco_scale["AcrossDamageStates"]: - - eco_levels = [0, ] - eco_columns = ['cmp', ] - - else: - - eco_levels = [0, 4] - eco_columns = ['cmp', 'ds'] - - elif self._asmnt.options.eco_scale["AcrossDamageStates"]: - - eco_levels = [0, 1] - eco_columns = ['cmp', 'loc'] - - else: - - eco_levels = [0, 1, 4] - eco_columns = ['cmp', 'loc', 'ds'] - - eco_group = dmg_quantities.groupby(level=eco_levels, axis=1) - eco_qnt = eco_group.sum().mask(eco_group.count() == 0, np.nan) - assert eco_qnt.columns.names == eco_columns - - self.log_msg("Successfully aggregated damage quantities.", - prepend_timestamp=False) - - # apply the median functions, if needed, to get median consequences for - # each realization - self.log_msg("\nCalculating the median repair consequences...", - prepend_timestamp=False) - - medians = self._calc_median_consequence(eco_qnt) - - self.log_msg("Successfully determined median repair consequences.", - prepend_timestamp=False) - - # combine the median consequences with the samples of deviation from the - # median to get the consequence realizations. - self.log_msg("\nConsidering deviations from the median values to obtain " - "random DV sample...") - - self.log_msg("Preparing random variables for repair cost and time...", - prepend_timestamp=False) - RV_reg = self._create_DV_RVs(dmg_quantities.columns) - - if RV_reg is not None: - RV_reg.generate_sample( - sample_size=sample_size, method=self._asmnt.options.sampling_method) - - std_sample = base.convert_to_MultiIndex( - pd.DataFrame(RV_reg.RV_sample), axis=1).sort_index(axis=1) - std_sample.columns.names = ['dv', 'cmp', 'ds', 'loc', 'dir', 'uid'] - - # convert column names to int - std_idx = std_sample.columns.levels - - std_sample.columns = std_sample.columns.set_levels( - [ - std_idx[0], - std_idx[1].astype(int), - std_idx[2], - std_idx[3], - std_idx[4], - std_idx[5], - ] - ) - - std_sample.sort_index(axis=1, inplace=True) - - else: - std_sample = None - - self.log_msg(f"\nSuccessfully generated {sample_size} realizations of " - "deviation from the median consequences.", - prepend_timestamp=False) - - res_list = [] - key_list = [] - - dmg_quantities.columns = dmg_quantities.columns.reorder_levels( - [0, 4, 1, 2, 3] - ) - dmg_quantities.sort_index(axis=1, inplace=True) - - DV_types = self.loss_params.index.unique(level=1) - - if isinstance(std_sample, pd.DataFrame): - std_DV_types = std_sample.columns.unique(level=0) - else: - std_DV_types = [] - - # for DV_type, _ in zip(['COST', 'TIME'], ['Cost', 'Time']): - for DV_type in DV_types: - - if DV_type in std_DV_types: - prob_cmp_list = std_sample[DV_type].columns.unique(level=0) - else: - prob_cmp_list = [] - - cmp_list = [] - - if DV_type not in medians: - continue - - for cmp_i in medians[DV_type].columns.unique(level=0): - - # check if there is damage in the component - driver_type, dmg_cmp_i = self.loss_map.loc[cmp_i, 'Driver'] - loss_cmp_i = self.loss_map.loc[cmp_i, 'Consequence'] - - if driver_type != 'DMG': - raise ValueError(f"Loss Driver type not " - f"recognized: {driver_type}") - - if not (dmg_cmp_i - in dmg_quantities.columns.unique(level=0)): - continue - - ds_list = [] - - for ds in medians[DV_type].loc[:, cmp_i].columns.unique(level=0): - - loc_list = [] - - for loc_id, loc in enumerate( - dmg_quantities.loc[ - :, (dmg_cmp_i, ds)].columns.unique(level=0)): - - if ((self._asmnt.options.eco_scale[ - "AcrossFloors"] is True) and ( - loc_id > 0)): - break - - if self._asmnt.options.eco_scale["AcrossFloors"] is True: - median_i = medians[DV_type].loc[:, (cmp_i, ds)] - dmg_i = dmg_quantities.loc[:, (dmg_cmp_i, ds)] - - if cmp_i in prob_cmp_list: - std_i = std_sample.loc[:, (DV_type, cmp_i, ds)] - else: - std_i = None - - else: - median_i = medians[DV_type].loc[:, (cmp_i, ds, loc)] - dmg_i = dmg_quantities.loc[:, (dmg_cmp_i, ds, loc)] - - if cmp_i in prob_cmp_list: - std_i = std_sample.loc[:, (DV_type, cmp_i, ds, loc)] - else: - std_i = None - - if std_i is not None: - res_list.append(dmg_i.mul(median_i, axis=0) * std_i) - else: - res_list.append(dmg_i.mul(median_i, axis=0)) - - loc_list.append(loc) - - if self._asmnt.options.eco_scale["AcrossFloors"] is True: - ds_list += [ds, ] - else: - ds_list += [(ds, loc) for loc in loc_list] - - if self._asmnt.options.eco_scale["AcrossFloors"] is True: - cmp_list += [(loss_cmp_i, dmg_cmp_i, ds) for ds in ds_list] - else: - cmp_list += [ - (loss_cmp_i, dmg_cmp_i, ds, loc) for ds, loc in ds_list] - - if self._asmnt.options.eco_scale["AcrossFloors"] is True: - key_list += [(DV_type, loss_cmp_i, dmg_cmp_i, ds) - for loss_cmp_i, dmg_cmp_i, ds in cmp_list] - else: - key_list += [(DV_type, loss_cmp_i, dmg_cmp_i, ds, loc) - for loss_cmp_i, dmg_cmp_i, ds, loc in cmp_list] - - lvl_names = ['dv', 'loss', 'dmg', 'ds', 'loc', 'dir', 'uid'] - DV_sample = pd.concat(res_list, axis=1, keys=key_list, - names=lvl_names) - - DV_sample = DV_sample.fillna(0).convert_dtypes() - DV_sample.columns.names = lvl_names - - # Get the flags for replacement consequence trigger - DV_sum = DV_sample.groupby(level=[1, ], axis=1).sum() - if 'replacement' in DV_sum.columns: - - # When the 'replacement' consequence is triggered, all - # local repair consequences are discarded. Note that - # global consequences are assigned to location '0'. - - id_replacement = DV_sum['replacement'] > 0 - - # get the list of non-zero locations - locs = DV_sample.columns.get_level_values(4).unique().values - - locs = locs[locs != '0'] - - DV_sample.loc[id_replacement, idx[:, :, :, :, locs]] = 0.0 - - self._sample = DV_sample - - self.log_msg("Successfully obtained DV sample.", - prepend_timestamp=False) - - -def prep_constant_median_DV(median): - """ - Returns a constant median Decision Variable (DV) function. - - Parameters - ---------- - median: float - The median DV for a consequence function with fixed median. - - Returns - ------- - f: callable - A function that returns the constant median DV for all component - quantities. - """ - def f(*args): - # pylint: disable=unused-argument - return median - - return f - - -def prep_bounded_multilinear_median_DV(medians, quantities): - """ - Returns a bounded multilinear median Decision Variable (DV) function. - - The median DV equals the min and max values when the quantity is - outside of the prescribed quantity bounds. When the quantity is within the - bounds, the returned median is calculated by linear interpolation. - - Parameters - ---------- - medians: ndarray - Series of values that define the y coordinates of the multilinear DV - function. - quantities: ndarray - Series of values that define the component quantities corresponding to - the series of medians and serving as the x coordinates of the - multilinear DV function. - - Returns - ------- - f: callable - A function that returns the median DV given the quantity of damaged - components. - """ - def f(quantity): - if quantity is None: - raise ValueError( - 'A bounded linear median Decision Variable function called ' - 'without specifying the quantity of damaged components') - - q_array = np.asarray(quantity, dtype=np.float64) - - # calculate the median consequence given the quantity of damaged - # components - output = np.interp(q_array, quantities, medians) - - return output - - return f diff --git a/pelicun/model/__init__.py b/pelicun/model/__init__.py new file mode 100644 index 000000000..fdb212f1d --- /dev/null +++ b/pelicun/model/__init__.py @@ -0,0 +1,49 @@ +""" +-*- coding: utf-8 -*- + +Copyright (c) 2018 Leland Stanford Junior University +Copyright (c) 2018 The Regents of the University of California + +This file is part of pelicun. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +You should have received a copy of the BSD 3-Clause License along with +pelicun. If not, see . + +Contributors: +Adam Zsarnóczay +""" + +# flake8: noqa + +from .pelicun_model import PelicunModel +from .demand_model import DemandModel +from .asset_model import AssetModel +from .damage_model import DamageModel +from .loss_model import LossModel +from .loss_model import RepairModel diff --git a/pelicun/model/asset_model.py b/pelicun/model/asset_model.py new file mode 100644 index 000000000..3907ca692 --- /dev/null +++ b/pelicun/model/asset_model.py @@ -0,0 +1,458 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Leland Stanford Junior University +# Copyright (c) 2018 The Regents of the University of California +# +# This file is part of pelicun. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# You should have received a copy of the BSD 3-Clause License along with +# pelicun. If not, see . +# +# Contributors: +# Adam Zsarnóczay +# John Vouvakis Manousakis + +""" +This file defines the AssetModel object and its methods. + +.. rubric:: Contents + +.. autosummary:: + + AssetModel + +""" + +from itertools import product +import numpy as np +import pandas as pd +from .pelicun_model import PelicunModel +from .. import base +from .. import uq +from .. import file_io + + +idx = base.idx + + +class AssetModel(PelicunModel): + """ + Manages asset information used in assessments. + + Parameters + ---------- + + """ + + def __init__(self, assessment): + super().__init__(assessment) + + self.cmp_marginal_params = None + self.cmp_units = None + + self._cmp_RVs = None + self._cmp_sample = None + + @property + def cmp_sample(self): + """ + Assigns the _cmp_sample attribute if it is None and returns + the component sample. + """ + + if self._cmp_sample is None: + cmp_sample = pd.DataFrame(self._cmp_RVs.RV_sample) + cmp_sample.sort_index(axis=0, inplace=True) + cmp_sample.sort_index(axis=1, inplace=True) + + cmp_sample = base.convert_to_MultiIndex(cmp_sample, axis=1)['CMP'] + + cmp_sample.columns.names = ['cmp', 'loc', 'dir', 'uid'] + + self._cmp_sample = cmp_sample + + else: + cmp_sample = self._cmp_sample + + return cmp_sample + + def save_cmp_sample(self, filepath=None, save_units=False): + """ + Save component quantity sample to a csv file + + """ + + self.log_div() + if filepath is not None: + self.log_msg('Saving asset components sample...') + + # prepare a units array + sample = self.cmp_sample + + units = pd.Series(name='Units', index=sample.columns, dtype=object) + + for cmp_id, unit_name in self.cmp_units.items(): + units.loc[cmp_id, :] = unit_name + + res = file_io.save_to_csv( + sample, + filepath, + units=units, + unit_conversion_factors=self._asmnt.unit_conversion_factors, + use_simpleindex=(filepath is not None), + log=self._asmnt.log, + ) + + if filepath is not None: + self.log_msg( + 'Asset components sample successfully saved.', + prepend_timestamp=False, + ) + return None + # else: + units = res.loc["Units"] + res.drop("Units", inplace=True) + + if save_units: + return res.astype(float), units + + return res.astype(float) + + def load_cmp_sample(self, filepath): + """ + Load component quantity sample from a csv file + + """ + + self.log_div() + self.log_msg('Loading asset components sample...') + + sample, units = file_io.load_data( + filepath, + self._asmnt.unit_conversion_factors, + return_units=True, + log=self._asmnt.log, + ) + + sample.columns.names = ['cmp', 'loc', 'dir', 'uid'] + + self._cmp_sample = sample + + self.cmp_units = units.groupby(level=0).first() + + self.log_msg( + 'Asset components sample successfully loaded.', prepend_timestamp=False + ) + + def load_cmp_model(self, data_source): + """ + Load the model that describes component quantities in the asset. + + Parameters + ---------- + data_source: string or dict + If string, the data_source is a file prefix ( in the + following description) that identifies the following files: + _marginals.csv, _empirical.csv, + _correlation.csv. If dict, the data source is a dictionary + with the following optional keys: 'marginals', 'empirical', and + 'correlation'. The value under each key shall be a DataFrame. + """ + + def get_locations(loc_str): + try: + res = str(int(loc_str)) + return np.array( + [ + res, + ] + ) + + except ValueError as exc: + stories = self._asmnt.stories + + if "--" in loc_str: + s_low, s_high = loc_str.split('--') + s_low = get_locations(s_low) + s_high = get_locations(s_high) + return np.arange(int(s_low[0]), int(s_high[0]) + 1).astype(str) + + if "," in loc_str: + return np.array(loc_str.split(','), dtype=int).astype(str) + + if loc_str == "all": + return np.arange(1, stories + 1).astype(str) + + if loc_str == "top": + return np.array( + [ + stories, + ] + ).astype(str) + + if loc_str == "roof": + return np.array( + [ + stories + 1, + ] + ).astype(str) + + raise ValueError( + f"Cannot parse location string: " f"{loc_str}" + ) from exc + + def get_directions(dir_str): + if pd.isnull(dir_str): + return np.ones(1).astype(str) + + # else: + try: + res = str(int(dir_str)) + return np.array( + [ + res, + ] + ) + + except ValueError as exc: + if "," in dir_str: + return np.array(dir_str.split(','), dtype=int).astype(str) + + if "--" in dir_str: + d_low, d_high = dir_str.split('--') + d_low = get_directions(d_low) + d_high = get_directions(d_high) + return np.arange(int(d_low[0]), int(d_high[0]) + 1).astype(str) + + # else: + raise ValueError( + f"Cannot parse direction string: " f"{dir_str}" + ) from exc + + def get_attribute(attribute_str, dtype=float, default=np.nan): + if pd.isnull(attribute_str): + return default + return dtype(attribute_str) + + self.log_div() + self.log_msg('Loading component model...') + + # Currently, we assume independent component distributions are defined + # throughout the building. Correlations may be added afterward or this + # method can be extended to read correlation matrices too if needed. + + # prepare the marginal data source variable to load the data + if isinstance(data_source, dict): + marginal_data_source = data_source['marginals'] + else: + marginal_data_source = data_source + '_marginals.csv' + + marginal_params, units = file_io.load_data( + marginal_data_source, + None, + orientation=1, + reindex=False, + return_units=True, + log=self._asmnt.log, + ) + + # group units by cmp id to avoid redundant entries + self.cmp_units = units.copy().groupby(level=0).first() + + marginal_params = pd.concat([marginal_params, units], axis=1) + + cmp_marginal_param_dct = { + 'Family': [], + 'Theta_0': [], + 'Theta_1': [], + 'Theta_2': [], + 'TruncateLower': [], + 'TruncateUpper': [], + 'Blocks': [], + 'Units': [], + } + index_list = [] + for row in marginal_params.itertuples(): + locs = get_locations(row.Location) + dirs = get_directions(row.Direction) + indices = list(product((row.Index,), locs, dirs)) + num_vals = len(indices) + for col, cmp_marginal_param in cmp_marginal_param_dct.items(): + if col == 'Blocks': + cmp_marginal_param.extend( + [ + get_attribute( + getattr(row, 'Blocks', np.nan), + dtype=int, + default=1.0, + ) + ] + * num_vals + ) + elif col == 'Units': + cmp_marginal_param.extend([self.cmp_units[row.Index]] * num_vals) + elif col == 'Family': + cmp_marginal_param.extend([getattr(row, col, np.nan)] * num_vals) + else: + cmp_marginal_param.extend( + [get_attribute(getattr(row, col, np.nan))] * num_vals + ) + index_list.extend(indices) + index = pd.MultiIndex.from_tuples(index_list, names=['cmp', 'loc', 'dir']) + dtypes = { + 'Family': object, + 'Theta_0': float, + 'Theta_1': float, + 'Theta_2': float, + 'TruncateLower': float, + 'TruncateUpper': float, + 'Blocks': int, + 'Units': object, + } + cmp_marginal_param_series = [] + for col, cmp_marginal_param in cmp_marginal_param_dct.items(): + cmp_marginal_param_series.append( + pd.Series( + cmp_marginal_param, dtype=dtypes[col], name=col, index=index + ) + ) + + cmp_marginal_params = pd.concat(cmp_marginal_param_series, axis=1) + + assert not cmp_marginal_params['Theta_0'].isnull().values.any() + + cmp_marginal_params.dropna(axis=1, how='all', inplace=True) + + self.log_msg( + "Model parameters successfully parsed. " + f"{cmp_marginal_params.shape[0]} performance groups identified", + prepend_timestamp=False, + ) + + # Now we can take care of converting the values to base units + self.log_msg( + "Converting model parameters to internal units...", + prepend_timestamp=False, + ) + + # ensure that the index has unique entries by introducing an + # internal component uid + base.dedupe_index(cmp_marginal_params) + + cmp_marginal_params = self.convert_marginal_params( + cmp_marginal_params, cmp_marginal_params['Units'] + ) + + self.cmp_marginal_params = cmp_marginal_params.drop('Units', axis=1) + + self.log_msg( + "Model parameters successfully loaded.", prepend_timestamp=False + ) + + self.log_msg( + "\nComponent model marginal distributions:\n" + str(cmp_marginal_params), + prepend_timestamp=False, + ) + + # the empirical data and correlation files can be added later, if needed + + def _create_cmp_RVs(self): + """ + Defines the RVs used for sampling component quantities. + """ + + # initialize the registry + RV_reg = uq.RandomVariableRegistry(self._asmnt.options.rng) + + # add a random variable for each component quantity variable + for rv_params in self.cmp_marginal_params.itertuples(): + cmp = rv_params.Index + + # create a random variable and add it to the registry + family = getattr(rv_params, "Family", 'deterministic') + RV_reg.add_RV( + uq.rv_class_map(family)( + name=f'CMP-{cmp[0]}-{cmp[1]}-{cmp[2]}-{cmp[3]}', + theta=[ + getattr(rv_params, f"Theta_{t_i}", np.nan) + for t_i in range(3) + ], + truncation_limits=[ + getattr(rv_params, f"Truncate{side}", np.nan) + for side in ("Lower", "Upper") + ], + ) + ) + + self.log_msg( + f"\n{self.cmp_marginal_params.shape[0]} random variables created.", + prepend_timestamp=False, + ) + + self._cmp_RVs = RV_reg + + def generate_cmp_sample(self, sample_size=None): + """ + Generates component quantity realizations. If a sample_size + is not specified, the sample size found in the demand model is + used. + """ + + if self.cmp_marginal_params is None: + raise ValueError( + 'Model parameters have not been specified. Load' + 'parameters from a file before generating a ' + 'sample.' + ) + + self.log_div() + self.log_msg('Generating sample from component quantity variables...') + + if sample_size is None: + if self._asmnt.demand.sample is None: + raise ValueError( + 'Sample size was not specified, ' + 'and it cannot be determined from ' + 'the demand model.' + ) + sample_size = self._asmnt.demand.sample.shape[0] + + self._create_cmp_RVs() + + self._cmp_RVs.generate_sample( + sample_size=sample_size, method=self._asmnt.options.sampling_method + ) + + # replace the potentially existing sample with the generated one + self._cmp_sample = None + + self.log_msg( + f"\nSuccessfully generated {sample_size} realizations.", + prepend_timestamp=False, + ) diff --git a/pelicun/model/damage_model.py b/pelicun/model/damage_model.py new file mode 100644 index 000000000..ec1ba0d2c --- /dev/null +++ b/pelicun/model/damage_model.py @@ -0,0 +1,1568 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Leland Stanford Junior University +# Copyright (c) 2018 The Regents of the University of California +# +# This file is part of pelicun. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# You should have received a copy of the BSD 3-Clause License along with +# pelicun. If not, see . +# +# Contributors: +# Adam Zsarnóczay +# John Vouvakis Manousakis + +""" +This file defines the DamageModel object and its methods. + +.. rubric:: Contents + +.. autosummary:: + + DamageModel + +""" + +import numpy as np +import pandas as pd +from pelicun.model.pelicun_model import PelicunModel +from pelicun import base +from pelicun import uq +from pelicun import file_io + + +idx = base.idx + + +class DamageModel(PelicunModel): + """ + Manages damage information used in assessments. + + This class contains the following methods: + + - save_sample() + - load_sample() + - load_damage_model() + - calculate() + - _get_pg_batches() + - _generate_dmg_sample() + - _create_dmg_rvs() + - _get_required_demand_type() + - _assemble_required_demand_data() + - _evaluate_damage_state() + - _prepare_dmg_quantities() + - _perform_dmg_task() + - _apply_dmg_funcitons() + + Parameters + ---------- + + """ + + def __init__(self, assessment): + super().__init__(assessment) + + self.damage_params = None + self.sample = None + + def save_sample(self, filepath=None, save_units=False): + """ + Save damage sample to a csv file + + """ + self.log_div() + self.log_msg('Saving damage sample...') + + cmp_units = self._asmnt.asset.cmp_units + qnt_units = pd.Series( + index=self.sample.columns, name='Units', dtype='object' + ) + for cmp in cmp_units.index: + qnt_units.loc[cmp] = cmp_units.loc[cmp] + + res = file_io.save_to_csv( + self.sample, + filepath, + units=qnt_units, + unit_conversion_factors=self._asmnt.unit_conversion_factors, + use_simpleindex=(filepath is not None), + log=self._asmnt.log, + ) + + if filepath is not None: + self.log_msg( + 'Damage sample successfully saved.', prepend_timestamp=False + ) + return None + + # else: + units = res.loc["Units"] + res.drop("Units", inplace=True) + res.index = res.index.astype('int64') + + if save_units: + return res.astype(float), units + + return res.astype(float) + + def load_sample(self, filepath): + """ + Load damage state sample data. + + """ + self.log_div() + self.log_msg('Loading damage sample...') + + self.sample = file_io.load_data( + filepath, self._asmnt.unit_conversion_factors, log=self._asmnt.log + ) + + # set the names of the columns + self.sample.columns.names = ['cmp', 'loc', 'dir', 'uid', 'ds'] + + self.log_msg('Damage sample successfully loaded.', prepend_timestamp=False) + + def load_damage_model(self, data_paths): + """ + Load limit state damage model parameters and damage state assignments + + Parameters + ---------- + data_paths: list of string + List of paths to data files with damage model information. Default + XY datasets can be accessed as PelicunDefault/XY. + """ + + self.log_div() + self.log_msg('Loading damage model...') + + # replace default flag with default data path + data_paths = file_io.substitute_default_path(data_paths) + + data_list = [] + # load the data files one by one + for data_path in data_paths: + data = file_io.load_data( + data_path, None, orientation=1, reindex=False, log=self._asmnt.log + ) + + data_list.append(data) + + damage_params = pd.concat(data_list, axis=0) + + # drop redefinitions of components + damage_params = damage_params.groupby(damage_params.index).first() + + # get the component types defined in the asset model + cmp_labels = self._asmnt.asset.cmp_sample.columns + + # only keep the damage model parameters for the components in the model + cmp_unique = cmp_labels.unique(level=0) + cmp_mask = damage_params.index.isin(cmp_unique, level=0) + + damage_params = damage_params.loc[cmp_mask, :] + + if np.sum(cmp_mask) != len(cmp_unique): + cmp_list = cmp_unique[ + np.isin(cmp_unique, damage_params.index.values, invert=True) + ].to_list() + + self.log_msg( + "\nWARNING: The damage model does not provide " + "vulnerability information for the following component(s) " + f"in the asset model: {cmp_list}.\n", + prepend_timestamp=False, + ) + + # TODO: load defaults for Demand-Offset and Demand-Directional + + # Now convert model parameters to base units + for LS_i in damage_params.columns.unique(level=0): + if LS_i.startswith('LS'): + damage_params.loc[:, LS_i] = self.convert_marginal_params( + damage_params.loc[:, LS_i].copy(), + damage_params[('Demand', 'Unit')], + ).values + + # check for components with incomplete damage model information + cmp_incomplete_list = damage_params.loc[ + damage_params[('Incomplete', '')] == 1 + ].index + + damage_params.drop(cmp_incomplete_list, inplace=True) + + if len(cmp_incomplete_list) > 0: + self.log_msg( + f"\nWARNING: Damage model information is incomplete for " + f"the following component(s) {cmp_incomplete_list}. They " + f"were removed from the analysis.\n", + prepend_timestamp=False, + ) + + self.damage_params = damage_params + + self.log_msg( + "Damage model parameters successfully parsed.", prepend_timestamp=False + ) + + def _handle_operation(self, initial_value, operation, other_value): + """ + This method is used in `_create_dmg_RVs` to apply capacity + adjustment operations whenever required. It is defined as a + safer alternative to directly using `eval`. + + Parameters + ---------- + initial_value: float + Value before operation + operation: str + Any of +, -, *, / + other_value: float + Value used to apply the operation + + Returns + ------- + result: float + The result of the operation + + """ + if operation == '+': + return initial_value + other_value + if operation == '-': + return initial_value - other_value + if operation == '*': + return initial_value * other_value + if operation == '/': + return initial_value / other_value + raise ValueError(f'Invalid operation: {operation}') + + def _create_dmg_RVs(self, PGB, scaling_specification=None): + """ + Creates random variables required later for the damage calculation. + + The method initializes two random variable registries, + capacity_RV_reg and lsds_RV_reg, and loops through each + performance group in the input performance group block (PGB) + dataframe. For each performance group, it retrieves the + component sample and blocks and checks if the limit state is + defined for the component. If the limit state is defined, the + method gets the list of limit states and the parameters for + each limit state. The method assigns correlation between limit + state random variables, adds the limit state random variables + to the capacity_RV_reg registry, and adds LSDS assignments to + the lsds_RV_reg registry. After looping through all + performance groups, the method returns the two registries. + + Parameters + ---------- + PGB : DataFrame + A DataFrame that groups performance groups into batches + for efficient damage assessment. + scaling_specification: dict, optional + A dictionary defining the shift in median. + Example: {'CMP-1-1': '*1.2', 'CMP-1-2': '/1.4'} + The keys are individual components that should be present + in the `capacity_sample`. The values should be strings + containing an operation followed by the value formatted as + a float. The operation can be '+' for addition, '-' for + subtraction, '*' for multiplication, and '/' for division. + + """ + + def assign_lsds(ds_weights, ds_id, lsds_RV_reg, lsds_rv_tag): + """ + Prepare random variables to handle mutually exclusive damage states. + + """ + + # If the limit state has a single damage state assigned + # to it, we don't need random sampling + if pd.isnull(ds_weights): + ds_id += 1 + + lsds_RV_reg.add_RV( + uq.DeterministicRandomVariable( + name=lsds_rv_tag, + theta=ds_id, + ) + ) + + # Otherwise, we create a multinomial random variable + else: + # parse the DS weights + ds_weights = np.array( + ds_weights.replace(" ", "").split('|'), dtype=float + ) + + def map_ds(values, offset=int(ds_id + 1)): + return values + offset + + lsds_RV_reg.add_RV( + uq.MultinomialRandomVariable( + name=lsds_rv_tag, + theta=ds_weights, + f_map=map_ds, + ) + ) + + ds_id += len(ds_weights) + + return ds_id + + if self._asmnt.log.verbose: + self.log_msg('Generating capacity variables ...', prepend_timestamp=True) + + # initialize the registry + capacity_RV_reg = uq.RandomVariableRegistry(self._asmnt.options.rng) + lsds_RV_reg = uq.RandomVariableRegistry(self._asmnt.options.rng) + + # capacity adjustment: + # ensure the scaling_specification is a dictionary + if not scaling_specification: + scaling_specification = {} + else: + # if there are contents, ensure they are valid. + # See docstring for an example of what is expected. + parsed_scaling_specification = {} + # validate contents + for key, value in scaling_specification.items(): + css = 'capacity adjustment specification' + if not isinstance(value, str): + raise ValueError( + f'Invalud entry in {css}: {value}. It has to be a string. ' + f'See docstring of DamageModel._create_dmg_RVs.' + ) + capacity_adjustment_operation = value[0] + number = value[1::] + if capacity_adjustment_operation not in ('+', '-', '*', '/'): + raise ValueError( + f'Invalid operation in {css}: ' + f'{capacity_adjustment_operation}' + ) + fnumber = base.float_or_None(number) + if fnumber is None: + raise ValueError(f'Invalid number in {css}: {number}') + parsed_scaling_specification[key] = ( + capacity_adjustment_operation, + fnumber, + ) + scaling_specification = parsed_scaling_specification + + # get the component sample and blocks from the asset model + for PG in PGB.index: + # determine demand capacity adjustment operation, if required + cmp_loc_dir = '-'.join(PG[0:3]) + capacity_adjustment_operation = scaling_specification.get( + cmp_loc_dir, None + ) + + cmp_id = PG[0] + blocks = PGB.loc[PG, 'Blocks'] + + # Calculate the block weights + blocks = np.full(int(blocks), 1.0 / blocks) + + # initialize the damaged quantity sample variable + assert self.damage_params is not None + if cmp_id in self.damage_params.index: + frg_params = self.damage_params.loc[cmp_id, :] + + # get the list of limit states + limit_states = [] + + for val in frg_params.index.get_level_values(0).unique(): + if 'LS' in val: + limit_states.append(val[2:]) + + ds_id = 0 + + frg_rv_set_tags = [[] for b in blocks] + anchor_RVs = [] + + for ls_id in limit_states: + frg_params_LS = frg_params[f'LS{ls_id}'] + + theta_0 = frg_params_LS.get('Theta_0', np.nan) + family = frg_params_LS.get('Family', 'deterministic') + ds_weights = frg_params_LS.get('DamageStateWeights', np.nan) + + # check if the limit state is defined for the component + if pd.isna(theta_0): + continue + + theta = [ + frg_params_LS.get(f"Theta_{t_i}", np.nan) for t_i in range(3) + ] + + if capacity_adjustment_operation: + if family in {'normal', 'lognormal'}: + theta[0] = self._handle_operation( + theta[0], + capacity_adjustment_operation[0], + capacity_adjustment_operation[1], + ) + else: + self.log_msg( + f'\nWARNING: Capacity adjustment is only supported ' + f'for `normal` or `lognormal` distributions. ' + f'Ignoring: {cmp_loc_dir}, which is {family}', + prepend_timestamp=False, + ) + + tr_lims = [ + frg_params_LS.get(f"Truncate{side}", np.nan) + for side in ("Lower", "Upper") + ] + + for block_i, _ in enumerate(blocks): + frg_rv_tag = ( + 'FRG-' + f'{PG[0]}-' # cmp_id + f'{PG[1]}-' # loc + f'{PG[2]}-' # dir + f'{PG[3]}-' # uid + f'{block_i+1}-' # block + f'{ls_id}' + ) + + # Assign correlation between limit state random + # variables + # Note that we assume perfectly correlated limit + # state random variables here. This approach is in + # line with how mainstream PBE calculations are + # performed. Assigning more sophisticated + # correlations between limit state RVs is possible, + # if needed. Please let us know through the + # SimCenter Message Board if you are interested in + # such a feature. + # Anchor all other limit state random variables to + # the first one to consider the perfect correlation + # between capacities in each LS + if ls_id == limit_states[0]: + anchor = None + else: + anchor = anchor_RVs[block_i] + + # parse theta values for multilinear_CDF + if family == 'multilinear_CDF': + theta = np.column_stack( + ( + np.array( + theta[0].split('|')[0].split(','), + dtype=float, + ), + np.array( + theta[0].split('|')[1].split(','), + dtype=float, + ), + ) + ) + + RV = uq.rv_class_map(family)( + name=frg_rv_tag, + theta=theta, + truncation_limits=tr_lims, + anchor=anchor, + ) + + capacity_RV_reg.add_RV(RV) + + # add the RV to the set of correlated variables + frg_rv_set_tags[block_i].append(frg_rv_tag) + + if ls_id == limit_states[0]: + anchor_RVs.append(RV) + + # Now add the LS->DS assignments + lsds_rv_tag = ( + 'LSDS-' + f'{PG[0]}-' # cmp_id + f'{PG[1]}-' # loc + f'{PG[2]}-' # dir + f'{PG[3]}-' # uid + f'{block_i+1}-' # block + f'{ls_id}' + ) + + ds_id_next = assign_lsds( + ds_weights, ds_id, lsds_RV_reg, lsds_rv_tag + ) + + ds_id = ds_id_next + + if self._asmnt.log.verbose: + rv_count = len(lsds_RV_reg.RV) + self.log_msg( + f"2x{rv_count} random variables created.", prepend_timestamp=False + ) + + return capacity_RV_reg, lsds_RV_reg + + def _generate_dmg_sample(self, sample_size, PGB, scaling_specification=None): + """ + This method generates a damage sample by creating random + variables (RVs) for capacities and limit-state-damage-states + (lsds), and then sampling from these RVs. The sample size and + performance group batches (PGB) are specified as inputs. The + method returns the capacity sample and the lsds sample. + + Parameters + ---------- + sample_size : int + The number of realizations to generate. + PGB : DataFrame + A DataFrame that groups performance groups into batches + for efficient damage assessment. + scaling_specification: dict, optional + A dictionary defining the shift in median. + Example: {'CMP-1-1': '*1.2', 'CMP-1-2': '/1.4'} + The keys are individual components that should be present + in the `capacity_sample`. The values should be strings + containing an operation followed by the value formatted as + a float. The operation can be '+' for addition, '-' for + subtraction, '*' for multiplication, and '/' for division. + + Returns + ------- + capacity_sample : DataFrame + A DataFrame that represents the capacity sample. + lsds_sample : DataFrame + A DataFrame that represents the . + + Raises + ------ + ValueError + If the damage parameters have not been specified. + + """ + + # Check if damage model parameters have been specified + if self.damage_params is None: + raise ValueError( + 'Damage model parameters have not been specified. ' + 'Load parameters from the default damage model ' + 'databases or provide your own damage model ' + 'definitions before generating a sample.' + ) + + # Create capacity and LSD RVs for each performance group + capacity_RVs, lsds_RVs = self._create_dmg_RVs(PGB, scaling_specification) + + if self._asmnt.log.verbose: + self.log_msg('Sampling capacities...', prepend_timestamp=True) + + # Generate samples for capacity RVs + capacity_RVs.generate_sample( + sample_size=sample_size, method=self._asmnt.options.sampling_method + ) + + # Generate samples for LSD RVs + lsds_RVs.generate_sample( + sample_size=sample_size, method=self._asmnt.options.sampling_method + ) + + if self._asmnt.log.verbose: + self.log_msg("Raw samples are available", prepend_timestamp=True) + + # get the capacity and lsds samples + capacity_sample = ( + pd.DataFrame(capacity_RVs.RV_sample) + .sort_index(axis=0) + .sort_index(axis=1) + ) + capacity_sample = base.convert_to_MultiIndex(capacity_sample, axis=1)['FRG'] + capacity_sample.columns.names = ['cmp', 'loc', 'dir', 'uid', 'block', 'ls'] + + lsds_sample = ( + pd.DataFrame(lsds_RVs.RV_sample) + .sort_index(axis=0) + .sort_index(axis=1) + .astype(int) + ) + lsds_sample = base.convert_to_MultiIndex(lsds_sample, axis=1)['LSDS'] + lsds_sample.columns.names = ['cmp', 'loc', 'dir', 'uid', 'block', 'ls'] + + if self._asmnt.log.verbose: + self.log_msg( + f"Successfully generated {sample_size} realizations.", + prepend_timestamp=True, + ) + + return capacity_sample, lsds_sample + + def _get_required_demand_type(self, PGB): + """ + Returns the id of the demand needed to calculate damage to a + component. We assume that a damage model sample is available. + + This method returns the demand type and its properties + required to calculate the damage to a component. The + properties include whether the demand is directional, the + offset, and the type of the demand. The method takes as input + a dataframe PGB that contains information about the component + groups in the asset. For each component group PG in the PGB + dataframe, the method retrieves the relevant damage parameters + from the damage_params dataframe and parses the demand type + into its properties. If the demand type has a subtype, the + method splits it and adds the subtype to the demand type to + form the EDP (engineering demand parameter) type. The method + also considers the default offset for the demand type, if it + is specified in the options attribute of the assessment, and + adds the offset to the EDP. If the demand is directional, the + direction is added to the EDP. The method collects all the + unique EDPs for each component group and returns them as a + dictionary where each key is an EDP and its value is a list of + component groups that require that EDP. + + Parameters + ---------- + `PGB`: pd.DataFrame + A pandas DataFrame with the block information for + each component + + Returns + ------- + EDP_req: dict + A dictionary of EDP requirements, where each key is the EDP + string (e.g., "Peak Ground Acceleration-0-0"), and the + corresponding value is a list of tuples (component_id, + location, direction) + + """ + + # Assign the damage_params attribute to a local variable `DP` + DP = self.damage_params + + # Check if verbose logging is enabled in `self._asmnt.log` + if self._asmnt.log.verbose: + # If verbose logging is enabled, log a message indicating + # that we are collecting demand information + self.log_msg( + 'Collecting required demand information...', prepend_timestamp=True + ) + + # Initialize an empty dictionary to store the unique EDP + # requirements + EDP_req = {} + + # Iterate over the index of the `PGB` DataFrame + for PG in PGB.index: + # Get the component name from the first element of the + # `PG` tuple + cmp = PG[0] + + # Get the directional, offset, and demand_type parameters + # from the `DP` DataFrame + directional, offset, demand_type = DP.loc[ + cmp, + [ + ('Demand', 'Directional'), + ('Demand', 'Offset'), + ('Demand', 'Type'), + ], + ] + + # Parse the demand type + + # Check if there is a subtype included in the demand_type + # string + if '|' in demand_type: + # If there is a subtype, split the demand_type string + # on the '|' character + demand_type, subtype = demand_type.split('|') + # Convert the demand type to the corresponding EDP + # type using `base.EDP_to_demand_type` + demand_type = base.EDP_to_demand_type[demand_type] + # Concatenate the demand type and subtype to form the + # EDP type + EDP_type = f'{demand_type}_{subtype}' + else: + # If there is no subtype, convert the demand type to + # the corresponding EDP type using + # `base.EDP_to_demand_type` + demand_type = base.EDP_to_demand_type[demand_type] + # Assign the EDP type to be equal to the demand type + EDP_type = demand_type + + # Consider the default offset, if needed + if demand_type in self._asmnt.options.demand_offset.keys(): + # If the demand type has a default offset in + # `self._asmnt.options.demand_offset`, add the offset + # to the default offset + offset = int(offset + self._asmnt.options.demand_offset[demand_type]) + else: + # If the demand type does not have a default offset in + # `self._asmnt.options.demand_offset`, convert the + # offset to an integer + offset = int(offset) + + # Determine the direction + if directional: + # If the demand is directional, use the third element + # of the `PG` tuple as the direction + direction = PG[2] + else: + # If the demand is not directional, use '0' as the + # direction + direction = '0' + + # Concatenate the EDP type, offset, and direction to form + # the EDP key + EDP = f"{EDP_type}-{str(int(PG[1]) + offset)}-{direction}" + + # If the EDP key is not already in the `EDP_req` + # dictionary, add it and initialize it with an empty list + if EDP not in EDP_req: + EDP_req.update({EDP: []}) + + # Add the current PG (performance group) to the list of + # PGs associated with the current EDP key + EDP_req[EDP].append(PG) + + # Return the unique EDP requirements + return EDP_req + + def _assemble_required_demand_data(self, EDP_req): + """ + Assembles demand data for damage state determination. + + The method takes the maximum of all available directions for + non-directional demand, scaling it using the non-directional + multiplier specified in self._asmnt.options, and returning the + result as a dictionary with keys in the format of + '--' and values as arrays of + demand values. If demand data is not found, logs a warning + message and skips the corresponding damages calculation. + + Parameters + ---------- + EDP_req : dict + A dictionary of unique EDP requirements + + Returns + ------- + demand_dict : dict + A dictionary of assembled demand data for calculation + + Raises + ------ + KeyError + If demand data for a given EDP cannot be found + + """ + + if self._asmnt.log.verbose: + self.log_msg( + 'Assembling demand data for calculation...', prepend_timestamp=True + ) + + demand_source = self._asmnt.demand.sample + + demand_dict = {} + + for EDP in EDP_req.keys(): + EDP = EDP.split('-') + + # if non-directional demand is requested... + if EDP[2] == '0': + # assume that the demand at the given location is available + try: + # take the maximum of all available directions and scale it + # using the nondirectional multiplier specified in the + # self._asmnt.options (the default value is 1.2) + demand = ( + demand_source.loc[:, (EDP[0], EDP[1])].max(axis=1).values + ) + demand = demand * self._asmnt.options.nondir_multi(EDP[0]) + + except KeyError: + demand = None + + else: + demand = demand_source[(EDP[0], EDP[1], EDP[2])].values + + if demand is None: + self.log_msg( + f'\nWARNING: Cannot find demand data for {EDP}. The ' + 'corresponding damages cannot be calculated.', + prepend_timestamp=False, + ) + else: + demand_dict.update({f'{EDP[0]}-{EDP[1]}-{EDP[2]}': demand}) + + return demand_dict + + def _evaluate_damage_state( + self, demand_dict, EDP_req, capacity_sample, lsds_sample + ): + """ + Use the demand and LS capacity sample to evaluate damage states + + Parameters + ---------- + demand_dict: dict + Dictionary containing the demand of each demand type. + EDP_req: dict + Dictionary containing the EDPs assigned to each demand + type. + capacity_sample: DataFrame + Provides a sample of the capacity. + lsds_sample: DataFrame + Provides the mapping between limit states and damage + states. + + Returns + ------- + dmg_sample: DataFrame + Assigns a Damage State to each component block in the + asset model. + """ + + # Log a message indicating that damage states are being + # evaluated + + if self._asmnt.log.verbose: + self.log_msg('Evaluating damage states...', prepend_timestamp=True) + + # Create an empty dataframe with columns and index taken from + # the input capacity sample + dmg_eval = pd.DataFrame( + columns=capacity_sample.columns, index=capacity_sample.index + ) + + # Initialize an empty list to store demand data + demand_df = [] + + # For each demand type in the demand dictionary + for demand_name, demand_vals in demand_dict.items(): + # Get the list of PGs assigned to this demand type + PG_list = EDP_req[demand_name] + + # Create a list of columns for the demand data + # corresponding to each PG in the PG_list + PG_cols = pd.concat( + [dmg_eval.loc[:1, PG_i] for PG_i in PG_list], axis=1, keys=PG_list + ).columns + PG_cols.names = ['cmp', 'loc', 'dir', 'uid', 'block', 'ls'] + # Create a dataframe with demand values repeated for the + # number of PGs and assign the columns as PG_cols + demand_df.append( + pd.concat( + [pd.Series(demand_vals)] * len(PG_cols), axis=1, keys=PG_cols + ) + ) + + # Concatenate all demand dataframes into a single dataframe + demand_df = pd.concat(demand_df, axis=1) + # Sort the columns of the demand dataframe + demand_df.sort_index(axis=1, inplace=True) + + # Evaluate the damage exceedance by subtracting demand from + # capacity and checking if the result is less than zero + dmg_eval = (capacity_sample - demand_df) < 0 + + # Remove any columns with NaN values from the damage + # exceedance dataframe + dmg_eval.dropna(axis=1, inplace=True) + + # initialize the DataFrames that store the damage states and + # quantities + ds_sample = pd.DataFrame( + 0, # fill value + columns=capacity_sample.columns.droplevel('ls').unique(), + index=capacity_sample.index, + dtype='int32', + ) + + # get a list of limit state ids among all components in the damage model + ls_list = dmg_eval.columns.get_level_values(5).unique() + + # for each consecutive limit state... + for LS_id in ls_list: + # get all cmp - loc - dir - block where this limit state occurs + dmg_e_ls = dmg_eval.loc[:, idx[:, :, :, :, :, LS_id]].dropna(axis=1) + + # Get the damage states corresponding to this limit state in each + # block + # Note that limit states with a set of mutually exclusive damage + # states options have their damage state picked here. + lsds = lsds_sample.loc[:, dmg_e_ls.columns] + + # Drop the limit state level from the columns to make the damage + # exceedance DataFrame compatible with the other DataFrames in the + # following steps + dmg_e_ls.columns = dmg_e_ls.columns.droplevel(5) + + # Same thing for the lsds DataFrame + lsds.columns = dmg_e_ls.columns + + # Update the damage state in the result with the values from the + # lsds DF if the limit state was exceeded according to the + # dmg_e_ls DF. + # This one-liner updates the given Limit State exceedance in the + # entire damage model. If subsequent Limit States are also exceeded, + # those cells in the result matrix will get overwritten by higher + # damage states. + ds_sample.loc[:, dmg_e_ls.columns] = ds_sample.loc[ + :, dmg_e_ls.columns + ].mask(dmg_e_ls, lsds) + + return ds_sample + + def _prepare_dmg_quantities(self, component_blocks, ds_sample, dropzero=True): + """ + Combine component quantity and damage state information in one + DataFrame. + + This method assumes that a component quantity sample is + available in the asset model and a damage state sample is + available in the damage model. + + Parameters + ---------- + component_blocks: DataFrame + A DataFrame that contains the number of blocks for each + component. + ds_sample: DataFrame + A DataFrame that assigns a damage state to each component + block in the asset model. + dropzero: bool, optional, default: True + If True, the quantity of non-damaged components is not + saved. + + Returns + ------- + res_df: DataFrame + A DataFrame that combines the component quantity and + damage state information. + + """ + + if self._asmnt.log.verbose: + self.log_msg('Calculating damage quantities...', prepend_timestamp=True) + + # Retrieve the component quantity information and component + # marginal parameters from the asset model + + # ('cmp', 'loc', 'dir', 'uid') -> component quantity series + component_quantities = self._asmnt.asset.cmp_sample.to_dict('series') + component_marginal_parameters = self._asmnt.asset.cmp_marginal_params + + if (component_marginal_parameters is not None) and ( + 'Blocks' in component_marginal_parameters.columns + ): + # if this information is available, use it + + # ('cmp', 'loc', 'dir', 'uid) -> number of blocks + num_blocks = component_marginal_parameters['Blocks'].to_dict() + + def get_num_blocks(key): + return float(num_blocks[key]) + + else: + # otherwise assume 1 block regardless of + # ('cmp', 'loc', 'dir', 'uid) key + def get_num_blocks(key): + return 1.00 + + # ('cmp', 'loc', 'dir', 'uid', 'block') -> damage state series + ds_sample_dict = ds_sample.to_dict('series') + + dmg_qnt_series_collection = {} + for key, ds_series in ds_sample_dict.items(): + component, location, direction, uid, block = key + ds_set = set(ds_series.values) + for ds in ds_set: + if ds == -1: + continue + if dropzero and ds == 0: + continue + else: + dmg_qnt_vals = np.where( + ds_series.values == ds, + component_quantities[ + component, location, direction, uid + ].values + / get_num_blocks((component, location, direction, uid)), + 0.00, + ) + if -1 in ds_set: + dmg_qnt_vals = np.where( + ds_series.values != -1, dmg_qnt_vals, np.nan + ) + dmg_qnt_series = pd.Series(dmg_qnt_vals) + dmg_qnt_series_collection[ + (component, location, direction, uid, block, str(ds)) + ] = dmg_qnt_series + + damage_quantities = pd.concat( + dmg_qnt_series_collection.values(), + axis=1, + keys=dmg_qnt_series_collection.keys(), + ) + damage_quantities.columns.names = ['cmp', 'loc', 'dir', 'uid', 'block', 'ds'] + + # sum up block quantities + damage_quantities = damage_quantities.groupby( + ['cmp', 'loc', 'dir', 'uid', 'ds'], axis=1 + ).sum() + + return damage_quantities + + def _perform_dmg_task(self, task, ds_sample): + """ + Perform a task from a damage process. + + The method performs a task from a damage process on a given + damage state sample. The events of the task are triggered by a + damage state occurrence. The method assigns target + component(s) into the target damage state based on the damage + state of the source component. If the target event is "NA", + the method removes damage state information from the target + components. + + Parameters + ---------- + task : list + A list representing a task from the damage process. The + list contains two elements: + - The first element is a string representing the source + component, e.g., `'1_CMP_A'`. The number in the beginning + is used to order the tasks and is not considered here. + - The second element is a dictionary representing the + events triggered by the damage state of the source + component. The keys of the dictionary are strings that + represent the damage state of the source component, + e.g., `'DS1'`. The values are lists of strings + representing the target component(s) and event(s), e.g., + `['CMP_B.DS1', 'CMP_C.DS1']`. They could also be a + single element instead of a list. + Examples of a task: + ['1_CMP.A', {'DS1': ['CMP.B_DS1', 'CMP.C_DS2']}] + ['1_CMP.A', {'DS1': 'CMP.B_DS1', 'DS2': 'CMP.B_DS2'}] + ['1_CMP.A-LOC', {'DS1': 'CMP.B_DS1'}] + ds_sample : pandas DataFrame + A DataFrame representing the damage state of the + components. It is modified in place to represent the + damage states of the components after the task has been + performed. + + """ + + if self._asmnt.log.verbose: + self.log_msg(f'Applying task {task}...', prepend_timestamp=True) + + # parse task + source_cmp = task[0].split('_')[1] # source component + events = task[1] # prescribed events + + # check for the `-LOC` suffix. If this is the case, we need to + # match locations. + if source_cmp.endswith('-LOC'): + source_cmp = source_cmp.replace('-LOC', '') + match_locations = True + else: + match_locations = False + + # check if the source component exists in the damage state + # dataframe + if source_cmp not in ds_sample.columns.get_level_values('cmp'): + self.log_msg( + f"WARNING: Source component {source_cmp} in the prescribed " + "damage process not found among components in the damage " + "sample. The corresponding part of the damage process is " + "skipped.", + prepend_timestamp=False, + ) + return + + # execute the events pres prescribed in the damage task + for source_event, target_infos in events.items(): + + # events can only be triggered by damage state occurrence + if not source_event.startswith('DS'): + raise ValueError( + f"Unable to parse source event in damage " + f"process: {source_event}" + ) + # get the ID of the damage state that triggers the event + ds_source = int(source_event[2:]) + + # turn the target_infos into a list if it is a single + # argument, for consistency + if not isinstance(target_infos, list): + target_infos = [target_infos] + + for target_info in target_infos: + + # get the target component and event type + target_cmp, target_event = target_info.split('_') + + if (target_cmp != 'ALL') and ( + target_cmp not in ds_sample.columns.get_level_values('cmp') + ): + self.log_msg( + f"WARNING: Target component {target_cmp} in the prescribed " + "damage process not found among components in the damage " + "sample. The corresponding part of the damage process is " + "skipped.", + prepend_timestamp=False, + ) + continue + + # trigger a damage state + if target_event.startswith('DS'): + + # get the ID of the damage state to switch the target + # components to + ds_target = int(target_event[2:]) + + # clear damage state information + elif target_event == 'NA': + ds_target = -1 + # -1 stands for nan (ints don'ts support nan) + + else: + raise ValueError( + f"Unable to parse target event in damage " + f"process: {target_event}" + ) + + if match_locations: + self._perform_dmg_event_loc( + ds_sample, source_cmp, ds_source, target_cmp, ds_target + ) + + else: + self._perform_dmg_event( + ds_sample, source_cmp, ds_source, target_cmp, ds_target + ) + + if self._asmnt.log.verbose: + self.log_msg( + 'Damage process task successfully applied.', prepend_timestamp=False + ) + + def _perform_dmg_event( + self, ds_sample, source_cmp, ds_source, target_cmp, ds_target + ): + """ + Perform a damage event. + See `_perform_dmg_task`. + + """ + + # affected rows + row_selection = np.where( + # for many instances of source_cmp, we + # consider the highest damage state + ds_sample[source_cmp].max(axis=1).values + == ds_source + )[0] + # affected columns + if target_cmp == 'ALL': + column_selection = np.where( + ds_sample.columns.get_level_values('cmp') != source_cmp + )[0] + else: + column_selection = np.where( + ds_sample.columns.get_level_values('cmp') == target_cmp + )[0] + ds_sample.iloc[row_selection, column_selection] = ds_target + + def _perform_dmg_event_loc( + self, ds_sample, source_cmp, ds_source, target_cmp, ds_target + ): + """ + Perform a damage event matching locations. + See `_perform_dmg_task`. + + """ + + # get locations of source component + source_locs = set(ds_sample[source_cmp].columns.get_level_values('loc')) + for loc in source_locs: + # apply damage task matching locations + row_selection = np.where( + # for many instances of source_cmp, we + # consider the highest damage state + ds_sample[source_cmp, loc].max(axis=1).values + == ds_source + )[0] + + # affected columns + if target_cmp == 'ALL': + column_selection = np.where( + np.logical_and( + ds_sample.columns.get_level_values('cmp') != source_cmp, + ds_sample.columns.get_level_values('loc') == loc, + ) + )[0] + else: + column_selection = np.where( + np.logical_and( + ds_sample.columns.get_level_values('cmp') == target_cmp, + ds_sample.columns.get_level_values('loc') == loc, + ) + )[0] + ds_sample.iloc[row_selection, column_selection] = ds_target + + def _get_pg_batches(self, block_batch_size): + """ + Group performance groups into batches for efficient damage assessment. + + The method takes as input the block_batch_size, which + specifies the maximum number of blocks per batch. The method + first checks if performance groups have been defined in the + cmp_marginal_params dataframe, and if so, it uses the 'Blocks' + column as the performance group information. If performance + groups have not been defined in cmp_marginal_params, the + method uses the cmp_sample dataframe to define the performance + groups, with each performance group having a single block. + + The method then checks if the performance groups are available + in the damage parameters dataframe, and removes any + performance groups that are not found in the damage + parameters. The method then groups the performance groups + based on the locations and directions of the components, and + calculates the cumulative sum of the blocks for each + group. The method then divides the performance groups into + batches of size specified by block_batch_size and assigns a + batch number to each group. Finally, the method groups the + performance groups by batch number, component, location, and + direction, and returns a dataframe that shows the number of + blocks for each batch. + + """ + + # Get the marginal parameters for the components from the + # asset model + cmp_marginals = self._asmnt.asset.cmp_marginal_params + + # Initialize the batch dataframe + pg_batch = None + + # If marginal parameters are available, use the 'Blocks' + # column to initialize the batch dataframe + if cmp_marginals is not None: + # Check if the "Blocks" column exists in the component + # marginal parameters + if 'Blocks' in cmp_marginals.columns: + pg_batch = cmp_marginals['Blocks'].to_frame() + + # If the "Blocks" column doesn't exist, create a new dataframe + # with "Blocks" column filled with ones, using the component + # sample as the index. + if pg_batch is None: + cmp_sample = self._asmnt.asset.cmp_sample + pg_batch = pd.DataFrame( + np.ones(cmp_sample.shape[1]), + index=cmp_sample.columns, + columns=['Blocks'], + ) + + # Check if the damage model information exists for each + # performance group If not, remove the performance group from + # the analysis and log a warning message. + first_time = True + for pg_i in pg_batch.index: + if np.any(np.isin(pg_i, self.damage_params.index)): + blocks_i = pg_batch.loc[pg_i, 'Blocks'] + pg_batch.loc[pg_i, 'Blocks'] = blocks_i + + else: + pg_batch.drop(pg_i, inplace=True) + + if first_time: + self.log_msg( + "\nWARNING: Damage model information is " + "incomplete for some of the performance groups " + "and they had to be removed from the analysis:", + prepend_timestamp=False, + ) + + first_time = False + + self.log_msg(f"{pg_i}", prepend_timestamp=False) + + # Convert the data types of the dataframe to be efficient + pg_batch = pg_batch.convert_dtypes() + + # Sum up the number of blocks for each performance group + pg_batch = pg_batch.groupby(['loc', 'dir', 'cmp', 'uid']).sum() + pg_batch.sort_index(axis=0, inplace=True) + + # Calculate cumulative sum of blocks + pg_batch['CBlocks'] = np.cumsum(pg_batch['Blocks'].values.astype(int)) + pg_batch['Batch'] = 0 + + # Group the performance groups into batches + for batch_i in range(1, pg_batch.shape[0] + 1): + # Find the mask for blocks that are less than the batch + # size and greater than 0 + batch_mask = np.all( + np.array( + [ + pg_batch['CBlocks'] <= block_batch_size, + pg_batch['CBlocks'] > 0, + ] + ), + axis=0, + ) + + if np.sum(batch_mask) < 1: + batch_mask = np.full(batch_mask.shape, False) + batch_mask[np.where(pg_batch['CBlocks'] > 0)[0][0]] = True + + pg_batch.loc[batch_mask, 'Batch'] = batch_i + + # Decrement the cumulative block count by the max count in + # the current batch + pg_batch['CBlocks'] -= pg_batch.loc[ + pg_batch['Batch'] == batch_i, 'CBlocks' + ].max() + + # If the maximum cumulative block count is 0, exit the + # loop + if pg_batch['CBlocks'].max() == 0: + break + + # Group the performance groups by batch, component, location, + # and direction, and keep only the number of blocks for each + # group + pg_batch = ( + pg_batch.groupby(['Batch', 'cmp', 'loc', 'dir', 'uid']) + .sum() + .loc[:, 'Blocks'] + .to_frame() + ) + + return pg_batch + + def _complete_ds_cols(self, dmg_sample): + """ + Completes the damage sample dataframe with all possible damage + states for each component. + + Parameters + ---------- + dmg_sample : DataFrame + A DataFrame containing the damage state information for + each component block in the asset model. The columns are + MultiIndexed with levels corresponding to component + information ('cmp', 'loc', 'dir', 'uid') and the damage + state ('ds'). + + Returns + ------- + DataFrame + A DataFrame similar to `dmg_sample` but with additional + columns for missing damage states for each component, + ensuring that all possible damage states are + represented. The new columns are filled with zeros, + indicating no occurrence of those damage states in the + sample. + + Notes + ----- + - The method assumes that the damage model parameters + (`self.damage_params`) are available and contain the + necessary information to determine the total number of + damage states for each component. + + """ + # get a shortcut for the damage model parameters + DP = self.damage_params + + # Get the header for the results that we can use to identify + # cmp-loc-dir-uid sets + dmg_header = ( + dmg_sample.groupby(level=[0, 1, 2, 3], axis=1).first().iloc[:2, :] + ) + + # get the number of possible limit states + ls_list = [col for col in DP.columns.unique(level=0) if 'LS' in col] + + # initialize the result dataframe + res = pd.DataFrame() + + # walk through all components that have damage parameters provided + for cmp_id in DP.index: + # get the component-specific parameters + cmp_data = DP.loc[cmp_id] + + # and initialize the damage state counter + ds_count = 0 + + # walk through all limit states for the component + for ls in ls_list: + # check if the given limit state is defined + if not pd.isna(cmp_data[(ls, 'Theta_0')]): + # check if there is only one damage state + if pd.isna(cmp_data[(ls, 'DamageStateWeights')]): + ds_count += 1 + + else: + # or if there are more than one, how many + ds_count += len( + cmp_data[(ls, 'DamageStateWeights')].split('|') + ) + + # get the list of valid cmp-loc-dir-uid sets + cmp_header = dmg_header.loc[ + :, + [ + cmp_id, + ], + ] + + # Create a dataframe where they are repeated ds_count times in the + # columns. The keys put the DS id in the first level of the + # multiindexed column + cmp_headers = pd.concat( + [cmp_header for ds_i in range(ds_count + 1)], + keys=[str(r) for r in range(0, ds_count + 1)], + axis=1, + ) + cmp_headers.columns.names = ['ds', *cmp_headers.columns.names[1::]] + + # add these new columns to the result dataframe + res = pd.concat([res, cmp_headers], axis=1) + + # Fill the result dataframe with zeros and reorder its columns to have + # the damage states at the lowest like - matching the dmg_sample input + res = pd.DataFrame( + 0.0, + columns=res.columns.reorder_levels([1, 2, 3, 4, 0]), + index=dmg_sample.index, + ) + + # replace zeros wherever the dmg_sample has results + res.loc[:, dmg_sample.columns.to_list()] = dmg_sample + + return res + + def calculate( + self, dmg_process=None, block_batch_size=1000, scaling_specification=None + ): + """ + Calculate the damage state of each component block in the asset. + + """ + + self.log_div() + self.log_msg('Calculating damages...') + + sample_size = self._asmnt.demand.sample.shape[0] + + # Break up damage calculation and perform it by performance group. + # Compared to the simultaneous calculation of all PGs, this approach + # reduces demands on memory and increases the load on CPU. This leads + # to a more balanced workload on most machines for typical problems. + # It also allows for a straightforward extension with parallel + # computing. + + # get the list of performance groups + self.log_msg( + f'Number of Performance Groups in Asset Model:' + f' {self._asmnt.asset.cmp_sample.shape[1]}', + prepend_timestamp=False, + ) + + pg_batch = self._get_pg_batches(block_batch_size) + batches = pg_batch.index.get_level_values(0).unique() + + self.log_msg( + f'Number of Component Blocks: {pg_batch["Blocks"].sum()}', + prepend_timestamp=False, + ) + + self.log_msg( + f"{len(batches)} batches of Performance Groups prepared " + "for damage assessment", + prepend_timestamp=False, + ) + + # for PG_i in self._asmnt.asset.cmp_sample.columns: + ds_samples = [] + for PGB_i in batches: + + component_blocks = pg_batch.loc[PGB_i] + + self.log_msg( + f"Calculating damage for PG batch {PGB_i} with " + f"{int(component_blocks['Blocks'].sum())} blocks" + ) + + # Generate an array with component capacities for each block and + # generate a second array that assigns a specific damage state to + # each component limit state. The latter is primarily needed to + # handle limit states with multiple, mutually exclusive DS options + capacity_sample, lsds_sample = self._generate_dmg_sample( + sample_size, component_blocks, scaling_specification + ) + + # Get the required demand types for the analysis + EDP_req = self._get_required_demand_type(component_blocks) + + # Create the demand vector + demand_dict = self._assemble_required_demand_data(EDP_req) + + # Evaluate the Damage State of each Component Block + ds_sample = self._evaluate_damage_state( + demand_dict, EDP_req, capacity_sample, lsds_sample + ) + + ds_samples.append(ds_sample) + + ds_sample = pd.concat(ds_samples, axis=1) + self.log_msg("Raw damage calculation successful.", prepend_timestamp=False) + + # Apply the prescribed damage process, if any + if dmg_process is not None: + self.log_msg("Applying damage processes...") + + # Sort the damage processes tasks + dmg_process = {key: dmg_process[key] for key in sorted(dmg_process)} + + # Perform damage tasks in the sorted order + for task in dmg_process.items(): + self._perform_dmg_task(task, ds_sample) + + self.log_msg( + "Damage processes successfully applied.", prepend_timestamp=False + ) + + qnt_sample = self._prepare_dmg_quantities( + pg_batch.reset_index('Batch', drop=True), ds_sample, dropzero=False + ) + + self.sample = qnt_sample + + self.log_msg('Damage calculation successfully completed.') diff --git a/pelicun/model/demand_model.py b/pelicun/model/demand_model.py new file mode 100644 index 000000000..bd5d8ff3b --- /dev/null +++ b/pelicun/model/demand_model.py @@ -0,0 +1,905 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Leland Stanford Junior University +# Copyright (c) 2018 The Regents of the University of California +# +# This file is part of pelicun. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# You should have received a copy of the BSD 3-Clause License along with +# pelicun. If not, see . +# +# Contributors: +# Adam Zsarnóczay +# John Vouvakis Manousakis + +""" +This file defines the DemandModel object and its methods. + +.. rubric:: Contents + +.. autosummary:: + + DemandModel + +""" + +import numpy as np +import pandas as pd +from .pelicun_model import PelicunModel +from .. import base +from .. import uq +from .. import file_io + + +idx = base.idx + + +class DemandModel(PelicunModel): + """ + Manages demand information used in assessments. + + Parameters + ---------- + marginal_params: DataFrame + Available after the model has been calibrated or calibration data has + been imported. Defines the marginal distribution of each demand + variable. + correlation: DataFrame + Available after the model has been calibrated or calibration data has + been imported. Defines the correlation between the demand variables in + standard normal space. That is, the variables are sampled in standard + normal space and then transformed into the space of their respective + distributions and the correlation matrix corresponds to the space where + they are sampled. + empirical_data: DataFrame + Available after the model has been calibrated or calibration data has + been imported. It provides an empirical dataset for the demand + variables that are modeled with an empirical distribution. + sample: DataFrame + Available after a sample has been generated. Demand variables are + listed in columns and each row provides an independent realization of + the joint demand distribution. + units: Series + Available after any demand data has been loaded. The index identifies + the demand variables and the values provide the unit for each variable. + calibrated: bool + Signifies whether the DemandModel object has been calibrated. + + """ + + def __init__(self, assessment): + super().__init__(assessment) + + self.marginal_params = None + self.correlation = None + self.empirical_data = None + self.units = None + self.calibrated = False + + self._RVs = None + self.sample = None + + def save_sample(self, filepath=None, save_units=False): + """ + Save demand sample to a csv file or return it in a DataFrame + + """ + + self.log_div() + if filepath is not None: + self.log_msg('Saving demand sample...') + + res = file_io.save_to_csv( + self.sample, + filepath, + units=self.units, + unit_conversion_factors=self._asmnt.unit_conversion_factors, + use_simpleindex=(filepath is not None), + log=self._asmnt.log, + ) + + if filepath is not None: + self.log_msg( + 'Demand sample successfully saved.', prepend_timestamp=False + ) + return None + + # else: + units = res.loc["Units"] + res.drop("Units", inplace=True) + + if save_units: + return res.astype(float), units + + # else: + return res.astype(float) + + def load_sample(self, filepath): + """ + Load demand sample data and parse it. + + Besides parsing the sample, the method also reads and saves the units + specified for each demand variable. If no units are specified, Standard + Units are assumed. + + Parameters + ---------- + filepath: string or DataFrame + Location of the file with the demand sample. + + """ + + def parse_header(raw_header): + old_MI = raw_header + + # The first number (event_ID) in the demand labels is optional and + # currently not used. We remove it if it was in the raw data. + if old_MI.nlevels == 4: + if self._asmnt.log.verbose: + self.log_msg( + 'Removing event_ID from header...', prepend_timestamp=False + ) + + new_column_index_array = np.array( + [old_MI.get_level_values(i) for i in range(1, 4)] + ) + + else: + new_column_index_array = np.array( + [old_MI.get_level_values(i) for i in range(3)] + ) + + # Remove whitespace to avoid ambiguity + + if self._asmnt.log.verbose: + self.log_msg( + 'Removing whitespace from header...', prepend_timestamp=False + ) + + wspace_remove = np.vectorize(lambda name: str(name).replace(' ', '')) + + new_column_index = wspace_remove(new_column_index_array) + + # Creating new, cleaned-up header + + new_MI = pd.MultiIndex.from_arrays( + new_column_index, names=['type', 'loc', 'dir'] + ) + + return new_MI + + self.log_div() + self.log_msg('Loading demand data...') + + demand_data, units = file_io.load_data( + filepath, + self._asmnt.unit_conversion_factors, + return_units=True, + log=self._asmnt.log, + ) + + parsed_data = demand_data.copy() + + # start with cleaning up the header + + parsed_data.columns = parse_header(parsed_data.columns) + + # Remove errors, if needed + if 'ERROR' in parsed_data.columns.get_level_values(0): + self.log_msg( + 'Removing errors from the raw data...', prepend_timestamp=False + ) + + error_list = parsed_data.loc[:, idx['ERROR', :, :]].values.astype(bool) + + parsed_data = parsed_data.loc[~error_list, :].copy() + parsed_data.drop('ERROR', level=0, axis=1, inplace=True) + + self.log_msg( + "\nBased on the values in the ERROR column, " + f"{np.sum(error_list)} demand samples were removed.\n", + prepend_timestamp=False, + ) + + self.sample = parsed_data + + self.log_msg('Demand data successfully parsed.', prepend_timestamp=False) + + # parse the index for the units + units.index = parse_header(units.index) + + self.units = units + + self.log_msg('Demand units successfully parsed.', prepend_timestamp=False) + + def estimate_RID(self, demands, params, method='FEMA P58'): + """ + Estimate residual drift realizations based on other demands + + Parameters + ---------- + demands: DataFrame + Sample of demands required for the method to estimate the RID values + params: dict + Parameters required for the method to estimate the RID values + method: {'FEMA P58'}, default: 'FEMA P58' + Method to use for the estimation - currently, only one is available. + """ + + if method == 'FEMA P58': + # method is described in FEMA P-58 Volume 1 Section 5.4 & Appendix C + + # the provided demands shall be PID values at various loc-dir pairs + PID = demands + + # there's only one parameter needed: the yield drift + yield_drift = params['yield_drift'] + + # three subdomains of demands are identified + small = PID < yield_drift + medium = PID < 4 * yield_drift + large = PID >= 4 * yield_drift + + # convert PID to RID in each subdomain + RID = PID.copy() + RID[large] = PID[large] - 3 * yield_drift + RID[medium] = 0.3 * (PID[medium] - yield_drift) + RID[small] = 0.0 + + # add extra uncertainty to nonzero values + rng = self._asmnt.options.rng + eps = rng.normal(scale=0.2, size=RID.shape) + RID[RID > 0] = np.exp(np.log(RID[RID > 0]) + eps) + + # finally, make sure the RID values are never larger than the PIDs + RID = pd.DataFrame( + np.minimum(PID.values, RID.values), + columns=pd.DataFrame( + 1, + index=[ + 'RID', + ], + columns=PID.columns, + ) + .stack(level=[0, 1]) + .index, + index=PID.index, + ) + + else: + RID = None + + # return the generated drift realizations + return RID + + def calibrate_model(self, config): + """ + Calibrate a demand model to describe the raw demand data + + The raw data shall be parsed first to ensure that it follows the + schema expected by this method. The calibration settings define the + characteristics of the multivariate distribution that is fit to the + raw data. + + Parameters + ---------- + config: dict + A dictionary, typically read from a json file, that specifies the + distribution family, truncation and censoring limits, and other + settings for the calibration. + + """ + + if self.calibrated: + self.log_msg( + 'WARNING: DemandModel has been previously calibrated.', + prepend_timestamp=False, + ) + + raise ValueError() + + def parse_settings(settings, demand_type): + def parse_str_to_float(in_str, context_string): + try: + out_float = float(in_str) + + except ValueError: + self.log_msg( + f"WARNING: Could not parse {in_str} provided as " + f"{context_string}. Using NaN instead.", + prepend_timestamp=False, + ) + + out_float = np.nan + + return out_float + + active_d_types = demand_sample.columns.get_level_values('type').unique() + + if demand_type == 'ALL': + cols = tuple(active_d_types) + + else: + cols_lst = [] + + for d_type in active_d_types: + if d_type.split('_')[0] == demand_type: + cols_lst.append(d_type) + + cols = tuple(cols_lst) + + # load the distribution family + cal_df.loc[idx[cols, :, :], 'Family'] = settings['DistributionFamily'] + + # load limits + for lim in ( + 'CensorLower', + 'CensorUpper', + 'TruncateLower', + 'TruncateUpper', + ): + if lim in settings.keys(): + val = parse_str_to_float(settings[lim], lim) + if not pd.isna(val): + cal_df.loc[idx[cols, :, :], lim] = val + + # scale the censor and truncation limits, if needed + scale_factor = self._asmnt.scale_factor(settings.get('Unit', None)) + + rows_to_scale = [ + 'CensorLower', + 'CensorUpper', + 'TruncateLower', + 'TruncateUpper', + ] + cal_df.loc[idx[cols, :, :], rows_to_scale] *= scale_factor + + # load the prescribed additional uncertainty + if 'AddUncertainty' in settings.keys(): + sig_increase = parse_str_to_float( + settings['AddUncertainty'], 'AddUncertainty' + ) + + # scale the sig value if the target distribution family is normal + if settings['DistributionFamily'] == 'normal': + sig_increase *= scale_factor + + cal_df.loc[idx[cols, :, :], 'SigIncrease'] = sig_increase + + def get_filter_mask(lower_lims, upper_lims): + demands_of_interest = demand_sample.iloc[:, pd.notna(upper_lims)] + limits_of_interest = upper_lims[pd.notna(upper_lims)] + upper_mask = np.all(demands_of_interest < limits_of_interest, axis=1) + + demands_of_interest = demand_sample.iloc[:, pd.notna(lower_lims)] + limits_of_interest = lower_lims[pd.notna(lower_lims)] + lower_mask = np.all(demands_of_interest > limits_of_interest, axis=1) + + return np.all([lower_mask, upper_mask], axis=0) + + self.log_div() + self.log_msg('Calibrating demand model...') + + demand_sample = self.sample + + # initialize a DataFrame that contains calibration information + cal_df = pd.DataFrame( + columns=[ + 'Family', + 'CensorLower', + 'CensorUpper', + 'TruncateLower', + 'TruncateUpper', + 'SigIncrease', + 'Theta_0', + 'Theta_1', + ], + index=demand_sample.columns, + dtype=float, + ) + + cal_df['Family'] = cal_df['Family'].astype(str) + + # start by assigning the default option ('ALL') to every demand column + parse_settings(config['ALL'], 'ALL') + + # then parse the additional settings and make the necessary adjustments + for demand_type in config.keys(): + if demand_type != 'ALL': + parse_settings(config[demand_type], demand_type) + + if self._asmnt.log.verbose: + self.log_msg( + "\nCalibration settings successfully parsed:\n" + str(cal_df), + prepend_timestamp=False, + ) + else: + self.log_msg( + "\nCalibration settings successfully parsed:\n", + prepend_timestamp=False, + ) + + # save the settings + model_params = cal_df.copy() + + # Remove the samples outside of censoring limits + # Currently, non-empirical demands are assumed to have some level of + # correlation, hence, a censored value in any demand triggers the + # removal of the entire sample from the population. + upper_lims = cal_df.loc[:, 'CensorUpper'].values + lower_lims = cal_df.loc[:, 'CensorLower'].values + + if ~np.all(pd.isna(np.array([upper_lims, lower_lims]))): + censor_mask = get_filter_mask(lower_lims, upper_lims) + censored_count = np.sum(~censor_mask) + + demand_sample = demand_sample.loc[censor_mask, :] + + self.log_msg( + "\nBased on the provided censoring limits, " + f"{censored_count} samples were censored.", + prepend_timestamp=False, + ) + else: + censored_count = 0 + + # Check if there is any sample outside of truncation limits + # If yes, that suggests an error either in the samples or the + # configuration. We handle such errors gracefully: the analysis is not + # terminated, but we show an error in the log file. + upper_lims = cal_df.loc[:, 'TruncateUpper'].values + lower_lims = cal_df.loc[:, 'TruncateLower'].values + + if ~np.all(pd.isna(np.array([upper_lims, lower_lims]))): + truncate_mask = get_filter_mask(lower_lims, upper_lims) + truncated_count = np.sum(~truncate_mask) + + if truncated_count > 0: + demand_sample = demand_sample.loc[truncate_mask, :] + + self.log_msg( + "\nBased on the provided truncation limits, " + f"{truncated_count} samples were removed before demand " + "calibration.", + prepend_timestamp=False, + ) + + # Separate and save the demands that are kept empirical -> i.e., no + # fitting. Currently, empirical demands are decoupled from those that + # have a distribution fit to their samples. The correlation between + # empirical and other demands is not preserved in the demand model. + empirical_edps = [] + for edp in cal_df.index: + if cal_df.loc[edp, 'Family'] == 'empirical': + empirical_edps.append(edp) + + if empirical_edps: + self.empirical_data = demand_sample.loc[:, empirical_edps].copy() + + # remove the empirical demands from the samples used for calibration + demand_sample = demand_sample.drop(empirical_edps, axis=1) + + # and the calibration settings + cal_df = cal_df.drop(empirical_edps, axis=0) + + if self._asmnt.log.verbose: + self.log_msg( + f"\nDemand data used for calibration:\n{demand_sample}", + prepend_timestamp=False, + ) + + # fit the joint distribution + self.log_msg( + "\nFitting the prescribed joint demand distribution...", + prepend_timestamp=False, + ) + + demand_theta, demand_rho = uq.fit_distribution_to_sample( + raw_samples=demand_sample.values.T, + distribution=cal_df.loc[:, 'Family'].values, + censored_count=censored_count, + detection_limits=cal_df.loc[:, ['CensorLower', 'CensorUpper']].values, + truncation_limits=cal_df.loc[ + :, ['TruncateLower', 'TruncateUpper'] + ].values, + multi_fit=False, + logger_object=self._asmnt.log, + ) + # fit the joint distribution + self.log_msg( + "\nCalibration successful, processing results...", + prepend_timestamp=False, + ) + + # save the calibration results + model_params.loc[cal_df.index, ['Theta_0', 'Theta_1']] = demand_theta + + # increase the variance of the marginal distributions, if needed + if ~np.all(pd.isna(model_params.loc[:, 'SigIncrease'].values)): + self.log_msg("\nIncreasing demand variance...", prepend_timestamp=False) + + sig_inc = np.nan_to_num(model_params.loc[:, 'SigIncrease'].values) + sig_0 = model_params.loc[:, 'Theta_1'].values + + model_params.loc[:, 'Theta_1'] = np.sqrt(sig_0**2.0 + sig_inc**2.0) + + # remove unneeded fields from model_params + for col in ('SigIncrease', 'CensorLower', 'CensorUpper'): + model_params = model_params.drop(col, axis=1) + + # reorder the remaining fields for clarity + model_params = model_params[ + ['Family', 'Theta_0', 'Theta_1', 'TruncateLower', 'TruncateUpper'] + ] + + self.marginal_params = model_params + + self.log_msg( + "\nCalibrated demand model marginal distributions:\n" + + str(model_params), + prepend_timestamp=False, + ) + + # save the correlation matrix + self.correlation = pd.DataFrame( + demand_rho, columns=cal_df.index, index=cal_df.index + ) + + self.log_msg( + "\nCalibrated demand model correlation matrix:\n" + + str(self.correlation), + prepend_timestamp=False, + ) + + self.calibrated = True + + def save_model(self, file_prefix): + """ + Save parameters of the demand model to a set of csv files + + """ + + self.log_div() + self.log_msg('Saving demand model...') + + # save the correlation and empirical data + file_io.save_to_csv(self.correlation, file_prefix + '_correlation.csv') + if self.empirical_data is not None: + file_io.save_to_csv( + self.empirical_data, + file_prefix + '_empirical.csv', + units=self.units, + unit_conversion_factors=self._asmnt.unit_conversion_factors, + log=self._asmnt.log, + ) + + # the log standard deviations in the marginal parameters need to be + # scaled up before feeding to the saving method where they will be + # scaled back down and end up being saved unscaled to the target file + + marginal_params = self.marginal_params.copy() + + log_rows = marginal_params['Family'] == 'lognormal' + log_demands = marginal_params.loc[log_rows, :] + + for label in log_demands.index: + if label in self.units.index: + unit_factor = self._asmnt.calc_unit_scale_factor(self.units[label]) + + marginal_params.loc[label, 'Theta_1'] *= unit_factor + + file_io.save_to_csv( + marginal_params, + file_prefix + '_marginals.csv', + units=self.units, + unit_conversion_factors=self._asmnt.unit_conversion_factors, + orientation=1, + log=self._asmnt.log, + ) + + self.log_msg('Demand model successfully saved.', prepend_timestamp=False) + + def load_model(self, data_source): + """ + Load the model that describes demands on the asset. + + Parameters + ---------- + data_source: string or dict + If string, the data_source is a file prefix ( in the + following description) that identifies the following files: + _marginals.csv, _empirical.csv, + _correlation.csv. If dict, the data source is a dictionary + with the following optional keys: 'marginals', 'empirical', and + 'correlation'. The value under each key shall be a DataFrame. + """ + + self.log_div() + self.log_msg('Loading demand model...') + + # prepare the marginal data source variable to load the data + if isinstance(data_source, dict): + marginal_data_source = data_source.get('marginals') + empirical_data_source = data_source.get('empirical', None) + correlation_data_source = data_source.get('correlation', None) + else: + marginal_data_source = data_source + '_marginals.csv' + empirical_data_source = data_source + '_empirical.csv' + correlation_data_source = data_source + '_correlation.csv' + + if empirical_data_source is not None: + self.empirical_data = file_io.load_data( + empirical_data_source, + self._asmnt.unit_conversion_factors, + log=self._asmnt.log, + ) + self.empirical_data.columns.names = ('type', 'loc', 'dir') + else: + self.empirical_data = None + + if correlation_data_source is not None: + self.correlation = file_io.load_data( + correlation_data_source, + self._asmnt.unit_conversion_factors, + reindex=False, + log=self._asmnt.log, + ) + self.correlation.index.set_names(['type', 'loc', 'dir'], inplace=True) + self.correlation.columns.set_names(['type', 'loc', 'dir'], inplace=True) + else: + self.correlation = None + + # the log standard deviations in the marginal parameters need to be + # adjusted after getting the data from the loading method where they + # were scaled according to the units of the corresponding variable + + # Note that a data source without marginal information is not valid + marginal_params, units = file_io.load_data( + marginal_data_source, + None, + orientation=1, + reindex=False, + return_units=True, + log=self._asmnt.log, + ) + marginal_params.index.set_names(['type', 'loc', 'dir'], inplace=True) + + marginal_params = self.convert_marginal_params(marginal_params.copy(), units) + + self.marginal_params = marginal_params + self.units = units + + self.log_msg('Demand model successfully loaded.', prepend_timestamp=False) + + def _create_RVs(self, preserve_order=False): + """ + Create a random variable registry for the joint distribution of demands. + + """ + + # initialize the registry + RV_reg = uq.RandomVariableRegistry(self._asmnt.options.rng) + + # add a random variable for each demand variable + for rv_params in self.marginal_params.itertuples(): + edp = rv_params.Index + rv_tag = f'EDP-{edp[0]}-{edp[1]}-{edp[2]}' + family = getattr(rv_params, "Family", 'deterministic') + + if family == 'empirical': + if preserve_order: + dist_family = 'coupled_empirical' + else: + dist_family = 'empirical' + + # empirical RVs need the data points + RV_reg.add_RV( + uq.rv_class_map(dist_family)( + name=rv_tag, + raw_samples=self.empirical_data.loc[:, edp].values, + ) + ) + + else: + # all other RVs need parameters of their distributions + RV_reg.add_RV( + uq.rv_class_map(family)( + name=rv_tag, + theta=[ + getattr(rv_params, f"Theta_{t_i}", np.nan) + for t_i in range(3) + ], + truncation_limits=[ + getattr(rv_params, f"Truncate{side}", np.nan) + for side in ("Lower", "Upper") + ], + ) + ) + + self.log_msg( + f"\n{self.marginal_params.shape[0]} random variables created.", + prepend_timestamp=False, + ) + + # add an RV set to consider the correlation between demands, if needed + if self.correlation is not None: + rv_set_tags = [ + f'EDP-{edp[0]}-{edp[1]}-{edp[2]}' + for edp in self.correlation.index.values + ] + + RV_reg.add_RV_set( + uq.RandomVariableSet( + 'EDP_set', + list(RV_reg.RVs(rv_set_tags).values()), + self.correlation.values, + ) + ) + + self.log_msg( + f"\nCorrelations between {len(rv_set_tags)} random variables " + "successfully defined.", + prepend_timestamp=False, + ) + + self._RVs = RV_reg + + def clone_demands(self, demand_cloning): + """ + Clones demands. This means copying over columns of the + original demand sample and assigning given names to them. The + columns to be copied over and the names to assign to the + copies are defined as the keys and values of the + `demand_cloning` dictionary, respectively. + The method modifies `sample` inplace. + + Parameters + ---------- + demand_cloning: dict + Keys correspond to the columns of the original sample to + be copied over and the values correspond to the intended + names for the copies. Caution: It's possible to define a + dictionary with duplicate keys, and Python will just keep + the last entry without warning. Users need to be careful + enough to avoid duplicate keys, because we can't validate + them. + E.g.: x = {'1': 1.00, '1': 2.00} results in x={'1': 2.00}. + + Raises + ------ + ValueError + In multiple instances of invalid demand_cloning entries. + + """ + + # it's impossible to have duplicate keys, because + # demand_cloning is a dictionary. + new_columns_list = demand_cloning.values() + # The following prevents duplicate entries in the values + # corresponding to a single cloned demand (1), but + # also the same column being specified as the cloned + # entry of multiple demands (2). + # e.g. + # (1): {'PGV-0-1': ['PGV-1-1', 'PGV-1-1', ...]} + # (2): {'PGV-0-1': ['PGV-1-1', ...], 'PGV-0-2': ['PGV-1-1', ...]} + flat_list = [] + for new_columns in new_columns_list: + flat_list.extend(new_columns) + if len(set(flat_list)) != len(flat_list): + raise ValueError('Duplicate entries in demand cloning configuration.') + + # turn the config entries to tuples + def turn_to_tuples(demand_cloning): + demand_cloning_tuples = {} + for key, values in demand_cloning.items(): + demand_cloning_tuples[tuple(key.split('-'))] = [ + tuple(x.split('-')) for x in values + ] + return demand_cloning_tuples + + demand_cloning = turn_to_tuples(demand_cloning) + + # The demand cloning confuguration should not include + # columns that are not present in the orignal sample. + warn_columns = [] + for column in demand_cloning: + if column not in self.sample.columns: + warn_columns.append(column) + if warn_columns: + warn_columns = ['-'.join(x) for x in warn_columns] + self.log_msg( + "\nWARNING: The demand cloning configuration lists " + "columns that are not present in the original demand sample's " + f"columns: {warn_columns}.\n", + prepend_timestamp=False, + ) + + # we iterate over the existing columns of the sample and try + # to locate columns that need to be copied as required by the + # demand cloning configuration. If a column does not need + # to be cloned it is left as is. Otherwise, we keep track + # of its initial index location (in `column_index`) and the + # number of times it needs to be replicated, along with the + # new names of its copies (in `column_values`). + column_index = [] + column_values = [] + for i, column in enumerate(self.sample.columns): + if column not in demand_cloning: + column_index.append(i) + column_values.append(column) + else: + new_column_values = demand_cloning[column] + column_index.extend([i] * len(new_column_values)) + column_values.extend(new_column_values) + # copy the columns + self.sample = self.sample.iloc[:, column_index] + # update the column index + self.sample.columns = pd.MultiIndex.from_tuples(column_values) + + def generate_sample(self, config): + """ + Generates an RV sample with the specified configuration. + """ + + if self.marginal_params is None: + raise ValueError( + 'Model parameters have not been specified. Either' + 'load parameters from a file or calibrate the ' + 'model using raw demand data.' + ) + + self.log_div() + self.log_msg('Generating sample from demand variables...') + + self._create_RVs(preserve_order=config.get('PreserveRawOrder', False)) + + sample_size = config['SampleSize'] + self._RVs.generate_sample( + sample_size=sample_size, method=self._asmnt.options.sampling_method + ) + + # replace the potentially existing raw sample with the generated one + assert self._RVs is not None + assert self._RVs.RV_sample is not None + sample = pd.DataFrame(self._RVs.RV_sample) + sample.sort_index(axis=0, inplace=True) + sample.sort_index(axis=1, inplace=True) + + sample = base.convert_to_MultiIndex(sample, axis=1)['EDP'] + + sample.columns.names = ['type', 'loc', 'dir'] + self.sample = sample + + if config.get('DemandCloning', False): + self.clone_demands(config['DemandCloning']) + + self.log_msg( + f"\nSuccessfully generated {sample_size} realizations.", + prepend_timestamp=False, + ) diff --git a/pelicun/model/loss_model.py b/pelicun/model/loss_model.py new file mode 100644 index 000000000..eb0b2d4de --- /dev/null +++ b/pelicun/model/loss_model.py @@ -0,0 +1,1172 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Leland Stanford Junior University +# Copyright (c) 2018 The Regents of the University of California +# +# This file is part of pelicun. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# You should have received a copy of the BSD 3-Clause License along with +# pelicun. If not, see . +# +# Contributors: +# Adam Zsarnóczay +# John Vouvakis Manousakis + +""" +This file defines Loss model objects and their methods. + +.. rubric:: Contents + +.. autosummary:: + + prep_constant_median_DV + prep_bounded_multilinear_median_DV + + LossModel + RepairModel + +""" + +import numpy as np +import pandas as pd +from .pelicun_model import PelicunModel +from .. import base +from .. import uq +from .. import file_io + + +idx = base.idx + + +class LossModel(PelicunModel): + """ + Parent object for loss models. + + All loss assessment methods should be children of this class. + + Parameters + ---------- + + """ + + def __init__(self, assessment): + super().__init__(assessment) + + self._sample = None + self.loss_map = None + self.loss_params = None + self.loss_type = 'Generic' + + @property + def sample(self): + """ + sample property + """ + return self._sample + + def save_sample(self, filepath=None, save_units=False): + """ + Save loss sample to a csv file + + """ + self.log_div() + if filepath is not None: + self.log_msg('Saving loss sample...') + + cmp_units = self.loss_params[('DV', 'Unit')] + dv_units = pd.Series(index=self.sample.columns, name='Units', dtype='object') + + for cmp_id, dv_type in cmp_units.index: + dv_units.loc[(dv_type, cmp_id)] = cmp_units.at[(cmp_id, dv_type)] + + res = file_io.save_to_csv( + self.sample, + filepath, + units=dv_units, + unit_conversion_factors=self._asmnt.unit_conversion_factors, + use_simpleindex=(filepath is not None), + log=self._asmnt.log, + ) + + if filepath is not None: + self.log_msg('Loss sample successfully saved.', prepend_timestamp=False) + return None + + # else: + units = res.loc["Units"] + res.drop("Units", inplace=True) + + if save_units: + return res.astype(float), units + + return res.astype(float) + + def load_sample(self, filepath): + """ + Load damage sample data. + + """ + self.log_div() + self.log_msg('Loading loss sample...') + + self._sample = file_io.load_data( + filepath, self._asmnt.unit_conversion_factors, log=self._asmnt.log + ) + + self.log_msg('Loss sample successfully loaded.', prepend_timestamp=False) + + def load_model(self, data_paths, mapping_path, decision_variables=None): + """ + Load the list of prescribed consequence models and their parameters + + Parameters + ---------- + data_paths: list of string or DataFrame + List of paths to data files with consequence model + parameters. Default XY datasets can be accessed as + PelicunDefault/XY. The list can also contain DataFrame + objects, in which case that data is used directly. + mapping_path: string + Path to a csv file that maps drivers (i.e., damage or edp data) to + loss models. + decision_variables: list of string, optional + List of decision variables to include in the analysis. If None, + all variables provided in the consequence models are included. When + a list is provided, only variables in the list will be included. + """ + + self.log_div() + self.log_msg(f'Loading loss map for {self.loss_type}...') + + loss_map = file_io.load_data( + mapping_path, None, orientation=1, reindex=False, log=self._asmnt.log + ) + + loss_map['Driver'] = loss_map.index.values + loss_map['Consequence'] = loss_map[self.loss_type] + loss_map.index = np.arange(loss_map.shape[0]) + loss_map = loss_map.loc[:, ['Driver', 'Consequence']] + loss_map.dropna(inplace=True) + + self.loss_map = loss_map + + self.log_msg("Loss map successfully parsed.", prepend_timestamp=False) + + self.log_div() + self.log_msg(f'Loading loss parameters for {self.loss_type}...') + + # replace default flag with default data path + data_paths = file_io.substitute_default_path(data_paths) + + data_list = [] + # load the data files one by one + for data_path in data_paths: + data = file_io.load_data( + data_path, None, orientation=1, reindex=False, log=self._asmnt.log + ) + + data_list.append(data) + + loss_params = pd.concat(data_list, axis=0) + + # drop redefinitions of components + loss_params = ( + loss_params.groupby(level=[0, 1]) + .first() + .transform(lambda x: x.fillna(np.nan)) + ) + # note: .groupby introduces None entries. We replace them with + # NaN for consistency. + + # keep only the relevant data + loss_cmp = np.unique(self.loss_map['Consequence'].values) + + available_cmp = loss_params.index.unique(level=0) + missing_cmp = [] + for cmp in loss_cmp: + if cmp not in available_cmp: + missing_cmp.append(cmp) + + if len(missing_cmp) > 0: + self.log_msg( + "\nWARNING: The loss model does not provide " + "consequence information for the following component(s) " + f"in the loss map: {missing_cmp}. They are removed from " + "further analysis\n", + prepend_timestamp=False, + ) + + self.loss_map = self.loss_map.loc[~loss_map['Consequence'].isin(missing_cmp)] + loss_cmp = np.unique(self.loss_map['Consequence'].values) + + loss_params = loss_params.loc[idx[loss_cmp, :], :] + + # drop unused damage states + DS_list = loss_params.columns.get_level_values(0).unique() + DS_to_drop = [] + for DS in DS_list: + if np.all(pd.isna(loss_params.loc[:, idx[DS, :]].values)) is True: + DS_to_drop.append(DS) + + loss_params.drop(columns=DS_to_drop, level=0, inplace=True) + + # convert values to internal base units + for DS in loss_params.columns.unique(level=0): + if DS.startswith('DS'): + loss_params.loc[:, DS] = self.convert_marginal_params( + loss_params.loc[:, DS].copy(), + loss_params[('DV', 'Unit')], + loss_params[('Quantity', 'Unit')], + ).values + + # check for components with incomplete loss information + cmp_incomplete_list = loss_params.loc[ + loss_params[('Incomplete', '')] == 1 + ].index + + if len(cmp_incomplete_list) > 0: + loss_params.drop(cmp_incomplete_list, inplace=True) + + self.log_msg( + "\n" + "WARNING: Loss information is incomplete for the " + f"following component(s) {cmp_incomplete_list}. " + "They were removed from the analysis." + "\n", + prepend_timestamp=False, + ) + + # filter decision variables, if needed + if decision_variables is not None: + loss_params = loss_params.reorder_levels([1, 0]) + + available_DVs = loss_params.index.unique(level=0) + filtered_DVs = [] + + for DV_i in decision_variables: + if DV_i in available_DVs: + filtered_DVs.append(DV_i) + + loss_params = loss_params.loc[filtered_DVs, :].reorder_levels([1, 0]) + + self.loss_params = loss_params.sort_index(axis=1) + + self.log_msg("Loss parameters successfully parsed.", prepend_timestamp=False) + + def aggregate_losses(self): + """ + This is placeholder method. + + The method of aggregating the Decision Variable sample is specific to + each DV and needs to be implemented in every child of the LossModel + independently. + """ + raise NotImplementedError + + def _generate_DV_sample(self, dmg_quantities, sample_size): + """ + This is placeholder method. + + The method of sampling decision variables in Decision + Variable-specific and needs to be implemented in every child + of the LossModel independently. + """ + raise NotImplementedError + + def calculate(self): + """ + Calculate the consequences of each component block damage in + the asset. + + """ + + self.log_div() + self.log_msg("Calculating losses...") + + drivers = [d for d, _ in self.loss_map['Driver']] + + if 'DMG' in drivers: + sample_size = self._asmnt.damage.sample.shape[0] + elif 'DEM' in drivers: + sample_size = self._asmnt.demand.sample.shape[0] + else: + raise ValueError('Invalid loss drivers. Check the specified loss map.') + + # First, get the damaged quantities in each damage state for + # each component of interest. + dmg_q = self._asmnt.damage.sample.copy() + + # Now sample random Decision Variables + # Note that this method is DV-specific and needs to be + # implemented in every child of the LossModel independently. + self._generate_DV_sample(dmg_q, sample_size) + + self.log_msg("Loss calculation successful.") + + +class RepairModel(LossModel): + """ + Manages building repair consequence assessments. + + Parameters + ---------- + + """ + + def __init__(self, assessment): + super().__init__(assessment) + + self.loss_type = 'Repair' + + # def load_model(self, data_paths, mapping_path): + + # super().load_model(data_paths, mapping_path) + + # def calculate(self): + + # super().calculate() + + def _create_DV_RVs(self, case_list): + """ + Prepare the random variables used for repair cost and time simulation. + + Parameters + ---------- + case_list: MultiIndex + Index with cmp-loc-dir-ds descriptions that identify the RVs + we need for the simulation. + + Raises + ------ + ValueError + When any Loss Driver is not recognized. + """ + + RV_reg = uq.RandomVariableRegistry(self._asmnt.options.rng) + LP = self.loss_params + + # make ds the second level in the MultiIndex + case_DF = pd.DataFrame( + index=case_list.reorder_levels([0, 4, 1, 2, 3]), + columns=[ + 0, + ], + ) + case_DF.sort_index(axis=0, inplace=True) + driver_cmps = case_list.get_level_values(0).unique() + + rv_count = 0 + + # for each loss component + for loss_cmp_id in self.loss_map.index.values: + # load the corresponding parameters + driver_type, driver_cmp_id = self.loss_map.loc[loss_cmp_id, 'Driver'] + conseq_cmp_id = self.loss_map.loc[loss_cmp_id, 'Consequence'] + + # currently, we only support DMG-based loss calculations + # but this will be extended in the very near future + if driver_type != 'DMG': + raise ValueError( + f"Loss Driver type not recognized: " f"{driver_type}" + ) + + # load the parameters + # TODO: remove specific DV_type references and make the code below + # generate parameters for any DV_types provided + if (conseq_cmp_id, 'Cost') in LP.index: + cost_params = LP.loc[(conseq_cmp_id, 'Cost'), :] + else: + cost_params = None + + if (conseq_cmp_id, 'Time') in LP.index: + time_params = LP.loc[(conseq_cmp_id, 'Time'), :] + else: + time_params = None + + if (conseq_cmp_id, 'Carbon') in LP.index: + carbon_params = LP.loc[(conseq_cmp_id, 'Carbon'), :] + else: + carbon_params = None + + if (conseq_cmp_id, 'Energy') in LP.index: + energy_params = LP.loc[(conseq_cmp_id, 'Energy'), :] + else: + energy_params = None + + if driver_cmp_id not in driver_cmps: + continue + + for ds in case_DF.loc[driver_cmp_id, :].index.unique(level=0): + if ds == '0': + continue + + if cost_params is not None: + cost_params_DS = cost_params[f'DS{ds}'] + + cost_family = cost_params_DS.get('Family', np.nan) + cost_theta = [ + cost_params_DS.get(f"Theta_{t_i}", np.nan) + for t_i in range(3) + ] + + # If the first parameter is controlled by a function, we use + # 1.0 in its place and will scale the results in a later + # step + if '|' in str(cost_theta[0]): + # if isinstance(cost_theta[0], str): + cost_theta[0] = 1.0 + + else: + cost_family = np.nan + + if time_params is not None: + time_params_DS = time_params[f'DS{ds}'] + + time_family = time_params_DS.get('Family', np.nan) + time_theta = [ + time_params_DS.get(f"Theta_{t_i}", np.nan) + for t_i in range(3) + ] + + # If the first parameter is controlled by a function, we use + # 1.0 in its place and will scale the results in a later + # step + if '|' in str(time_theta[0]): + # if isinstance(time_theta[0], str): + time_theta[0] = 1.0 + + else: + time_family = np.nan + + if carbon_params is not None: + carbon_params_DS = carbon_params[f'DS{ds}'] + + carbon_family = carbon_params_DS.get('Family', np.nan) + carbon_theta = [ + carbon_params_DS.get(f"Theta_{t_i}", np.nan) + for t_i in range(3) + ] + + # If the first parameter is controlled by a function, we use + # 1.0 in its place and will scale the results in a later + # step + if '|' in str(carbon_theta[0]): + # if isinstance(carbon_theta[0], str): + carbon_theta[0] = 1.0 + + else: + carbon_family = np.nan + + if energy_params is not None: + energy_params_DS = energy_params[f'DS{ds}'] + + energy_family = energy_params_DS.get('Family', np.nan) + energy_theta = [ + energy_params_DS.get(f"Theta_{t_i}", np.nan) + for t_i in range(3) + ] + + # If the first parameter is controlled by a function, we use + # 1.0 in its place and will scale the results in a later + # step + if '|' in str(energy_theta[0]): + # if isinstance(energy_theta[0], str): + energy_theta[0] = 1.0 + + else: + energy_family = np.nan + + # If neither of the DV_types has a stochastic model assigned, + # we do not need random variables for this DS + if ( + (pd.isna(cost_family)) + and (pd.isna(time_family)) + and (pd.isna(carbon_family)) + and (pd.isna(energy_family)) + ): + continue + + # Otherwise, load the loc-dir cases + loc_dir_uid = case_DF.loc[(driver_cmp_id, ds)].index.values + + for loc, direction, uid in loc_dir_uid: + # assign cost RV + if pd.isna(cost_family) is False: + cost_rv_tag = ( + f'Cost-{loss_cmp_id}-{ds}-{loc}-{direction}-{uid}' + ) + + RV_reg.add_RV( + uq.rv_class_map(cost_family)( + name=cost_rv_tag, + theta=cost_theta, + truncation_limits=[0.0, np.nan], + ) + ) + rv_count += 1 + + # assign time RV + if pd.isna(time_family) is False: + time_rv_tag = ( + f'Time-{loss_cmp_id}-{ds}-{loc}-{direction}-{uid}' + ) + + RV_reg.add_RV( + uq.rv_class_map(time_family)( + name=time_rv_tag, + theta=time_theta, + truncation_limits=[0.0, np.nan], + ) + ) + rv_count += 1 + + # assign time RV + if pd.isna(carbon_family) is False: + carbon_rv_tag = ( + f'Carbon-{loss_cmp_id}-{ds}-{loc}-{direction}-{uid}' + ) + + RV_reg.add_RV( + uq.rv_class_map(carbon_family)( + name=carbon_rv_tag, + theta=carbon_theta, + truncation_limits=[0.0, np.nan], + ) + ) + rv_count += 1 + + # assign time RV + if pd.isna(energy_family) is False: + energy_rv_tag = ( + f'Energy-{loss_cmp_id}-{ds}-{loc}-{direction}-{uid}' + ) + + RV_reg.add_RV( + uq.rv_class_map(energy_family)( + name=energy_rv_tag, + theta=energy_theta, + truncation_limits=[0.0, np.nan], + ) + ) + rv_count += 1 + + # assign correlation between RVs across DV_types + # TODO: add more DV_types and handle cases with only a + # subset of them being defined + if ( + (pd.isna(cost_family) is False) + and (pd.isna(time_family) is False) + and (self._asmnt.options.rho_cost_time != 0.0) + ): + rho = self._asmnt.options.rho_cost_time + + RV_reg.add_RV_set( + uq.RandomVariableSet( + f'DV-{loss_cmp_id}-{ds}-{loc}-{direction}-{uid}_set', + list( + RV_reg.RVs([cost_rv_tag, time_rv_tag]).values() + ), + np.array([[1.0, rho], [rho, 1.0]]), + ) + ) + + self.log_msg( + f"\n{rv_count} random variables created.", prepend_timestamp=False + ) + + if rv_count > 0: + return RV_reg + # else: + return None + + def _calc_median_consequence(self, eco_qnt): + """ + Calculate the median repair consequence for each loss component. + + """ + + medians = {} + + DV_types = self.loss_params.index.unique(level=1) + + # for DV_type, DV_type_scase in zip(['COST', 'TIME'], ['Cost', 'Time']): + for DV_type in DV_types: + cmp_list = [] + median_list = [] + + for loss_cmp_id in self.loss_map.index: + driver_type, driver_cmp = self.loss_map.loc[loss_cmp_id, 'Driver'] + loss_cmp_name = self.loss_map.loc[loss_cmp_id, 'Consequence'] + + # check if the given DV type is available as an output for the + # selected component + if (loss_cmp_name, DV_type) not in self.loss_params.index: + continue + + if driver_type != 'DMG': + raise ValueError( + f"Loss Driver type not recognized: " f"{driver_type}" + ) + + if driver_cmp not in eco_qnt.columns.get_level_values(0).unique(): + continue + + ds_list = [] + sub_medians = [] + + for ds in self.loss_params.columns.get_level_values(0).unique(): + if not ds.startswith('DS'): + continue + + ds_id = ds[2:] + + if ds_id == '0': + continue + + loss_params_DS = self.loss_params.loc[ + (loss_cmp_name, DV_type), ds + ] + + # check if theta_0 is defined + theta_0 = loss_params_DS.get('Theta_0', np.nan) + + if pd.isna(theta_0): + continue + + # check if the distribution type is supported + family = loss_params_DS.get('Family', np.nan) + + if (not pd.isna(family)) and ( + family not in ['normal', 'lognormal', 'deterministic'] + ): + raise ValueError( + f"Loss Distribution of type {family} " f"not supported." + ) + + # If theta_0 is a scalar + try: + theta_0 = float(theta_0) + + if pd.isna(loss_params_DS.get('Family', np.nan)): + # if theta_0 is constant, then use it directly + f_median = prep_constant_median_DV(theta_0) + + else: + # otherwise use a constant 1.0 as the median + # The random variable will be generated as a + # variation from this 1.0 and added in a later step. + f_median = prep_constant_median_DV(1.0) + + except ValueError: + # otherwise, use the multilinear function + all_vals = np.array( + [val.split(',') for val in theta_0.split('|')], + dtype=float, + ) + medns = all_vals[0] + qnts = all_vals[1] + f_median = prep_bounded_multilinear_median_DV(medns, qnts) + + # get the corresponding aggregate damage quantities + # to consider economies of scale + if 'ds' in eco_qnt.columns.names: + avail_ds = eco_qnt.loc[:, driver_cmp].columns.unique(level=0) + + if ds_id not in avail_ds: + continue + + eco_qnt_i = eco_qnt.loc[:, (driver_cmp, ds_id)].copy() + + else: + eco_qnt_i = eco_qnt.loc[:, driver_cmp].copy() + + if isinstance(eco_qnt_i, pd.Series): + eco_qnt_i = eco_qnt_i.to_frame() + eco_qnt_i.columns = ['X'] + eco_qnt_i.columns.name = 'del' + + # generate the median values for each realization + eco_qnt_i.loc[:, :] = f_median(eco_qnt_i.values) + + sub_medians.append(eco_qnt_i) + ds_list.append(ds_id) + + if len(ds_list) > 0: + # combine medians across damage states into one DF + median_list.append(pd.concat(sub_medians, axis=1, keys=ds_list)) + cmp_list.append(loss_cmp_id) + + if len(cmp_list) > 0: + # combine medians across components into one DF + result = pd.concat(median_list, axis=1, keys=cmp_list) + + # remove the extra column header level + if 'del' in result.columns.names: + result.columns = result.columns.droplevel('del') + + # name the remaining column header levels + if self._asmnt.options.eco_scale["AcrossFloors"] is True: + result.columns.names = ['cmp', 'ds'] + + else: + result.columns.names = ['cmp', 'ds', 'loc'] + + # save the results to the returned dictionary + medians.update({DV_type: result}) + + return medians + + def aggregate_losses(self): + """ + Aggregates repair consequences across components. + + Repair costs are simply summed up for each realization while repair + times are aggregated to provide lower and upper limits of the total + repair time using the assumption of parallel and sequential repair of + floors, respectively. Repairs within each floor are assumed to occur + sequentially. + """ + + self.log_div() + self.log_msg("Aggregating repair consequences...") + + DV = self.sample + + if DV is None: + return + + # group results by DV type and location + DVG = DV.groupby(level=[0, 4], axis=1).sum() + + # create the summary DF + df_agg = pd.DataFrame( + index=DV.index, + columns=[ + 'repair_cost', + 'repair_time-parallel', + 'repair_time-sequential', + 'repair_carbon', + 'repair_energy', + ], + ) + + if 'Cost' in DVG.columns: + df_agg['repair_cost'] = DVG['Cost'].sum(axis=1) + else: + df_agg = df_agg.drop('repair_cost', axis=1) + + if 'Time' in DVG.columns: + df_agg['repair_time-sequential'] = DVG['Time'].sum(axis=1) + + df_agg['repair_time-parallel'] = DVG['Time'].max(axis=1) + else: + df_agg = df_agg.drop( + ['repair_time-parallel', 'repair_time-sequential'], axis=1 + ) + + if 'Carbon' in DVG.columns: + df_agg['repair_carbon'] = DVG['Carbon'].sum(axis=1) + else: + df_agg = df_agg.drop('repair_carbon', axis=1) + + if 'Energy' in DVG.columns: + df_agg['repair_energy'] = DVG['Energy'].sum(axis=1) + else: + df_agg = df_agg.drop('repair_energy', axis=1) + + # convert units + + cmp_units = ( + self.loss_params[('DV', 'Unit')] + .groupby( + level=[ + 1, + ] + ) + .agg(lambda x: x.value_counts().index[0]) + ) + + dv_units = pd.Series(index=df_agg.columns, name='Units', dtype='object') + + if 'Cost' in DVG.columns: + dv_units['repair_cost'] = cmp_units['Cost'] + + if 'Time' in DVG.columns: + dv_units['repair_time-parallel'] = cmp_units['Time'] + dv_units['repair_time-sequential'] = cmp_units['Time'] + + if 'Carbon' in DVG.columns: + dv_units['repair_carbon'] = cmp_units['Carbon'] + + if 'Energy' in DVG.columns: + dv_units['repair_energy'] = cmp_units['Energy'] + + df_agg = file_io.save_to_csv( + df_agg, + None, + units=dv_units, + unit_conversion_factors=self._asmnt.unit_conversion_factors, + use_simpleindex=False, + log=self._asmnt.log, + ) + + df_agg.drop("Units", inplace=True) + + # convert header + + df_agg = base.convert_to_MultiIndex(df_agg, axis=1) + + self.log_msg("Repair consequences successfully aggregated.") + + return df_agg.astype(float) + + def _generate_DV_sample(self, dmg_quantities, sample_size): + """ + Generate a sample of repair costs and times. + + Parameters + ---------- + dmg_quantities: DataFrame + A table with the quantity of damage experienced in each damage state + of each performance group at each location and direction. You can use + the prepare_dmg_quantities method in the DamageModel to get such a + DF. + sample_size: integer + The number of realizations to generate. + + Raises + ------ + ValueError + When any Loss Driver is not recognized. + """ + + # calculate the quantities for economies of scale + self.log_msg("\nAggregating damage quantities...", prepend_timestamp=False) + + # If everything is undamaged there are no losses + if set(dmg_quantities.columns.get_level_values('ds')) == {'0'}: + self._sample = None + self.log_msg( + "There is no damage---DV sample is set to None.", + prepend_timestamp=False, + ) + return + + if self._asmnt.options.eco_scale["AcrossFloors"]: + if self._asmnt.options.eco_scale["AcrossDamageStates"]: + eco_levels = [ + 0, + ] + eco_columns = [ + 'cmp', + ] + + else: + eco_levels = [0, 4] + eco_columns = ['cmp', 'ds'] + + elif self._asmnt.options.eco_scale["AcrossDamageStates"]: + eco_levels = [0, 1] + eco_columns = ['cmp', 'loc'] + + else: + eco_levels = [0, 1, 4] + eco_columns = ['cmp', 'loc', 'ds'] + + eco_group = dmg_quantities.groupby(level=eco_levels, axis=1) + eco_qnt = eco_group.sum().mask(eco_group.count() == 0, np.nan) + assert eco_qnt.columns.names == eco_columns + + self.log_msg( + "Successfully aggregated damage quantities.", prepend_timestamp=False + ) + + # apply the median functions, if needed, to get median consequences for + # each realization + self.log_msg( + "\nCalculating the median repair consequences...", + prepend_timestamp=False, + ) + + medians = self._calc_median_consequence(eco_qnt) + + self.log_msg( + "Successfully determined median repair consequences.", + prepend_timestamp=False, + ) + + # combine the median consequences with the samples of deviation from the + # median to get the consequence realizations. + self.log_msg( + "\nConsidering deviations from the median values to obtain " + "random DV sample..." + ) + + self.log_msg( + "Preparing random variables for repair cost and time...", + prepend_timestamp=False, + ) + RV_reg = self._create_DV_RVs(dmg_quantities.columns) + + if RV_reg is not None: + RV_reg.generate_sample( + sample_size=sample_size, method=self._asmnt.options.sampling_method + ) + + std_sample = base.convert_to_MultiIndex( + pd.DataFrame(RV_reg.RV_sample), axis=1 + ).sort_index(axis=1) + std_sample.columns.names = ['dv', 'cmp', 'ds', 'loc', 'dir', 'uid'] + + # convert column names to int + std_idx = std_sample.columns.levels + + std_sample.columns = std_sample.columns.set_levels( + [ + std_idx[0], + std_idx[1].astype(int), + std_idx[2], + std_idx[3], + std_idx[4], + std_idx[5], + ] + ) + + std_sample.sort_index(axis=1, inplace=True) + + else: + std_sample = None + + self.log_msg( + f"\nSuccessfully generated {sample_size} realizations of " + "deviation from the median consequences.", + prepend_timestamp=False, + ) + + res_list = [] + key_list = [] + + dmg_quantities.columns = dmg_quantities.columns.reorder_levels( + [0, 4, 1, 2, 3] + ) + dmg_quantities.sort_index(axis=1, inplace=True) + + DV_types = self.loss_params.index.unique(level=1) + + if isinstance(std_sample, pd.DataFrame): + std_DV_types = std_sample.columns.unique(level=0) + else: + std_DV_types = [] + + # for DV_type, _ in zip(['COST', 'TIME'], ['Cost', 'Time']): + for DV_type in DV_types: + if DV_type in std_DV_types: + prob_cmp_list = std_sample[DV_type].columns.unique(level=0) + else: + prob_cmp_list = [] + + cmp_list = [] + + if DV_type not in medians: + continue + + for cmp_i in medians[DV_type].columns.unique(level=0): + # check if there is damage in the component + driver_type, dmg_cmp_i = self.loss_map.loc[cmp_i, 'Driver'] + loss_cmp_i = self.loss_map.loc[cmp_i, 'Consequence'] + + if driver_type != 'DMG': + raise ValueError( + f"Loss Driver type not " f"recognized: {driver_type}" + ) + + if not (dmg_cmp_i in dmg_quantities.columns.unique(level=0)): + continue + + ds_list = [] + + for ds in medians[DV_type].loc[:, cmp_i].columns.unique(level=0): + loc_list = [] + + for loc_id, loc in enumerate( + dmg_quantities.loc[:, (dmg_cmp_i, ds)].columns.unique( + level=0 + ) + ): + if ( + self._asmnt.options.eco_scale["AcrossFloors"] is True + ) and (loc_id > 0): + break + + if self._asmnt.options.eco_scale["AcrossFloors"] is True: + median_i = medians[DV_type].loc[:, (cmp_i, ds)] + dmg_i = dmg_quantities.loc[:, (dmg_cmp_i, ds)] + + if cmp_i in prob_cmp_list: + std_i = std_sample.loc[:, (DV_type, cmp_i, ds)] + else: + std_i = None + + else: + median_i = medians[DV_type].loc[:, (cmp_i, ds, loc)] + dmg_i = dmg_quantities.loc[:, (dmg_cmp_i, ds, loc)] + + if cmp_i in prob_cmp_list: + std_i = std_sample.loc[:, (DV_type, cmp_i, ds, loc)] + else: + std_i = None + + if std_i is not None: + res_list.append(dmg_i.mul(median_i, axis=0) * std_i) + else: + res_list.append(dmg_i.mul(median_i, axis=0)) + + loc_list.append(loc) + + if self._asmnt.options.eco_scale["AcrossFloors"] is True: + ds_list += [ + ds, + ] + else: + ds_list += [(ds, loc) for loc in loc_list] + + if self._asmnt.options.eco_scale["AcrossFloors"] is True: + cmp_list += [(loss_cmp_i, dmg_cmp_i, ds) for ds in ds_list] + else: + cmp_list += [ + (loss_cmp_i, dmg_cmp_i, ds, loc) for ds, loc in ds_list + ] + + if self._asmnt.options.eco_scale["AcrossFloors"] is True: + key_list += [ + (DV_type, loss_cmp_i, dmg_cmp_i, ds) + for loss_cmp_i, dmg_cmp_i, ds in cmp_list + ] + else: + key_list += [ + (DV_type, loss_cmp_i, dmg_cmp_i, ds, loc) + for loss_cmp_i, dmg_cmp_i, ds, loc in cmp_list + ] + + lvl_names = ['dv', 'loss', 'dmg', 'ds', 'loc', 'dir', 'uid'] + DV_sample = pd.concat(res_list, axis=1, keys=key_list, names=lvl_names) + + DV_sample = DV_sample.fillna(0).convert_dtypes() + DV_sample.columns.names = lvl_names + + # Get the flags for replacement consequence trigger + DV_sum = DV_sample.groupby( + level=[ + 1, + ], + axis=1, + ).sum() + if 'replacement' in DV_sum.columns: + # When the 'replacement' consequence is triggered, all + # local repair consequences are discarded. Note that + # global consequences are assigned to location '0'. + + id_replacement = DV_sum['replacement'] > 0 + + # get the list of non-zero locations + locs = DV_sample.columns.get_level_values(4).unique().values + + locs = locs[locs != '0'] + + DV_sample.loc[id_replacement, idx[:, :, :, :, locs]] = 0.0 + + self._sample = DV_sample + + self.log_msg("Successfully obtained DV sample.", prepend_timestamp=False) + + +def prep_constant_median_DV(median): + """ + Returns a constant median Decision Variable (DV) function. + + Parameters + ---------- + median: float + The median DV for a consequence function with fixed median. + + Returns + ------- + f: callable + A function that returns the constant median DV for all component + quantities. + """ + + def f(*args): + # pylint: disable=unused-argument + return median + + return f + + +def prep_bounded_multilinear_median_DV(medians, quantities): + """ + Returns a bounded multilinear median Decision Variable (DV) function. + + The median DV equals the min and max values when the quantity is + outside of the prescribed quantity bounds. When the quantity is within the + bounds, the returned median is calculated by linear interpolation. + + Parameters + ---------- + medians: ndarray + Series of values that define the y coordinates of the multilinear DV + function. + quantities: ndarray + Series of values that define the component quantities corresponding to + the series of medians and serving as the x coordinates of the + multilinear DV function. + + Returns + ------- + f: callable + A function that returns the median DV given the quantity of damaged + components. + """ + + def f(quantity): + if quantity is None: + raise ValueError( + 'A bounded linear median Decision Variable function called ' + 'without specifying the quantity of damaged components' + ) + + q_array = np.asarray(quantity, dtype=np.float64) + + # calculate the median consequence given the quantity of damaged + # components + output = np.interp(q_array, quantities, medians) + + return output + + return f diff --git a/pelicun/model/pelicun_model.py b/pelicun/model/pelicun_model.py new file mode 100644 index 000000000..bcf9a2e8d --- /dev/null +++ b/pelicun/model/pelicun_model.py @@ -0,0 +1,228 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Leland Stanford Junior University +# Copyright (c) 2018 The Regents of the University of California +# +# This file is part of pelicun. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# You should have received a copy of the BSD 3-Clause License along with +# pelicun. If not, see . +# +# Contributors: +# Adam Zsarnóczay +# John Vouvakis Manousakis + +""" +This file defines the PelicunModel object and its methods. + +.. rubric:: Contents + +.. autosummary:: + + PelicunModel + +""" + +import numpy as np +import pandas as pd +from .. import base +from .. import uq + + +idx = base.idx + + +class PelicunModel: + """ + Generic model class to manage methods shared between all models in Pelicun. + + """ + + def __init__(self, assessment): + # link the PelicunModel object to its Assessment object + self._asmnt = assessment + + # link logging methods as attributes enabling more + # concise syntax + self.log_msg = self._asmnt.log.msg + self.log_div = self._asmnt.log.div + + def convert_marginal_params(self, marginal_params, units, arg_units=None): + """ + Converts the parameters of marginal distributions in a model to SI units. + + Parameters + ---------- + marginal_params: DataFrame + Each row corresponds to a marginal distribution with Theta + parameters and TruncateLower, TruncateUpper truncation limits + identified in separate columns. + units: Series + Identifies the input units of each marginal. The index shall be + identical to the index of the marginal_params argument. The values + are strings that correspond to the units listed in base.py. + arg_units: Series + Identifies the size of a reference entity for the marginal + parameters. For example, when the parameters refer to a component + repair cost, the reference size is the component block size the + repair cost corresponds to. When the parameters refer to a capacity, + demand, or component quantity, the reference size can be omitted + and the default value will ensure that the corresponding scaling is + skipped. This Series provides the units of the reference entities + for each component. Use '1 EA' if you want to skip such scaling for + select components but provide arg units for others. + + Returns + ------- + marginal_params: DataFrame + Same structure as the input DataFrame but with values scaled to + represent internal Standard International units. + + """ + assert np.all(marginal_params.index == units.index) + if arg_units is not None: + assert np.all(marginal_params.index == arg_units.index) + + # preserve the columns in the input marginal_params + original_cols = marginal_params.columns + + # add extra columns if they are not available in the marginals + for col_name in ( + 'Family', + 'Theta_0', + 'Theta_1', + 'Theta_2', + 'TruncateLower', + 'TruncateUpper', + ): + if col_name not in marginal_params.columns: + marginal_params[col_name] = np.nan + + # get a list of unique units + unique_units = units.unique() + + # for each unit + for unit_name in unique_units: + # get the scale factor for converting from the source unit + unit_factor = self._asmnt.calc_unit_scale_factor(unit_name) + + # get the variables that use the given unit + unit_ids = marginal_params.loc[units == unit_name].index + + # for each variable + for row_id in unit_ids: + # pull the parameters of the marginal distribution + family = marginal_params.at[row_id, 'Family'] + + if family == 'empirical': + continue + + # load the theta values + theta = marginal_params.loc[ + row_id, ['Theta_0', 'Theta_1', 'Theta_2'] + ].values + + # for each theta + args = [] + for t_i, theta_i in enumerate(theta): + # if theta_i evaluates to NaN, it is considered undefined + if pd.isna(theta_i): + args.append([]) + continue + + try: + # if theta is a scalar, just store it + theta[t_i] = float(theta_i) + args.append([]) + + except ValueError: + # otherwise, we assume it is a string using SimCenter + # array notation to identify coordinates of a + # multilinear function + values = [val.split(',') for val in theta_i.split('|')] + + # the first set of values defines the ordinates that + # need to be passed to the distribution scaling method + theta[t_i] = np.array(values[0], dtype=float) + + # the second set of values defines the abscissae that + # we will use after the distribution scaling + args.append(np.array(values[1], dtype=float)) + + # load the truncation limits + tr_limits = marginal_params.loc[ + row_id, ['TruncateLower', 'TruncateUpper'] + ] + + arg_unit_factor = 1.0 + + # check if there is a need to scale due to argument units + if not (arg_units is None): + # get the argument unit for the given marginal + arg_unit = arg_units.get(row_id) + + if arg_unit != '1 EA': + # get the scale factor + arg_unit_factor = self._asmnt.calc_unit_scale_factor( + arg_unit + ) + + # scale arguments, if needed + for a_i, arg in enumerate(args): + if isinstance(arg, np.ndarray): + args[a_i] = arg * arg_unit_factor + + # convert the distribution parameters to SI + theta, tr_limits = uq.scale_distribution( + unit_factor / arg_unit_factor, family, theta, tr_limits + ) + + # convert multilinear function parameters back into strings + for a_i, arg in enumerate(args): + if len(arg) > 0: + theta[a_i] = '|'.join( + [ + ','.join([f'{val:g}' for val in vals]) + for vals in (theta[a_i], args[a_i]) + ] + ) + + # and update the values in the DF + marginal_params.loc[ + row_id, ['Theta_0', 'Theta_1', 'Theta_2'] + ] = theta + + marginal_params.loc[ + row_id, ['TruncateLower', 'TruncateUpper'] + ] = tr_limits + + # remove the added columns + marginal_params = marginal_params[original_cols] + + return marginal_params diff --git a/pelicun/resources/auto/Hazus_Earthquake_IM.py b/pelicun/resources/auto/Hazus_Earthquake_IM.py index 6c957ce74..970f51fa3 100644 --- a/pelicun/resources/auto/Hazus_Earthquake_IM.py +++ b/pelicun/resources/auto/Hazus_Earthquake_IM.py @@ -1,829 +1,809 @@ -# -*- coding: utf-8 -*- -# -# Copyright (c) 2023 Leland Stanford Junior University -# Copyright (c) 2023 The Regents of the University of California -# -# This file is part of pelicun. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its contributors -# may be used to endorse or promote products derived from this software without -# specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -# -# You should have received a copy of the BSD 3-Clause License along with -# pelicun. If not, see . -# -# Contributors: -# Adam Zsarnóczay - -import os -import json -import pandas as pd -import pelicun - -ap_DesignLevel = { - 1940: 'PC', - 1940: 'LC', - 1975: 'MC', - 2100: 'HC' -} - -ap_DesignLevel_W1 = { - 0: 'PC', - 0: 'LC', - 1975: 'MC', - 2100: 'HC' -} - -ap_Occupancy = { - 'Other/Unknown': 'RES3', - 'Residential - Single-Family': 'RES1', - 'Residential - Town-Home': 'RES3', - 'Residential - Multi-Family': 'RES3', - 'Residential - Mixed Use': 'RES3', - 'Office': 'COM4', - 'Hotel': 'RES4', - 'School': 'EDU1', - 'Industrial - Light': 'IND2', - 'Industrial - Warehouse': 'IND2', - 'Industrial - Heavy': 'IND1', - 'Retail': 'COM1', - 'Parking' : 'COM10' -} - +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 Leland Stanford Junior University +# Copyright (c) 2023 The Regents of the University of California +# +# This file is part of pelicun. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# You should have received a copy of the BSD 3-Clause License along with +# pelicun. If not, see . +# +# Contributors: +# Adam Zsarnóczay +import json +import pandas as pd +import pelicun + +ap_DesignLevel = {1940: 'LC', 1975: 'MC', 2100: 'HC'} +# original: +# ap_DesignLevel = {1940: 'PC', 1940: 'LC', 1975: 'MC', 2100: 'HC'} +# Note that the duplicated key is ignored, and Python keeps the last +# entry. + +ap_DesignLevel_W1 = {0: 'LC', 1975: 'MC', 2100: 'HC'} +# original: +# ap_DesignLevel_W1 = {0: 'PC', 0: 'LC', 1975: 'MC', 2100: 'HC'} +# same thing applies + +ap_Occupancy = { + 'Other/Unknown': 'RES3', + 'Residential - Single-Family': 'RES1', + 'Residential - Town-Home': 'RES3', + 'Residential - Multi-Family': 'RES3', + 'Residential - Mixed Use': 'RES3', + 'Office': 'COM4', + 'Hotel': 'RES4', + 'School': 'EDU1', + 'Industrial - Light': 'IND2', + 'Industrial - Warehouse': 'IND2', + 'Industrial - Heavy': 'IND1', + 'Retail': 'COM1', + 'Parking': 'COM10', +} + + # Convert common length units def convertUnits(value, unit_in, unit_out): aval_types = ['m', 'mm', 'cm', 'km', 'inch', 'ft', 'mile'] - m = 1. + m = 1.0 mm = 0.001 * m cm = 0.01 * m km = 1000 * m inch = 0.0254 * m - ft = 12. * inch - mile = 5280. * ft - scale_map = {'m':m, 'mm':mm, 'cm':cm, 'km':km, 'inch':inch, 'ft':ft,\ - 'mile':mile} + ft = 12.0 * inch + mile = 5280.0 * ft + scale_map = { + 'm': m, + 'mm': mm, + 'cm': cm, + 'km': km, + 'inch': inch, + 'ft': ft, + 'mile': mile, + } if (unit_in not in aval_types) or (unit_out not in aval_types): - print(f"The unit {unit_in} or {unit_out} are used in auto_population but not supported") + print( + f"The unit {unit_in} or {unit_out} " + f"are used in auto_population but not supported" + ) return - value = value*scale_map[unit_in]/scale_map[unit_out] + value = value * scale_map[unit_in] / scale_map[unit_out] return value -def convertBridgeToHAZUSclass(AIM): - #TODO: replace labels in AIM with standard CamelCase versions - structureType = AIM["BridgeClass"] - # if type(structureType)== str and len(structureType)>3 and structureType[:3] == "HWB" and 0 < int(structureType[3:]) and 29 > int(structureType[3:]): - # return AIM["bridge_class"] - state = AIM["StateCode"] - yr_built = AIM["YearBuilt"] +def convertBridgeToHAZUSclass(AIM): + # TODO: replace labels in AIM with standard CamelCase versions + structureType = AIM["BridgeClass"] + # if ( + # type(structureType) == str + # and len(structureType) > 3 + # and structureType[:3] == "HWB" + # and 0 < int(structureType[3:]) + # and 29 > int(structureType[3:]) + # ): + # return AIM["bridge_class"] + state = AIM["StateCode"] + yr_built = AIM["YearBuilt"] num_span = AIM["NumOfSpans"] len_max_span = AIM["MaxSpanLength"] len_unit = AIM["units"]["length"] len_max_span = convertUnits(len_max_span, len_unit, "m") + seismic = (int(state) == 6 and int(yr_built) >= 1975) or ( + int(state) != 6 and int(yr_built) >= 1990 + ) + # Use a catch-all, other class by default + bridge_class = "HWB28" + + if len_max_span > 150: + if not seismic: + bridge_class = "HWB1" + else: + bridge_class = "HWB2" + + elif num_span == 1: + if not seismic: + bridge_class = "HWB3" + else: + bridge_class = "HWB4" + + elif structureType in list(range(101, 107)): + if not seismic: + if state != 6: + bridge_class = "HWB5" + else: + bridge_class = "HWB6" + else: + bridge_class = "HWB7" + + elif structureType in [205, 206]: + if not seismic: + bridge_class = "HWB8" + else: + bridge_class = "HWB9" + + elif structureType in list(range(201, 207)): + if not seismic: + bridge_class = "HWB10" + else: + bridge_class = "HWB11" + + elif structureType in list(range(301, 307)): + if not seismic: + if len_max_span >= 20: + if state != 6: + bridge_class = "HWB12" + else: + bridge_class = "HWB13" + else: + if state != 6: + bridge_class = "HWB24" + else: + bridge_class = "HWB25" + else: + bridge_class = "HWB14" + + elif structureType in list(range(402, 411)): + if not seismic: + if len_max_span >= 20: + bridge_class = "HWB15" + elif state != 6: + bridge_class = "HWB26" + else: + bridge_class = "HWB27" + else: + bridge_class = "HWB16" + + elif structureType in list(range(501, 507)): + if not seismic: + if state != 6: + bridge_class = "HWB17" + else: + bridge_class = "HWB18" + else: + bridge_class = "HWB19" + + elif structureType in [605, 606]: + if not seismic: + bridge_class = "HWB20" + else: + bridge_class = "HWB21" + + elif structureType in list(range(601, 608)): + if not seismic: + bridge_class = "HWB22" + else: + bridge_class = "HWB23" + + # TODO: review and add HWB24-27 rules + # TODO: also double check rules for HWB10-11 and HWB22-23 + + return bridge_class + + +def convertTunnelToHAZUSclass(AIM): + if ("Bored" in AIM["ConstructType"]) or ("Drilled" in AIM["ConstructType"]): + return "HTU1" + elif ("Cut" in AIM["ConstructType"]) or ("Cover" in AIM["ConstructType"]): + return "HTU2" + else: + # Select HTU2 for unclassfied tunnels because it is more conservative. + return "HTU2" + + +def convertRoadToHAZUSclass(AIM): + if AIM["RoadType"] in ["Primary", "Secondary"]: + return "HRD1" + + elif AIM["RoadType"] == "Residential": + return "HRD2" + + else: + # many unclassified roads are urban roads + return "HRD2" + + +def convert_story_rise(structureType, stories): + if structureType in ['W1', 'W2', 'S3', 'PC1', 'MH']: + # These archetypes have no rise information in their IDs + rise = None + + else: + # First, check if we have valid story information + try: + stories = int(stories) + + except (ValueError, TypeError): + raise ValueError( + 'Missing "NumberOfStories" information, ' + 'cannot infer `rise` attribute of archetype' + ) + + if structureType == 'RM1': + if stories <= 3: + rise = "L" + + else: + rise = "M" + + elif structureType == 'URM': + if stories <= 2: + rise = "L" + + else: + rise = "M" + + elif structureType in [ + 'S1', + 'S2', + 'S4', + 'S5', + 'C1', + 'C2', + 'C3', + 'PC2', + 'RM2', + ]: + if stories <= 3: + rise = "L" + + elif stories <= 7: + rise = "M" + + else: + rise = "H" + + return rise + + +def auto_populate(AIM): + """ + Automatically creates a performance model for PGA-based Hazus EQ analysis. + + Parameters + ---------- + AIM: dict + Asset Information Model - provides features of the asset that can be + used to infer attributes of the performance model. + + Returns + ------- + GI_ap: dict + Extended General Information - extends the GI from the input AIM with + additional inferred features. These features are typically used in + intermediate steps during the auto-population and are not required + for the performance assessment. They are returned to allow reviewing + how these latent variables affect the final results. + DL_ap: dict + Damage and Loss parameters - these define the performance model and + details of the calculation. + CMP: DataFrame + Component assignment - Defines the components (in rows) and their + location, direction, and quantity (in columns). + """ + + # extract the General Information + GI = AIM.get('GeneralInformation', None) + + if GI is None: + # TODO: show an error message + pass + + # initialize the auto-populated GI + GI_ap = GI.copy() + + assetType = AIM["assetType"] + ground_failure = AIM["Applications"]["DL"]["ApplicationData"]["ground_failure"] + + if assetType == "Buildings": + # get the building parameters + bt = GI['StructureType'] # building type + + # get the design level + dl = GI.get('DesignLevel', None) + + if dl is None: + # If there is no DesignLevel provided, we assume that the YearBuilt is + # available + year_built = GI['YearBuilt'] + + if 'W1' in bt: + DesignL = ap_DesignLevel_W1 + else: + DesignL = ap_DesignLevel + + for year in sorted(DesignL.keys()): + if year_built <= year: + dl = DesignL[year] + break + + GI_ap['DesignLevel'] = dl + + # get the number of stories / height + stories = GI.get('NumberOfStories', None) + + # We assume that the structure type does not include height information + # and we append it here based on the number of story information + rise = convert_story_rise(bt, stories) + + if rise is not None: + LF = f'LF.{bt}.{rise}.{dl}' + GI_ap['BuildingRise'] = rise + else: + LF = f'LF.{bt}.{dl}' + + # fmt: off + CMP = pd.DataFrame( # noqa + {f'{LF}': ['ea', 1, 1, 1, 'N/A']}, # noqa + index = ['Units','Location','Direction','Theta_0','Family'] # noqa + ).T # noqa + # fmt: on + + # if needed, add components to simulate damage from ground failure + if ground_failure: + foundation_type = 'S' + + FG_GF_H = f'GF.H.{foundation_type}' + FG_GF_V = f'GF.V.{foundation_type}' + + # fmt: off + CMP_GF = pd.DataFrame( # noqa + {f'{FG_GF_H}':[ 'ea', 1, 1, 1, 'N/A'], # noqa + f'{FG_GF_V}':[ 'ea', 1, 3, 1, 'N/A']}, # noqa + index = [ 'Units','Location','Direction','Theta_0','Family'] # noqa + ).T # noqa + # fmt: on + + CMP = pd.concat([CMP, CMP_GF], axis=0) + + # set the number of stories to 1 + # there is only one component in a building-level resolution + stories = 1 + + # get the occupancy class + if GI['OccupancyClass'] in ap_Occupancy.keys(): + ot = ap_Occupancy[GI['OccupancyClass']] + else: + ot = GI['OccupancyClass'] + + DL_ap = { + "Asset": { + "ComponentAssignmentFile": "CMP_QNT.csv", + "ComponentDatabase": "Hazus Earthquake - Buildings", + "NumberOfStories": f"{stories}", + "OccupancyType": f"{ot}", + "PlanArea": "1", + }, + "Damage": {"DamageProcess": "Hazus Earthquake"}, + "Demands": {}, + "Losses": { + "Repair": { + "ConsequenceDatabase": "Hazus Earthquake - Buildings", + "MapApproach": "Automatic", + } + }, + } + + elif assetType == "TransportationNetwork": + inf_type = GI["assetSubtype"] + + if inf_type == "HwyBridge": + # get the bridge class + bt = convertBridgeToHAZUSclass(GI) + GI_ap['BridgeHazusClass'] = bt + + # fmt: off + CMP = pd.DataFrame( # noqa + {f'HWB.GS.{bt[3:]}': [ 'ea', 1, 1, 1, 'N/A'], # noqa + f'HWB.GF': [ 'ea', 1, 1, 1, 'N/A']}, # noqa + index = [ 'Units','Location','Direction','Theta_0','Family'] # noqa + ).T # noqa + # fmt: on + + DL_ap = { + "Asset": { + "ComponentAssignmentFile": "CMP_QNT.csv", + "ComponentDatabase": "Hazus Earthquake - Transportation", + "BridgeHazusClass": bt, + "PlanArea": "1", + }, + "Damage": {"DamageProcess": "Hazus Earthquake"}, + "Demands": {}, + "Losses": { + "Repair": { + "ConsequenceDatabase": "Hazus Earthquake - Transportation", + "MapApproach": "Automatic", + } + }, + } + + elif inf_type == "HwyTunnel": + # get the tunnel class + tt = convertTunnelToHAZUSclass(GI) + GI_ap['TunnelHazusClass'] = tt + + # fmt: off + CMP = pd.DataFrame( # noqa + {f'HTU.GS.{tt[3:]}': [ 'ea', 1, 1, 1, 'N/A'], # noqa + f'HTU.GF': [ 'ea', 1, 1, 1, 'N/A']}, # noqa + index = [ 'Units','Location','Direction','Theta_0','Family'] # noqa + ).T # noqa + # fmt: on + + DL_ap = { + "Asset": { + "ComponentAssignmentFile": "CMP_QNT.csv", + "ComponentDatabase": "Hazus Earthquake - Transportation", + "TunnelHazusClass": tt, + "PlanArea": "1", + }, + "Damage": {"DamageProcess": "Hazus Earthquake"}, + "Demands": {}, + "Losses": { + "Repair": { + "ConsequenceDatabase": "Hazus Earthquake - Transportation", + "MapApproach": "Automatic", + } + }, + } + elif inf_type == "Roadway": + # get the road class + rt = convertRoadToHAZUSclass(GI) + GI_ap['RoadHazusClass'] = rt + + # fmt: off + CMP = pd.DataFrame( # noqa + {f'HRD.GF.{rt[3:]}':[ 'ea', 1, 1, 1, 'N/A']}, # noqa + index = [ 'Units','Location','Direction','Theta_0','Family'] # noqa + ).T # noqa + # fmt: on + + DL_ap = { + "Asset": { + "ComponentAssignmentFile": "CMP_QNT.csv", + "ComponentDatabase": "Hazus Earthquake - Transportation", + "RoadHazusClass": rt, + "PlanArea": "1", + }, + "Damage": {"DamageProcess": "Hazus Earthquake"}, + "Demands": {}, + "Losses": { + "Repair": { + "ConsequenceDatabase": "Hazus Earthquake - Transportation", + "MapApproach": "Automatic", + } + }, + } + else: + print("subtype not supported in HWY") + + elif assetType == "WaterDistributionNetwork": + + pipe_material_map = { + "CI": "B", + "AC": "B", + "RCC": "B", + "DI": "D", + "PVC": "D", + "DS": "B", + "BS": "D", + } + + # GI = AIM.get("GeneralInformation", None) + # if GI==None: + + # initialize the auto-populated GI + wdn_element_type = GI_ap.get("type", "MISSING") + asset_name = GI_ap.get("AIM_id", None) + + if wdn_element_type == "Pipe": + pipe_construction_year = GI_ap.get("year", None) + pipe_diameter = GI_ap.get("Diam", None) + # diamaeter value is a fundamental part of hydraulic + # performance assessment + if pipe_diameter is None: + raise ValueError( + f"pipe diamater in asset type {assetType}, \ + asset id \"{asset_name}\" has no diameter \ + value." + ) + + pipe_length = GI_ap.get("Len", None) + # length value is a fundamental part of hydraulic performance assessment + if pipe_diameter is None: + raise ValueError( + f"pipe length in asset type {assetType}, \ + asset id \"{asset_name}\" has no diameter \ + value." + ) + + pipe_material = GI_ap.get("material", None) + + # pipe material can be not available or named "missing" in + # both case, pipe flexibility will be set to "missing" + + """ + The assumed logic (rullset) is that if the material is + missing, if the pipe is smaller than or equal to 20 + inches, the material is Cast Iron (CI) otherwise the pipe + material is steel. + If the material is steel (ST), either based on user specified + input or the assumption due to the lack of the user-input, the year + that the pipe is constructed define the flexibility status per HAZUS + instructions. If the pipe is built in 1935 or after, it is, the pipe + is Ductile Steel (DS), and otherwise it is Brittle Steel (BS). + If the pipe is missing construction year and is built by steel, + we assume consevatively that the pipe is brittle (i.e., BS) + """ + if pipe_material is None: + if pipe_diameter > 20 * 0.0254: # 20 inches in meter + print( + f"Asset {asset_name} is missing material. Material is\ + assumed to be Cast Iron" + ) + pipe_material = "CI" + else: + print( + f"Asset {asset_name} is missing material. Material is " + f"assumed to be Steel (ST)" + ) + pipe_material = "ST" + + if pipe_material == "ST": + if (pipe_construction_year is not None) and ( + pipe_construction_year >= 1935 + ): + print( + f"Asset {asset_name} has material of \"ST\" is assumed to be\ + Ductile Steel" + ) + pipe_material = "DS" + else: + print( + f'Asset {asset_name} has material of "ST" is assumed to be ' + f'Brittle Steel' + ) + pipe_material = "BS" + + pipe_flexibility = pipe_material_map.get(pipe_material, "missing") + + GI_ap["material flexibility"] = pipe_flexibility + GI_ap["material"] = pipe_material + + # Pipes are broken into 20ft segments (rounding up) and + # each segment is represented by an individual entry in + # the performance model, `CMP`. The damage capcity of each + # segment is assumed to be independent and driven by the + # same EDP. We therefore replicate the EDP associated with + # the pipe to the various locations assgined to the + # segments. + + # Determine number of segments + + pipe_length_unit = GI_ap['units']['length'] + pipe_length_feet = pelicun.base.convert_units( + pipe_length, unit=pipe_length_unit, to_unit='ft', category='length' + ) + reference_length = 20.00 # 20 ft + if pipe_length_feet % reference_length < 1e-2: + # If the lengths are equal, then that's one segment, not two. + num_segments = int(pipe_length_feet / reference_length) + else: + # In all other cases, round up. + num_segments = int(pipe_length_feet / reference_length) + 1 + if num_segments > 1: + location_string = f'1--{num_segments}' + else: + location_string = '1' + + # Define performance model + # fmt: off + CMP = pd.DataFrame( # noqa + {f'PWP.{pipe_flexibility}.GS': ['ea', location_string, '0', 1, 'N/A'], # noqa + f'PWP.{pipe_flexibility}.GF': ['ea', location_string, '0', 1, 'N/A'], # noqa + 'aggregate': ['ea', location_string, '0', 1, 'N/A']}, # noqa + index = ['Units','Location','Direction','Theta_0','Family'] # noqa + ).T # noqa + # fmt: on + + # Set up the demand cloning configuration for the pipe + # segments, if required. + demand_config = {} + if num_segments > 1: + # determine the EDP tags available for cloning + response_data = pelicun.file_io.load_data('response.csv', None) + num_header_entries = len(response_data.columns.names) + # if 4, assume a hazard level tag is present and remove it + if num_header_entries == 4: + response_data.columns = pd.MultiIndex.from_tuples( + [x[1::] for x in response_data.columns] + ) + demand_cloning_config = {} + for edp in response_data.columns: + tag, location, direction = edp + + demand_cloning_config['-'.join(edp)] = [ + f'{tag}-{x}-{direction}' + for x in [f'{i+1}' for i in range(num_segments)] + ] + demand_config = {'DemandCloning': demand_cloning_config} + + # Create damage process + dmg_process = { + f"1_PWP.{pipe_flexibility}.GS-LOC": {"DS1": "aggregate_DS1"}, + f"2_PWP.{pipe_flexibility}.GF-LOC": {"DS1": "aggregate_DS1"}, + f"3_PWP.{pipe_flexibility}.GS-LOC": {"DS2": "aggregate_DS2"}, + f"4_PWP.{pipe_flexibility}.GF-LOC": {"DS2": "aggregate_DS2"}, + } + dmg_process_filename = 'dmg_process.json' + with open(dmg_process_filename, 'w', encoding='utf-8') as f: + json.dump(dmg_process, f, indent=2) + + # Define the auto-populated config + DL_ap = { + "Asset": { + "ComponentAssignmentFile": "CMP_QNT.csv", + "ComponentDatabase": "Hazus Earthquake - Water", + "Material Flexibility": pipe_flexibility, + "PlanArea": "1", # Sina: does not make sense for water. + # Kept it here since itw as also + # kept here for Transportation + }, + "Damage": { + "DamageProcess": "User Defined", + "DamageProcessFilePath": "dmg_process.json", + }, + "Demands": demand_config, + } + + elif wdn_element_type == "Tank": + + tank_cmp_lines = { + ("OG", "C", 1): {'PST.G.C.A.GS': ['ea', 1, 1, 1, 'N/A']}, + ("OG", "C", 0): {'PST.G.C.U.GS': ['ea', 1, 1, 1, 'N/A']}, + ("OG", "S", 1): {'PST.G.S.A.GS': ['ea', 1, 1, 1, 'N/A']}, + ("OG", "S", 0): {'PST.G.S.U.GS': ['ea', 1, 1, 1, 'N/A']}, + # Anchored status and Wood is not defined for On Ground tanks + ("OG", "W", 0): {'PST.G.W.GS': ['ea', 1, 1, 1, 'N/A']}, + # Anchored status and Steel is not defined for Above Ground tanks + ("AG", "S", 0): {'PST.A.S.GS': ['ea', 1, 1, 1, 'N/A']}, + # Anchored status and Concrete is not defined for Buried tanks. + ("B", "C", 0): {'PST.B.C.GF': ['ea', 1, 1, 1, 'N/A']}, + } + + # The default values are assumed: material = Concrete (C), + # location= On Ground (OG), and Anchored = 1 + tank_material = GI_ap.get("material", "C") + tank_location = GI_ap.get("location", "OG") + tank_anchored = GI_ap.get("anchored", int(1)) + + tank_material_allowable = {"C", "S"} + if tank_material not in tank_material_allowable: + raise ValueError( + f"Tank's material = \"{tank_material}\" is \ + not allowable in tank {asset_name}. The \ + material must be either C for concrete or S \ + for steel." + ) + + tank_location_allowable = {"AG", "OG", "B"} + if tank_location not in tank_location_allowable: + raise ValueError( + f"Tank's location = \"{tank_location}\" is \ + not allowable in tank {asset_name}. The \ + location must be either \"AG\" for Above \ + ground, \"OG\" for On Ground or \"BG\" for \ + Bellow Ground (burried) Tanks." + ) + + tank_anchored_allowable = {int(0), int(1)} + if tank_anchored not in tank_anchored_allowable: + raise ValueError( + f"Tank's anchored status = \"{tank_location}\ + \" is not allowable in tank {asset_name}. \ + The anchored status must be either integer\ + value 0 for unachored, or 1 for anchored" + ) + + if tank_location == "AG" and tank_material == "C": + print( + f"The tank {asset_name} is Above Ground (i.e., AG), but \ + the material type is Concrete (\"C\"). Tank type \"C\" is not \ + defiend for AG tanks. The tank is assumed to be Steel (\"S\")" + ) + tank_material = "S" + + if tank_location == "AG" and tank_material == "W": + print( + f"The tank {asset_name} is Above Ground (i.e., AG), but \ + the material type is Wood (\"W\"). Tank type \"W\" is not \ + defiend for AG tanks. The tank is assumed to be Steel (\"S\")" + ) + tank_material = "S" + + if tank_location == "B" and tank_material == "S": + print( + f"The tank {asset_name} is burried (i.e., B), but the\ + material type is Steel (\"S\"). \ + Tank type \"S\" is not defiend for\ + B tanks. The tank is assumed to be Concrete (\"C\")" + ) + tank_material = "C" + + if tank_location == "B" and tank_material == "W": + print( + f"The tank {asset_name} is burried (i.e., B), but the\ + material type is Wood (\"W\"). Tank type \"W\" is not defiend \ + for B tanks. The tank is assumed to be Concrete (\"C\")" + ) + tank_material = "C" + + if tank_anchored == 1: + # Since anchore status does nto matter, there is no need to + # print a warning + tank_anchored = 0 + + cur_tank_cmp_line = tank_cmp_lines[ + (tank_location, tank_material, tank_anchored) + ] + + CMP = pd.DataFrame( + cur_tank_cmp_line, + index=['Units', 'Location', 'Direction', 'Theta_0', 'Family'], + ).T + + DL_ap = { + "Asset": { + "ComponentAssignmentFile": "CMP_QNT.csv", + "ComponentDatabase": "Hazus Earthquake - Water", + "Material": tank_material, + "Location": tank_location, + "Anchored": tank_anchored, + "PlanArea": "1", # Sina: does not make sense for water. + # Kept it here since itw as also kept here for Transportation + }, + "Damage": {"DamageProcess": "Hazus Earthquake"}, + "Demands": {}, + } + + else: + print( + f"Water Distribution network element type {wdn_element_type} " + f"is not supported in Hazus Earthquake IM DL method" + ) + DL_ap = None + CMP = None + + else: + print( + f"AssetType: {assetType} is not supported " + f"in Hazus Earthquake IM DL method" + ) - seismic = ((int(state)==6 and int(yr_built)>=1975) or - (int(state)!=6 and int(yr_built)>=1990)) - - # Use a catch-all, other class by default - bridge_class = "HWB28" - - if len_max_span > 150: - if not seismic: - bridge_class = "HWB1" - else: - bridge_class = "HWB2" - - elif num_span == 1: - if not seismic: - bridge_class = "HWB3" - else: - bridge_class = "HWB4" - - elif structureType in list(range(101,107)): - if not seismic: - if state != 6: - bridge_class = "HWB5" - else: - bridge_class = "HWB6" - else: - bridge_class = "HWB7" - - elif structureType in [205,206]: - if not seismic: - bridge_class = "HWB8" - else: - bridge_class = "HWB9" - - elif structureType in list(range(201,207)): - if not seismic: - bridge_class = "HWB10" - else: - bridge_class = "HWB11" - - elif structureType in list(range(301,307)): - if not seismic: - if len_max_span>=20: - if state != 6: - bridge_class = "HWB12" - else: - bridge_class = "HWB13" - else: - if state != 6: - bridge_class = "HWB24" - else: - bridge_class = "HWB25" - else: - bridge_class = "HWB14" - - elif structureType in list(range(402,411)): - if not seismic: - if len_max_span>=20: - bridge_class = "HWB15" - elif state != 6: - bridge_class = "HWB26" - else: - bridge_class = "HWB27" - else: - bridge_class = "HWB16" - - elif structureType in list(range(501,507)): - if not seismic: - if state != 6: - bridge_class = "HWB17" - else: - bridge_class = "HWB18" - else: - bridge_class = "HWB19" - - elif structureType in [605,606]: - if not seismic: - bridge_class = "HWB20" - else: - bridge_class = "HWB21" - - elif structureType in list(range(601,608)): - if not seismic: - bridge_class = "HWB22" - else: - bridge_class = "HWB23" - - - #TODO: review and add HWB24-27 rules - #TODO: also double check rules for HWB10-11 and HWB22-23 - - return bridge_class - - - # original code by JZ - """ - if not seismic and len_max_span > 150: - return "HWB1" - elif seismic and len_max_span > 150: - return "HWB2" - elif not seismic and num_span == 1: - return "HWB3" - elif seismic and num_span == 1: - return "HWB4" - elif not seismic and 101 <= structureType and structureType <= 106 and state != 6: - return "HWB5" - elif not seismic and 101 <= structureType and structureType <= 106 and state ==6: - return "HWB6" - elif seismic and 101 <= structureType and structureType <= 106: - return "HWB7" - elif not seismic and 205 <= structureType and structureType <= 206: - return "HWB8" - elif seismic and 205 <= structureType and structureType <= 206: - return "HWB9" - elif not seismic and 201 <= structureType and structureType <= 206: - return "HWB10" - elif seismic and 201 <= structureType and structureType <= 206: - return "HWB11" - elif not seismic and 301 <= structureType and structureType <= 306 and state != 6: - return "HWB12" - elif not seismic and 301 <= structureType and structureType <= 306 and state == 6: - return "HWB13" - elif seismic and 301 <= structureType and structureType <= 306: - return "HWB14" - elif not seismic and 402 <= structureType and structureType <= 410: - return "HWB15" - elif seismic and 402 <= structureType and structureType <= 410: - return "HWB16" - elif not seismic and 501 <= structureType and structureType <= 506 and state != 6: - return "HWB17" - elif not seismic and 501 <= structureType and structureType <= 506 and state == 6: - return "HWB18" - elif seismic and 501 <= structureType and structureType <= 506: - return "HWB19" - elif not seismic and 605 <= structureType and structureType <= 606: - return "HWB20" - elif seismic and 605 <= structureType and structureType <= 606: - return "HWB21" - elif not seismic and 601 <= structureType and structureType <= 607: - return "HWB22" - elif seismic and 601 <= structureType and structureType <= 607: - return "HWB23" - - elif not seismic and 301 <= structureType and structureType <= 306 and state != 6: - return "HWB24" - elif not seismic and 301 <= structureType and structureType <= 306 and state == 6: - return "HWB25" - elif not seismic and 402 <= structureType and structureType <= 410 and state != 6: - return "HWB26" - elif not seismic and 402 <= structureType and structureType <= 410 and state == 6: - return "HWB27" - else: - return "HWB28" - """ - -def convertTunnelToHAZUSclass(AIM): - - if ("Bored" in AIM["ConstructType"]) or ("Drilled" in AIM["ConstructType"]): - return "HTU1" - elif ("Cut" in AIM["ConstructType"]) or ("Cover" in AIM["ConstructType"]): - return "HTU2" - else: - # Select HTU2 for unclassfied tunnels because it is more conservative. - return "HTU2" - -def convertRoadToHAZUSclass(AIM): - - if AIM["RoadType"] in ["Primary", "Secondary"]: - return "HRD1" - - elif AIM["RoadType"]=="Residential": - return "HRD2" - - else: - # many unclassified roads are urban roads - return "HRD2" - -def convert_story_rise(structureType, stories): - - - if structureType in ['W1', 'W2', 'S3', 'PC1', 'MH']: - - # These archetypes have no rise information in their IDs - rise = None - - else: - - # First, check if we have valid story information - try: - - stories = int(stories) - - except: - - raise ValueError('Missing "NumberOfStories" information, ' - 'cannot infer rise attribute of archetype') - - if structureType == 'RM1': - - if stories <= 3: - rise = "L" - - else: - rise = "M" - - elif structureType == 'URM': - if stories <= 2: - rise = "L" - - else: - rise = "M" - - elif structureType in ['S1', 'S2', 'S4', 'S5', 'C1', 'C2', 'C3', \ - 'PC2', 'RM2']: - if stories <=3: - rise = "L" - - elif stories <= 7: - rise = "M" - - else: - rise = "H" - - return rise - -def auto_populate(AIM): - """ - Automatically creates a performance model for PGA-based Hazus EQ analysis. - - Parameters - ---------- - AIM: dict - Asset Information Model - provides features of the asset that can be - used to infer attributes of the performance model. - - Returns - ------- - GI_ap: dict - Extended General Information - extends the GI from the input AIM with - additional inferred features. These features are typically used in - intermediate steps during the auto-population and are not required - for the performance assessment. They are returned to allow reviewing - how these latent variables affect the final results. - DL_ap: dict - Damage and Loss parameters - these define the performance model and - details of the calculation. - CMP: DataFrame - Component assignment - Defines the components (in rows) and their - location, direction, and quantity (in columns). - """ - - # extract the General Information - GI = AIM.get('GeneralInformation', None) - - if GI==None: - #TODO: show an error message - pass - - # initialize the auto-populated GI - GI_ap = GI.copy() - - assetType = AIM["assetType"] - ground_failure = AIM["Applications"]["DL"]["ApplicationData"]["ground_failure"] - - if assetType=="Buildings": - - # get the building parameters - bt = GI['StructureType'] #building type - - # get the design level - dl = GI.get('DesignLevel', None) - - if dl == None: - # If there is no DesignLevel provided, we assume that the YearBuilt is - # available - year_built = GI['YearBuilt'] - - if 'W1' in bt: - DesignL = ap_DesignLevel_W1 - else: - DesignL = ap_DesignLevel - - for year in sorted(DesignL.keys()): - if year_built <= year: - dl = DesignL[year] - break - - GI_ap['DesignLevel'] = dl - - # get the number of stories / height - stories = GI.get('NumberOfStories', None) - - # We assume that the structure type does not include height information - # and we append it here based on the number of story information - rise = convert_story_rise(bt, stories) - - if rise is not None: - LF = f'LF.{bt}.{rise}.{dl}' - GI_ap['BuildingRise'] = rise - else: - LF = f'LF.{bt}.{dl}' - - - CMP = pd.DataFrame( - {f'{LF}': [ 'ea', 1, 1, 1, 'N/A']}, - index = [ 'Units','Location','Direction','Theta_0','Family'] - ).T - - # if needed, add components to simulate damage from ground failure - if ground_failure: - - foundation_type = 'S' - - FG_GF_H = f'GF.H.{foundation_type}' - FG_GF_V = f'GF.V.{foundation_type}' - - CMP_GF = pd.DataFrame( - {f'{FG_GF_H}':[ 'ea', 1, 1, 1, 'N/A'], - f'{FG_GF_V}':[ 'ea', 1, 3, 1, 'N/A']}, - index = [ 'Units','Location','Direction','Theta_0','Family'] - ).T - - CMP = pd.concat([CMP, CMP_GF], axis=0) - - # set the number of stories to 1 - # there is only one component in a building-level resolution - stories = 1 - - # get the occupancy class - if GI['OccupancyClass'] in ap_Occupancy.keys(): - ot = ap_Occupancy[GI['OccupancyClass']] - else: - ot = GI['OccupancyClass'] - - DL_ap = { - "Asset": { - "ComponentAssignmentFile": "CMP_QNT.csv", - "ComponentDatabase": "Hazus Earthquake - Buildings", - "NumberOfStories": f"{stories}", - "OccupancyType": f"{ot}", - "PlanArea": "1" - }, - "Damage": { - "DamageProcess": "Hazus Earthquake" - }, - "Demands": { - }, - "Losses": { - "BldgRepair": { - "ConsequenceDatabase": "Hazus Earthquake - Buildings", - "MapApproach": "Automatic" - } - } - } - - elif assetType == "TransportationNetwork": - - inf_type = GI["assetSubtype"] - - if inf_type == "HwyBridge": - - # get the bridge class - bt = convertBridgeToHAZUSclass(GI) - GI_ap['BridgeHazusClass'] = bt - - CMP = pd.DataFrame( - {f'HWB.GS.{bt[3:]}': [ 'ea', 1, 1, 1, 'N/A'], - f'HWB.GF': [ 'ea', 1, 1, 1, 'N/A']}, - index = [ 'Units','Location','Direction','Theta_0','Family'] - ).T - - DL_ap = { - "Asset": { - "ComponentAssignmentFile": "CMP_QNT.csv", - "ComponentDatabase": "Hazus Earthquake - Transportation", - "BridgeHazusClass": bt, - "PlanArea": "1" - }, - "Damage": { - "DamageProcess": "Hazus Earthquake" - }, - "Demands": { - }, - "Losses": { - "BldgRepair": { - "ConsequenceDatabase": "Hazus Earthquake - Transportation", - "MapApproach": "Automatic" - } - } - } - - elif inf_type == "HwyTunnel": - - # get the tunnel class - tt = convertTunnelToHAZUSclass(GI) - GI_ap['TunnelHazusClass'] = tt - - CMP = pd.DataFrame( - {f'HTU.GS.{tt[3:]}': [ 'ea', 1, 1, 1, 'N/A'], - f'HTU.GF': [ 'ea', 1, 1, 1, 'N/A']}, - index = [ 'Units','Location','Direction','Theta_0','Family'] - ).T - - DL_ap = { - "Asset": { - "ComponentAssignmentFile": "CMP_QNT.csv", - "ComponentDatabase": "Hazus Earthquake - Transportation", - "TunnelHazusClass": tt, - "PlanArea": "1" - }, - "Damage": { - "DamageProcess": "Hazus Earthquake" - }, - "Demands": { - }, - "Losses": { - "BldgRepair": { - "ConsequenceDatabase": "Hazus Earthquake - Transportation", - "MapApproach": "Automatic" - } - } - } - elif inf_type == "Roadway": - - # get the road class - rt = convertRoadToHAZUSclass(GI) - GI_ap['RoadHazusClass'] = rt - - CMP = pd.DataFrame( - {f'HRD.GF.{rt[3:]}':[ 'ea', 1, 1, 1, 'N/A']}, - index = [ 'Units','Location','Direction','Theta_0','Family'] - ).T - - DL_ap = { - "Asset": { - "ComponentAssignmentFile": "CMP_QNT.csv", - "ComponentDatabase": "Hazus Earthquake - Transportation", - "RoadHazusClass": rt, - "PlanArea": "1" - }, - "Damage": { - "DamageProcess": "Hazus Earthquake" - }, - "Demands": { - }, - "Losses": { - "BldgRepair": { - "ConsequenceDatabase": "Hazus Earthquake - Transportation", - "MapApproach": "Automatic" - } - } - } - else: - print("subtype not supported in HWY") - - elif assetType == "WaterDistributionNetwork": - - pipe_material_map ={"CI": "B", "AC": "B", "RCC": "B", - "DI": "D", "PVC": "D", - "DS": "B", - "BS": "D",} - - #GI = AIM.get("GeneralInformation", None) - #if GI==None: - - - # initialize the auto-populated GI - wdn_element_type = GI_ap.get("type", "MISSING") - asset_name = GI_ap.get("AIM_id", None) - - - if wdn_element_type == "Pipe": - pipe_construction_year = GI_ap.get("year", None) - pipe_diameter = GI_ap.get("Diam", None) - #diamaeter value is a fundamental part of hydraulic performance assessment - if pipe_diameter == None: - raise ValueError(f"pipe diamater in asset type {assetType}, \ - asset id \"{asset_name}\" has no diameter \ - value.") - - pipe_length = GI_ap.get("Len", None) - #length value is a fundamental part of hydraulic performance assessment - if pipe_diameter == None: - raise ValueError(f"pipe length in asset type {assetType}, \ - asset id \"{asset_name}\" has no diameter \ - value.") - - pipe_material = GI_ap.get("material", None) - - #pipe material can be not available or named "missing" in both case, pipe flexibility will be set to "missing" - - """ - The assumed logic (rullset) is that if the material is missing, if the pipe - is smaller than or equal to 20 inches, the material is Cast Iron - (CI) otherwise the pipe material is steel. - If the material is steel (ST), either based on user specified - input or the assumption due to the lack of the user-input, the year - that the pipe is constructed define the flexibility status per HAZUS - instructions. If the pipe is built in 1935 or after, it is, the pipe - is Ductile Steel (DS), and otherwise it is Brittle Steel (BS). - If the pipe is missing construction year and is built by steel, - we assume consevatively that the pipe is brittle (i.e., BS) - """ - if pipe_material == None: - if pipe_diameter > 20 * 0.0254: #20 inches in meter - print(f"Asset {asset_name} is missing material. Material is\ - assumed to be Cast Iron") - pipe_material = "CI" - else: - print(f"Asset {asset_name} is missing material. Material is " - f"assumed to be Steel (ST)") - pipe_material = "ST" - - if pipe_material == "ST": - if pipe_construction_year != None and pipe_construction_year >= 1935: - print(f"Asset {asset_name} has material of \"ST\" is assumed to be\ - Ductile Steel") - pipe_material = "DS" - else: - print(f'Asset {asset_name} has material of "ST" is assumed to be ' - f'Brittle Steel') - pipe_material = "BS" - - pipe_flexibility = pipe_material_map.get(pipe_material, "missing") - - GI_ap["material flexibility"] = pipe_flexibility - GI_ap["material"] = pipe_material - - - # Pipes are broken into 20ft segments (rounding up) and - # each segment is represented by an individual entry in - # the performance model, `CMP`. The damage capcity of each - # segment is assumed to be independent and driven by the - # same EDP. We therefore replicate the EDP associated with - # the pipe to the various locations assgined to the - # segments. - - # Determine number of segments - with open( - os.path.join( - os.path.dirname(pelicun.__file__), 'settings/default_units.json' - ), - 'r', - encoding='utf-8', - ) as f: - units = json.load(f) - pipe_length_unit = GI_ap['units']['length'] - pipe_length_unit_factor = units['length'][pipe_length_unit] - pipe_length_in_base_unit = pipe_length * pipe_length_unit_factor - reference_length_in_base_unit = 20.00 * units['length']['ft'] - if pipe_length_in_base_unit % reference_length_in_base_unit < 1e-2: - # If the lengths are equal, then that's one segment, not two. - num_segments = int(pipe_length_in_base_unit / reference_length_in_base_unit) - else: - # In all other cases, round up. - num_segments = int(pipe_length_in_base_unit / reference_length_in_base_unit) + 1 - if num_segments > 1: - location_string = f'1--{num_segments}' - else: - location_string = '1' - - # Define performance model - CMP = pd.DataFrame( - {f'PWP.{pipe_flexibility}.GS': ['ea', location_string, '0', 1, 'N/A'], - f'PWP.{pipe_flexibility}.GF': ['ea', location_string, '0', 1, 'N/A'], - f'aggregate': ['ea', location_string, '0', 1, 'N/A']}, - index = ['Units','Location','Direction','Theta_0','Family'] - ).T - - # Set up the demand cloning configuration for the pipe - # segments, if required. - demand_config = {} - if num_segments > 1: - # determine the EDP tags available for cloning - response_data = pelicun.file_io.load_data('response.csv', None) - num_header_entries = len(response_data.columns.names) - # if 4, assume a hazard level tag is present and remove it - if num_header_entries == 4: - response_data.columns = pd.MultiIndex.from_tuples( - [x[1::] for x in response_data.columns] - ) - demand_cloning_config = {} - for edp in response_data.columns: - tag, location, direction = edp - - demand_cloning_config['-'.join(edp)] = [ - f'{tag}-{x}-{direction}' - for x in [f'{i+1}' for i in range(num_segments)] - ] - demand_config = {'DemandCloning': demand_cloning_config} - - # Create damage process - dmg_process = { - f"1_PWP.{pipe_flexibility}.GS": {"DS1": "aggregate_DS1"}, - f"2_PWP.{pipe_flexibility}.GF": {"DS1": "aggregate_DS1"}, - f"3_PWP.{pipe_flexibility}.GS": {"DS2": "aggregate_DS2"}, - f"4_PWP.{pipe_flexibility}.GF": {"DS2": "aggregate_DS2"}, - } - dmg_process_filename = 'dmg_process.json' - with open(dmg_process_filename, 'w', encoding='utf-8') as f: - json.dump(dmg_process, f, indent=2) - - # Define the auto-populated config - DL_ap = { - "Asset": { - "ComponentAssignmentFile": "CMP_QNT.csv", - "ComponentDatabase": "Hazus Earthquake - Water", - "Material Flexibility": pipe_flexibility, - "PlanArea": "1" # Sina: does not make sense for water. Kept it here since itw as also kept here for Transportation - }, - "Damage": { - "DamageProcess": "User Defined", - "DamageProcessFilePath": "dmg_process.json" - }, - "Demands": demand_config - } - - elif wdn_element_type == "Tank": - - tank_cmp_lines = { - ("OG", "C", 1):{'PST.G.C.A.GS': [ 'ea', 1, 1, 1, 'N/A' ]}, - ("OG", "C", 0):{'PST.G.C.U.GS': [ 'ea', 1, 1, 1, 'N/A' ]}, - ("OG", "S", 1):{'PST.G.S.A.GS': [ 'ea', 1, 1, 1, 'N/A' ]}, - ("OG", "S", 0):{'PST.G.S.U.GS': [ 'ea', 1, 1, 1, 'N/A' ]}, - #Anchored status and Wood is not defined for On Ground tanks - ("OG", "W", 0):{'PST.G.W.GS': [ 'ea', 1, 1, 1, 'N/A' ]}, - #Anchored status and Steel is not defined for Above Ground tanks - ("AG", "S", 0):{'PST.A.S.GS': [ 'ea', 1, 1, 1, 'N/A' ]}, - #Anchored status and Concrete is not defined for Buried tanks. - ("B", "C", 0):{'PST.B.C.GF': [ 'ea', 1, 1, 1, 'N/A' ]} - } - - """ - The default values are assumed: material = Concrete (C), - location= On Ground (OG), and Anchored = 1 - """ - tank_material = GI_ap.get("material", "C") - tank_location = GI_ap.get("location", "OG") - tank_anchored = GI_ap.get("anchored", int(1) ) - - tank_material_allowable = {"C", "S"} - if tank_material not in tank_material_allowable: - raise ValueError(f"Tank's material = \"{tank_material}\" is \ - not allowable in tank {asset_name}. The \ - material must be either C for concrete or S \ - for steel.") - - tank_location_allowable = {"AG", "OG", "B"} - if tank_location not in tank_location_allowable: - raise ValueError(f"Tank's location = \"{tank_location}\" is \ - not allowable in tank {asset_name}. The \ - location must be either \"AG\" for Above \ - ground, \"OG\" for On Ground or \"BG\" for \ - Bellow Ground (burried) Tanks.") - - tank_anchored_allowable = {int(0), int(1)} - if tank_anchored not in tank_anchored_allowable: - raise ValueError(f"Tank's anchored status = \"{tank_location}\ - \" is not allowable in tank {asset_name}. \ - The anchored status must be either integer\ - value 0 for unachored, or 1 for anchored") - - if tank_location == "AG" and tank_material == "C": - print(f"The tank {asset_name} is Above Ground (i.e., AG), but \ - the material type is Concrete (\"C\"). Tank type \"C\" is not \ - defiend for AG tanks. The tank is assumed to be Steel (\"S\")") - tank_material = "S" - - if tank_location == "AG" and tank_material == "W": - print(f"The tank {asset_name} is Above Ground (i.e., AG), but \ - the material type is Wood (\"W\"). Tank type \"W\" is not \ - defiend for AG tanks. The tank is assumed to be Steel (\"S\")") - tank_material = "S" - - - if tank_location == "B" and tank_material == "S": - print(f"The tank {asset_name} is burried (i.e., B), but the\ - material type is Steel (\"S\"). Tank type \"S\" is not defiend for\ - B tanks. The tank is assumed to be Concrete (\"C\")") - tank_material = "C" - - if tank_location == "B" and tank_material == "W": - print(f"The tank {asset_name} is burried (i.e., B), but the\ - material type is Wood (\"W\"). Tank type \"W\" is not defiend for\ - B tanks. The tank is assumed to be Concrete (\"C\")") - tank_material = "C" - - if tank_anchored == 1: - #Since anchore status does nto matter, there is no need to - #print a warning - tank_anchored = 0 - - cur_tank_cmp_line = tank_cmp_lines[(tank_location, tank_material, tank_anchored)] - - CMP = pd.DataFrame( - cur_tank_cmp_line, - index = ['Units','Location','Direction','Theta_0','Family'] - ).T - - DL_ap = { - "Asset": { - "ComponentAssignmentFile": "CMP_QNT.csv", - "ComponentDatabase": "Hazus Earthquake - Water", - "Material": tank_material, - "Location": tank_location, - "Anchored": tank_anchored, - "PlanArea": "1" # Sina: does not make sense for water. Kept it here since itw as also kept here for Transportation - }, - "Damage": { - "DamageProcess": "Hazus Earthquake" - }, - "Demands": { - } - } - - else: - print(f"Water Distribution network element type {wdn_element_type} is not supported in Hazus Earthquake IM DL method") - DL_ap = None - CMP = None - - else: - print(f"AssetType: {assetType} is not supported in Hazus Earthquake IM DL method") - - return GI_ap, DL_ap, CMP + return GI_ap, DL_ap, CMP diff --git a/pelicun/resources/auto/Hazus_Earthquake_Story.py b/pelicun/resources/auto/Hazus_Earthquake_Story.py index 5d14d1988..0b2bd34eb 100644 --- a/pelicun/resources/auto/Hazus_Earthquake_Story.py +++ b/pelicun/resources/auto/Hazus_Earthquake_Story.py @@ -39,19 +39,11 @@ import pandas as pd -ap_DesignLevel = { - 1940: 'PC', - 1940: 'LC', - 1975: 'MC', - 2100: 'HC' -} +ap_DesignLevel = {1940: 'LC', 1975: 'MC', 2100: 'HC'} +# ap_DesignLevel = {1940: 'PC', 1940: 'LC', 1975: 'MC', 2100: 'HC'} -ap_DesignLevel_W1 = { - 0: 'PC', - 0: 'LC', - 1975: 'MC', - 2100: 'HC' -} +ap_DesignLevel_W1 = {0: 'LC', 1975: 'MC', 2100: 'HC'} +# ap_DesignLevel_W1 = {0: 'PC', 0: 'LC', 1975: 'MC', 2100: 'HC'} ap_Occupancy = { 'Other/Unknown': 'RES3', @@ -66,15 +58,16 @@ 'Industrial - Warehouse': 'IND2', 'Industrial - Heavy': 'IND1', 'Retail': 'COM1', - 'Parking' : 'COM10' + 'Parking': 'COM10', } convert_design_level = { - 'High-Code' : 'HC', - 'Moderate-Code': 'MC', - 'Low-Code' : 'LC', - 'Pre-Code' : 'PC' - } + 'High-Code': 'HC', + 'Moderate-Code': 'MC', + 'Low-Code': 'LC', + 'Pre-Code': 'PC', +} + def story_scale(stories, comp_type): if comp_type == 'NSA': @@ -97,7 +90,7 @@ def story_scale(stories, comp_type): elif stories == 9: return 2.20 elif (stories >= 10) and (stories < 30): - return 2.30 + (stories-10)*0.04 + return 2.30 + (stories - 10) * 0.04 elif stories >= 30: return 3.10 else: @@ -123,7 +116,7 @@ def story_scale(stories, comp_type): elif stories == 9: return 4.50 elif (stories >= 10) and (stories < 50): - return 4.50 + (stories-10)*0.07 + return 4.50 + (stories - 10) * 0.07 elif stories >= 50: return 7.30 else: @@ -137,30 +130,30 @@ def auto_populate(AIM): Parameters ---------- AIM: dict - Asset Information Model - provides features of the asset that can be + Asset Information Model - provides features of the asset that can be used to infer attributes of the performance model. Returns ------- GI_ap: dict - Extended General Information - extends the GI from the input AIM with - additional inferred features. These features are typically used in - intermediate steps during the auto-population and are not required - for the performance assessment. They are returned to allow reviewing + Extended General Information - extends the GI from the input AIM with + additional inferred features. These features are typically used in + intermediate steps during the auto-population and are not required + for the performance assessment. They are returned to allow reviewing how these latent variables affect the final results. DL_ap: dict - Damage and Loss parameters - these define the performance model and + Damage and Loss parameters - these define the performance model and details of the calculation. CMP: DataFrame - Component assignment - Defines the components (in rows) and their + Component assignment - Defines the components (in rows) and their location, direction, and quantity (in columns). """ # extract the General Information GI = AIM.get('GeneralInformation', None) - if GI==None: - #TODO: show an error message + if GI is None: + # TODO: show an error message pass # initialize the auto-populated GI @@ -169,15 +162,14 @@ def auto_populate(AIM): assetType = AIM["assetType"] ground_failure = AIM["Applications"]["DL"]["ApplicationData"]["ground_failure"] - if assetType=="Buildings": - + if assetType == "Buildings": # get the building parameters - bt = GI['StructureType'] #building type + bt = GI['StructureType'] # building type # get the design level dl = GI.get('DesignLevel', None) - if dl == None: + if dl is None: # If there is no DesignLevel provided, we assume that the YearBuilt is # available year_built = GI['YearBuilt'] @@ -186,10 +178,10 @@ def auto_populate(AIM): DesignL = ap_DesignLevel_W1 else: DesignL = ap_DesignLevel - + for year in sorted(DesignL.keys()): if year_built <= year: - dl = DesignL[year] + dl = DesignL[year] break GI_ap['DesignLevel'] = dl @@ -198,35 +190,50 @@ def auto_populate(AIM): stories = GI.get('NumberOfStories', None) FG_S = f'STR.{bt}.{dl}' - FG_NSD = f'NSD' + FG_NSD = 'NSD' FG_NSA = f'NSA.{dl}' CMP = pd.DataFrame( - {f'{FG_S}': ['ea', 'all', '1, 2', f"{story_scale(stories, 'S')/stories/2.}", 'N/A'], - f'{FG_NSA}': ['ea', 'all', 0, f"{story_scale(stories, 'NSA')/stories}", 'N/A'], - f'{FG_NSD}': ['ea', 'all', '1, 2', f"{story_scale(stories, 'NSD')/stories/2.}", 'N/A']}, - index = ['Units','Location','Direction', - 'Theta_0','Family'] - - ).T + { + f'{FG_S}': [ + 'ea', + 'all', + '1, 2', + f"{story_scale(stories, 'S')/stories/2.}", + 'N/A', + ], + f'{FG_NSA}': [ + 'ea', + 'all', + 0, + f"{story_scale(stories, 'NSA')/stories}", + 'N/A', + ], + f'{FG_NSD}': [ + 'ea', + 'all', + '1, 2', + f"{story_scale(stories, 'NSD')/stories/2.}", + 'N/A', + ], + }, + index=['Units', 'Location', 'Direction', 'Theta_0', 'Family'], + ).T # if needed, add components to simulate damage from ground failure if ground_failure: - foundation_type = 'S' - FG_GF_H = f'GF.H.{foundation_type}' - FG_GF_V = f'GF.V.{foundation_type}' - - CMP_GF = pd.DataFrame( - {f'{FG_GF_H}':[ 'ea', 1, 1, 1, 'N/A'], - f'{FG_GF_V}':[ 'ea', 1, 3, 1, 'N/A']}, - index = [ 'Units','Location','Direction','Theta_0','Family'] - ).T + FG_GF_H = f'GF.H.{foundation_type}' # noqa + FG_GF_V = f'GF.V.{foundation_type}' # noqa + CMP_GF = pd.DataFrame( # noqa + {f'{FG_GF_H}':[ 'ea', 1, 1, 1, 'N/A'], # noqa + f'{FG_GF_V}':[ 'ea', 1, 3, 1, 'N/A']}, # noqa + index = [ 'Units','Location','Direction','Theta_0','Family'] # noqa + ).T # noqa CMP = pd.concat([CMP, CMP_GF], axis=0) - # get the occupancy class if GI['OccupancyClass'] in ap_Occupancy.keys(): ot = ap_Occupancy[GI['OccupancyClass']] @@ -235,16 +242,16 @@ def auto_populate(AIM): plan_area = GI.get('PlanArea', 1.0) - bldg_repair_config = { - "ConsequenceDatabase": "Hazus Earthquake - Stories", - "MapApproach": "Automatic", - "DecisionVariables": { - "Cost": True, - "Carbon": False, - "Energy": False, - "Time": False - } - } + repair_config = { + "ConsequenceDatabase": "Hazus Earthquake - Stories", + "MapApproach": "Automatic", + "DecisionVariables": { + "Cost": True, + "Carbon": False, + "Energy": False, + "Time": False, + }, + } DL_ap = { "Asset": { @@ -252,19 +259,17 @@ def auto_populate(AIM): "ComponentDatabase": "Hazus Earthquake - Stories", "NumberOfStories": f"{stories}", "OccupancyType": f"{ot}", - "PlanArea": str(plan_area) + "PlanArea": str(plan_area), }, - "Damage": { - "DamageProcess": "Hazus Earthquake" - }, - "Demands": { - }, - "Losses": { - "BldgRepair": bldg_repair_config - } + "Damage": {"DamageProcess": "Hazus Earthquake"}, + "Demands": {}, + "Losses": {"Repair": repair_config}, } - + else: - print(f"AssetType: {assetType} is not supported in Hazus Earthquake Story-based DL method") + print( + f"AssetType: {assetType} is not supported " + f"in Hazus Earthquake Story-based DL method" + ) - return GI_ap, DL_ap, CMP \ No newline at end of file + return GI_ap, DL_ap, CMP diff --git a/pelicun/tests/data/model/test_DamageModel_perform_dmg_task/CMP_marginals_2.csv b/pelicun/tests/data/model/test_DamageModel_perform_dmg_task/CMP_marginals_2.csv new file mode 100755 index 000000000..46aed067a --- /dev/null +++ b/pelicun/tests/data/model/test_DamageModel_perform_dmg_task/CMP_marginals_2.csv @@ -0,0 +1,5 @@ +,Units,Location,Direction,Theta_0,Blocks +CMP.A,ea,1,1,1,1 +CMP.A,ea,2,1,1,1 +CMP.B,ea,1,2,1,1 +CMP.B,ea,2,2,1,1 diff --git a/pelicun/tests/test_assessment.py b/pelicun/tests/test_assessment.py index 6fa50e3e2..cd7b95484 100644 --- a/pelicun/tests/test_assessment.py +++ b/pelicun/tests/test_assessment.py @@ -79,8 +79,8 @@ def test_Assessment_init(): assert isinstance(asmt.asset, model.AssetModel) assert asmt.damage assert isinstance(asmt.damage, model.DamageModel) - assert asmt.bldg_repair - assert isinstance(asmt.bldg_repair, model.BldgRepairModel) + assert asmt.repair + assert isinstance(asmt.repair, model.RepairModel) def test_assessment_get_default_metadata(): diff --git a/pelicun/tests/test_auto.py b/pelicun/tests/test_auto.py new file mode 100644 index 000000000..b1b50fa58 --- /dev/null +++ b/pelicun/tests/test_auto.py @@ -0,0 +1,126 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Leland Stanford Junior University +# Copyright (c) 2018 The Regents of the University of California +# +# This file is part of pelicun. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# You should have received a copy of the BSD 3-Clause License along with +# pelicun. If not, see . +# +# Contributors: +# Adam Zsarnóczay +# John Vouvakis Manousakis + +""" +These are unit and integration tests on the auto module of pelicun. + +""" + +import pytest +from unittest.mock import patch +from unittest.mock import MagicMock +from pelicun.auto import auto_populate + + +# pylint: disable=missing-function-docstring + +# The tests maintain the order of definitions of the `auto.py` file. + +# _____ _ _ +# | ___| _ _ __ ___| |_(_) ___ _ __ ___ +# | |_ | | | | '_ \ / __| __| |/ _ \| '_ \/ __| +# | _|| |_| | | | | (__| |_| | (_) | | | \__ \ +# |_| \__,_|_| |_|\___|\__|_|\___/|_| |_|___/ +# +# The following tests verify the functions of the module. + + +@pytest.fixture +def setup_valid_config(): + return {'GeneralInformation': {'someKey': 'someValue'}} + + +@pytest.fixture +def setup_auto_script_path(): + return 'PelicunDefault/test_script' + + +@pytest.fixture +def setup_expected_base_path(): + return '/expected/path/resources/auto/' + + +def test_valid_inputs(setup_valid_config, setup_auto_script_path): + with patch('pelicun.base.pelicun_path', '/expected/path'), patch( + 'os.path.exists', return_value=True + ), patch('importlib.__import__') as mock_import: + mock_auto_populate_ext = MagicMock( + return_value=({'AIM_ap': 'value'}, {'DL_ap': 'value'}, 'CMP') + ) + mock_import.return_value.auto_populate = mock_auto_populate_ext + + config, cmp = auto_populate(setup_valid_config, setup_auto_script_path) + + assert 'DL' in config + assert cmp == 'CMP' + + +def test_missing_general_information(): + with pytest.raises(ValueError) as excinfo: + auto_populate({}, 'some/path') + assert "No Asset Information provided for the auto-population routine." in str( + excinfo.value + ) + + +def test_pelicun_default_path_replacement( + setup_auto_script_path, setup_expected_base_path +): + modified_path = setup_auto_script_path.replace( + 'PelicunDefault/', setup_expected_base_path + ) + assert modified_path.startswith( + setup_expected_base_path + ) + + +def test_auto_population_script_execution( + setup_valid_config, setup_auto_script_path +): + with patch('pelicun.base.pelicun_path', '/expected/path'), patch( + 'os.path.exists', return_value=True + ), patch('importlib.__import__') as mock_import: + mock_auto_populate_ext = MagicMock( + return_value=({'AIM_ap': 'value'}, {'DL_ap': 'value'}, 'CMP') + ) + mock_import.return_value.auto_populate = mock_auto_populate_ext + + auto_populate(setup_valid_config, setup_auto_script_path) + mock_import.assert_called_once() diff --git a/pelicun/tests/test_base.py b/pelicun/tests/test_base.py index b1b5b8524..3ae89ce31 100644 --- a/pelicun/tests/test_base.py +++ b/pelicun/tests/test_base.py @@ -305,20 +305,15 @@ def test_convert_dtypes(): # Expected DataFrame df_expected = pd.DataFrame({'a': [1, 2, 3], 'b': [4.0, 5.5, 6.75]}).astype( - {'a': int, 'b': float} + {'a': 'int64', 'b': 'float64'} ) # Convert data types df_result = base.convert_dtypes(df_input) - # Verify dtypes - - if os.name == 'nt': - # Windows sometimes uses int32 and sometimes int64, breaking - # our tests. - df_expected['a'] = df_expected['a'].astype('int64') - - pd.testing.assert_frame_equal(df_result, df_expected) + pd.testing.assert_frame_equal( + df_result, df_expected, check_index_type=False, check_column_type=False + ) # No columns that can be converted @@ -327,7 +322,9 @@ def test_convert_dtypes(): ) df_expected = df_input.copy() df_result = base.convert_dtypes(df_input) - pd.testing.assert_frame_equal(df_result, df_expected) + pd.testing.assert_frame_equal( + df_result, df_expected, check_index_type=False, check_column_type=False + ) # Columns with mixed types @@ -339,21 +336,31 @@ def test_convert_dtypes(): } ) df_result = base.convert_dtypes(df_input) - pd.testing.assert_frame_equal(df_result, df_input) + pd.testing.assert_frame_equal( + df_result, df_input, check_index_type=False, check_column_type=False + ) # None values present df_input = pd.DataFrame({'a': [None, '2', '3'], 'b': ['4.0', None, '6.75']}) df_expected = pd.DataFrame({'a': [np.nan, 2, 3], 'b': [4.0, np.nan, 6.75]}) df_result = base.convert_dtypes(df_input) - pd.testing.assert_frame_equal(df_result, df_expected, check_dtype=False) + pd.testing.assert_frame_equal( + df_result, + df_expected, + check_dtype=False, + check_index_type=False, + check_column_type=False, + ) # Empty dataframe df_input = pd.DataFrame({}) df_expected = pd.DataFrame({}) df_result = base.convert_dtypes(df_input) - pd.testing.assert_frame_equal(df_result, df_expected) + pd.testing.assert_frame_equal( + df_result, df_expected, check_index_type=False, check_column_type=False + ) def test_convert_to_SimpleIndex(): diff --git a/pelicun/tests/test_file_io.py b/pelicun/tests/test_file_io.py index 60d7478c4..5a141cef9 100644 --- a/pelicun/tests/test_file_io.py +++ b/pelicun/tests/test_file_io.py @@ -145,6 +145,19 @@ def msg(self, text, **kwargs): assert mylogger.logs[-1][0] == 'WARNING: Data was empty, no file saved.' +def test_substitute_default_path(): + prior_path = file_io.base.pelicun_path + file_io.base.pelicun_path = 'some_path' + input_paths = ['PelicunDefault/data/file1.txt', '/data/file2.txt'] + expected_paths = [ + 'some_path/resources/SimCenterDBDL/data/file1.txt', + '/data/file2.txt', + ] + result_paths = file_io.substitute_default_path(input_paths) + assert result_paths == expected_paths + file_io.base.pelicun_path = prior_path + + def test_load_data(): # test loading data with orientation 0 diff --git a/pelicun/tests/test_model.py b/pelicun/tests/test_model.py index 1834a8a2c..459b955a8 100644 --- a/pelicun/tests/test_model.py +++ b/pelicun/tests/test_model.py @@ -44,6 +44,7 @@ import os import tempfile +from copy import deepcopy import pytest import numpy as np import pandas as pd @@ -75,13 +76,13 @@ def create_instance(verbose): @pytest.fixture(params=[True, False]) def assessment_instance(self, request, assessment_factory): - return assessment_factory(request.param) + return deepcopy(assessment_factory(request.param)) class TestDemandModel(TestModelModule): @pytest.fixture def demand_model(self, assessment_instance): - return assessment_instance.demand + return deepcopy(assessment_instance.demand) @pytest.fixture def demand_model_with_sample(self, assessment_instance): @@ -90,7 +91,7 @@ def demand_model_with_sample(self, assessment_instance): 'pelicun/tests/data/model/' 'test_DemandModel_load_sample/demand_sample_A.csv' ) - return mdl + return deepcopy(mdl) @pytest.fixture def calibrated_demand_model(self, demand_model_with_sample): @@ -108,7 +109,7 @@ def calibrated_demand_model(self, demand_model_with_sample): }, } demand_model_with_sample.calibrate_model(config) - return demand_model_with_sample + return deepcopy(demand_model_with_sample) @pytest.fixture def demand_model_with_sample_B(self, assessment_instance): @@ -117,7 +118,7 @@ def demand_model_with_sample_B(self, assessment_instance): 'pelicun/tests/data/model/' 'test_DemandModel_load_sample/demand_sample_B.csv' ) - return mdl + return deepcopy(mdl) @pytest.fixture def demand_model_with_sample_C(self, assessment_instance): @@ -126,7 +127,7 @@ def demand_model_with_sample_C(self, assessment_instance): 'pelicun/tests/data/model/' 'test_DemandModel_load_sample/demand_sample_C.csv' ) - return mdl + return deepcopy(mdl) @pytest.fixture def demand_model_with_sample_D(self, assessment_instance): @@ -135,7 +136,7 @@ def demand_model_with_sample_D(self, assessment_instance): 'pelicun/tests/data/model/' 'test_DemandModel_load_sample/demand_sample_D.csv' ) - return mdl + return deepcopy(mdl) def test_init(self, demand_model): assert demand_model.log_msg @@ -181,8 +182,17 @@ def test_load_sample(self, demand_model_with_sample, demand_model_with_sample_B) # level. Therefore, the two files are expected to result to the # same `obtained_sample` - pd.testing.assert_frame_equal(obtained_sample, obtained_sample_2) - pd.testing.assert_series_equal(obtained_units, obtained_units_2) + pd.testing.assert_frame_equal( + obtained_sample, + obtained_sample_2, + check_index_type=False, + check_column_type=False, + ) + pd.testing.assert_series_equal( + obtained_units, + obtained_units_2, + check_index_type=False, + ) # compare against the expected values for the sample expected_sample = pd.DataFrame( @@ -200,7 +210,12 @@ def test_load_sample(self, demand_model_with_sample, demand_model_with_sample_B) ), index=[0], ) - pd.testing.assert_frame_equal(expected_sample, obtained_sample) + pd.testing.assert_frame_equal( + expected_sample, + obtained_sample, + check_index_type=False, + check_column_type=False, + ) # compare against the expected values for the units expected_units = pd.Series( @@ -216,7 +231,11 @@ def test_load_sample(self, demand_model_with_sample, demand_model_with_sample_B) ), name='Units', ) - pd.testing.assert_series_equal(expected_units, obtained_units) + pd.testing.assert_series_equal( + expected_units, + obtained_units, + check_index_type=False, + ) def test_estimate_RID(self, demand_model_with_sample): demands = demand_model_with_sample.sample['PID'] @@ -244,6 +263,9 @@ def test_calibrate_model( == 0.06 ) + def test_calibrate_model_censoring( + self, calibrated_demand_model, demand_model_with_sample_C + ): # with a config featuring censoring the RIDs config = { "ALL": { @@ -257,6 +279,9 @@ def test_calibrate_model( } demand_model_with_sample_C.calibrate_model(config) + def test_calibrate_model_truncation( + self, calibrated_demand_model, demand_model_with_sample_C + ): # with a config that specifies a truncation limit smaller than # the samples config = { @@ -270,6 +295,9 @@ def test_calibrate_model( }, } demand_model_with_sample_C.calibrate_model(config) + # calibrating again should raise an error + with pytest.raises(ValueError): + demand_model_with_sample_C.calibrate_model(config) def test_save_load_model_with_empirical( self, calibrated_demand_model, assessment_instance @@ -285,13 +313,25 @@ def test_save_load_model_with_empirical( new_demand_model = assessment_instance.demand new_demand_model.load_model(f'{temp_dir}/temp') pd.testing.assert_frame_equal( - calibrated_demand_model.marginal_params, new_demand_model.marginal_params + calibrated_demand_model.marginal_params, + new_demand_model.marginal_params, + atol=1e-4, + check_index_type=False, + check_column_type=False, ) pd.testing.assert_frame_equal( - calibrated_demand_model.correlation, new_demand_model.correlation + calibrated_demand_model.correlation, + new_demand_model.correlation, + atol=1e-4, + check_index_type=False, + check_column_type=False, ) pd.testing.assert_frame_equal( - calibrated_demand_model.empirical_data, new_demand_model.empirical_data + calibrated_demand_model.empirical_data, + new_demand_model.empirical_data, + atol=1e-4, + check_index_type=False, + check_column_type=False, ) # # todo: this currently fails @@ -369,7 +409,12 @@ def test_generate_sample(self, calibrated_demand_model): index=pd.Index((0, 1, 2), dtype='object'), ) pd.testing.assert_frame_equal( - expected_sample, obtained_sample, check_exact=False, atol=1e-4 + expected_sample, + obtained_sample, + check_exact=False, + atol=1e-4, + check_index_type=False, + check_column_type=False, ) # compare against the expected values for the units @@ -386,7 +431,11 @@ def test_generate_sample(self, calibrated_demand_model): ), name='Units', ) - pd.testing.assert_series_equal(expected_units, obtained_units) + pd.testing.assert_series_equal( + expected_units, + obtained_units, + check_index_type=False, + ) def test_generate_sample_with_demand_cloning(self, assessment_instance): # # used for debugging @@ -454,7 +503,7 @@ def test_generate_sample_with_demand_cloning(self, assessment_instance): class TestPelicunModel(TestModelModule): @pytest.fixture def pelicun_model(self, assessment_instance): - return model.PelicunModel(assessment_instance) + return deepcopy(model.PelicunModel(assessment_instance)) def test_init(self, pelicun_model): assert pelicun_model.log_msg @@ -535,7 +584,9 @@ def test_convert_marginal_params(self, pelicun_model): ), ) - pd.testing.assert_frame_equal(expected_df, res) + pd.testing.assert_frame_equal( + expected_df, res, check_index_type=False, check_column_type=False + ) # a case with arg_units marginal_params = pd.DataFrame( @@ -568,13 +619,15 @@ def test_convert_marginal_params(self, pelicun_model): names=('cmp', 'loc', 'dir'), ), ) - pd.testing.assert_frame_equal(expected_df, res) + pd.testing.assert_frame_equal( + expected_df, res, check_index_type=False, check_column_type=False + ) class TestAssetModel(TestPelicunModel): @pytest.fixture def asset_model(self, assessment_instance): - return assessment_instance.asset + return deepcopy(assessment_instance.asset) def test_init(self, asset_model): assert asset_model.log_msg @@ -646,25 +699,25 @@ def test_load_cmp_model_1(self, asset_model): ), names=('cmp', 'loc', 'dir', 'uid'), ), - ) + ).astype({'Theta_0': 'float64', 'Blocks': 'int64'}) - if os.name == 'nt': - expected_cmp_marginal_params['Blocks'] = expected_cmp_marginal_params[ - 'Blocks' - ].astype('int32') - pd.testing.assert_frame_equal( - expected_cmp_marginal_params, asset_model.cmp_marginal_params - ) - else: - pd.testing.assert_frame_equal( - expected_cmp_marginal_params, asset_model.cmp_marginal_params - ) + pd.testing.assert_frame_equal( + expected_cmp_marginal_params, + asset_model.cmp_marginal_params, + check_index_type=False, + check_column_type=False, + check_dtype=False, + ) expected_cmp_units = pd.Series( data=['ea'], index=['component_a'], name='Units' ) - pd.testing.assert_series_equal(expected_cmp_units, asset_model.cmp_units) + pd.testing.assert_series_equal( + expected_cmp_units, + asset_model.cmp_units, + check_index_type=False, + ) def test_load_cmp_model_2(self, asset_model): # component marginals utilizing the keywords '--', 'all', 'top', 'roof' @@ -722,7 +775,11 @@ def test_load_cmp_model_2(self, asset_model): name='Units', ) - pd.testing.assert_series_equal(expected_cmp_units, asset_model.cmp_units) + pd.testing.assert_series_equal( + expected_cmp_units, + asset_model.cmp_units, + check_index_type=False, + ) def test_load_cmp_model_csv(self, asset_model): # load by directly specifying the csv file @@ -785,7 +842,12 @@ def test_generate_cmp_sample(self, asset_model): ), ) - pd.testing.assert_frame_equal(expected_cmp_sample, asset_model.cmp_sample) + pd.testing.assert_frame_equal( + expected_cmp_sample, + asset_model.cmp_sample, + check_index_type=False, + check_column_type=False, + ) # currently this is not working # def test_load_cmp_model_block_weights(self, asset_model): @@ -849,7 +911,7 @@ def calibration_config_A(self): @pytest.fixture def damage_model(self, assessment_instance): - return assessment_instance.damage + return deepcopy(assessment_instance.damage) @pytest.fixture def damage_model_model_loaded(self, damage_model, cmp_sample_A): @@ -857,7 +919,7 @@ def damage_model_model_loaded(self, damage_model, cmp_sample_A): asmt.get_default_data('damage_DB_FEMA_P58_2nd') asmt.asset._cmp_sample = cmp_sample_A damage_model.load_damage_model(['PelicunDefault/damage_DB_FEMA_P58_2nd.csv']) - return damage_model + return deepcopy(damage_model) @pytest.fixture def damage_model_with_sample(self, assessment_instance): @@ -981,7 +1043,7 @@ def damage_model_with_sample(self, assessment_instance): name='Units', dtype='object', ) - return assessment_instance.damage + return deepcopy(assessment_instance.damage) def test_init(self, damage_model): assert damage_model.log_msg @@ -1006,11 +1068,16 @@ def test_save_load_sample(self, damage_model_with_sample, assessment_instance): # saving to a variable sample_from_variable = damage_model_with_sample.save_sample(save_units=False) - pd.testing.assert_frame_equal(sample_from_file, sample_from_variable) + pd.testing.assert_frame_equal( + sample_from_file, + sample_from_variable, + check_index_type=False, + check_column_type=False, + ) _, units_from_variable = damage_model_with_sample.save_sample( save_units=True ) - assert units_from_variable.to_list() == ['ea'] * 20 + assert np.all(units_from_variable.to_numpy() == 'ea') def test_load_damage_model(self, damage_model_model_loaded): # should no longer be None @@ -1219,12 +1286,6 @@ def test__generate_dmg_sample(self, damage_model_model_loaded): assert list(res.index) == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - assert capacity_sample.to_numpy().dtype == np.dtype('float64') - if os.name == 'nt': - assert lsds_sample.to_numpy().dtype == np.dtype('int32') - else: - assert lsds_sample.to_numpy().dtype == np.dtype('int64') - def test__get_required_demand_type(self, damage_model_model_loaded): pg_batch = damage_model_model_loaded._get_pg_batches(block_batch_size=1) batches = pg_batch.index.get_level_values(0).unique() @@ -1312,69 +1373,208 @@ def test__evaluate_damage_state_and_prepare_dmg_quantities( assert list(qnt_sample.columns)[0] == ('B.10.31.001', '2', '2', '0', '0') def test__perform_dmg_task(self, assessment_instance): + damage_model = assessment_instance.damage - demand_model = assessment_instance.demand - asset_model = assessment_instance.asset - data = [ - ['rad', 1e-11], - ['rad', 1e11], - ] + # + # when CMP.B reaches DS1, CMP.A should be DS4 + # - index = pd.MultiIndex.from_tuples( - (('PID', '1', '1'), ('PID', '1', '2')), names=['type', 'loc', 'dir'] + ds_sample = pd.DataFrame( + { + ('CMP.A', '1', '1', '0'): [0, 0, 0], + ('CMP.A', '1', '1', '1'): [0, 0, 0], + ('CMP.B', '1', '1', '0'): [0, 0, 1], + ('CMP.B', '1', '1', '1'): [1, 0, 0], + }, + dtype='int32', ) + ds_sample.columns.names = ['cmp', 'loc', 'dir', 'uid'] - demand_marginals = pd.DataFrame(data, index, columns=['Units', 'Theta_0']) - demand_model.load_model({'marginals': demand_marginals}) - sample_size = 5 - demand_model.generate_sample({"SampleSize": sample_size}) + dmg_process = {"1_CMP.B": {"DS1": "CMP.A_DS4"}} + for task in dmg_process.items(): + damage_model._perform_dmg_task(task, ds_sample) + after = ds_sample + + assert after.to_dict() == { + ('CMP.A', '1', '1', '0'): {0: 4, 1: 0, 2: 4}, + ('CMP.A', '1', '1', '1'): {0: 4, 1: 0, 2: 4}, + ('CMP.B', '1', '1', '0'): {0: 0, 1: 0, 2: 1}, + ('CMP.B', '1', '1', '1'): {0: 1, 1: 0, 2: 0}, + } - cmp_marginals = pd.read_csv( - 'pelicun/tests/data/model/' - 'test_DamageModel_perform_dmg_task/CMP_marginals.csv', - index_col=0, + # + # when CMP.B reaches DS1, CMP.A should be NA (-1) + # + + ds_sample = pd.DataFrame( + { + ('CMP.A', '1', '1', '0'): [0, 0, 0], + ('CMP.A', '1', '1', '1'): [0, 0, 0], + ('CMP.B', '1', '1', '0'): [0, 0, 1], + ('CMP.B', '1', '1', '1'): [1, 0, 0], + }, + dtype='int32', ) - asset_model.load_cmp_model({'marginals': cmp_marginals}) - asset_model.generate_cmp_sample(sample_size) + ds_sample.columns.names = ['cmp', 'loc', 'dir', 'uid'] - damage_model.load_damage_model( - [ - 'pelicun/tests/data/model/' - 'test_DamageModel_perform_dmg_task/fragility_DB_test.csv' - ] + dmg_process = {"1_CMP.B": {"DS1": "CMP.A_NA"}} + for task in dmg_process.items(): + damage_model._perform_dmg_task(task, ds_sample) + after = ds_sample + + assert after.to_dict() == { + ('CMP.A', '1', '1', '0'): {0: -1, 1: 0, 2: -1}, + ('CMP.A', '1', '1', '1'): {0: -1, 1: 0, 2: -1}, + ('CMP.B', '1', '1', '0'): {0: 0, 1: 0, 2: 1}, + ('CMP.B', '1', '1', '1'): {0: 1, 1: 0, 2: 0}, + } + + # + # `-LOC` keyword + # when CMP.B reaches DS1, CMP.A should be DS4 + # matching locations + # + + ds_sample = pd.DataFrame( + { + ('CMP.A', '1', '1', '0'): [0, 0, 0], + ('CMP.A', '2', '1', '0'): [0, 0, 0], + ('CMP.B', '1', '1', '0'): [0, 0, 1], + ('CMP.B', '2', '1', '0'): [1, 0, 0], + }, + dtype='int32', ) + ds_sample.columns.names = ['cmp', 'loc', 'dir', 'uid'] - block_batch_size = 5 - qnt_samples = [] - pg_batch = damage_model._get_pg_batches(block_batch_size) - batches = pg_batch.index.get_level_values(0).unique() - for PGB_i in batches: - PGB = pg_batch.loc[PGB_i] - capacity_sample, lsds_sample = damage_model._generate_dmg_sample( - sample_size, PGB - ) - EDP_req = damage_model._get_required_demand_type(PGB) - demand_dict = damage_model._assemble_required_demand_data(EDP_req) - ds_sample = damage_model._evaluate_damage_state( - demand_dict, EDP_req, capacity_sample, lsds_sample - ) - qnt_sample = damage_model._prepare_dmg_quantities( - PGB, ds_sample, dropzero=False - ) - qnt_samples.append(qnt_sample) - qnt_sample = pd.concat(qnt_samples, axis=1) - qnt_sample.sort_index(axis=1, inplace=True) - before = qnt_sample.copy() + dmg_process = {"1_CMP.B-LOC": {"DS1": "CMP.A_DS4"}} + for task in dmg_process.items(): + damage_model._perform_dmg_task(task, ds_sample) + after = ds_sample + + assert after.to_dict() == { + ('CMP.A', '1', '1', '0'): {0: 0, 1: 0, 2: 4}, + ('CMP.A', '2', '1', '0'): {0: 4, 1: 0, 2: 0}, + ('CMP.B', '1', '1', '0'): {0: 0, 1: 0, 2: 1}, + ('CMP.B', '2', '1', '0'): {0: 1, 1: 0, 2: 0}, + } + + # + # ALL keyword + # + # Whenever CMP.A reaches DS1, all other components should be + # set to DS2. + # - dmg_process = {"1_CMP.B": {"DS1": "CMP.A_DS1"}} - dmg_process = {key: dmg_process[key] for key in sorted(dmg_process)} + ds_sample = pd.DataFrame( + { + ('CMP.A', '1', '1', '0'): [1, 0, 0], + ('CMP.B', '1', '1', '0'): [0, 0, 0], + ('CMP.C', '1', '1', '0'): [0, 0, 0], + ('CMP.D', '1', '1', '0'): [0, 0, 0], + }, + dtype='int32', + ) + ds_sample.columns.names = ['cmp', 'loc', 'dir', 'uid'] + + dmg_process = {"1_CMP.A": {"DS1": "ALL_DS2"}} + for task in dmg_process.items(): + damage_model._perform_dmg_task(task, ds_sample) + after = ds_sample + + assert after.to_dict() == { + ('CMP.A', '1', '1', '0'): {0: 1, 1: 0, 2: 0}, + ('CMP.B', '1', '1', '0'): {0: 2, 1: 0, 2: 0}, + ('CMP.C', '1', '1', '0'): {0: 2, 1: 0, 2: 0}, + ('CMP.D', '1', '1', '0'): {0: 2, 1: 0, 2: 0}, + } + + # + # NA keyword + # + # NA translates to -1 representing nan + # + + ds_sample = pd.DataFrame( + { + ('CMP.A', '1', '1', '0'): [0, 0, 0], + ('CMP.A', '1', '1', '1'): [0, 0, 0], + ('CMP.B', '1', '1', '0'): [0, 0, 1], + ('CMP.B', '1', '1', '1'): [1, 0, 0], + }, + dtype='int32', + ) + ds_sample.columns.names = ['cmp', 'loc', 'dir', 'uid'] + + dmg_process = {"1_CMP.B": {"DS1": "CMP.A_NA"}} for task in dmg_process.items(): - damage_model._perform_dmg_task(task, qnt_sample) - after = qnt_sample + damage_model._perform_dmg_task(task, ds_sample) + after = ds_sample + + assert after.to_dict() == { + ('CMP.A', '1', '1', '0'): {0: -1, 1: 0, 2: -1}, + ('CMP.A', '1', '1', '1'): {0: -1, 1: 0, 2: -1}, + ('CMP.B', '1', '1', '0'): {0: 0, 1: 0, 2: 1}, + ('CMP.B', '1', '1', '1'): {0: 1, 1: 0, 2: 0}, + } - assert ('CMP.A', '1', '1', '0', '1') not in before.columns - assert ('CMP.A', '1', '1', '0', '1') in after.columns + # + # NA keyword combined with `-LOC` + # + + ds_sample = pd.DataFrame( + { + ('CMP.A', '1', '1', '0'): [0, 0, 0], + ('CMP.A', '2', '1', '0'): [0, 0, 0], + ('CMP.B', '1', '1', '0'): [0, 0, 1], + ('CMP.B', '2', '1', '0'): [1, 0, 0], + }, + dtype='int32', + ) + ds_sample.columns.names = ['cmp', 'loc', 'dir', 'uid'] + + dmg_process = {"1_CMP.B-LOC": {"DS1": "CMP.A_NA"}} + for task in dmg_process.items(): + damage_model._perform_dmg_task(task, ds_sample) + after = ds_sample + + assert after.to_dict() == { + ('CMP.A', '1', '1', '0'): {0: 0, 1: 0, 2: -1}, + ('CMP.A', '2', '1', '0'): {0: -1, 1: 0, 2: 0}, + ('CMP.B', '1', '1', '0'): {0: 0, 1: 0, 2: 1}, + ('CMP.B', '2', '1', '0'): {0: 1, 1: 0, 2: 0}, + } + + # + # NA keyword combined with `-LOC` and `ALL` + # + + ds_sample = pd.DataFrame( + { + ('CMP.A', '1', '1', '0'): [0, 0, 1], + ('CMP.A', '2', '1', '0'): [1, 0, 0], + ('CMP.B', '1', '1', '0'): [0, 0, 0], + ('CMP.B', '2', '1', '0'): [0, 0, 0], + ('CMP.C', '1', '1', '0'): [0, 0, 0], + ('CMP.C', '2', '1', '0'): [0, 0, 0], + }, + dtype='int32', + ) + ds_sample.columns.names = ['cmp', 'loc', 'dir', 'uid'] + + dmg_process = {"1_CMP.A-LOC": {"DS1": "ALL_NA"}} + for task in dmg_process.items(): + damage_model._perform_dmg_task(task, ds_sample) + after = ds_sample + + assert after.to_dict() == { + ('CMP.A', '1', '1', '0'): {0: 0, 1: 0, 2: 1}, + ('CMP.A', '2', '1', '0'): {0: 1, 1: 0, 2: 0}, + ('CMP.B', '1', '1', '0'): {0: 0, 1: 0, 2: -1}, + ('CMP.B', '2', '1', '0'): {0: -1, 1: 0, 2: 0}, + ('CMP.C', '1', '1', '0'): {0: 0, 1: 0, 2: -1}, + ('CMP.C', '2', '1', '0'): {0: -1, 1: 0, 2: 0}, + } def test__get_pg_batches_1(self, assessment_instance): damage_model = assessment_instance.damage @@ -1428,7 +1628,9 @@ def test__get_pg_batches_2(self, damage_model_model_loaded): columns=('Blocks',), ).astype('Int64') - pd.testing.assert_frame_equal(expected_res, res) + pd.testing.assert_frame_equal( + expected_res, res, check_index_type=False, check_column_type=False + ) res = damage_model_model_loaded._get_pg_batches(block_batch_size=1000) expected_res = pd.DataFrame( @@ -1445,7 +1647,9 @@ def test__get_pg_batches_2(self, damage_model_model_loaded): columns=('Blocks',), ).astype('Int64') - pd.testing.assert_frame_equal(expected_res, res) + pd.testing.assert_frame_equal( + expected_res, res, check_index_type=False, check_column_type=False + ) def test_calculate(self, damage_model_with_sample): # note: Due to inherent randomness, we can't assert the actual @@ -1507,7 +1711,7 @@ def test_calculate_multilinear_CDF(self, damage_model): class TestLossModel(TestPelicunModel): @pytest.fixture def loss_model(self, assessment_instance): - return model.LossModel(assessment_instance) + return deepcopy(model.LossModel(assessment_instance)) def test_init(self, loss_model): assert loss_model.log_msg @@ -1571,12 +1775,19 @@ def test_load_sample_save_sample(self, loss_model): loss_model.load_sample(sample) - pd.testing.assert_frame_equal(sample, loss_model._sample) + pd.testing.assert_frame_equal( + sample, + loss_model._sample, + check_index_type=False, + check_column_type=False, + ) output = loss_model.save_sample(None) output.index = output.index.astype('int64') - pd.testing.assert_frame_equal(sample, output) + pd.testing.assert_frame_equal( + sample, output, check_index_type=False, check_column_type=False + ) def test_load_model(self, loss_model): data_path_1 = pd.DataFrame( @@ -1626,10 +1837,10 @@ def test__generate_DV_sample(self, loss_model): loss_model._generate_DV_sample(None, None) -class TestBldgRepairModel(TestPelicunModel): +class TestRepairModel(TestPelicunModel): @pytest.fixture - def bldg_repair_model(self, assessment_instance): - return assessment_instance.bldg_repair + def repair_model(self, assessment_instance): + return deepcopy(assessment_instance.repair) @pytest.fixture def loss_params_A(self): @@ -1670,17 +1881,17 @@ def loss_params_A(self): ), ) - def test_init(self, bldg_repair_model): - assert bldg_repair_model.log_msg - assert bldg_repair_model.log_div + def test_init(self, repair_model): + assert repair_model.log_msg + assert repair_model.log_div - assert bldg_repair_model._sample is None - assert bldg_repair_model.loss_type == 'BldgRepair' + assert repair_model._sample is None + assert repair_model.loss_type == 'Repair' - def test__create_DV_RVs(self, bldg_repair_model, loss_params_A): - bldg_repair_model.loss_params = loss_params_A + def test__create_DV_RVs(self, repair_model, loss_params_A): + repair_model.loss_params = loss_params_A - bldg_repair_model.loss_map = pd.DataFrame( + repair_model.loss_map = pd.DataFrame( ((("DMG", "some.test.component"), "some.test.component"),), columns=("Driver", "Consequence"), ) @@ -1694,7 +1905,7 @@ def test__create_DV_RVs(self, bldg_repair_model, loss_params_A): names=("cmp", "loc", "dir", "uid", "ds"), ) - rv_reg = bldg_repair_model._create_DV_RVs(case_list) + rv_reg = repair_model._create_DV_RVs(case_list) assert list(rv_reg.RV.keys()) == [ 'Cost-0-1-2-2-0', 'Time-0-1-2-2-0', @@ -1718,10 +1929,10 @@ def test__create_DV_RVs(self, bldg_repair_model, loss_params_A): rvs[3].theta, np.array((1.00, 0.464027, np.nan)) ) - def test__calc_median_consequence(self, bldg_repair_model, loss_params_A): - bldg_repair_model.loss_params = loss_params_A + def test__calc_median_consequence(self, repair_model, loss_params_A): + repair_model.loss_params = loss_params_A - bldg_repair_model.loss_map = pd.DataFrame( + repair_model.loss_map = pd.DataFrame( ((("DMG", "some.test.component"), "some.test.component"),), columns=("Driver", "Consequence"), ) @@ -1737,12 +1948,12 @@ def test__calc_median_consequence(self, bldg_repair_model, loss_params_A): ), ) - medians = bldg_repair_model._calc_median_consequence(eco_qnt) + medians = repair_model._calc_median_consequence(eco_qnt) assert medians['Cost'].to_dict() == {(0, '1'): {0: 25704.0, 1: 22848.0}} assert medians['Time'].to_dict() == {(0, '1'): {0: 22.68, 1: 20.16}} - def test_aggregate_losses(self, bldg_repair_model, loss_params_A): - bldg_repair_model._sample = pd.DataFrame( + def test_aggregate_losses(self, repair_model, loss_params_A): + repair_model._sample = pd.DataFrame( ((100.00, 1.00),), columns=pd.MultiIndex.from_tuples( ( @@ -1767,9 +1978,9 @@ def test_aggregate_losses(self, bldg_repair_model, loss_params_A): ), ) - bldg_repair_model.loss_params = loss_params_A + repair_model.loss_params = loss_params_A - df_agg = bldg_repair_model.aggregate_losses() + df_agg = repair_model.aggregate_losses() assert df_agg.to_dict() == { ('repair_cost', ''): {0: 100.0}, @@ -1777,7 +1988,7 @@ def test_aggregate_losses(self, bldg_repair_model, loss_params_A): ('repair_time', 'sequential'): {0: 1.0}, } - def test__generate_DV_sample(self, bldg_repair_model): + def test__generate_DV_sample(self, repair_model): expected_sample = { (True, True): { ( @@ -1861,7 +2072,7 @@ def test__generate_DV_sample(self, bldg_repair_model): (True, True), (True, False), ): # todo: (False, True), (False, False) fails - assessment_instance = bldg_repair_model._asmnt + assessment_instance = repair_model._asmnt assessment_instance.options.eco_scale["AcrossFloors"] = ecofl assessment_instance.options.eco_scale["AcrossDamageStates"] = ecods @@ -1883,12 +2094,12 @@ def test__generate_DV_sample(self, bldg_repair_model): ), ) - bldg_repair_model.loss_map = pd.DataFrame( + repair_model.loss_map = pd.DataFrame( ((("DMG", "some.test.component"), "some.test.component"),), columns=("Driver", "Consequence"), ) - bldg_repair_model.loss_params = pd.DataFrame( + repair_model.loss_params = pd.DataFrame( ( ( None, @@ -1928,12 +2139,9 @@ def test__generate_DV_sample(self, bldg_repair_model): ), ) - bldg_repair_model._generate_DV_sample(dmg_quantities, 4) + repair_model._generate_DV_sample(dmg_quantities, 4) - assert ( - bldg_repair_model._sample.to_dict() - == expected_sample[(ecods, ecofl)] - ) + assert repair_model._sample.to_dict() == expected_sample[(ecods, ecofl)] # _____ _ _ @@ -1948,7 +2156,7 @@ def test__generate_DV_sample(self, bldg_repair_model): class TestModelFunctions: def test_prep_constant_median_DV(self): median = 10.00 - constant_median_DV = model.prep_constant_median_DV(median) + constant_median_DV = model.loss_model.prep_constant_median_DV(median) assert constant_median_DV() == median values = (1.0, 2.0, 3.0, 4.0, 5.0) for value in values: @@ -1957,7 +2165,7 @@ def test_prep_constant_median_DV(self): def test_prep_bounded_multilinear_median_DV(self): medians = np.array((1.00, 2.00, 3.00, 4.00, 5.00)) quantities = np.array((0.00, 1.00, 2.00, 3.00, 4.00)) - f = model.prep_bounded_multilinear_median_DV(medians, quantities) + f = model.loss_model.prep_bounded_multilinear_median_DV(medians, quantities) result = f(2.5) expected = 3.5 diff --git a/pelicun/tests/test_uq.py b/pelicun/tests/test_uq.py index c620312d4..eeed09545 100644 --- a/pelicun/tests/test_uq.py +++ b/pelicun/tests/test_uq.py @@ -799,132 +799,19 @@ def test__OLS_percentiles(): # The following tests verify the methods of the objects of the module. -def test_RandomVariable(): - # instantiate a random variable with default attributes - rv_1 = uq.RandomVariable('rv_1', 'empirical') - # verify that the attributes have been assigned as expected - assert rv_1.name == 'rv_1' - assert rv_1._distribution == 'empirical' - assert np.isnan(rv_1._theta[0]) - - # instantiate a random variable with default attributes - rv_2 = uq.RandomVariable('rv_2', 'coupled_empirical') - # verify that the attributes have been assigned as expected - assert rv_2.name == 'rv_2' - assert rv_2._distribution == 'coupled_empirical' - assert np.isnan(rv_2._theta[0]) - - # verify that other distributions require theta - distributions = ( - 'normal', - 'lognormal', - 'multinomial', - 'custom', - 'uniform', - 'deterministic', - ) - for distribution in distributions: - with pytest.raises(ValueError): - uq.RandomVariable("won't see the light of day", distribution) - - # define a distribution with a given theta - rv_3 = uq.RandomVariable('rv_3', 'normal', np.array((1.00, 0.20))) - # redefine the theta attribute - rv_3.theta = np.array((2.00, 0.20)) - # retrieve other attributes - assert np.allclose(rv_3.theta, np.array((2.00, 0.20))) - assert rv_3.custom_expr is None - assert rv_3.RV_set is None - assert rv_3.sample_DF is None - # assign an anchor value - rv_3.anchor = 2.00 - - # multinomial with invalid p values provided in the theta vector - with pytest.raises(ValueError): - uq.RandomVariable( - 'rv_invalid', 'multinomial', np.array((0.20, 0.70, 0.10, 42.00)) - ) - - # multilinear CDF: cases that should fail - - x_values = (0.00, 1.00, 2.00, 3.00, 4.00) - y_values = (100.00, 0.20, 0.20, 0.80, 1.00) - values = np.column_stack((x_values, y_values)) - with pytest.raises(ValueError): - uq.RandomVariable( - 'test_rv', - 'multilinear_CDF', - theta=values - ) - - x_values = (0.00, 1.00, 2.00, 3.00, 4.00) - y_values = (0.00, 0.20, 0.20, 0.80, 0.80) - values = np.column_stack((x_values, y_values)) - with pytest.raises(ValueError): - uq.RandomVariable( - 'test_rv', - 'multilinear_CDF', - theta=values - ) - - x_values = (0.00, 3.00, 1.00, 2.00, 4.00) - y_values = (0.00, 0.25, 0.50, 0.75, 1.00) - values = np.column_stack((x_values, y_values)) - with pytest.raises(ValueError): - uq.RandomVariable( - 'test_rv', - 'multilinear_CDF', - theta=values - ) - - x_values = (0.00, 1.00, 2.00, 3.00, 4.00) - y_values = (0.00, 0.75, 0.50, 0.25, 1.00) - values = np.column_stack((x_values, y_values)) - with pytest.raises(ValueError): - uq.RandomVariable( - 'test_rv', - 'multilinear_CDF', - theta=values - ) - - x_values = (0.00, 1.00, 2.00, 3.00, 4.00) - y_values = (0.00, 0.50, 0.50, 0.50, 1.00) - values = np.column_stack((x_values, y_values)) - with pytest.raises(ValueError): - uq.RandomVariable( - 'test_rv', - 'multilinear_CDF', - theta=values - ) - - x_values = (0.00, 2.00, 2.00, 3.00, 4.00) - y_values = (0.00, 0.20, 0.40, 0.50, 1.00) - values = np.column_stack((x_values, y_values)) - with pytest.raises(ValueError): - uq.RandomVariable( - 'test_rv', - 'multilinear_CDF', - theta=values - ) - - # truncation limits not supported - x_values = (0.00, 1.00, 2.00, 3.00, 4.00) - y_values = (0.00, 0.25, 0.50, 0.75, 1.00) - values = np.column_stack((x_values, y_values)) - with pytest.raises(ValueError): - uq.RandomVariable( - 'test_rv', - 'multilinear_CDF', - theta=values, - truncation_limits=np.array((0.20, 0.80)) - ) - - -def test_RandomVariable_cdf(): - # create a normal random variable - rv = uq.RandomVariable( +def test_NormalRandomVariable(): + rv = uq.NormalRandomVariable('rv_name', theta=np.array((0.00, 1.00))) + assert rv.name == 'rv_name' + np.testing.assert_allclose(rv.theta, np.array((0.00, 1.00))) + assert np.all(np.isnan(rv.truncation_limits)) + assert rv.RV_set is None + assert rv.sample_DF is None + + +def test_NormalRandomVariable_cdf(): + # test CDF method + rv = uq.NormalRandomVariable( 'test_rv', - 'normal', theta=(1.0, 1.0), truncation_limits=np.array((0.00, np.nan)), ) @@ -937,7 +824,7 @@ def test_RandomVariable_cdf(): assert np.allclose(cdf, (0.0, 0.0, 0.1781461, 0.40571329, 0.81142658), rtol=1e-5) # repeat without truncation limits - rv = uq.RandomVariable('test_rv', 'normal', theta=(1.0, 1.0)) + rv = uq.NormalRandomVariable('test_rv', theta=(1.0, 1.0)) # evaluate CDF at different points x = (-1.0, 0.0, 0.5, 1.0, 2.0) @@ -948,137 +835,196 @@ def test_RandomVariable_cdf(): cdf, (0.02275013, 0.15865525, 0.30853754, 0.5, 0.84134475), rtol=1e-5 ) - # lognormal, lower truncation - rv = uq.RandomVariable( + +def test_NormalRandomVariable_inverse_transform(): + samples = np.array((0.10, 0.20, 0.30)) + + rv = uq.NormalRandomVariable('test_rv', theta=(1.0, 0.5)) + rv.uni_sample = samples + rv.inverse_transform_sampling() + inverse_transform = rv.sample + assert np.allclose( + inverse_transform, np.array((0.35922422, 0.57918938, 0.73779974)), rtol=1e-5 + ) + + rv = uq.NormalRandomVariable('test_rv', theta=(1.0, 0.5)) + with pytest.raises(ValueError): + rv.inverse_transform_sampling() + + # with truncation limits + + rv = uq.NormalRandomVariable( + 'test_rv', theta=(1.0, 0.5), truncation_limits=(np.nan, 1.20) + ) + rv.uni_sample = samples + rv.inverse_transform_sampling() + inverse_transform = rv.sample + assert np.allclose( + inverse_transform, np.array((0.24508018, 0.43936, 0.57313359)), rtol=1e-5 + ) + + rv = uq.NormalRandomVariable( + 'test_rv', theta=(1.0, 0.5), truncation_limits=(0.80, np.nan) + ) + rv.uni_sample = samples + rv.inverse_transform_sampling() + inverse_transform = rv.sample + assert np.allclose( + inverse_transform, np.array((0.8863824, 0.96947866, 1.0517347)), rtol=1e-5 + ) + + rv = uq.NormalRandomVariable( + 'test_rv', theta=(1.0, 0.5), truncation_limits=(0.80, 1.20) + ) + rv.uni_sample = samples + rv.inverse_transform_sampling() + inverse_transform = rv.sample + assert np.allclose( + inverse_transform, np.array((0.84155378, 0.88203946, 0.92176503)), rtol=1e-5 + ) + + # + # edge cases + # + + # normal with problematic truncation limits + rv = uq.NormalRandomVariable( + 'test_rv', theta=(1.0, 0.5), truncation_limits=(1e8, 2e8) + ) + rv.uni_sample = samples + with pytest.raises(ValueError): + rv.inverse_transform_sampling() + + +def test_LogNormalRandomVariable_cdf(): + # lower truncation + rv = uq.LogNormalRandomVariable( 'test_rv', - 'lognormal', theta=(1.0, 1.0), truncation_limits=np.array((0.10, np.nan)), ) - x = (-1.0, 0.0, 0.5, 1.0, 2.0) cdf = rv.cdf(x) - assert np.allclose( cdf, (0.0, 0.0, 0.23597085, 0.49461712, 0.75326339), rtol=1e-5 ) - # lognormal, upper truncation - rv = uq.RandomVariable( + # upper truncation + rv = uq.LogNormalRandomVariable( 'test_rv', - 'lognormal', theta=(1.0, 1.0), truncation_limits=np.array((np.nan, 5.00)), ) - x = (-1.0, 0.0, 0.5, 1.0, 2.0) cdf = rv.cdf(x) - assert np.allclose( cdf, (0.00, 0.00, 0.25797755, 0.52840734, 0.79883714), rtol=1e-5 ) - # lognormal, no truncation - rv = uq.RandomVariable('test_rv', 'lognormal', theta=(1.0, 1.0)) - + # no truncation + rv = uq.LogNormalRandomVariable('test_rv', theta=(1.0, 1.0)) x = (-1.0, 0.0, 0.5, 1.0, 2.0) cdf = rv.cdf(x) - assert np.allclose(cdf, (0.0, 0.0, 0.2441086, 0.5, 0.7558914), rtol=1e-5) - # uniform, both theta values - rv = uq.RandomVariable('test_rv', 'uniform', theta=(0.0, 1.0)) +def test_LogNormalRandomVariable_inverse_transform(): + samples = np.array((0.10, 0.20, 0.30)) + rv = uq.LogNormalRandomVariable('test_rv', theta=(1.0, 0.5)) + + rv.uni_sample = samples + rv.inverse_transform_sampling() + inverse_transform = rv.sample + + assert np.allclose( + inverse_transform, np.array((0.52688352, 0.65651442, 0.76935694)), rtol=1e-5 + ) + + # + # lognormal with truncation limits + # + + rv = uq.LogNormalRandomVariable( + 'test_rv', + theta=(1.0, 0.5), + truncation_limits=np.array((0.50, np.nan)), + ) + rv.uni_sample = samples + rv.inverse_transform_sampling() + inverse_transform = rv.sample + assert np.allclose( + inverse_transform, np.array((0.62614292, 0.73192471, 0.83365823)), rtol=1e-5 + ) + + # + # edge cases + # + + # lognormal without values to sample from + rv = uq.LogNormalRandomVariable('test_rv', theta=(1.0, 0.5)) + with pytest.raises(ValueError): + rv.inverse_transform_sampling() + + +def test_UniformRandomVariable_cdf(): + # uniform, both theta values + rv = uq.UniformRandomVariable('test_rv', theta=(0.0, 1.0)) x = (-1.0, 0.0, 0.5, 1.0, 2.0) cdf = rv.cdf(x) - assert np.allclose(cdf, (0.0, 0.0, 0.5, 1.0, 1.0), rtol=1e-5) with warnings.catch_warnings(): warnings.simplefilter('ignore') # uniform, only upper theta value ( -inf implied ) - rv = uq.RandomVariable('test_rv', 'uniform', theta=(np.nan, 100.00)) - + rv = uq.UniformRandomVariable('test_rv', theta=(np.nan, 100.00)) x = (-1.0, 0.0, 0.5, 1.0, 2.0) cdf = rv.cdf(x) - assert np.all(np.isnan(cdf)) # uniform, only lower theta value ( +inf implied ) - rv = uq.RandomVariable('test_rv', 'uniform', theta=(0.00, np.nan)) - + rv = uq.UniformRandomVariable('test_rv', theta=(0.00, np.nan)) x = (-1.0, 0.0, 0.5, 1.0, 2.0) cdf = rv.cdf(x) - assert np.allclose(cdf, (0.0, 0.0, 0.0, 0.0, 0.0), rtol=1e-5) # uniform, with truncation limits - rv = uq.RandomVariable( + rv = uq.UniformRandomVariable( 'test_rv', - 'uniform', theta=(0.0, 10.0), truncation_limits=np.array((0.00, 1.00)), ) - x = (-1.0, 0.0, 0.5, 1.0, 2.0) cdf = rv.cdf(x) - assert np.allclose(cdf, (0.0, 0.0, 0.5, 1.0, 1.0), rtol=1e-5) - # multilinear CDF - x_values = (0.00, 1.00, 2.00, 3.00, 4.00) - y_values = (0.00, 0.20, 0.30, 0.80, 1.00) - values = np.column_stack((x_values, y_values)) - rv = uq.RandomVariable( - 'test_rv', - 'multilinear_CDF', - theta=values - ) - x = (-100.00, 0.00, 0.50, 1.00, 1.50, 2.00, 2.50, 3.00, 3.50, 4.00, 100.00) - cdf = rv.cdf(x) - - assert np.allclose( - cdf, - (0.00, 0.00, 0.10, 0.20, 0.25, 0.30, 0.55, 0.80, 0.90, 1.00, 1.0), - rtol=1e-5, - ) - - -def test_RandomVariable_inverse_transform(): - # - # uniform - # - - rv = uq.RandomVariable('test_rv', 'uniform', theta=(0.0, 1.0)) +def test_UniformRandomVariable_inverse_transform(): + rv = uq.UniformRandomVariable('test_rv', theta=(0.0, 1.0)) samples = np.array((0.10, 0.20, 0.30)) - rv.uni_sample = samples rv.inverse_transform_sampling() inverse_transform = rv.sample - assert np.allclose(inverse_transform, samples, rtol=1e-5) # # uniform with unspecified bounds # - with warnings.catch_warnings(): - warnings.simplefilter('ignore') - rv = uq.RandomVariable('test_rv', 'uniform', theta=(np.nan, 1.0)) - samples = np.array((0.10, 0.20, 0.30)) - rv.uni_sample = samples - rv.inverse_transform_sampling() - inverse_transform = rv.sample - assert np.all(np.isnan(inverse_transform)) - rv = uq.RandomVariable('test_rv', 'uniform', theta=(0.00, np.nan)) - rv.uni_sample = samples - rv.inverse_transform_sampling() - inverse_transform = rv.sample - assert np.all(np.isinf(inverse_transform)) - rv = uq.RandomVariable( + rv = uq.UniformRandomVariable('test_rv', theta=(np.nan, 1.0)) + samples = np.array((0.10, 0.20, 0.30)) + rv.uni_sample = samples + rv.inverse_transform_sampling() + inverse_transform = rv.sample + assert np.all(np.isnan(inverse_transform)) + + rv = uq.UniformRandomVariable('test_rv', theta=(0.00, np.nan)) + rv.uni_sample = samples + rv.inverse_transform_sampling() + inverse_transform = rv.sample + assert np.all(np.isinf(inverse_transform)) + + rv = uq.UniformRandomVariable( 'test_rv', - 'uniform', theta=(0.00, 1.00), truncation_limits=np.array((0.20, 0.80)), ) @@ -1088,7 +1034,7 @@ def test_RandomVariable_inverse_transform(): assert np.allclose(inverse_transform, np.array((0.26, 0.32, 0.38)), rtol=1e-5) # sample as a pandas series, with a log() map - rv._f_map = np.log + rv.f_map = np.log assert rv.sample_DF.to_dict() == { 0: -1.3470736479666092, 1: -1.1394342831883646, @@ -1096,177 +1042,132 @@ def test_RandomVariable_inverse_transform(): } # - # lognormal + # edge cases # - rv = uq.RandomVariable('test_rv', 'lognormal', theta=(1.0, 0.5)) + # uniform without values to sample from + rv = uq.UniformRandomVariable('test_rv', theta=(0.0, 1.0)) + with pytest.raises(ValueError): + rv.inverse_transform_sampling() - rv.uni_sample = samples - rv.inverse_transform_sampling() - inverse_transform = rv.sample - assert np.allclose( - inverse_transform, np.array((0.52688352, 0.65651442, 0.76935694)), rtol=1e-5 - ) +def test_MultinomialRandomVariable(): + # multinomial with invalid p values provided in the theta vector + with pytest.raises(ValueError): + uq.MultinomialRandomVariable( + 'rv_invalid', np.array((0.20, 0.70, 0.10, 42.00)) + ) - # - # lognormal with truncation limits - # - rv = uq.RandomVariable( - 'test_rv', - 'lognormal', - theta=(1.0, 0.5), - truncation_limits=np.array((0.50, np.nan)), - ) - rv.uni_sample = samples - rv.inverse_transform_sampling() - inverse_transform = rv.sample - assert np.allclose( - inverse_transform, np.array((0.62614292, 0.73192471, 0.83365823)), rtol=1e-5 - ) +def test_MultilinearCDFRandomVariable(): + # multilinear CDF: cases that should fail - # - # normal - # + x_values = (0.00, 1.00, 2.00, 3.00, 4.00) + y_values = (100.00, 0.20, 0.20, 0.80, 1.00) + values = np.column_stack((x_values, y_values)) + with pytest.raises(ValueError): + uq.MultilinearCDFRandomVariable('test_rv', theta=values) - rv = uq.RandomVariable('test_rv', 'normal', theta=(1.0, 0.5)) + x_values = (0.00, 1.00, 2.00, 3.00, 4.00) + y_values = (0.00, 0.20, 0.20, 0.80, 0.80) + values = np.column_stack((x_values, y_values)) + with pytest.raises(ValueError): + uq.MultilinearCDFRandomVariable('test_rv', theta=values) - rv.uni_sample = samples - rv.inverse_transform_sampling() - inverse_transform = rv.sample + x_values = (0.00, 3.00, 1.00, 2.00, 4.00) + y_values = (0.00, 0.25, 0.50, 0.75, 1.00) + values = np.column_stack((x_values, y_values)) + with pytest.raises(ValueError): + uq.MultilinearCDFRandomVariable('test_rv', theta=values) - assert np.allclose( - inverse_transform, np.array((0.35922422, 0.57918938, 0.73779974)), rtol=1e-5 - ) + x_values = (0.00, 1.00, 2.00, 3.00, 4.00) + y_values = (0.00, 0.75, 0.50, 0.25, 1.00) + values = np.column_stack((x_values, y_values)) + with pytest.raises(ValueError): + uq.MultilinearCDFRandomVariable('test_rv', theta=values) - rv = uq.RandomVariable('test_rv', 'normal', theta=(1.0, 0.5)) + x_values = (0.00, 1.00, 2.00, 3.00, 4.00) + y_values = (0.00, 0.50, 0.50, 0.50, 1.00) + values = np.column_stack((x_values, y_values)) with pytest.raises(ValueError): - rv.inverse_transform_sampling() + uq.MultilinearCDFRandomVariable('test_rv', theta=values) - # - # normal with truncation limits - # + x_values = (0.00, 2.00, 2.00, 3.00, 4.00) + y_values = (0.00, 0.20, 0.40, 0.50, 1.00) + values = np.column_stack((x_values, y_values)) + with pytest.raises(ValueError): + uq.MultilinearCDFRandomVariable('test_rv', theta=values) + + +def test_MultilinearCDFRandomVariable_cdf(): + x_values = (0.00, 1.00, 2.00, 3.00, 4.00) + y_values = (0.00, 0.20, 0.30, 0.80, 1.00) + values = np.column_stack((x_values, y_values)) + rv = uq.MultilinearCDFRandomVariable('test_rv', theta=values) + x = (-100.00, 0.00, 0.50, 1.00, 1.50, 2.00, 2.50, 3.00, 3.50, 4.00, 100.00) + cdf = rv.cdf(x) - rv = uq.RandomVariable( - 'test_rv', 'normal', theta=(1.0, 0.5), truncation_limits=(np.nan, 1.20) - ) - rv.uni_sample = samples - rv.inverse_transform_sampling() - inverse_transform = rv.sample - assert np.allclose( - inverse_transform, np.array((0.24508018, 0.43936, 0.57313359)), rtol=1e-5 - ) - rv = uq.RandomVariable( - 'test_rv', 'normal', theta=(1.0, 0.5), truncation_limits=(0.80, np.nan) - ) - rv.uni_sample = samples - rv.inverse_transform_sampling() - inverse_transform = rv.sample assert np.allclose( - inverse_transform, np.array((0.8863824, 0.96947866, 1.0517347)), rtol=1e-5 - ) - rv = uq.RandomVariable( - 'test_rv', 'normal', theta=(1.0, 0.5), truncation_limits=(0.80, 1.20) + cdf, + (0.00, 0.00, 0.10, 0.20, 0.25, 0.30, 0.55, 0.80, 0.90, 1.00, 1.0), + rtol=1e-5, ) - rv.uni_sample = samples + + +def test_MultilinearCDFRandomVariable_inverse_transform(): + x_values = (0.00, 1.00, 2.00, 3.00, 4.00) + y_values = (0.00, 0.20, 0.30, 0.80, 1.00) + values = np.column_stack((x_values, y_values)) + rv = uq.MultilinearCDFRandomVariable('test_rv', theta=values) + + rv.uni_sample = np.array((0.00, 0.1, 0.2, 0.5, 0.8, 0.9, 1.00)) rv.inverse_transform_sampling() inverse_transform = rv.sample assert np.allclose( - inverse_transform, np.array((0.84155378, 0.88203946, 0.92176503)), rtol=1e-5 + inverse_transform, + np.array((0.00, 0.50, 1.00, 2.40, 3.00, 3.50, 4.00)), + rtol=1e-5, ) - # - # empirical - # - rv = uq.RandomVariable( - 'test_rv', 'empirical', raw_samples=(1.00, 2.00, 3.00, 4.00) - ) +def test_EmpiricalRandomVariable_inverse_transform(): + samples = np.array((0.10, 0.20, 0.30)) + + rv = uq.EmpiricalRandomVariable('test_rv', raw_samples=(1.00, 2.00, 3.00, 4.00)) samples = np.array((0.10, 0.50, 0.90)) rv.uni_sample = samples - rv.inverse_transform_sampling(len(samples)) + rv.inverse_transform_sampling() inverse_transform = rv.sample assert np.allclose(inverse_transform, np.array((1.00, 3.00, 4.00)), rtol=1e-5) - rv = uq.RandomVariable( + rv = uq.CoupledEmpiricalRandomVariable( 'test_rv', - 'coupled_empirical', raw_samples=np.array((1.00, 2.00, 3.00, 4.00)), ) - rv.inverse_transform_sampling(6) + rv.inverse_transform_sampling(sample_size=6) inverse_transform = rv.sample assert np.allclose( inverse_transform, np.array((1.00, 2.00, 3.00, 4.00, 1.00, 2.00)), rtol=1e-5 ) - # multilinear CDF - x_values = (0.00, 1.00, 2.00, 3.00, 4.00) - y_values = (0.00, 0.20, 0.30, 0.80, 1.00) - values = np.column_stack((x_values, y_values)) - rv = uq.RandomVariable('test_rv', 'multilinear_CDF', theta=values) - - rv.uni_sample = np.array( - (0.00, 0.1, 0.2, 0.5, 0.8, 0.9, 1.00) - ) - rv.inverse_transform_sampling() +def test_DeterministicRandomVariable_inverse_transform(): + rv = uq.DeterministicRandomVariable('test_rv', theta=np.array((0.00,))) + rv.inverse_transform_sampling(4) inverse_transform = rv.sample assert np.allclose( - inverse_transform, - np.array((0.00, 0.50, 1.00, 2.40, 3.00, 3.50, 4.00)), - rtol=1e-5, - ) - - # - # edge cases - # - - # normal with problematic truncation limits - rv = uq.RandomVariable( - 'test_rv', 'normal', theta=(1.0, 0.5), truncation_limits=(1e8, 2e8) - ) - rv.uni_sample = samples - with pytest.raises(ValueError): - rv.inverse_transform_sampling() - - # lognormal without values to sample from - rv = uq.RandomVariable('test_rv', 'lognormal', theta=(1.0, 0.5)) - with pytest.raises(ValueError): - rv.inverse_transform_sampling() - - # uniform without values to sample from - rv = uq.RandomVariable('test_rv', 'uniform', theta=(0.0, 1.0)) - with pytest.raises(ValueError): - rv.inverse_transform_sampling() - - # empirical, coupled_empirical without values to sample from - for distr in ('empirical', 'coupled_empirical'): - rv = uq.RandomVariable('test_rv', distr) - with pytest.raises(ValueError): - rv.inverse_transform_sampling() - - # deterministic - rv = uq.RandomVariable('test_rv', 'deterministic', theta=np.array((0.00, 1.00))) - with pytest.raises(ValueError): - rv.inverse_transform_sampling() - - # multinomial - rv = uq.RandomVariable( - 'test_rv', 'multinomial', theta=np.array((0.20, 0.30, 0.50)) + inverse_transform, np.array((0.00, 0.00, 0.00, 0.00)), rtol=1e-5 ) - with pytest.raises(ValueError): - rv.inverse_transform_sampling() def test_RandomVariable_Set(): # a set of two random variables - rv_1 = uq.RandomVariable('rv1', 'normal', theta=(1.0, 1.0)) - rv_2 = uq.RandomVariable('rv2', 'normal', theta=(1.0, 1.0)) + rv_1 = uq.NormalRandomVariable('rv1', theta=(1.0, 1.0)) + rv_2 = uq.NormalRandomVariable('rv2', theta=(1.0, 1.0)) rv_set = uq.RandomVariableSet( # noqa: F841 'test_set', (rv_1, rv_2), np.array(((1.0, 0.50), (0.50, 1.0))) ) @@ -1275,7 +1176,7 @@ def test_RandomVariable_Set(): assert rv_set.size == 2 # a set with only one random variable - rv_1 = uq.RandomVariable('rv1', 'normal', theta=(1.0, 1.0)) + rv_1 = uq.NormalRandomVariable('rv1', theta=(1.0, 1.0)) rv_set = uq.RandomVariableSet( # noqa: F841 'test_set', (rv_1,), np.array(((1.0, 0.50),)) ) @@ -1287,8 +1188,8 @@ def test_RandomVariable_Set_apply_correlation(reset=False): # correlated, uniform np.random.seed(40) - rv_1 = uq.RandomVariable(name='rv1', distribution='uniform', theta=(-5.0, 5.0)) - rv_2 = uq.RandomVariable(name='rv2', distribution='uniform', theta=(-5.0, 5.0)) + rv_1 = uq.UniformRandomVariable(name='rv1', theta=(-5.0, 5.0)) + rv_2 = uq.UniformRandomVariable(name='rv2', theta=(-5.0, 5.0)) rv_1.uni_sample = np.random.random(size=100) rv_2.uni_sample = np.random.random(size=100) @@ -1309,8 +1210,8 @@ def test_RandomVariable_Set_apply_correlation(reset=False): # we also test .sample here - rv_1.inverse_transform_sampling(10) - rv_2.inverse_transform_sampling(10) + rv_1.inverse_transform_sampling() + rv_2.inverse_transform_sampling() rvset_sample = rvs.sample assert set(rvset_sample.keys()) == set(('rv1', 'rv2')) vals = list(rvset_sample.values()) @@ -1334,8 +1235,8 @@ def test_RandomVariable_Set_apply_correlation_special(): # non positive semidefinite correlation matrix rho = np.array(((1.00, 0.50), (0.50, -1.00))) - rv_1 = uq.RandomVariable('rv1', 'normal', theta=[5.0, 0.1]) - rv_2 = uq.RandomVariable('rv2', 'normal', theta=[5.0, 0.1]) + rv_1 = uq.NormalRandomVariable('rv1', theta=[5.0, 0.1]) + rv_2 = uq.NormalRandomVariable('rv2', theta=[5.0, 0.1]) rv_1.uni_sample = np.random.random(size=100) rv_2.uni_sample = np.random.random(size=100) rv_set = uq.RandomVariableSet('rv_set', [rv_1, rv_2], rho) @@ -1343,8 +1244,8 @@ def test_RandomVariable_Set_apply_correlation_special(): # non full rank matrix rho = np.array(((0.00, 0.00), (0.0, 0.0))) - rv_1 = uq.RandomVariable('rv1', 'normal', theta=[5.0, 0.1]) - rv_2 = uq.RandomVariable('rv2', 'normal', theta=[5.0, 0.1]) + rv_1 = uq.NormalRandomVariable('rv1', theta=[5.0, 0.1]) + rv_2 = uq.NormalRandomVariable('rv2', theta=[5.0, 0.1]) rv_1.uni_sample = np.random.random(size=100) rv_2.uni_sample = np.random.random(size=100) rv_set = uq.RandomVariableSet('rv_set', [rv_1, rv_2], rho) @@ -1358,13 +1259,13 @@ def test_RandomVariable_Set_orthotope_density(reset=False): data_dir = 'pelicun/tests/data/uq/test_random_variable_set_orthotope_density' # create some random variables - rv_1 = uq.RandomVariable( - 'rv1', 'normal', theta=[5.0, 0.1], truncation_limits=np.array((np.nan, 10.0)) + rv_1 = uq.NormalRandomVariable( + 'rv1', theta=[5.0, 0.1], truncation_limits=np.array((np.nan, 10.0)) ) - rv_2 = uq.RandomVariable('rv2', 'lognormal', theta=[10.0, 0.2]) - rv_3 = uq.RandomVariable('rv3', 'uniform', theta=[13.0, 17.0]) - rv_4 = uq.RandomVariable('rv4', 'uniform', theta=[0.0, 1.0]) - rv_5 = uq.RandomVariable('rv5', 'uniform', theta=[0.0, 1.0]) + rv_2 = uq.LogNormalRandomVariable('rv2', theta=[10.0, 0.2]) + rv_3 = uq.UniformRandomVariable('rv3', theta=[13.0, 17.0]) + rv_4 = uq.UniformRandomVariable('rv4', theta=[0.0, 1.0]) + rv_5 = uq.UniformRandomVariable('rv5', theta=[0.0, 1.0]) # create a random variable set rv_set = uq.RandomVariableSet( @@ -1425,7 +1326,7 @@ def test_RandomVariableRegistry_generate_sample(reset=False): rng = np.random.default_rng(0) rv_registry_single = uq.RandomVariableRegistry(rng) # create the random variable and add it to the registry - RV = uq.RandomVariable('x', distribution='normal', theta=[1.0, 1.0]) + RV = uq.NormalRandomVariable('x', theta=[1.0, 1.0]) rv_registry_single.add_RV(RV) # Generate a sample @@ -1454,9 +1355,9 @@ def test_RandomVariableRegistry_generate_sample(reset=False): # create a random variable registry and add some random variables to it rng = np.random.default_rng(4) rv_registry = uq.RandomVariableRegistry(rng) - rv_1 = uq.RandomVariable('rv1', 'normal', theta=[5.0, 0.1]) - rv_2 = uq.RandomVariable('rv2', 'lognormal', theta=[10.0, 0.2]) - rv_3 = uq.RandomVariable('rv3', 'uniform', theta=[13.0, 17.0]) + rv_1 = uq.NormalRandomVariable('rv1', theta=[5.0, 0.1]) + rv_2 = uq.LogNormalRandomVariable('rv2', theta=[10.0, 0.2]) + rv_3 = uq.UniformRandomVariable('rv3', theta=[13.0, 17.0]) rv_registry.add_RV(rv_1) rv_registry.add_RV(rv_2) rv_registry.add_RV(rv_3) @@ -1470,8 +1371,8 @@ def test_RandomVariableRegistry_generate_sample(reset=False): rv_registry.add_RV_set(rv_set) # add some more random variables that are not part of the set - rv_4 = uq.RandomVariable('rv4', 'normal', theta=[14.0, 0.30]) - rv_5 = uq.RandomVariable('rv5', 'normal', theta=[15.0, 0.50]) + rv_4 = uq.NormalRandomVariable('rv4', theta=[14.0, 0.30]) + rv_5 = uq.NormalRandomVariable('rv5', theta=[15.0, 0.50]) rv_registry.add_RV(rv_4) rv_registry.add_RV(rv_5) @@ -1494,5 +1395,13 @@ def test_RandomVariableRegistry_generate_sample(reset=False): assert 'rv3' not in rv_dictionary +def test_rv_class_map(): + rv_class = uq.rv_class_map('normal') + assert rv_class.__name__ == 'NormalRandomVariable' + + with pytest.raises(ValueError): + uq.rv_class_map('') + + if __name__ == '__main__': pass diff --git a/pelicun/tools/DL_calculation.py b/pelicun/tools/DL_calculation.py index 5449a84a9..768850824 100644 --- a/pelicun/tools/DL_calculation.py +++ b/pelicun/tools/DL_calculation.py @@ -126,12 +126,12 @@ def log_msg(msg): "DMG_stats.csv", "DMG_grp.zip", "DMG_grp_stats.csv", - "DV_bldg_repair_sample.zip", - "DV_bldg_repair_stats.csv", - "DV_bldg_repair_grp.zip", - "DV_bldg_repair_grp_stats.csv", - "DV_bldg_repair_agg.zip", - "DV_bldg_repair_agg_stats.csv", + "DV_repair_sample.zip", + "DV_repair_stats.csv", + "DV_repair_grp.zip", + "DV_repair_grp_stats.csv", + "DV_repair_agg.zip", + "DV_repair_agg_stats.csv", "DL_summary.csv", "DL_summary_stats.csv", ] @@ -146,7 +146,7 @@ def log_msg(msg): 'GroupedStatistics': True, }, 'Loss': { - 'BldgRepair': { + 'Repair': { 'Sample': True, 'Statistics': True, 'GroupedSample': True, @@ -168,7 +168,7 @@ def log_msg(msg): 'GroupedStatistics': True, }, 'Loss': { - 'BldgRepair': { + 'Repair': { 'Sample': True, 'Statistics': True, 'GroupedSample': True, @@ -366,7 +366,8 @@ def run_pelicun( with open(config_path, 'r', encoding='utf-8') as f: config = json.load(f) - custom_dl_file_path = custom_model_dir #f"{config['commonFileDir']}/CustomDLModels/" + # f"{config['commonFileDir']}/CustomDLModels/" + custom_dl_file_path = custom_model_dir DL_config = config.get('DL', None) if not DL_config: @@ -857,7 +858,9 @@ def run_pelicun( if asset_config.get('ComponentDatabasePath', False) is not False: extra_comps = asset_config['ComponentDatabasePath'] - extra_comps = extra_comps.replace('CustomDLDataFolder', custom_dl_file_path) + extra_comps = extra_comps.replace( + 'CustomDLDataFolder', custom_dl_file_path + ) component_db += [ extra_comps, @@ -944,7 +947,7 @@ def run_pelicun( adf.loc['collapse', ('LS1', 'Theta_0')] = 1e10 adf.loc['collapse', 'Incomplete'] = 0 - else: + elif "Water" not in asset_config['ComponentDatabase']: # add a placeholder collapse fragility that will never trigger # collapse, but allow damage processes to work with collapse @@ -988,11 +991,13 @@ def run_pelicun( adf.loc['irreparable', ('LS1', 'Theta_0')] = 1e10 adf.loc['irreparable', 'Incomplete'] = 0 - # TODO: we can improve this by creating a water network-specific assessment class + # TODO: we can improve this by creating a water + # network-specific assessment class if "Water" in asset_config['ComponentDatabase']: # add a placeholder aggregate fragility that will never trigger - # damage, but allow damage processes to aggregate the various pipeline damages + # damage, but allow damage processes to aggregate the + # various pipeline damages adf.loc['aggregate', ('Demand', 'Directional')] = 1 adf.loc['aggregate', ('Demand', 'Offset')] = 0 adf.loc['aggregate', ('Demand', 'Type')] = 'Peak Ground Velocity' @@ -1000,12 +1005,7 @@ def run_pelicun( adf.loc['aggregate', ('LS1', 'Theta_0')] = 1e10 adf.loc['aggregate', 'Incomplete'] = 0 - PAL.damage.load_damage_model( - component_db - + [ - adf, - ] - ) + PAL.damage.load_damage_model(component_db + [adf]) # load the damage process if needed dmg_process = None @@ -1274,23 +1274,23 @@ def run_pelicun( out_config_loss = out_config.get('Loss', {}) # if requested, calculate repair consequences - if loss_config.get('BldgRepair', False): - bldg_repair_config = loss_config['BldgRepair'] + if loss_config.get('Repair', False): + repair_config = loss_config['Repair'] # load the fragility information if ( - bldg_repair_config['ConsequenceDatabase'] + repair_config['ConsequenceDatabase'] in default_DBs['repair'].keys() ): consequence_db = [ 'PelicunDefault/' + default_DBs['repair'][ - bldg_repair_config['ConsequenceDatabase'] + repair_config['ConsequenceDatabase'] ], ] conseq_df = PAL.get_default_data( - default_DBs['repair'][bldg_repair_config['ConsequenceDatabase']][ + default_DBs['repair'][repair_config['ConsequenceDatabase']][ :-4 ] ) @@ -1299,10 +1299,12 @@ def run_pelicun( conseq_df = pd.DataFrame() - if bldg_repair_config.get('ConsequenceDatabasePath', False) is not False: - extra_comps = bldg_repair_config['ConsequenceDatabasePath'] + if repair_config.get('ConsequenceDatabasePath', False) is not False: + extra_comps = repair_config['ConsequenceDatabasePath'] - extra_comps = extra_comps.replace('CustomDLDataFolder', custom_dl_file_path) + extra_comps = extra_comps.replace( + 'CustomDLDataFolder', custom_dl_file_path + ) consequence_db += [ extra_comps, @@ -1338,12 +1340,12 @@ def run_pelicun( ), ) - # DL_method = bldg_repair_config['ConsequenceDatabase'] + # DL_method = repair_config['ConsequenceDatabase'] DL_method = damage_config.get('DamageProcess', 'User Defined') rc = ('replacement', 'Cost') - if 'ReplacementCost' in bldg_repair_config.keys(): - rCost_config = bldg_repair_config['ReplacementCost'] + if 'ReplacementCost' in repair_config.keys(): + rCost_config = repair_config['ReplacementCost'] adf.loc[rc, ('Quantity', 'Unit')] = "1 EA" @@ -1380,8 +1382,8 @@ def run_pelicun( adf.loc[rc, ('DS1', 'Theta_0')] = 1 rt = ('replacement', 'Time') - if 'ReplacementTime' in bldg_repair_config.keys(): - rTime_config = bldg_repair_config['ReplacementTime'] + if 'ReplacementTime' in repair_config.keys(): + rTime_config = repair_config['ReplacementTime'] rt = ('replacement', 'Time') adf.loc[rt, ('Quantity', 'Unit')] = "1 EA" @@ -1421,8 +1423,8 @@ def run_pelicun( adf.loc[rt, ('DS1', 'Theta_0')] = 1 rcarb = ('replacement', 'Carbon') - if 'ReplacementCarbon' in bldg_repair_config.keys(): - rCarbon_config = bldg_repair_config['ReplacementCarbon'] + if 'ReplacementCarbon' in repair_config.keys(): + rCarbon_config = repair_config['ReplacementCarbon'] rcarb = ('replacement', 'Carbon') adf.loc[rcarb, ('Quantity', 'Unit')] = "1 EA" @@ -1451,8 +1453,8 @@ def run_pelicun( adf.drop(rcarb, inplace=True) ren = ('replacement', 'Energy') - if 'ReplacementEnergy' in bldg_repair_config.keys(): - rEnergy_config = bldg_repair_config['ReplacementEnergy'] + if 'ReplacementEnergy' in repair_config.keys(): + rEnergy_config = repair_config['ReplacementEnergy'] ren = ('replacement', 'Energy') adf.loc[ren, ('Quantity', 'Unit')] = "1 EA" @@ -1480,7 +1482,7 @@ def run_pelicun( # prepare the loss map loss_map = None - if bldg_repair_config['MapApproach'] == "Automatic": + if repair_config['MapApproach'] == "Automatic": # get the damage sample dmg_sample = PAL.damage.save_sample() @@ -1527,13 +1529,13 @@ def run_pelicun( loss_models.append(loss_cmp) loss_map = pd.DataFrame( - loss_models, columns=['BldgRepair'], index=drivers + loss_models, columns=['Repair'], index=drivers ) - elif bldg_repair_config['MapApproach'] == "User Defined": + elif repair_config['MapApproach'] == "User Defined": - if bldg_repair_config.get('MapFilePath', False) is not False: - loss_map_path = bldg_repair_config['MapFilePath'] + if repair_config.get('MapFilePath', False) is not False: + loss_map_path = repair_config['MapFilePath'] loss_map_path = loss_map_path.replace( 'CustomDLDataFolder', custom_dl_file_path) @@ -1546,13 +1548,13 @@ def run_pelicun( # prepare additional loss map entries, if needed if 'DMG-collapse' not in loss_map.index: - loss_map.loc['DMG-collapse', 'BldgRepair'] = 'replacement' - loss_map.loc['DMG-irreparable', 'BldgRepair'] = 'replacement' + loss_map.loc['DMG-collapse', 'Repair'] = 'replacement' + loss_map.loc['DMG-irreparable', 'Repair'] = 'replacement' # assemble the list of requested decision variables DV_list = [] - if bldg_repair_config.get('DecisionVariables', False) is not False: - for DV_i, DV_status in bldg_repair_config[ + if repair_config.get('DecisionVariables', False) is not False: + for DV_i, DV_status in repair_config[ 'DecisionVariables' ].items(): if DV_status is True: @@ -1561,7 +1563,7 @@ def run_pelicun( else: DV_list = None - PAL.bldg_repair.load_model( + PAL.repair.load_model( consequence_db + [ adf, @@ -1570,13 +1572,13 @@ def run_pelicun( decision_variables=DV_list, ) - PAL.bldg_repair.calculate() + PAL.repair.calculate() - agg_repair = PAL.bldg_repair.aggregate_losses() + agg_repair = PAL.repair.aggregate_losses() # if requested, save results - if out_config_loss.get('BldgRepair', False): - repair_sample, repair_units = PAL.bldg_repair.save_sample( + if out_config_loss.get('Repair', False): + repair_sample, repair_units = PAL.repair.save_sample( save_units=True ) repair_units = repair_units.to_frame().T @@ -1601,7 +1603,7 @@ def run_pelicun( out_reqs = [ out if val else "" - for out, val in out_config_loss['BldgRepair'].items() + for out, val in out_config_loss['Repair'].items() ] if np.any( @@ -1625,14 +1627,14 @@ def run_pelicun( repair_sample_s, axis=1 ) repair_sample_s.to_csv( - output_path / "DV_bldg_repair_sample.zip", + output_path / "DV_repair_sample.zip", index_label=repair_sample_s.columns.name, compression=dict( method='zip', - archive_name='DV_bldg_repair_sample.csv', + archive_name='DV_repair_sample.csv', ), ) - output_files.append('DV_bldg_repair_sample.zip') + output_files.append('DV_repair_sample.zip') if 'Statistics' in out_reqs: repair_stats = describe(repair_sample) @@ -1640,10 +1642,10 @@ def run_pelicun( repair_stats = convert_to_SimpleIndex(repair_stats, axis=1) repair_stats.to_csv( - output_path / "DV_bldg_repair_stats.csv", + output_path / "DV_repair_stats.csv", index_label=repair_stats.columns.name, ) - output_files.append('DV_bldg_repair_stats.csv') + output_files.append('DV_repair_stats.csv') if np.any( np.isin(['GroupedSample', 'GroupedStatistics'], out_reqs) @@ -1667,14 +1669,14 @@ def run_pelicun( grp_repair_s, axis=1 ) grp_repair_s.to_csv( - output_path / "DV_bldg_repair_grp.zip", + output_path / "DV_repair_grp.zip", index_label=grp_repair_s.columns.name, compression=dict( method='zip', - archive_name='DV_bldg_repair_grp.csv', + archive_name='DV_repair_grp.csv', ), ) - output_files.append('DV_bldg_repair_grp.zip') + output_files.append('DV_repair_grp.zip') if 'GroupedStatistics' in out_reqs: grp_stats = describe(grp_repair) @@ -1682,10 +1684,10 @@ def run_pelicun( grp_stats = convert_to_SimpleIndex(grp_stats, axis=1) grp_stats.to_csv( - output_path / "DV_bldg_repair_grp_stats.csv", + output_path / "DV_repair_grp_stats.csv", index_label=grp_stats.columns.name, ) - output_files.append('DV_bldg_repair_grp_stats.csv') + output_files.append('DV_repair_grp_stats.csv') if np.any( np.isin(['AggregateSample', 'AggregateStatistics'], out_reqs) @@ -1693,24 +1695,24 @@ def run_pelicun( if 'AggregateSample' in out_reqs: agg_repair_s = convert_to_SimpleIndex(agg_repair, axis=1) agg_repair_s.to_csv( - output_path / "DV_bldg_repair_agg.zip", + output_path / "DV_repair_agg.zip", index_label=agg_repair_s.columns.name, compression=dict( method='zip', - archive_name='DV_bldg_repair_agg.csv', + archive_name='DV_repair_agg.csv', ), ) - output_files.append('DV_bldg_repair_agg.zip') + output_files.append('DV_repair_agg.zip') if 'AggregateStatistics' in out_reqs: agg_stats = convert_to_SimpleIndex( describe(agg_repair), axis=1 ) agg_stats.to_csv( - output_path / "DV_bldg_repair_agg_stats.csv", + output_path / "DV_repair_agg_stats.csv", index_label=agg_stats.columns.name, ) - output_files.append('DV_bldg_repair_agg_stats.csv') + output_files.append('DV_repair_agg_stats.csv') # Result Summary ----------------------------------------------------------- @@ -1733,7 +1735,7 @@ def run_pelicun( if loss_config is not None: if 'agg_repair' not in locals(): - agg_repair = PAL.bldg_repair.aggregate_losses() + agg_repair = PAL.repair.aggregate_losses() agg_repair_s = convert_to_SimpleIndex(agg_repair, axis=1) diff --git a/pelicun/tools/export_DB.py b/pelicun/tools/export_DB.py index 1ec0647ff..d1f87f4d1 100644 --- a/pelicun/tools/export_DB.py +++ b/pelicun/tools/export_DB.py @@ -82,7 +82,7 @@ def export_DB(data_path, target_dir): encoding='utf-8') as f: json.dump(pop_dict, f, indent=2) - except: + except (ValueError, NotImplementedError, FileNotFoundError): pass diff --git a/pelicun/uq.py b/pelicun/uq.py index 55cc4de73..67b51a66b 100644 --- a/pelicun/uq.py +++ b/pelicun/uq.py @@ -58,12 +58,12 @@ """ +from abc import ABC, abstractmethod from scipy.stats import uniform, norm from scipy.stats import multivariate_normal as mvn from scipy.stats._mvn import mvndst # pylint: disable=no-name-in-module from scipy.linalg import cholesky, svd from scipy.optimize import minimize -from scipy.interpolate import interp1d import numpy as np import pandas as pd @@ -89,6 +89,17 @@ def scale_distribution(scale_factor, family, theta, truncation_limits=None): Defines the [a,b] truncation limits for the distribution. Use None to assign no limit in one direction. + Returns + ------- + tuple + A tuple containing the scaled parameters and truncation + limits: + - theta_new (float ndarray of length 2): Scaled parameters of + the distribution. + - truncation_limits (float ndarray of length 2 or None): + Scaled truncation limits for the distribution, or None if no + truncation is applied. + Raises ------ ValueError @@ -155,12 +166,14 @@ def mvn_orthotope_density(mu, COV, lower=np.nan, upper=np.nan): multivariate cases. If the distribution is non-truncated from above in a subset of the dimensions, use either `None` or assign an infinite value (i.e. numpy.inf) to those dimensions. + Returns ------- - alpha: float - Estimate of the probability density within the hyperrectangle - eps_alpha: float - Estimate of the error in alpha. + tuple + alpha: float + Estimate of the probability density within the hyperrectangle. + eps_alpha: float + Estimate of the error in the calculated probability density. """ @@ -232,24 +245,22 @@ def _get_theta(params, inits, dist_list): dist_list: list of str List of strings containing the names of the distributions. + Returns + ------- + Theta + The estimated parameters. + Raises ------ ValueError If any of the distributions is unsupported. - Returns - ------- - Theta: - The estimated parameters. - """ theta = np.zeros(inits.shape) for i, (params_i, inits_i, dist_i) in enumerate(zip(params, inits, dist_list)): - if dist_i in {'normal', 'lognormal'}: - # Note that the standard deviation is fit in log space, hence the # unusual-looking transformation here sig = np.exp(np.log(inits_i[1]) + params_i[1]) @@ -279,19 +290,19 @@ def _get_limit_probs(limits, distribution, theta): theta: float ndarray The parameters of the specified distribution. + Returns + ------- + tuple + The CDF values. + Raises ------ ValueError If any of the distributions is unsupported. - Returns - ------- - The CDF values. - """ if distribution in {'normal', 'normal-stdev', 'lognormal'}: - a, b = limits mu = theta[0] sig = theta[1] @@ -318,44 +329,48 @@ def _get_std_samples(samples, theta, tr_limits, dist_list): Parameters ---------- - samples: float ndarray, DxN + samples: float ndarray DxN 2D array of samples. Each row represents a sample. - theta: float ndarray, Dx2 + theta: float ndarray Dx2 2D array of theta values that represent each dimension of the samples - tr_limits: float ndarray, Dx2 + tr_limits: float ndarray Dx2 2D array with rows that represent [a, b] pairs of truncation limits dist_list: str ndarray of length D 1D array containing the names of the distributions + Returns + ------- + ndarray + float DxN ndarray of the samples transformed to standard normal + space, with each row representing a transformed sample in + standard normal space. + Raises ------ ValueError If any of the distributions is unsupported. - Returns - ------- - std_samples: float ndarray, DxN - The samples transformed to standard normal space. - """ std_samples = np.zeros(samples.shape) for i, (samples_i, theta_i, tr_lim_i, dist_i) in enumerate( - zip(samples, theta, tr_limits, dist_list)): - + zip(samples, theta, tr_limits, dist_list) + ): if dist_i in {'normal', 'normal-stdev', 'lognormal'}: - lim_low = tr_lim_i[0] lim_high = tr_lim_i[1] - if (True in (samples_i > lim_high).tolist() - or True in (samples_i < lim_low).tolist()): + if ( + True in (samples_i > lim_high).tolist() + or True in (samples_i < lim_low).tolist() + ): raise ValueError( 'One or more sample values lie outside ' - 'of the specified truncation limits.') + 'of the specified truncation limits.' + ) # first transform from normal to uniform uni_samples = norm.cdf(samples_i, loc=theta_i[0], scale=theta_i[1]) @@ -369,7 +384,7 @@ def _get_std_samples(samples, theta, tr_limits, dist_list): uni_samples = (uni_samples - p_a) / (p_b - p_a) # then transform from uniform to standard normal - std_samples[i] = norm.ppf(uni_samples, loc=0., scale=1.) + std_samples[i] = norm.ppf(uni_samples, loc=0.0, scale=1.0) else: raise ValueError(f'Unsupported distribution: {dist_i}') @@ -389,21 +404,20 @@ def _get_std_corr_matrix(std_samples): Array containing the standard normal samples. Each column is a sample. It should not contain Inf or NaN values. + Returns + ------- + ndarray + Correlation matrix. float ndarray, DxD + Raises ------ ValueError If any of the elements of std_samples is np.inf or np.nan - Returns - ------- - rho_hat: float ndarray, DxD - Correlation matrix. """ - if (True in np.isinf(std_samples) - or True in np.isnan(std_samples)): - raise ValueError( - 'std_samples array must not contain inf or NaN values') + if True in np.isinf(std_samples) or True in np.isnan(std_samples): + raise ValueError('std_samples array must not contain inf or NaN values') n_dims, n_samples = std_samples.shape @@ -415,23 +429,22 @@ def _get_std_corr_matrix(std_samples): for dim_i in range(n_dims): for dim_j in np.arange(dim_i + 1, n_dims): rho_hat[dim_i, dim_j] = ( - np.sum(std_samples[dim_i] * std_samples[dim_j]) / n_samples) + np.sum(std_samples[dim_i] * std_samples[dim_j]) / n_samples + ) rho_hat[dim_j, dim_i] = rho_hat[dim_i, dim_j] # make sure rho_hat is positive semidefinite try: - cholesky(rho_hat, lower=True) # if this works, we're good # otherwise, we can try to fix the matrix using SVD except np.linalg.LinAlgError: - try: - - U, s, _ = svd(rho_hat, ) + U, s, _ = svd( + rho_hat, + ) except np.linalg.LinAlgError: - # if this also fails, we give up return None @@ -441,7 +454,7 @@ def _get_std_corr_matrix(std_samples): np.fill_diagonal(rho_hat, 1.0) # check if we introduced any unreasonable values - if ((np.max(rho_hat) > 1.01) or (np.min(rho_hat) < -1.01)): + if (np.max(rho_hat) > 1.01) or (np.min(rho_hat) < -1.01): return None # round values to 1.0 and -1.0, if needed @@ -456,9 +469,21 @@ def _get_std_corr_matrix(std_samples): def _mvn_scale(x, rho): """ - Utility function used in _neg_log_likelihood - """ + Scaling utility function + + Parameters + ---------- + x: ndarray + Input array + rho: ndarray + Covariance matrix + + Returns + ------- + ndarray + Scaled values + """ x = np.atleast_2d(x) n_dims = x.shape[1] @@ -473,9 +498,18 @@ def _mvn_scale(x, rho): return b / a -def _neg_log_likelihood(params, inits, bnd_lower, bnd_upper, samples, - dist_list, tr_limits, det_limits, censored_count, - enforce_bounds=False): +def _neg_log_likelihood( + params, + inits, + bnd_lower, + bnd_upper, + samples, + dist_list, + tr_limits, + det_limits, + censored_count, + enforce_bounds=False, +): """ Calculate the negative log likelihood of the given data samples given the parameter values and distribution information. @@ -540,8 +574,8 @@ def _neg_log_likelihood(params, inits, bnd_lower, bnd_upper, samples, # calculate the marginal likelihoods for i, (theta_i, samples_i, tr_lim_i, dist_i) in enumerate( - zip(theta, samples, tr_limits, dist_list)): - + zip(theta, samples, tr_limits, dist_list) + ): # consider truncation if needed p_a, p_b = _get_limit_probs(tr_lim_i, dist_i, theta_i) # this is the probability mass within the @@ -552,8 +586,9 @@ def _neg_log_likelihood(params, inits, bnd_lower, bnd_upper, samples, # Note that we are performing this without any transformation to be able # to respect truncation limits if dist_i in {'normal', 'lognormal'}: - likelihoods[i] = norm.pdf( - samples_i, loc=theta_i[0], scale=theta_i[1]) / tr_alpha + likelihoods[i] = ( + norm.pdf(samples_i, loc=theta_i[0], scale=theta_i[1]) / tr_alpha + ) # transform every sample into standard normal space std_samples = _get_std_samples(samples, theta, tr_limits, dist_list) @@ -568,13 +603,12 @@ def _neg_log_likelihood(params, inits, bnd_lower, bnd_upper, samples, # likelihoods related to censoring need to be handled together if censored_count > 0: - det_lower = np.zeros(n_dims) det_upper = np.zeros(n_dims) for i, (theta_i, tr_lim_i, det_lim_i, dist_i) in enumerate( - zip(theta, tr_limits, det_limits, dist_list)): - + zip(theta, tr_limits, det_limits, dist_list) + ): # prepare the standardized truncation and detection limits p_a, p_b = _get_limit_probs(tr_lim_i, dist_i, theta_i) p_l, p_u = _get_limit_probs(det_lim_i, dist_i, theta_i) @@ -584,15 +618,16 @@ def _neg_log_likelihood(params, inits, bnd_lower, bnd_upper, samples, p_l, p_u = [(lim - p_a) / (p_b - p_a) for lim in (p_l, p_u)] # transform limits to standard normal space - det_lower[i], det_upper[i] = norm.ppf([p_l, p_u], loc=0., scale=1.) + det_lower[i], det_upper[i] = norm.ppf([p_l, p_u], loc=0.0, scale=1.0) # get the likelihood of getting a non-censored sample given the # detection limits and the correlation matrix det_alpha, eps_alpha = mvn_orthotope_density( - np.zeros(n_dims), rho_hat, det_lower, det_upper) + np.zeros(n_dims), rho_hat, det_lower, det_upper + ) # Make sure det_alpha is estimated with sufficient accuracy - if det_alpha <= 100. * eps_alpha: + if det_alpha <= 100.0 * eps_alpha: return 1e10 # make sure that the likelihood of censoring a sample is positive @@ -618,8 +653,10 @@ def _neg_log_likelihood(params, inits, bnd_lower, bnd_upper, samples, likelihoods = np.clip(likelihoods, a_min=np.nextafter(0, 1), a_max=None) # calculate the total negative log likelihood - NLL = -(np.sum(np.log(likelihoods)) # from samples - + censored_count * np.log(cen_likelihood)) # censoring influence + NLL = -( + np.sum(np.log(likelihoods)) # from samples + + censored_count * np.log(cen_likelihood) + ) # censoring influence # normalize the NLL with the sample count NLL = NLL / samples.size @@ -629,11 +666,15 @@ def _neg_log_likelihood(params, inits, bnd_lower, bnd_upper, samples, return NLL -def fit_distribution_to_sample(raw_samples, distribution, - truncation_limits=(np.nan, np.nan), - censored_count=0, detection_limits=(np.nan, np.nan), - multi_fit=False, - logger_object=None): +def fit_distribution_to_sample( + raw_samples, + distribution, + truncation_limits=(np.nan, np.nan), + censored_count=0, + detection_limits=(np.nan, np.nan), + multi_fit=False, + logger_object=None, +): """ Fit a distribution to sample using maximum likelihood estimation. @@ -687,20 +728,22 @@ def fit_distribution_to_sample(raw_samples, distribution, Returns ------- - theta: float ndarray - Estimates of the parameters of the fitted probability distribution in - each dimension. The following parameters are returned for the supported - distributions: - normal - mean, coefficient of variation; - lognormal - median, log standard deviation; - Rho: float 2D ndarray, optional - In the multivariate case, returns the estimate of the correlation - matrix. + tuple + theta: float ndarray + Estimates of the parameters of the fitted probability + distribution in each dimension. The following parameters + are returned for the supported distributions: normal - + mean, coefficient of variation; lognormal - median, log + standard deviation; + Rho: float 2D ndarray, optional + In the multivariate case, returns the estimate of the + correlation matrix. Raises ------ ValueError If NaN values are produced during standard normal space transformation + """ samples = np.atleast_2d(raw_samples) @@ -720,9 +763,7 @@ def fit_distribution_to_sample(raw_samples, distribution, # Convert samples and limits to log space if the distribution is lognormal for d_i, distr in enumerate(dist_list): - if distr == 'lognormal': - samples[d_i] = np.log(samples[d_i]) for lim in range(2): @@ -739,7 +780,6 @@ def fit_distribution_to_sample(raw_samples, distribution, sig_init = np.ones_like(mu_init) * np.nan for d_i, distr in enumerate(dist_list): - if distr in {'normal', 'normal-stdev', 'lognormal'}: # use the first two moments mu_init[d_i] = np.mean(samples[d_i]) @@ -751,8 +791,7 @@ def fit_distribution_to_sample(raw_samples, distribution, # replace zero standard dev with negligible standard dev sig_zero_id = np.where(sig_init == 0.0)[0] - sig_init[sig_zero_id] = (1e-6 * np.abs(mu_init[sig_zero_id]) - + np.nextafter(0, 1)) + sig_init[sig_zero_id] = 1e-6 * np.abs(mu_init[sig_zero_id]) + np.nextafter(0, 1) # prepare a vector of initial values # Note: The actual optimization uses zeros as initial parameters to @@ -773,10 +812,11 @@ def fit_distribution_to_sample(raw_samples, distribution, # There is nothing to gain from a time-consuming optimization if.. # the number of samples is too small - if ((n_samples < 3) or ( - # there are no truncation or detection limits involved - np.all(np.isnan(tr_limits)) and np.all(np.isnan(det_limits)))): - + if (n_samples < 3) or ( + # there are no truncation or detection limits involved + np.all(np.isnan(tr_limits)) + and np.all(np.isnan(det_limits)) + ): # In this case, it is typically hard to improve on the method of # moments estimates for the parameters of the marginal distributions theta = inits @@ -784,64 +824,87 @@ def fit_distribution_to_sample(raw_samples, distribution, # Otherwise, we run the optimization that aims to find the parameters that # maximize the likelihood of observing the samples else: - # First, optimize for each marginal independently for dim in range(n_dims): - - inits_i = inits[dim:dim + 1] + inits_i = inits[dim : dim + 1] # Censored samples are only considered in the following step, but # we fit a truncated distribution if there are censored samples to # make it easier to fit the censored distribution later. tr_limits_i = [np.nan, np.nan] for lim in range(2): - if ((np.isnan(tr_limits[dim][lim])) and ( - not np.isnan(det_limits[dim][lim]))): + if (np.isnan(tr_limits[dim][lim])) and ( + not np.isnan(det_limits[dim][lim]) + ): tr_limits_i[lim] = det_limits[dim][lim] elif not np.isnan(det_limits[dim][lim]): if lim == 0: - tr_limits_i[lim] = np.min([tr_limits[dim][lim], - det_limits[dim][lim]]) + tr_limits_i[lim] = np.min( + [tr_limits[dim][lim], det_limits[dim][lim]] + ) elif lim == 1: - tr_limits_i[lim] = np.max([tr_limits[dim][lim], - det_limits[dim][lim]]) + tr_limits_i[lim] = np.max( + [tr_limits[dim][lim], det_limits[dim][lim]] + ) else: tr_limits_i[lim] = tr_limits[dim][lim] - out_m_i = minimize(_neg_log_likelihood, - np.zeros(inits[dim].size), - args=(inits_i, - bnd_lower[dim], - bnd_upper[dim], - samples[dim:dim + 1], - [dist_list[dim], ], - [tr_limits_i, ], - [np.nan, np.nan], - 0, True,), - method='BFGS', - options={'maxiter': 50} - ) + out_m_i = minimize( + _neg_log_likelihood, + np.zeros(inits[dim].size), + args=( + inits_i, + bnd_lower[dim], + bnd_upper[dim], + samples[dim : dim + 1], + [ + dist_list[dim], + ], + [ + tr_limits_i, + ], + [np.nan, np.nan], + 0, + True, + ), + method='BFGS', + options={'maxiter': 50}, + ) out = out_m_i.x.reshape(inits_i.shape) - theta = _get_theta(out, inits_i, [dist_list[dim], ]) + theta = _get_theta( + out, + inits_i, + [ + dist_list[dim], + ], + ) inits[dim] = theta[0] # Second, if multi_fit is requested or there are censored samples, # we attempt the multivariate fitting using the marginal results as # initial parameters. if multi_fit or (censored_count > 0): - bnd_lower = bnd_lower.flatten() bnd_upper = bnd_upper.flatten() - out_m = minimize(_neg_log_likelihood, - np.zeros(inits.size), - args=(inits, bnd_lower, bnd_upper, samples, - dist_list, tr_limits, det_limits, - censored_count, True,), - method='BFGS', - options={'maxiter': 50} - ) + out_m = minimize( + _neg_log_likelihood, + np.zeros(inits.size), + args=( + inits, + bnd_lower, + bnd_upper, + samples, + dist_list, + tr_limits, + det_limits, + censored_count, + True, + ), + method='BFGS', + options={'maxiter': 50}, + ) out = out_m.x.reshape(inits.shape) theta = _get_theta(out, inits, dist_list) @@ -852,8 +915,7 @@ def fit_distribution_to_sample(raw_samples, distribution, # Calculate rho in the standard normal space because we will generate new # samples using that type of correlation (i.e., Gaussian copula) std_samples = _get_std_samples(samples, theta, tr_limits, dist_list) - if True in np.isnan(std_samples) or \ - True in np.isinf(std_samples): + if True in np.isnan(std_samples) or True in np.isinf(std_samples): raise ValueError( 'Something went wrong.' '\n' @@ -872,11 +934,14 @@ def fit_distribution_to_sample(raw_samples, distribution, logger_object.msg( "\nWARNING: Demand sample size too small to reliably estimate " "the correlation matrix. Assuming uncorrelated demands.", - prepend_timestamp=False, prepend_blank_space=False) + prepend_timestamp=False, + prepend_blank_space=False, + ) else: print( "\nWARNING: Demand sample size too small to reliably estimate " - "the correlation matrix. Assuming uncorrelated demands.") + "the correlation matrix. Assuming uncorrelated demands." + ) for d_i, distr in enumerate(dist_list): # Convert mean back to linear space if the distribution is lognormal @@ -922,10 +987,10 @@ def _OLS_percentiles(params, values, perc, family): ------ ValueError If `family` is not 'normal' or 'lognormal'. + """ if family == 'normal': - theta_0 = params[0] theta_1 = params[1] @@ -935,7 +1000,6 @@ def _OLS_percentiles(params, values, perc, family): val_hat = norm.ppf(perc, loc=theta_0, scale=theta_1) elif family == 'lognormal': - theta_0 = params[0] theta_1 = params[1] @@ -970,10 +1034,13 @@ def fit_distribution_to_percentiles(values, percentiles, families): Returns ------- - family: string - The optimal choice of family among the provided list of families - theta: array of float - Parameters of the fitted distribution. + tuple + family: string + The optimal choice of family among the provided list of + families + theta: array of float + Parameters of the fitted distribution. + """ out_list = [] @@ -984,89 +1051,49 @@ def fit_distribution_to_percentiles(values, percentiles, families): extreme_id = np.argmax(percentiles - 0.5) for family in families: - - inits = [values[median_id], ] + inits = [ + values[median_id], + ] if family == 'normal': inits.append( - (np.abs(values[extreme_id] - inits[0]) - / np.abs(norm.ppf(percentiles[extreme_id], - loc=0, scale=1)))) + ( + np.abs(values[extreme_id] - inits[0]) + / np.abs(norm.ppf(percentiles[extreme_id], loc=0, scale=1)) + ) + ) elif family == 'lognormal': inits.append( - (np.abs(np.log(values[extreme_id] / inits[0])) - / np.abs(norm.ppf(percentiles[extreme_id], - loc=0, scale=1)))) + ( + np.abs(np.log(values[extreme_id] / inits[0])) + / np.abs(norm.ppf(percentiles[extreme_id], loc=0, scale=1)) + ) + ) - out_list.append(minimize(_OLS_percentiles, inits, - args=(values, percentiles, family), - method='BFGS')) + out_list.append( + minimize( + _OLS_percentiles, + inits, + args=(values, percentiles, family), + method='BFGS', + ) + ) best_out_id = np.argmin([out.fun for out in out_list]) return families[best_out_id], out_list[best_out_id].x -class RandomVariable: +class BaseRandomVariable(ABC): """ - Description + Base abstract class for different types of random variables. - Parameters - ---------- - name: string - A unique string that identifies the random variable. - distribution: {'normal', 'lognormal', 'multinomial', 'custom', - 'empirical', 'coupled_empirical', 'uniform', 'deterministic', - 'multilinear_CDF'}, optional - Defines the type of probability distribution for the random - variable. - theta: float scalar or ndarray, optional - Set of parameters that define the Cumulative Distribution - Function (CDF) of the variable given its distribution - type. The following parameters are expected currently for the - supported distribution types: - normal - mean, standard deviation; - lognormal - median, log standard deviation; - uniform - a, b, the lower and upper bounds of the distribution; - multinomial - likelihood of each unique event (the last event's - likelihood is adjusted automatically to ensure the likelihoods sum up - to one); - custom - according to the custom expression provided; - empirical and coupled_empirical - N/A; - deterministic - the deterministic value assigned to the variable. - multilinear_CDF - a Nx2 numpy array defining the - vertices of a multilinear CDF curve in the form ((X_0, 0.00), - (X_1, Y_1), ..., (X_n, 1.00)). The first Y value has to be - 0.00 and the last 1.00 for a valid CDF, and the X_i's as well - as the Y_i's should be in increasing order, otherwise an error - is raised. - truncation_limits: float ndarray, optional - Defines the np.array((a, b)) truncation limits for the - distribution. Use np.nan to assign no limit in one direction, - like so: np.array((a, np.nan)), or np.array((np.nan, b)). - custom_expr: string, optional - Provide an expression that is a Python syntax for a custom CDF. The - controlling variable shall be "x" and the parameters shall be "p1", - "p2", etc. - f_map: function, optional - A user-defined function that is applied on the realizations before - returning a sample. - anchor: RandomVariable, optional - Anchors this to another variable. If the anchor is not None, this - variable will be perfectly correlated with its anchor. Note that - the attributes of this variable and its anchor do not have to be - identical. """ def __init__( self, name, - distribution, - theta=np.nan, - truncation_limits=np.nan, - custom_expr=None, - raw_samples=None, f_map=None, anchor=None, ): @@ -1075,7 +1102,16 @@ def __init__( Parameters ---------- - see the attributes of the RandomVariable class + name: string + A unique string that identifies the random variable. + f_map: function, optional + A user-defined function that is applied on the realizations before + returning a sample. + anchor: RandomVariable, optional + Anchors this to another variable. If the anchor is not None, this + variable will be perfectly correlated with its anchor. Note that + the attributes of this variable and its anchor do not have to be + identical. Raises ------ @@ -1086,176 +1122,74 @@ def __init__( """ self.name = name - - if pd.isna(distribution): - distribution = 'deterministic' - - if ( - distribution not in ['empirical', 'coupled_empirical'] - ) and (np.all(np.isnan(theta))): - - raise ValueError( - f"A random variable that follows a {distribution} distribution " - f"is characterized by a set of parameters (theta). The " - f"parameters need to be provided when the RV is created." - ) - - if distribution == 'multinomial': - if np.sum(theta) > 1: - raise ValueError( - f"The set of p values provided for a multinomial " - f"distribution shall sum up to less than or equal to 1.0. " - f"The provided values sum up to {np.sum(theta)}. p = " - f"{theta} ." - ) - - if distribution == 'multilinear_CDF': - y_1 = theta[0, 1] - if y_1 != 0.00: - raise ValueError( - "For multilinear CDF random variables, " - "y_1 should be set to 0.00" - ) - y_n = theta[-1, 1] - if y_n != 1.00: - raise ValueError( - "For multilinear CDF random variables, " - "y_n should be set to 1.00" - ) - - x_s = theta[:, 0] - if not np.array_equal(np.sort(x_s), x_s): - raise ValueError( - "For multilinear CDF random variables, " - "Xs should be specified in ascending order" - ) - if np.any(np.isclose(np.diff(x_s), 0.00)): - raise ValueError( - "For multilinear CDF random variables, " - "Xs should be specified in strictly ascending order" - ) - - y_s = theta[:, 1] - if not np.array_equal(np.sort(y_s), y_s): - raise ValueError( - "For multilinear CDF random variables, " - "Ys should be specified in ascending order" - ) - - if np.any(np.isclose(np.diff(y_s), 0.00)): - raise ValueError( - "For multilinear CDF random variables, " - "Ys should be specified in strictly ascending order" - ) - if np.any(~np.isnan(truncation_limits)): - raise ValueError( - "Truncation limits not supported " - "for multilinear CDF random variables." - ) - - # save the other parameters internally - self._distribution = distribution - self._theta = np.atleast_1d(theta) - self._truncation_limits = truncation_limits - self._custom_expr = custom_expr - self._f_map = f_map - self._raw_samples = np.atleast_1d(raw_samples) + self.distribution = None + self.f_map = f_map self._uni_samples = None - self._RV_set = None + self.RV_set = None self._sample_DF = None - + self._sample = None if anchor is None: - self._anchor = self + self.anchor = self else: - self._anchor = anchor + self.anchor = anchor @property - def distribution(self): - """ - Return the assigned probability distribution type. - """ - return self._distribution - - @property - def theta(self): - """ - Return the assigned probability distribution parameters. - """ - return self._theta - - @theta.setter - def theta(self, value): - """ - Define the parameters of the distribution of the random variable + def sample(self): """ - self._theta = value + Return the empirical or generated sample. - @property - def truncation_limits(self): - """ - Return the assigned truncation limits. - """ - return self._truncation_limits + Returns + ------- + ndarray + The empirical or generated sample. - @property - def custom_expr(self): """ - Return the assigned custom expression for CDF. - """ - return self._custom_expr + if self.f_map is not None: + return self.f_map(self._sample) + return self._sample - @property - def RV_set(self): - """ - Return the RV_set this RV is a member of + @sample.setter + def sample(self, value): """ - return self._RV_set + Assign a sample to the random variable. - @RV_set.setter - def RV_set(self, value): - """ - Assign an RV_set to this RV - """ - self._RV_set = value + Parameters + ---------- + value: ndarray + Sample to assign - @property - def sample(self): - """ - Return the empirical or generated sample. """ - if self._f_map is not None: - - return self._f_map(self._sample) - - # else: - return self._sample + self._sample = value + self._sample_DF = pd.Series(value) @property def sample_DF(self): """ Return the empirical or generated sample in a pandas Series. - """ - if self._f_map is not None: - - return self._sample_DF.apply(self._f_map) - # else: - return self._sample_DF + Returns + ------- + ndarray + The empirical or generated sample in a pandas Series. - @sample.setter - def sample(self, value): - """ - Assign a sample to the random variable """ - self._sample = value - self._sample_DF = pd.Series(value) + if self.f_map is not None: + return self._sample_DF.apply(self.f_map) + + return self._sample_DF @property def uni_sample(self): """ Return the sample from the controlling uniform distribution. + + Returns + ------- + ndarray + The sample from the controlling uniform distribution. + """ - return self._anchor._uni_samples + return self.anchor._uni_samples @uni_sample.setter def uni_sample(self, value): @@ -1266,284 +1200,857 @@ def uni_sample(self, value): ---------- value: float ndarray An array of floating point values in the [0, 1] domain. + """ self._uni_samples = value - @property - def anchor(self): - """ - Return the anchor of the variable (if any). - """ - return self._anchor - @anchor.setter - def anchor(self, value): - """ - Assign an anchor to the random variable +class CommonRandomVariable(BaseRandomVariable): + """ + Random variable that needs `values` in `inverse_transform` + """ + + @abstractmethod + def __init__( + self, + name, + theta, + truncation_limits=np.array((np.nan, np.nan)), + f_map=None, + anchor=None, + ): """ - self._anchor = value + Instantiates a normal random variable. - def cdf(self, values): + Parameters + ---------- + name: string + A unique string that identifies the random variable. + theta: 2-element float ndarray + Set of parameters that define the Cumulative Distribution + Function (CDF) of the variable: Mean, coefficient of + variation. + truncation_limits: float ndarray, optional + Defines the np.array((a, b)) truncation limits for the + distribution. Use np.nan to assign no limit in one direction, + like so: np.array((a, np.nan)), or np.array((np.nan, b)). + f_map: function, optional + A user-defined function that is applied on the realizations before + returning a sample. + anchor: RandomVariable, optional + Anchors this to another variable. If the anchor is not None, this + variable will be perfectly correlated with its anchor. Note that + the attributes of this variable and its anchor do not have to be + identical. + + """ + super().__init__( + name, + f_map, + anchor, + ) + + @abstractmethod + def inverse_transform(self, values): """ - Returns the cdf at the given values + Uses inverse probability integral transformation on the + provided values. + """ - result = None - if self.distribution == 'normal': - mu, cov = self.theta[:2] - sig = np.abs(mu) * cov + def inverse_transform_sampling(self): + """ + Creates a sample using inverse probability integral + transformation. - if np.any(~np.isnan(self.truncation_limits)): - a, b = self.truncation_limits + Raises + ------ + ValueError + If there is no available uniform sample. + """ + if self.uni_sample is None: + raise ValueError('No available uniform sample.') + self.sample = self.inverse_transform(self.uni_sample) - if np.isnan(a): - a = -np.inf - if np.isnan(b): - b = np.inf - p_a, p_b = [norm.cdf((lim - mu) / sig) for lim in (a, b)] +class SampleSizeRandomVariable(BaseRandomVariable): + """ + Random variable that needs `sample_size` in `inverse_transform` + """ - # cap the values at the truncation limits - values = np.minimum(np.maximum(values, a), b) + @abstractmethod + def __init__( + self, + name, + theta, + truncation_limits=np.array((np.nan, np.nan)), + f_map=None, + anchor=None, + ): + """ + Instantiates a normal random variable. - # get the cdf from a non-truncated normal - p_vals = norm.cdf(values, loc=mu, scale=sig) + Parameters + ---------- + name: string + A unique string that identifies the random variable. + theta: 2-element float ndarray + Set of parameters that define the Cumulative Distribution + Function (CDF) of the variable: Mean, coefficient of + variation. + truncation_limits: float ndarray, optional + Defines the np.array((a, b)) truncation limits for the + distribution. Use np.nan to assign no limit in one direction, + like so: np.array((a, np.nan)), or np.array((np.nan, b)). + f_map: function, optional + A user-defined function that is applied on the realizations before + returning a sample. + anchor: RandomVariable, optional + Anchors this to another variable. If the anchor is not None, this + variable will be perfectly correlated with its anchor. Note that + the attributes of this variable and its anchor do not have to be + identical. + + """ + super().__init__( + name, + f_map, + anchor, + ) - # adjust for truncation - result = (p_vals - p_a) / (p_b - p_a) + @abstractmethod + def inverse_transform(self, sample_size): + """ + Uses inverse probability integral transformation on the + provided values. - else: - result = norm.cdf(values, loc=mu, scale=sig) + """ - elif self.distribution == 'lognormal': - theta, beta = self.theta[:2] + def inverse_transform_sampling(self, sample_size): + """ + Creates a sample using inverse probability integral + transformation. + """ + self.sample = self.inverse_transform(sample_size) - if np.any(~np.isnan(self.truncation_limits)): - a, b = self.truncation_limits - if np.isnan(a): - a = np.nextafter(0, 1) - if np.isnan(b): - b = np.inf +class NormalRandomVariable(CommonRandomVariable): + """ + Normal random variable. - p_a, p_b = [norm.cdf((np.log(lim) - np.log(theta)) / beta) - for lim in (a, b)] + """ - # cap the values at the truncation limits - values = np.minimum(np.maximum(values, a), b) + def __init__( + self, + name, + theta, + truncation_limits=np.array((np.nan, np.nan)), + f_map=None, + anchor=None, + ): + super().__init__( + name, + f_map, + anchor, + ) + self.distribution = 'normal' + self.theta = np.atleast_1d(theta) + self.truncation_limits = truncation_limits - # get the cdf from a non-truncated lognormal - p_vals = norm.cdf(np.log(values), loc=np.log(theta), scale=beta) + def cdf(self, values): + """ + Returns the Cumulative Density Function (CDF) at the specified + values. - # adjust for truncation - result = (p_vals - p_a) / (p_b - p_a) + Parameters + ---------- + values: 1D float ndarray + Values for which to evaluate the CDF - else: - values = np.maximum(values, np.nextafter(0, 1)) + Returns + ------- + ndarray + 1D float ndarray containing CDF values - result = norm.cdf(np.log(values), loc=np.log(theta), scale=beta) + """ + mu, cov = self.theta[:2] + sig = np.abs(mu) * cov - elif self.distribution == 'uniform': - a, b = self.theta[:2] + if np.any(~np.isnan(self.truncation_limits)): + a, b = self.truncation_limits if np.isnan(a): a = -np.inf if np.isnan(b): b = np.inf - if np.any(~np.isnan(self.truncation_limits)): - a, b = self.truncation_limits - - result = uniform.cdf(values, loc=a, scale=(b - a)) + p_a, p_b = [norm.cdf((lim - mu) / sig) for lim in (a, b)] - elif self.distribution == 'multilinear_CDF': + # cap the values at the truncation limits + values = np.minimum(np.maximum(values, a), b) - x_i = [-np.inf] + [x[0] for x in self.theta] + [np.inf] - y_i = [0.00] + [x[1] for x in self.theta] + [1.00] + # get the cdf from a non-truncated normal + p_vals = norm.cdf(values, loc=mu, scale=sig) - ifun = interp1d(x_i, y_i, kind='linear') + # adjust for truncation + result = (p_vals - p_a) / (p_b - p_a) - result = ifun(values) + else: + result = norm.cdf(values, loc=mu, scale=sig) return result - def inverse_transform(self, values=None, sample_size=None): + def inverse_transform(self, values): """ - Uses inverse probability integral transformation on the provided values. + Evaluates the inverse of the Cumulative Density Function (CDF) + for the given values. Used to generate random variable + realizations. + + Parameters + ---------- + values: 1D float ndarray + Values for which to evaluate the inverse CDF + + Returns + ------- + ndarray + Inverse CDF values Raises ------ ValueError - If no values are specified. - ValueError - If problematic truncation limits are assigned. - """ - result = None - - if self.distribution == 'normal': + If the probability massss within the truncation limits is + too small - if values is None: - raise ValueError( - "Missing uniform sample for inverse transform sampling a " - "normal random variable.") + """ - # else: + mu, cov = self.theta[:2] + sig = np.abs(mu) * cov - mu, cov = self.theta[:2] - sig = np.abs(mu) * cov + if np.any(~np.isnan(self.truncation_limits)): + a, b = self.truncation_limits - if np.any(~np.isnan(self.truncation_limits)): - a, b = self.truncation_limits + if np.isnan(a): + a = -np.inf + if np.isnan(b): + b = np.inf - if np.isnan(a): - a = -np.inf - if np.isnan(b): - b = np.inf + p_a, p_b = [norm.cdf((lim - mu) / sig) for lim in (a, b)] - p_a, p_b = [norm.cdf((lim - mu) / sig) for lim in (a, b)] + if p_b - p_a == 0: + raise ValueError( + "The probability mass within the truncation limits is " + "too small and the truncated distribution cannot be " + "sampled with sufficiently high accuracy. This is most " + "probably due to incorrect truncation limits set for " + "the distribution." + ) - if p_b - p_a == 0: - raise ValueError( - "The probability mass within the truncation limits is " - "too small and the truncated distribution cannot be " - "sampled with sufficiently high accuracy. This is most " - "probably due to incorrect truncation limits set for " - "the distribution." - ) + result = norm.ppf(values * (p_b - p_a) + p_a, loc=mu, scale=sig) - result = norm.ppf(values * (p_b - p_a) + p_a, - loc=mu, scale=sig) + else: + result = norm.ppf(values, loc=mu, scale=sig) - else: - result = norm.ppf(values, loc=mu, scale=sig) + return result - elif self.distribution == 'lognormal': - if values is None: - raise ValueError( - "Missing uniform sample for inverse transform sampling a " - "lognormal random variable.") +class LogNormalRandomVariable(CommonRandomVariable): + """ + Lognormal random variable. - # else: + """ - theta, beta = self.theta[:2] + def __init__( + self, + name, + theta, + truncation_limits=np.array((np.nan, np.nan)), + f_map=None, + anchor=None, + ): + super().__init__( + name, + f_map, + anchor, + ) + self.distribution = 'lognormal' + self.theta = np.atleast_1d(theta) + self.truncation_limits = truncation_limits - if np.any(~np.isnan(self.truncation_limits)): - a, b = self.truncation_limits + def cdf(self, values): + """ + Returns the Cumulative Density Function (CDF) at the specified + values. - if np.isnan(a): - a = np.nextafter(0, 1) - else: - a = np.maximum(np.nextafter(0, 1), a) + Parameters + ---------- + values: 1D float ndarray + Values for which to evaluate the CDF - if np.isnan(b): - b = np.inf + Returns + ------- + ndarray + CDF values - p_a, p_b = [norm.cdf((np.log(lim) - np.log(theta)) / beta) - for lim in (a, b)] + """ + theta, beta = self.theta[:2] - result = np.exp( - norm.ppf(values * (p_b - p_a) + p_a, - loc=np.log(theta), scale=beta)) + if np.any(~np.isnan(self.truncation_limits)): + a, b = self.truncation_limits - else: - result = np.exp(norm.ppf(values, loc=np.log(theta), scale=beta)) + if np.isnan(a): + a = np.nextafter(0, 1) + if np.isnan(b): + b = np.inf - elif self.distribution == 'uniform': + p_a, p_b = [ + norm.cdf((np.log(lim) - np.log(theta)) / beta) for lim in (a, b) + ] - if values is None: - raise ValueError( - "Missing uniform sample for inverse transform sampling a " - "uniform random variable.") + # cap the values at the truncation limits + values = np.minimum(np.maximum(values, a), b) - # else: + # get the cdf from a non-truncated lognormal + p_vals = norm.cdf(np.log(values), loc=np.log(theta), scale=beta) - a, b = self.theta[:2] + # adjust for truncation + result = (p_vals - p_a) / (p_b - p_a) + + else: + values = np.maximum(values, np.nextafter(0, 1)) + + result = norm.cdf(np.log(values), loc=np.log(theta), scale=beta) + + return result + + def inverse_transform(self, values): + """ + Evaluates the inverse of the Cumulative Density Function (CDF) + for the given values. Used to generate random variable + realizations. + + Parameters + ---------- + values: 1D float ndarray + Values for which to evaluate the inverse CDF + + Returns + ------- + ndarray + Inverse CDF values + + """ + + theta, beta = self.theta[:2] + + if np.any(~np.isnan(self.truncation_limits)): + a, b = self.truncation_limits if np.isnan(a): - a = -np.inf + a = np.nextafter(0, 1) + else: + a = np.maximum(np.nextafter(0, 1), a) + if np.isnan(b): b = np.inf - if np.any(~np.isnan(self.truncation_limits)): - a, b = self.truncation_limits + p_a, p_b = [ + norm.cdf((np.log(lim) - np.log(theta)) / beta) for lim in (a, b) + ] - result = uniform.ppf(values, loc=a, scale=(b - a)) + result = np.exp( + norm.ppf(values * (p_b - p_a) + p_a, loc=np.log(theta), scale=beta) + ) - elif self.distribution == 'empirical': + else: + result = np.exp(norm.ppf(values, loc=np.log(theta), scale=beta)) - if values is None: - raise ValueError( - "Missing uniform sample for inverse transform sampling an " - "empirical random variable.") + return result - # else: - s_ids = (values * len(self._raw_samples)).astype(int) - result = self._raw_samples[s_ids] +class UniformRandomVariable(CommonRandomVariable): + """ + Uniform random variable. - elif self.distribution == 'coupled_empirical': + """ - if sample_size is None: - raise ValueError( - "Missing sample size information for sampling a coupled " - "empirical random variable.") - # else: - raw_sample_count = len(self._raw_samples) - new_sample = np.tile(self._raw_samples, - int(sample_size / raw_sample_count) + 1) - result = new_sample[:sample_size] + def __init__( + self, + name, + theta, + truncation_limits=np.array((np.nan, np.nan)), + f_map=None, + anchor=None, + ): + super().__init__( + name, + f_map, + anchor, + ) + self.distribution = 'uniform' + self.theta = np.atleast_1d(theta) + self.truncation_limits = truncation_limits - elif self.distribution == 'deterministic': + def cdf(self, values): + """ + Returns the Cumulative Density Function (CDF) at the specified + values. - if sample_size is None: - raise ValueError( - "Missing sample size information for sampling a " - "deterministic random variable.") - # else: - result = np.full(sample_size, self.theta[0]) + Parameters + ---------- + values: 1D float ndarray + Values for which to evaluate the CDF - elif self.distribution == 'multinomial': + Returns + ------- + ndarray + CDF values - if values is None: - raise ValueError( - "Missing uniform sample for sampling a multinomial random " - "variable.") + """ + a, b = self.theta[:2] + + if np.isnan(a): + a = -np.inf + if np.isnan(b): + b = np.inf + + if np.any(~np.isnan(self.truncation_limits)): + a, b = self.truncation_limits + + result = uniform.cdf(values, loc=a, scale=(b - a)) + + return result + + def inverse_transform(self, values): + """ + Evaluates the inverse of the Cumulative Density Function (CDF) + for the given values. Used to generate random variable + realizations. + + Parameters + ---------- + values: 1D float ndarray + Values for which to evaluate the inverse CDF + + Returns + ------- + ndarray + Inverse CDF values + + """ + a, b = self.theta[:2] + + if np.isnan(a): + a = -np.inf + if np.isnan(b): + b = np.inf + + if np.any(~np.isnan(self.truncation_limits)): + a, b = self.truncation_limits + + result = uniform.ppf(values, loc=a, scale=(b - a)) + + return result + + +class MultilinearCDFRandomVariable(CommonRandomVariable): + """ + Multilinear CDF random variable. This RV is defined by specifying + the points that define its Cumulative Density Function (CDF), and + linear interpolation between them. + + """ + + def __init__( + self, + name, + theta, + truncation_limits=np.array((np.nan, np.nan)), + f_map=None, + anchor=None, + ): + super().__init__( + name, + f_map, + anchor, + ) + self.distribution = 'multilinear_CDF' + + if not np.all(np.isnan(truncation_limits)): + raise NotImplementedError( + f'{self.distribution} RVs do not support truncation' + ) + + y_1 = theta[0, 1] + if y_1 != 0.00: + raise ValueError( + "For multilinear CDF random variables, y_1 should be set to 0.00" + ) + y_n = theta[-1, 1] + if y_n != 1.00: + raise ValueError( + "For multilinear CDF random variables, y_n should be set to 1.00" + ) + + x_s = theta[:, 0] + if not np.array_equal(np.sort(x_s), x_s): + raise ValueError( + "For multilinear CDF random variables, " + "Xs should be specified in ascending order" + ) + if np.any(np.isclose(np.diff(x_s), 0.00)): + raise ValueError( + "For multilinear CDF random variables, " + "Xs should be specified in strictly ascending order" + ) + + y_s = theta[:, 1] + if not np.array_equal(np.sort(y_s), y_s): + raise ValueError( + "For multilinear CDF random variables, " + "Ys should be specified in ascending order" + ) + + if np.any(np.isclose(np.diff(y_s), 0.00)): + raise ValueError( + "For multilinear CDF random variables, " + "Ys should be specified in strictly ascending order" + ) + + self.theta = np.atleast_1d(theta) + + def cdf(self, values): + """ + Returns the Cumulative Density Function (CDF) at the specified + values. + + Parameters + ---------- + values: 1D float ndarray + Values for which to evaluate the CDF + + Returns + ------- + ndarray + CDF values + + """ + x_i = [-np.inf] + [x[0] for x in self.theta] + [np.inf] + y_i = [0.00] + [x[1] for x in self.theta] + [1.00] + + # Using Numpy's interp for linear interpolation + result = np.interp(values, x_i, y_i, left=0.00, right=1.00) + + return result + + def inverse_transform(self, values): + """ + Evaluates the inverse of the Cumulative Density Function (CDF) + for the given values. Used to generate random variable + realizations. + + Parameters + ---------- + values: 1D float ndarray + Values for which to evaluate the inverse CDF + + Returns + ------- + ndarray + Inverse CDF values + + """ + + x_i = [x[0] for x in self.theta] + y_i = [x[1] for x in self.theta] + + # using Numpy's interp for the inverse CDF + # note: by definition, y_i /has/ to include the values 0.00 + # and 1.00, and `values` have to be in the range [0.00, 1.00], + # so there is no need to handle edge cases here (i.e., + # extrapolate). + # note: swapping the roles of x_i and y_i for inverse + # interpolation + result = np.interp(values, y_i, x_i) + + return result + + +class EmpiricalRandomVariable(CommonRandomVariable): + """ + Empirical random variable. + + """ + + def __init__( + self, + name, + raw_samples, + truncation_limits=np.array((np.nan, np.nan)), + f_map=None, + anchor=None, + ): + super().__init__( + name, + f_map, + anchor, + ) + self.distribution = 'empirical' + if not np.all(np.isnan(truncation_limits)): + raise NotImplementedError( + f'{self.distribution} RVs do not support truncation' + ) + + self._raw_samples = np.atleast_1d(raw_samples) + + def inverse_transform(self, values): + """ + Maps given values to their corresponding positions within the + empirical data array, simulating an inverse transformation + based on the empirical distribution. This can be seen as a + simple form of inverse CDF where values represent normalized + positions within the empirical data set. + + Parameters + ---------- + values: 1D float ndarray + Normalized values between 0 and 1, representing positions + within the empirical data distribution. + + Returns + ------- + ndarray + The empirical data points corresponding to the given + normalized positions. - # else: + """ + s_ids = (values * len(self._raw_samples)).astype(int) + result = self._raw_samples[s_ids] + return result - p_cum = np.cumsum(self.theta)[:-1] - samples = values +class CoupledEmpiricalRandomVariable(SampleSizeRandomVariable): + """ + Coupled empirical random variable. - for i, p_i in enumerate(p_cum): - samples[samples < p_i] = 10 + i - samples[samples <= 1.0] = 10 + len(p_cum) + """ - result = samples - 10 + def __init__( + self, + name, + raw_samples, + truncation_limits=np.array((np.nan, np.nan)), + f_map=None, + anchor=None, + ): + """ + Instantiates a coupled empirical random variable. - elif self.distribution == 'multilinear_CDF': + Parameters + ---------- + name: string + A unique string that identifies the random variable. + raw_samples: 1D float ndarray + Samples from which to draw empirical realizations. + truncation_limits: 2D float ndarray + Not supported for CoupledEmpirical RVs. + Should be np.array((np.nan, np.nan)) + f_map: function, optional + A user-defined function that is applied on the realizations before + returning a sample. + anchor: RandomVariable, optional + Anchors this to another variable. If the anchor is not None, this + variable will be perfectly correlated with its anchor. Note that + the attributes of this variable and its anchor do not have to be + identical. - x_i = [x[0] for x in self.theta] - y_i = [x[1] for x in self.theta] + Raises + ------ + NotImplementedError + When truncation limits are provided - # define the inverse CDF - ifun = interp1d(y_i, x_i, kind='linear') - # note: by definition, y_i /has/ to include the values - # 0.00 and 1.00, and `values` have to be in the range - # [0.00, 1.00], so there is no need to handle edge cases - # here (i.e., extrapolate). + """ + super().__init__( + name, + f_map, + anchor, + ) + self.distribution = 'coupled_empirical' + if not np.all(np.isnan(truncation_limits)): + raise NotImplementedError( + f'{self.distribution} RVs do not support truncation' + ) - result = ifun(values) + self._raw_samples = np.atleast_1d(raw_samples) + def inverse_transform(self, sample_size): + """ + Generates a new sample array from the existing empirical data + by repeating the dataset until it matches the requested sample + size. + + Parameters + ---------- + sample_size: int + The desired size of the sample array to be generated. It + dictates how many times the original dataset will be + repeated to match or exceed this size, after which the array + is trimmed to precisely match the requested size. + + Returns + ------- + ndarray + A new sample array derived from repeating the original + dataset. + + """ + + raw_sample_count = len(self._raw_samples) + new_sample = np.tile( + self._raw_samples, int(sample_size / raw_sample_count) + 1 + ) + result = new_sample[:sample_size] return result - def inverse_transform_sampling(self, sample_size=None): + +class DeterministicRandomVariable(SampleSizeRandomVariable): + """ + Deterministic random variable. + + """ + + def __init__( + self, + name, + theta, + truncation_limits=np.array((np.nan, np.nan)), + f_map=None, + anchor=None, + ): """ - Creates a sample using inverse probability integral transformation. + Instantiates a deterministic random variable. This behaves + like a RandomVariable object but represents a specific, + deterministic value. + + Parameters + ---------- + name: string + A unique string that identifies the random variable. + theta: 1-element float ndarray + The value. + truncation_limits: 2D float ndarray + Not supported for Deterministic RVs. + Should be np.array((np.nan, np.nan)) + f_map: function, optional + A user-defined function that is applied on the realizations before + returning a sample. + anchor: RandomVariable, optional + Anchors this to another variable. If the anchor is not None, this + variable will be perfectly correlated with its anchor. Note that + the attributes of this variable and its anchor do not have to be + identical. + + Raises + ------ + NotImplementedError + When truncation limits are provided + + """ + super().__init__( + name, + f_map, + anchor, + ) + self.distribution = 'deterministic' + if not np.all(np.isnan(truncation_limits)): + raise NotImplementedError( + f'{self.distribution} RVs do not support truncation' + ) + + self.theta = np.atleast_1d(theta) + + def inverse_transform(self, sample_size): + """ + Generates samples that correspond to the value. + + Parameters + ---------- + sample_size: int + The desired size of the sample array to be generated. + + Returns + ------- + ndarray + Sample array containing the deterministic value. + """ - self.sample = self.inverse_transform(self.uni_sample, sample_size) + result = np.full(sample_size, self.theta[0]) + return result + + +class MultinomialRandomVariable(CommonRandomVariable): + """ + Multinomial random variable. + + """ + + def __init__( + self, + name, + theta, + truncation_limits=np.array((np.nan, np.nan)), + f_map=None, + anchor=None, + ): + super().__init__( + name, + f_map, + anchor, + ) + if not np.all(np.isnan(truncation_limits)): + raise NotImplementedError( + f'{self.distribution} RVs do not support truncation' + ) + self.distribution = 'multinomial' + if np.sum(theta) > 1.00: + raise ValueError( + f"The set of p values provided for a multinomial " + f"distribution shall sum up to less than or equal to 1.0. " + f"The provided values sum up to {np.sum(theta)}. p = " + f"{theta} ." + ) + + self.theta = np.atleast_1d(theta) + + def inverse_transform(self, values): + """ + Transforms continuous values into discrete events based + on the cumulative probabilities of the multinomial + distribution derived by `theta`. + + Parameters + ---------- + values: 1D float ndarray + Continuous values to be transformed into discrete events + according to the multinomial distribution's cumulative + probabilities. + + Returns + ------- + ndarray + Discrete events corresponding to the input values. + + """ + p_cum = np.cumsum(self.theta)[:-1] + + for i, p_i in enumerate(p_cum): + values[values < p_i] = 10 + i + values[values <= 1.0] = 10 + len(p_cum) + + result = values - 10 + + return result class RandomVariableSet: @@ -1568,11 +2075,9 @@ class RandomVariableSet: """ def __init__(self, name, RV_list, Rho): - self.name = name if len(RV_list) > 1: - # put the RVs in a dictionary for more efficient access reorder = np.argsort([RV.name for RV in RV_list]) self._variables = {RV_list[i].name: RV_list[i] for i in reorder} @@ -1592,33 +2097,55 @@ def __init__(self, name, RV_list, Rho): @property def RV(self): """ - Return the random variable(s) assigned to the set + Returns the random variable(s) assigned to the set. + + Returns + ------- + ndarray + The random variable(s) assigned to the set. + """ return self._variables @property def size(self): """ - Return the size (i.e., number of variables in the) RV set + Returns the size (i.e., number of variables in the) RV set. + + Returns + ------- + ndarray + The size (i.e., number of variables in the) RV set. + """ return len(self._variables) @property def sample(self): """ - Return the sample of the variables in the set + Returns the sample of the variables in the set. + + Returns + ------- + ndarray + The sample of the variables in the set. + """ return {name: rv.sample for name, rv in self._variables.items()} def Rho(self, var_subset=None): """ - Return the (subset of the) correlation matrix. + Returns the (subset of the) correlation matrix. + + Returns + ------- + ndarray + The (subset of the) correlation matrix. + """ if var_subset is None: return self._Rho - # else: - var_ids = [list(self._variables.keys()).index(var_i) - for var_i in var_subset] + var_ids = [list(self._variables.keys()).index(var_i) for var_i in var_subset] return (self._Rho[var_ids]).T[var_ids] def apply_correlation(self): @@ -1645,12 +2172,13 @@ def apply_correlation(self): UC_RV = norm.cdf(NC_RV) except np.linalg.LinAlgError: - # if the Cholesky doesn't work, we need to use the more # time-consuming but more robust approach based on SVD N_RV = norm.ppf(U_RV) - U, s, _ = svd(self._Rho, ) + U, s, _ = svd( + self._Rho, + ) S = np.diagflat(np.sqrt(s)) NC_RV = (N_RV.T @ S @ U.T).T @@ -1689,10 +2217,11 @@ def orthotope_density(self, lower=np.nan, upper=np.nan, var_subset=None): Returns ------- - alpha: float - Estimate of the probability density within the orthotope. - eps_alpha: float - Estimate of the error in alpha. + tuple + alpha: float + Estimate of the probability density within the orthotope. + eps_alpha: float + Estimate of the error in alpha. """ @@ -1714,7 +2243,6 @@ def orthotope_density(self, lower=np.nan, upper=np.nan, var_subset=None): # first, convert limits to standard normal values for var_i, var_name in enumerate(variables): - var = self._variables[var_name] if (np.any(~np.isnan(lower))) and (~np.isnan(lower[var_i])): @@ -1727,10 +2255,15 @@ def orthotope_density(self, lower=np.nan, upper=np.nan, var_subset=None): lower_std = lower_std.T upper_std = upper_std.T - OD = [mvn_orthotope_density(mu=np.zeros(len(variables)), - COV=self.Rho(var_subset), - lower=l_i, upper=u_i)[0] - for l_i, u_i in zip(lower_std, upper_std)] + OD = [ + mvn_orthotope_density( + mu=np.zeros(len(variables)), + COV=self.Rho(var_subset), + lower=l_i, + upper=u_i, + )[0] + for l_i, u_i in zip(lower_std, upper_std) + ] return np.asarray(OD) @@ -1757,29 +2290,57 @@ def __init__(self, rng): @property def RV(self): """ - Return all random variable(s) in the registry + Returns all random variable(s) in the registry. + + Returns + ------- + dict + all random variable(s) in the registry. + """ return self._variables def RVs(self, keys): """ - Return a subset of the random variables in the registry + Returns a subset of the random variables in the registry + + Parameters + ---------- + keys: list of str + Keys that define the subset. + + Returns + ------- + dict + A subset random variable(s) in the registry. + """ return {name: self._variables[name] for name in keys} def add_RV(self, RV): """ Add a new random variable to the registry. + + Raises + ------ + ValueError + When the RV already exists in the registry + """ if RV.name in self._variables: - raise ValueError( - f'RV {RV.name} already exists in the registry.') + raise ValueError(f'RV {RV.name} already exists in the registry.') self._variables.update({RV.name: RV}) @property def RV_set(self): """ Return the random variable set(s) in the registry. + + Returns + ------- + dict + The random variable set(s) in the registry. + """ return self._sets @@ -1792,7 +2353,13 @@ def add_RV_set(self, RV_set): @property def RV_sample(self): """ - Return the sample for every random variable in the registry + Return the sample for every random variable in the registry. + + Returns + ------- + dict + The sample for every random variable in the registry. + """ return {name: rv.sample for name, rv in self.RV.items()} @@ -1812,21 +2379,32 @@ def generate_sample(self, sample_size, method): with random sample location within each bin of the hypercube; 'LHS_midpoint' is like LHS, but the samples are assigned to the midpoints of the hypercube bins. + + Raises + ------ + NotImplementedError + When the RV parent class is Unknown + """ # Generate a dictionary with IDs of the free (non-anchored and # non-deterministic) variables - RV_list = [RV_name for RV_name, RV in self.RV.items() if - ((RV.anchor == RV) or ( - RV.distribution in { - 'deterministic', 'coupled_empirical'}))] + RV_list = [ + RV_name + for RV_name, RV in self.RV.items() + if ( + (RV.anchor == RV) + or (RV.distribution in {'deterministic', 'coupled_empirical'}) + ) + ] RV_ID = {RV_name: ID for ID, RV_name in enumerate(RV_list)} RV_count = len(RV_ID) # Generate controlling samples from a uniform distribution for free RVs if 'LHS' in method: - bin_low = np.array([self._rng.permutation(sample_size) - for i in range(RV_count)]) + bin_low = np.array( + [self._rng.permutation(sample_size) for i in range(RV_count)] + ) if method == 'LHS_midpoint': U_RV = np.ones([RV_count, sample_size]) * 0.5 @@ -1850,4 +2428,51 @@ def generate_sample(self, sample_size, method): # Convert from uniform to the target distribution for every RV for RV in self.RV.values(): - RV.inverse_transform_sampling(sample_size) + if RV.__class__.__mro__[1] is CommonRandomVariable: + # no sample size needed, since that information is + # available in the uniform sample + RV.inverse_transform_sampling() + elif RV.__class__.__mro__[1] is SampleSizeRandomVariable: + RV.inverse_transform_sampling(sample_size) + else: + raise NotImplementedError('Unknown RV parent class.') + + +def rv_class_map(distribution_name): + """ + Maps convenient distribution names to their corresponding random + variable class. + + Parameters + ---------- + distribution_name: str + The name of a distribution. + + Returns + ------- + RandomVariable + RandomVariable class. + + Raises + ------ + ValueError + If the given distribution name does not correspond to a + distribution class. + + + """ + if pd.isna(distribution_name): + distribution_name = 'deterministic' + distribution_map = { + 'normal': NormalRandomVariable, + 'lognormal': LogNormalRandomVariable, + 'uniform': UniformRandomVariable, + 'multilinear_CDF': MultilinearCDFRandomVariable, + 'empirical': EmpiricalRandomVariable, + 'coupled_empirical': CoupledEmpiricalRandomVariable, + 'deterministic': DeterministicRandomVariable, + 'multinomial': MultinomialRandomVariable, + } + if distribution_name not in distribution_map: + raise ValueError(f'Unsupported distribution: {distribution_name}') + return distribution_map[distribution_name] diff --git a/setup.py b/setup.py index 7ae66d053..516d89dba 100644 --- a/setup.py +++ b/setup.py @@ -44,7 +44,7 @@ def read(*filenames, **kwargs): install_requires=[ 'numpy>=1.22.0, <2.0', 'scipy>=1.7.0, <2.0', - 'pandas>=1.4.0, <2.0', + 'pandas>=1.4.0, <3.0', 'tables>=3.7.0', ], classifiers=[