Skip to content

Commit

Permalink
adding more checking for warnings
Browse files Browse the repository at this point in the history
  • Loading branch information
jonrkarr committed May 23, 2021
1 parent ace2d20 commit 28576a5
Show file tree
Hide file tree
Showing 5 changed files with 526 additions and 42 deletions.
36 changes: 36 additions & 0 deletions biosimulators_utils/sedml/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from .data_model import (SedBase, SedIdGroupMixin, SedDocument, # noqa: F401
Model, ModelLanguagePattern, ModelChange, ModelAttributeChange, AddElementModelChange,
ReplaceElementModelChange, RemoveElementModelChange, ComputeModelChange, SetValueComputeModelChange,
OneStepSimulation, SteadyStateSimulation, UniformTimeCourseSimulation,
Task, RepeatedTask, Output, Report, Plot2D, Plot3D,
DataGenerator, Variable,
Range, UniformRange, VectorRange, FunctionalRange, UniformRangeType)
Expand Down Expand Up @@ -58,6 +59,7 @@
'get_namespaces_for_sed_object',
'get_xml_node_namespace_tag_target',
'is_model_language_encoded_in_xml',
'get_task_results_shape',
]


Expand Down Expand Up @@ -1167,3 +1169,37 @@ def get_all_sed_objects(doc, type=(SedBase, SedIdGroupMixin)):

# filter out elements of a specific type
return list(filter(lambda obj: isinstance(obj, type), seen_objs))


def get_task_results_shape(task):
""" Get the shape of the results of a task
Args:
task (:obj:`Task`): task
Returns:
:obj:`tuple` of :obj:`int`: shape of the results of a task
"""
if isinstance(task, Task):
if isinstance(task.simulation, OneStepSimulation):
return (2,)
elif isinstance(task.simulation, SteadyStateSimulation):
return (1,)
elif isinstance(task.simulation, UniformTimeCourseSimulation):
return (task.simulation.number_of_steps + 1,)
else:
return (None,)

elif isinstance(task, RepeatedTask):
shape = [get_range_len(task.range) if task.range else None, len(task.sub_tasks)]
max_sub_task_shape = []

for sub_task in task.sub_tasks:
sub_task_shape = list(get_task_results_shape(sub_task.task))
max_sub_task_shape = max_sub_task_shape + [0] * max(0, len(sub_task_shape) - len(max_sub_task_shape))
sub_task_shape = sub_task_shape + [0] * max(0, len(max_sub_task_shape) - len(sub_task_shape))
max_sub_task_shape = [max(i, j) for i, j in zip(max_sub_task_shape, sub_task_shape)]

return tuple(shape + max_sub_task_shape)

return None
193 changes: 189 additions & 4 deletions biosimulators_utils/sedml/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
is_model_language_encoded_in_xml, get_models_referenced_by_task,
get_all_sed_objects,
get_data_generators_for_output, get_variables_for_data_generators,
get_model_changes_for_task)
get_model_changes_for_task,
get_task_results_shape)
import collections
import copy
import lxml.etree
Expand Down Expand Up @@ -217,7 +218,14 @@ def validate_doc(doc, working_dir, validate_semantics=True, validate_models_with
networkx.algorithms.cycles.find_cycle(sub_task_graph)
errors.append(['The subtasks are defined cyclically. The graph of subtasks must be acyclic.'])
except networkx.NetworkXNoCycle:
pass
for task in doc.tasks:
if isinstance(task, RepeatedTask):
sub_task_shapes = set()
for sub_task in task.sub_tasks:
sub_task_shapes.add(get_task_results_shape(sub_task.task))
if len(sub_task_shapes) > 1:
msg = 'The outputs of the sub-tasks have different shapes.'
task_warnings[task]['other'].append([msg])

# Ranges of repeated tasks
# - Functional ranges
Expand Down Expand Up @@ -535,6 +543,74 @@ def validate_doc(doc, working_dir, validate_semantics=True, validate_models_with
if output_warnings:
warnings.append(['Output {} may be invalid.'.format(output_id), output_warnings])

# tasks, data generators that don't contribute to outputs
used_data_generators = set()
used_tasks = set()
for output in doc.outputs:
if isinstance(output, Report):
for data_set in output.data_sets:
if data_set.data_generator:
used_data_generators.add(data_set.data_generator)
for variable in data_set.data_generator.variables:
if variable.task:
used_tasks.add(variable.task)

elif isinstance(output, Plot2D):
for curve in output.curves:
if curve.x_data_generator:
used_data_generators.add(curve.x_data_generator)
for variable in curve.x_data_generator.variables:
if variable.task:
used_tasks.add(variable.task)
if curve.y_data_generator:
used_data_generators.add(curve.y_data_generator)
for variable in curve.y_data_generator.variables:
if variable.task:
used_tasks.add(variable.task)

elif isinstance(output, Plot3D):
for surface in output.surfaces:
if surface.x_data_generator:
used_data_generators.add(surface.x_data_generator)
for variable in surface.x_data_generator.variables:
if variable.task:
used_tasks.add(variable.task)
if surface.y_data_generator:
used_data_generators.add(surface.y_data_generator)
for variable in surface.y_data_generator.variables:
if variable.task:
used_tasks.add(variable.task)
if surface.z_data_generator:
used_data_generators.add(surface.z_data_generator)
for variable in surface.z_data_generator.variables:
if variable.task:
used_tasks.add(variable.task)

tasks_to_check = list(used_tasks)
used_tasks = set()
while tasks_to_check:
task = tasks_to_check.pop()
used_tasks.add(task)
if isinstance(task, RepeatedTask):
for sub_task in task.sub_tasks:
tasks_to_check.append(sub_task.task)

unused_tasks = []
for i_task, task in enumerate(doc.tasks):
if task not in used_tasks:
task_id = '`' + task.id + '`' if task.id else str(i_task + 1)
unused_tasks.append([task_id])
if unused_tasks:
warnings.append(['The following tasks do not contribute to outputs:', sorted(unused_tasks)])

unused_data_generators = []
for i_data_generator, data_generator in enumerate(doc.data_generators):
if data_generator not in used_data_generators:
data_generator_id = '`' + data_generator.id + '`' if data_generator.id else str(i_data_generator + 1)
unused_data_generators.append([data_generator_id])
if unused_data_generators:
warnings.append(['The following data generators do not contribute to outputs:', sorted(unused_data_generators)])

return (errors, warnings)


Expand Down Expand Up @@ -1017,6 +1093,8 @@ def validate_data_generator_variables(variables, model_etrees=None):
errors = []
warnings = []

task_types = set()

for i_variable, variable in enumerate(variables):
variable_errors = []
variable_warnings = []
Expand All @@ -1027,7 +1105,9 @@ def validate_data_generator_variables(variables, model_etrees=None):
if variable.model:
variable_errors.append(['Variable should not reference a model.'])

if not variable.task and not (variable.target and variable.target.startswith('#')):
if variable.task:
task_types.add(get_task_results_shape(variable.task))
elif not (variable.target and variable.target.startswith('#')):
variable_errors.append(['Variable must reference a task.'])

if (variable.symbol and variable.target) or (not variable.symbol and not variable.target):
Expand Down Expand Up @@ -1056,6 +1136,9 @@ def validate_data_generator_variables(variables, model_etrees=None):
variable_id = '`' + variable.id + '`' if variable and variable.id else str(i_variable + 1)
warnings.append(['Variable {} may be invalid.'.format(variable_id), variable_warnings])

if len(task_types) > 1:
warnings.append(['The variables do not have consistent shapes.'])

return errors, warnings


Expand Down Expand Up @@ -1087,53 +1170,155 @@ def validate_output(output):
if not output.data_sets:
errors.append(['Report must have at least one data set.'])

labels = set()
duplicate_labels = set()
task_types = set()

for i_data_set, data_set in enumerate(output.data_sets):
data_set_errors = []

if not data_set.id:
data_set_errors.append(['Data set must have an id.'])

if not data_set.label:
if data_set.label:
if data_set.label in labels:
duplicate_labels.add(data_set.label)
labels.add(data_set.label)
else:
data_set_errors.append(['Data set must have a label.'])

if data_set.data_generator:
for variable in data_set.data_generator.variables:
if variable.task:
task_types.add(get_task_results_shape(variable.task))
data_set_errors.extend(validate_reference(data_set, 'Data set', 'data_generator', 'data data generator'))

if data_set_errors:
data_set_id = '`' + data_set.id + '`' if data_set and data_set.id else str(i_data_set + 1)
errors.append(['Data set {} is invalid.'.format(data_set_id), data_set_errors])

if len(task_types) > 1:
warnings.append(['The data sets do not have consistent shapes.'])

if duplicate_labels:
warnings.append([(
'Data sets do not have unique labels. '
'Unique labels are helpful for interpreting reports. '
'The following labels are repeated:'),
[[label] for label in sorted(duplicate_labels)]])

elif isinstance(output, Plot2D):
if not output.curves:
errors.append(['Plot must have at least one curve.'])

x_scales = set()
y_scales = set()

for i_curve, curve in enumerate(output.curves):
curve_errors = []
curve_warnings = []

if not curve.id:
curve_errors.append(['Curve must have an id.'])

task_types = set()
if curve.x_data_generator:
for variable in curve.x_data_generator.variables:
if variable.task:
task_types.add(get_task_results_shape(variable.task))
if curve.y_data_generator:
for variable in curve.y_data_generator.variables:
if variable.task:
task_types.add(get_task_results_shape(variable.task))
curve_errors.extend(validate_reference(curve, 'Curve', 'x_data_generator', 'x data data generator'))
curve_errors.extend(validate_reference(curve, 'Curve', 'y_data_generator', 'y data data generator'))
if len(task_types) > 1:
curve_warnings.append(['The curves do not have consistent shapes.'])

if curve.x_scale:
x_scales.add(curve.x_scale)
else:
curve_errors.append(['Curve must have an x-scale.'])

if curve.y_scale:
y_scales.add(curve.y_scale)
else:
curve_errors.append(['Curve must have a y-scale.'])

if curve_errors:
curve_id = '`' + curve.id + '`' if curve and curve.id else str(i_curve + 1)
errors.append(['Curve {} is invalid.'.format(curve_id), curve_errors])
if curve_warnings:
curve_id = '`' + curve.id + '`' if curve and curve.id else str(i_curve + 1)
warnings.append(['Curve {} may be invalid.'.format(curve_id), curve_warnings])

if len(x_scales) > 1:
warnings.append(['Curves do not have consistent x-scales.'])
if len(y_scales) > 1:
warnings.append(['Curves do not have consistent y-scales.'])

elif isinstance(output, Plot3D):
if not output.surfaces:
errors.append(['Plot must have at least one surface.'])

x_scales = set()
y_scales = set()
z_scales = set()

for i_surface, surface in enumerate(output.surfaces):
surface_errors = []
surface_warnings = []

if not surface.id:
surface_errors.append(['Surface must have an id.'])

task_types = set()
if surface.x_data_generator:
for variable in surface.x_data_generator.variables:
if variable.task:
task_types.add(get_task_results_shape(variable.task))
if surface.y_data_generator:
for variable in surface.y_data_generator.variables:
if variable.task:
task_types.add(get_task_results_shape(variable.task))
if surface.z_data_generator:
for variable in surface.z_data_generator.variables:
if variable.task:
task_types.add(get_task_results_shape(variable.task))
surface_errors.extend(validate_reference(surface, 'Surface', 'x_data_generator', 'x data data generator'))
surface_errors.extend(validate_reference(surface, 'Surface', 'y_data_generator', 'y data data generator'))
surface_errors.extend(validate_reference(surface, 'Surface', 'z_data_generator', 'z data data generator'))
if len(task_types) > 1:
surface_warnings.append(['The surfaces do not have consistent shapes.'])

if surface.x_scale:
x_scales.add(surface.x_scale)
else:
surface_errors.append(['Surface must have an x-scale.'])

if surface.y_scale:
y_scales.add(surface.y_scale)
else:
surface_errors.append(['Surface must have a y-scale.'])

if surface.z_scale:
z_scales.add(surface.z_scale)
else:
surface_errors.append(['Surface must have a z-scale.'])

if surface_errors:
surface_id = '`' + surface.id + '`' if surface and surface.id else str(i_surface + 1)
errors.append(['Surface {} is invalid.'.format(surface_id), surface_errors])
if surface_warnings:
surface_id = '`' + surface.id + '`' if surface and surface.id else str(i_surface + 1)
warnings.append(['Surface {} may be invalid.'.format(surface_id), surface_warnings])

if len(x_scales) > 1:
warnings.append(['Surfaces do not have consistent x-scales.'])
if len(y_scales) > 1:
warnings.append(['Surfaces do not have consistent y-scales.'])
if len(z_scales) > 1:
warnings.append(['Surfaces do not have consistent z-scales.'])

involves_repeated_task = False
for variable in get_variables_for_data_generators(get_data_generators_for_output(output)):
Expand Down
1 change: 1 addition & 0 deletions tests/sedml/test_sedml_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -638,6 +638,7 @@ def test_write_error_unsupported_classes(self):
data_model.Curve(
id='curve',
x_scale='sin',
y_scale=data_model.AxisScale.linear,
x_data_generator=data_model.DataGenerator(id='x_data_gen',
parameters=[data_model.Parameter(id='x', value=1)],
math='x'),
Expand Down
39 changes: 39 additions & 0 deletions tests/sedml/test_sedml_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1608,3 +1608,42 @@ def test_get_model_changes_for_task(self):

with self.assertRaisesRegex(NotImplementedError, 'not supported'):
utils.get_model_changes_for_task(None)

def test_get_task_results_shape(self):
task = data_model.Task(
simulation=data_model.OneStepSimulation(),
)
self.assertEqual(utils.get_task_results_shape(task), (2,))

task = data_model.Task(
simulation=data_model.SteadyStateSimulation(),
)
self.assertEqual(utils.get_task_results_shape(task), (1,))

task = data_model.Task(simulation=data_model.UniformTimeCourseSimulation(number_of_steps=10))
self.assertEqual(utils.get_task_results_shape(task), (11,))

task = data_model.RepeatedTask(
range=data_model.VectorRange(values=[0.1, 0.2, 0.3]),
sub_tasks=[
data_model.SubTask(task=data_model.Task(simulation=data_model.SteadyStateSimulation())),
data_model.SubTask(task=data_model.Task(simulation=data_model.UniformTimeCourseSimulation(number_of_steps=10))),
],
)
self.assertEqual(utils.get_task_results_shape(task), (3, 2, 11))

task = data_model.RepeatedTask(
range=data_model.UniformRange(number_of_steps=4),
sub_tasks=[
data_model.SubTask(
task=data_model.RepeatedTask(
range=data_model.VectorRange(values=[0.1, 0.2, 0.3]),
sub_tasks=[
data_model.SubTask(task=data_model.Task(simulation=data_model.SteadyStateSimulation())),
data_model.SubTask(task=data_model.Task(simulation=data_model.UniformTimeCourseSimulation(number_of_steps=10))),
],
),
)
],
)
self.assertEqual(utils.get_task_results_shape(task), (5, 1, 3, 2, 11))
Loading

0 comments on commit 28576a5

Please sign in to comment.