From f8541a9081ec1889ad7eadb902a12af4b3a266d4 Mon Sep 17 00:00:00 2001 From: "Christian Y. Brenninkmeijer" Date: Wed, 18 Oct 2023 11:46:19 +0100 Subject: [PATCH 01/18] check too few cores and let version check ethernet --- spinn_machine/machine.py | 40 +++++++++++++++-------- spinn_machine/version/abstract_version.py | 31 ++++++++++++++++++ spinn_machine/version/version_3.py | 8 ++++- spinn_machine/version/version_5.py | 11 ++++++- spinn_machine/version/version_spin1.py | 5 +++ 5 files changed, 79 insertions(+), 16 deletions(-) diff --git a/spinn_machine/machine.py b/spinn_machine/machine.py index c6316748..49e43802 100644 --- a/spinn_machine/machine.py +++ b/spinn_machine/machine.py @@ -327,10 +327,14 @@ def where_is_chip(self, chip: Chip) -> str: :rtype: str """ chip00 = self[0, 0] - local00 = self[chip.nearest_ethernet_x, chip.nearest_ethernet_y] + try: + local00 = self[chip.nearest_ethernet_x, chip.nearest_ethernet_y] + ip_address = f"on {local00.ip_address}" + except KeyError: + ip_address = "" (localx, localy) = self.get_local_xy(chip) return (f"global chip {chip.x}, {chip.y} on {chip00.ip_address} " - f"is chip {localx}, {localy} on {local00.ip_address}") + f"is chip {localx}, {localy} {ip_address}") def where_is_xy(self, x: int, y: int) -> str: """ @@ -501,36 +505,44 @@ def validate(self) -> None: # The fact that self._boot_ethernet_address is set means there is an # ethernet chip and it is at 0,0 so no need to check that + version = MachineDataView.get_machine_version() for chip in self.chips: if chip.x < 0: - raise SpinnMachineException(f"{chip} has a negative x") + raise SpinnMachineException( + f"{self.where_is_chip(chip)} has a negative x") if chip.y < 0: - raise SpinnMachineException(f"{chip} has a negative y") + raise SpinnMachineException( + f"{self.where_is_chip(chip)} has a negative y") if chip.x >= self._width: raise SpinnMachineException( - f"{chip} has an x larger than width {self._width}") + f"{self.where_is_chip(chip)} has an x larger " + f"than width {self._width}") if chip.y >= self._height: raise SpinnMachineException( - f"{chip} has a y larger than height {self._height}") + f"{self.where_is_chip(chip)} has a y larger " + f"than height {self._height}") + if chip.n_processors < version.minimum_cores_expected: + raise SpinnMachineException( + f"{self.where_is_chip(chip)} has too few cores " + f"found {chip.n_processors}") if chip.ip_address: # Ethernet Chip checks - if chip.x % 4 != 0: - raise SpinnMachineException( - f"Ethernet {chip} has a x which is not divisible by 4") - if (chip.x + chip.y) % 12 != 0: + error = version.illegal_ethernet_message(chip.x, chip.y) + if error is not None: raise SpinnMachineException( - f"Ethernet {chip} has an x,y pair that " - "does not add up to 12") + f"{self.where_is_chip(chip)} {error}") else: # Non-Ethernet chip checks if not self.is_chip_at( chip.nearest_ethernet_x, chip.nearest_ethernet_y): raise SpinnMachineException( - f"{chip} has an invalid ethernet chip") + f"{self.where_is_chip(chip)} " + f"has an invalid ethernet chip") local_xy = self.get_local_xy(chip) if local_xy not in self._chip_core_map: raise SpinnMachineException( - f"{chip} has an unexpected local xy of {local_xy}") + f"{self.where_is_chip(chip)} " + f"has an unexpected local xy of {local_xy}") @property @abstractmethod diff --git a/spinn_machine/version/abstract_version.py b/spinn_machine/version/abstract_version.py index e98b771a..d152a87b 100644 --- a/spinn_machine/version/abstract_version.py +++ b/spinn_machine/version/abstract_version.py @@ -297,3 +297,34 @@ def _create_machine(self, width: int, height: int, origin: str) -> Machine: :rtype: ~spinn_machine.Machine """ raise NotImplementedError + + @property + @abstractmethod + def minimum_cores_expected(self) -> int: + """ + The minimum number of Chip that we expect from a Chip + + If there are less that this number of Cores Machine.validate and + other methods are allowed to raise an exception + + :rtype: int + :return: The lowest number of cores to accept before flagging a + Chip to be blacklisted + """ + + @abstractmethod + def illegal_ethernet_message(self, x:int, y:int) -> Optional[str]: + """ + Checks if x and y could be for an ethernet + + This method will return an explanation if the values for x and y are + known be illegal for an ethernet chip. + + Due to the limited inforamtion available this method will generate + False negatives. Ie x, y that for this machine which can + not be ethernets generating no explanation. + + :param int x: + :param int y: + :return: An explanation that the x and y can never be an ethernet + """ \ No newline at end of file diff --git a/spinn_machine/version/version_3.py b/spinn_machine/version/version_3.py index 55624990..73fbe248 100644 --- a/spinn_machine/version/version_3.py +++ b/spinn_machine/version/version_3.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Final, Mapping, Sequence, Tuple +from typing import Final, Mapping, Optional, Sequence, Tuple from spinn_utilities.overrides import overrides from spinn_utilities.typing.coords import XY from .version_spin1 import VersionSpin1 @@ -66,3 +66,9 @@ def _verify_size(self, width: int, height: int): @overrides(VersionSpin1._create_machine) def _create_machine(self, width: int, height: int, origin: str) -> Machine: return FullWrapMachine(width, height, CHIPS_PER_BOARD, origin) + + @overrides(VersionSpin1.illegal_ethernet_message) + def illegal_ethernet_message(self, x: int, y: int) -> Optional[str]: + if x != 0 or y != 0: + return "Only Chip 0, 0 may be an Ethernet Chip" + return None diff --git a/spinn_machine/version/version_5.py b/spinn_machine/version/version_5.py index 328419c9..178432dd 100644 --- a/spinn_machine/version/version_5.py +++ b/spinn_machine/version/version_5.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Final, Mapping, Sequence, Tuple +from typing import Final, Mapping, Optional, Sequence, Tuple from spinn_utilities.overrides import overrides from spinn_utilities.typing.coords import XY from spinn_machine.exceptions import SpinnMachineException @@ -98,3 +98,12 @@ def _create_machine(self, width: int, height: int, origin: str) -> Machine: width, height, CHIPS_PER_BOARD, origin) else: return NoWrapMachine(width, height, CHIPS_PER_BOARD, origin) + + @overrides(VersionSpin1.illegal_ethernet_message) + def illegal_ethernet_message(self, x: int, y: int) -> Optional[str]: + if x % 4 != 0: + return "Only Chip with X divisible by 4 may be an Ethernet Chip" + if (x + y) % 12 != 0: + return "Only Chip with x + y divisible by 12 " \ + "may be an Ethernet Chip" + return None diff --git a/spinn_machine/version/version_spin1.py b/spinn_machine/version/version_spin1.py index d9e7919a..a18e4f66 100644 --- a/spinn_machine/version/version_spin1.py +++ b/spinn_machine/version/version_spin1.py @@ -37,3 +37,8 @@ def n_non_user_cores(self) -> int: @overrides(AbstractVersion.n_router_entries) def n_router_entries(self) -> int: return 1023 + + @property + @overrides(AbstractVersion.minimum_cores_expected) + def minimum_cores_expected(self) -> int: + return 5 From 1100512b9797015351a2379d1901387a870816d4 Mon Sep 17 00:00:00 2001 From: "Christian Y. Brenninkmeijer" Date: Wed, 18 Oct 2023 11:48:16 +0100 Subject: [PATCH 02/18] test too few cores --- unittests/test_machine.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/unittests/test_machine.py b/unittests/test_machine.py index 864d0a46..b417a22b 100644 --- a/unittests/test_machine.py +++ b/unittests/test_machine.py @@ -386,6 +386,13 @@ def test_concentric_xys(self): (2, 4), (1, 3), (0, 2), (0, 1), (0, 0), (1, 0)] self.assertListEqual(expected, found) + def test_too_few_cores(self): + machine = virtual_machine(8, 8) + # Hack to get n_processors return a low number + machine.get_chip_at(0, 1)._p = [1, 2, 3] + with self.assertRaises(SpinnMachineException): + machine.validate() + if __name__ == '__main__': unittest.main() From 0746d9416116cbe6f0614c46ed6519aca2afabc0 Mon Sep 17 00:00:00 2001 From: "Christian Y. Brenninkmeijer" Date: Wed, 18 Oct 2023 11:50:27 +0100 Subject: [PATCH 03/18] flake8 --- spinn_machine/version/abstract_version.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spinn_machine/version/abstract_version.py b/spinn_machine/version/abstract_version.py index d152a87b..ef0aeec1 100644 --- a/spinn_machine/version/abstract_version.py +++ b/spinn_machine/version/abstract_version.py @@ -313,7 +313,7 @@ def minimum_cores_expected(self) -> int: """ @abstractmethod - def illegal_ethernet_message(self, x:int, y:int) -> Optional[str]: + def illegal_ethernet_message(self, x: int, y: int) -> Optional[str]: """ Checks if x and y could be for an ethernet @@ -327,4 +327,4 @@ def illegal_ethernet_message(self, x:int, y:int) -> Optional[str]: :param int x: :param int y: :return: An explanation that the x and y can never be an ethernet - """ \ No newline at end of file + """ From a7eac9419b2ebe0d6a573cce10972afdee3f7fbe Mon Sep 17 00:00:00 2001 From: "Christian Y. Brenninkmeijer" Date: Wed, 18 Oct 2023 12:06:21 +0100 Subject: [PATCH 04/18] if where is fails just use str of Chip --- spinn_machine/machine.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/spinn_machine/machine.py b/spinn_machine/machine.py index 49e43802..5753e0f1 100644 --- a/spinn_machine/machine.py +++ b/spinn_machine/machine.py @@ -326,15 +326,18 @@ def where_is_chip(self, chip: Chip) -> str: :return: A human-readable description of the location of a chip. :rtype: str """ - chip00 = self[0, 0] try: - local00 = self[chip.nearest_ethernet_x, chip.nearest_ethernet_y] - ip_address = f"on {local00.ip_address}" - except KeyError: - ip_address = "" - (localx, localy) = self.get_local_xy(chip) - return (f"global chip {chip.x}, {chip.y} on {chip00.ip_address} " - f"is chip {localx}, {localy} {ip_address}") + chip00 = self[0, 0] + try: + local00 = self[chip.nearest_ethernet_x, chip.nearest_ethernet_y] + ip_address = f"on {local00.ip_address}" + except KeyError: + ip_address = "" + (localx, localy) = self.get_local_xy(chip) + return (f"global chip {chip.x}, {chip.y} on {chip00.ip_address} " + f"is chip {localx}, {localy} {ip_address}") + except Exception: # pylint: disable=broad-except + return str(Chip) def where_is_xy(self, x: int, y: int) -> str: """ From bd998b043d667c0510db2c53235b332ea4bdc8a9 Mon Sep 17 00:00:00 2001 From: "Christian Y. Brenninkmeijer" Date: Wed, 18 Oct 2023 12:10:38 +0100 Subject: [PATCH 05/18] flake8 --- spinn_machine/machine.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spinn_machine/machine.py b/spinn_machine/machine.py index 5753e0f1..45092259 100644 --- a/spinn_machine/machine.py +++ b/spinn_machine/machine.py @@ -329,7 +329,8 @@ def where_is_chip(self, chip: Chip) -> str: try: chip00 = self[0, 0] try: - local00 = self[chip.nearest_ethernet_x, chip.nearest_ethernet_y] + local00 = self[chip.nearest_ethernet_x, + chip.nearest_ethernet_y] ip_address = f"on {local00.ip_address}" except KeyError: ip_address = "" From 2765460b82176cc202e5ca221abf3c594fd6df0f Mon Sep 17 00:00:00 2001 From: "Christian Y. Brenninkmeijer" Date: Wed, 18 Oct 2023 14:06:44 +0100 Subject: [PATCH 06/18] raise NotImplementedError for type checker --- spinn_machine/version/abstract_version.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spinn_machine/version/abstract_version.py b/spinn_machine/version/abstract_version.py index ef0aeec1..0a50f03b 100644 --- a/spinn_machine/version/abstract_version.py +++ b/spinn_machine/version/abstract_version.py @@ -311,6 +311,7 @@ def minimum_cores_expected(self) -> int: :return: The lowest number of cores to accept before flagging a Chip to be blacklisted """ + raise NotImplementedError @abstractmethod def illegal_ethernet_message(self, x: int, y: int) -> Optional[str]: @@ -328,3 +329,4 @@ def illegal_ethernet_message(self, x: int, y: int) -> Optional[str]: :param int y: :return: An explanation that the x and y can never be an ethernet """ + raise NotImplementedError \ No newline at end of file From 260c89a92dd23d037bd01db23fb3bcc35af510c0 Mon Sep 17 00:00:00 2001 From: "Christian Y. Brenninkmeijer" Date: Wed, 18 Oct 2023 14:10:26 +0100 Subject: [PATCH 07/18] flake8 --- spinn_machine/version/abstract_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spinn_machine/version/abstract_version.py b/spinn_machine/version/abstract_version.py index 0a50f03b..e3b7a59d 100644 --- a/spinn_machine/version/abstract_version.py +++ b/spinn_machine/version/abstract_version.py @@ -329,4 +329,4 @@ def illegal_ethernet_message(self, x: int, y: int) -> Optional[str]: :param int y: :return: An explanation that the x and y can never be an ethernet """ - raise NotImplementedError \ No newline at end of file + raise NotImplementedError From a9e677f3a9950fbd0da7e8fa1d10339e4bf9f448 Mon Sep 17 00:00:00 2001 From: "Christian Y. Brenninkmeijer" Date: Wed, 18 Oct 2023 14:24:55 +0100 Subject: [PATCH 08/18] doc fixes --- spinn_machine/version/abstract_version.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/spinn_machine/version/abstract_version.py b/spinn_machine/version/abstract_version.py index e3b7a59d..f95e6618 100644 --- a/spinn_machine/version/abstract_version.py +++ b/spinn_machine/version/abstract_version.py @@ -316,17 +316,18 @@ def minimum_cores_expected(self) -> int: @abstractmethod def illegal_ethernet_message(self, x: int, y: int) -> Optional[str]: """ - Checks if x and y could be for an ethernet + Checks if x and y could be for an Ethernet This method will return an explanation if the values for x and y are known be illegal for an ethernet chip. - Due to the limited inforamtion available this method will generate - False negatives. Ie x, y that for this machine which can - not be ethernets generating no explanation. + Due to the limited information available this method will generate + False negatives. + So this method returning None does not imply that x, y is an + Ethernet location :param int x: :param int y: - :return: An explanation that the x and y can never be an ethernet + :return: An explanation that the x and y can never be an Ethernet """ raise NotImplementedError From 33623c1bdc2b565e09b816a54475622bf6d3d68c Mon Sep 17 00:00:00 2001 From: "Christian Y. Brenninkmeijer" Date: Wed, 18 Oct 2023 14:31:52 +0100 Subject: [PATCH 09/18] more spellings --- spinn_machine/version/abstract_version.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spinn_machine/version/abstract_version.py b/spinn_machine/version/abstract_version.py index f95e6618..7301c008 100644 --- a/spinn_machine/version/abstract_version.py +++ b/spinn_machine/version/abstract_version.py @@ -316,10 +316,10 @@ def minimum_cores_expected(self) -> int: @abstractmethod def illegal_ethernet_message(self, x: int, y: int) -> Optional[str]: """ - Checks if x and y could be for an Ethernet + Checks if x and y could be for an Ethernet. This method will return an explanation if the values for x and y are - known be illegal for an ethernet chip. + known be illegal for an Ethernet chip. Due to the limited information available this method will generate False negatives. From 590f1656aeee921cd2fd0dedb13392d8fe8a222c Mon Sep 17 00:00:00 2001 From: "Christian Y. Brenninkmeijer" Date: Mon, 23 Oct 2023 12:21:18 +0100 Subject: [PATCH 10/18] simplify Chip str but include monitor --- spinn_machine/chip.py | 13 +++++++------ unittests/test_chip.py | 35 +++-------------------------------- 2 files changed, 10 insertions(+), 38 deletions(-) diff --git a/spinn_machine/chip.py b/spinn_machine/chip.py index 8fad1feb..f8b723df 100644 --- a/spinn_machine/chip.py +++ b/spinn_machine/chip.py @@ -326,13 +326,14 @@ def __contains__(self, processor_id: int) -> bool: return self.is_processor_with_id(processor_id) def __str__(self) -> str: + if self._ip_address: + ip_info = f"ip_address={self.ip_address} " + else: + ip_info = "" return ( - f"[Chip: x={self._x}, y={self._y}, " - f"sdram={self.sdram // (1024 * 1024)} MB, " - f"ip_address={self.ip_address}, router={self.router}, " - f"processors={list(self._p.values())}, " - f"nearest_ethernet={self._nearest_ethernet_x}:" - f"{self._nearest_ethernet_y}]") + f"[Chip: x={self._x}, y={self._y}, {ip_info}" + f"n_cores={self.n_processors}, " + f"mon={self.get_physical_core_id([0])}]") def __repr__(self) -> str: return self.__str__() diff --git a/unittests/test_chip.py b/unittests/test_chip.py index cb277b45..c4ff6a3d 100644 --- a/unittests/test_chip.py +++ b/unittests/test_chip.py @@ -56,38 +56,9 @@ def test_create_chip(self): self.assertIsNone(new_chip[42]) print(new_chip.__repr__()) self.assertEqual( - new_chip.__repr__(), - "[Chip: x=0, y=1, sdram=0 MB, ip_address=192.162.240.253, " - "router=[Router: " - "available_entries=1024, links=[" - "[Link: source_x=0, source_y=0, source_link_id=0, " - "destination_x=1, destination_y=1], " - "[Link: source_x=0, source_y=1, source_link_id=1, " - "destination_x=1, destination_y=0], " - "[Link: source_x=1, source_y=1, source_link_id=2, " - "destination_x=0, destination_y=0], " - "[Link: source_x=1, source_y=0, source_link_id=3, " - "destination_x=0, destination_y=1]" - "]], processors=[" - "[CPU: id=0, clock_speed=200 MHz, monitor=True], " - "[CPU: id=1, clock_speed=200 MHz, monitor=False], " - "[CPU: id=2, clock_speed=200 MHz, monitor=False], " - "[CPU: id=3, clock_speed=200 MHz, monitor=False], " - "[CPU: id=4, clock_speed=200 MHz, monitor=False], " - "[CPU: id=5, clock_speed=200 MHz, monitor=False], " - "[CPU: id=6, clock_speed=200 MHz, monitor=False], " - "[CPU: id=7, clock_speed=200 MHz, monitor=False], " - "[CPU: id=8, clock_speed=200 MHz, monitor=False], " - "[CPU: id=9, clock_speed=200 MHz, monitor=False], " - "[CPU: id=10, clock_speed=200 MHz, monitor=False], " - "[CPU: id=11, clock_speed=200 MHz, monitor=False], " - "[CPU: id=12, clock_speed=200 MHz, monitor=False], " - "[CPU: id=13, clock_speed=200 MHz, monitor=False], " - "[CPU: id=14, clock_speed=200 MHz, monitor=False], " - "[CPU: id=15, clock_speed=200 MHz, monitor=False], " - "[CPU: id=16, clock_speed=200 MHz, monitor=False], " - "[CPU: id=17, clock_speed=200 MHz, monitor=False]], " - "nearest_ethernet=0:0]") + "[Chip: x=0, y=1, ip_address=192.162.240.253 " + "n_cores=18, mon=None]", + new_chip.__repr__(),) self.assertEqual(new_chip.tag_ids, OrderedSet([1, 2, 3, 4, 5, 6, 7])) self.assertEqual( [p[0] for p in new_chip], From 9aceb32bb0f91ea87d6c55a0e9302d477bee5ec5 Mon Sep 17 00:00:00 2001 From: "Christian Y. Brenninkmeijer" Date: Mon, 23 Oct 2023 12:42:29 +0100 Subject: [PATCH 11/18] get_physical_core_id takes an int --- spinn_machine/chip.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spinn_machine/chip.py b/spinn_machine/chip.py index f8b723df..4d0dd6eb 100644 --- a/spinn_machine/chip.py +++ b/spinn_machine/chip.py @@ -333,7 +333,7 @@ def __str__(self) -> str: return ( f"[Chip: x={self._x}, y={self._y}, {ip_info}" f"n_cores={self.n_processors}, " - f"mon={self.get_physical_core_id([0])}]") + f"mon={self.get_physical_core_id(0)}]") def __repr__(self) -> str: return self.__str__() From 902c12eb3ed0a2da4ed3f67bd4203b5af30e515a Mon Sep 17 00:00:00 2001 From: "Christian Y. Brenninkmeijer" Date: Mon, 23 Oct 2023 12:51:34 +0100 Subject: [PATCH 12/18] use Chip str --- spinn_machine/machine_factory.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/spinn_machine/machine_factory.py b/spinn_machine/machine_factory.py index 2d4dc719..4df158a3 100644 --- a/spinn_machine/machine_factory.py +++ b/spinn_machine/machine_factory.py @@ -28,23 +28,23 @@ "Your machine has {} at {} on board {} which will cause algorithms to " "fail. Please report this to spinnakerusers@googlegroups.com \n\n") ONE_LINK_SAME_BOARD_MSG = ( - "Link {} from global chip id {}:{} to global chip id {}:{} does not " + "Link {} from {} to {} does not " "exist, but the opposite does. Both chips live on the same board under " "ip address {} and are local chip ids {}:{} and {}:{}. " "Please report this to spinnakerusers@googlegroups.com \n\n") ONE_LINK_DIFFERENT_BOARDS_MSG = ( - "Link {} from global chip id {}:{} to global chip id {}:{} does not " + "Link {} from {} to {} does not " "exist, but the opposite does. The chips live on different boards. " "chip {}:{} resides on board with ip address {} with local id {}:{} and " "chip {}:{} resides on board with ip address {} with local id {}:{}. " "Please report this to spinnakerusers@googlegroups.com \n\n") ONE_LINK_DEAD_CHIP = ( - "Link {} from global dead chip id {}:{} to global chip id {}:{} does not " + "Link {} from {} to {} does not " "exist, but the opposite does. chip {}:{} resides on board with ip " "address {} but as chip {}:{} is dead, we cannot report its ip address. " "Please report this to spinnakerusers@googlegroups.com \n\n") CHIP_REMOVED_BY_DEAD_PARENT = ( - "The chip {}:{} will fail to receive signals because its parent {}:{} in" + "The {} will fail to receive signals because its parent {}:{} in" " the signal tree has disappeared from the machine since it was booted. " "This occurred on board with ip address {} Please report this to " "spinnakerusers@googlegroups.com \n\n") @@ -101,6 +101,7 @@ def _machine_ignore( return new_machine + def _generate_uni_direction_link_error( dest_x: int, dest_y: int, src_x: int, src_y: int, back: int, original: Machine) -> str: @@ -113,7 +114,7 @@ def _generate_uni_direction_link_error( # if the dest chip is dead. Only report src chip ip address. if dest_chip is None: return ONE_LINK_DEAD_CHIP.format( - back, dest_x, dest_y, src_x, src_y, src_x, src_y, src_ethernet, + back, dest_chip, src_chip, src_x, src_y, src_ethernet, dest_x, dest_y) # got working chips, so get the separate ethernet's @@ -128,12 +129,12 @@ def _generate_uni_direction_link_error( # board. if src_ethernet == dest_ethernet: return ONE_LINK_SAME_BOARD_MSG.format( - back, dest_x, dest_y, src_x, src_y, src_ethernet, + back, dest_chip, src_chip, src_ethernet, local_dest_chip_x, local_dest_chip_y, local_src_chip_x, local_src_chip_y) else: return ONE_LINK_DIFFERENT_BOARDS_MSG.format( - back, dest_x, dest_y, src_x, src_y, dest_x, dest_y, dest_ethernet, + back, dest_chip, src_chip, dest_x, dest_y, dest_ethernet, local_dest_chip_x, local_dest_chip_y, src_x, src_y, src_ethernet, local_src_chip_x, local_src_chip_y) @@ -210,7 +211,7 @@ def machine_repair(original: Machine, removed_chips: Iterable[XY] = ()): ethernet_chip = original[ chip.nearest_ethernet_x, chip.nearest_ethernet_y] msg = CHIP_REMOVED_BY_DEAD_PARENT.format( - chip.x, chip.y, parent_x, parent_y, + chip, parent_x, parent_y, ethernet_chip.ip_address) if repair_machine: dead_chips.add((chip.x, chip.y)) From c22361fc15fb2633a375da65e1a84d0e5996f304 Mon Sep 17 00:00:00 2001 From: "Christian Y. Brenninkmeijer" Date: Mon, 23 Oct 2023 14:47:05 +0100 Subject: [PATCH 13/18] use methods for errors messages --- spinn_machine/machine_factory.py | 100 ++++++++++++++++++------------- 1 file changed, 58 insertions(+), 42 deletions(-) diff --git a/spinn_machine/machine_factory.py b/spinn_machine/machine_factory.py index 4df158a3..f33bc204 100644 --- a/spinn_machine/machine_factory.py +++ b/spinn_machine/machine_factory.py @@ -24,31 +24,6 @@ logger = FormatAdapter(logging.getLogger(__name__)) -BAD_MSG = ( - "Your machine has {} at {} on board {} which will cause algorithms to " - "fail. Please report this to spinnakerusers@googlegroups.com \n\n") -ONE_LINK_SAME_BOARD_MSG = ( - "Link {} from {} to {} does not " - "exist, but the opposite does. Both chips live on the same board under " - "ip address {} and are local chip ids {}:{} and {}:{}. " - "Please report this to spinnakerusers@googlegroups.com \n\n") -ONE_LINK_DIFFERENT_BOARDS_MSG = ( - "Link {} from {} to {} does not " - "exist, but the opposite does. The chips live on different boards. " - "chip {}:{} resides on board with ip address {} with local id {}:{} and " - "chip {}:{} resides on board with ip address {} with local id {}:{}. " - "Please report this to spinnakerusers@googlegroups.com \n\n") -ONE_LINK_DEAD_CHIP = ( - "Link {} from {} to {} does not " - "exist, but the opposite does. chip {}:{} resides on board with ip " - "address {} but as chip {}:{} is dead, we cannot report its ip address. " - "Please report this to spinnakerusers@googlegroups.com \n\n") -CHIP_REMOVED_BY_DEAD_PARENT = ( - "The {} will fail to receive signals because its parent {}:{} in" - " the signal tree has disappeared from the machine since it was booted. " - "This occurred on board with ip address {} Please report this to " - "spinnakerusers@googlegroups.com \n\n") - def _machine_ignore( original: Machine, dead_chips: Collection[XY], @@ -101,7 +76,6 @@ def _machine_ignore( return new_machine - def _generate_uni_direction_link_error( dest_x: int, dest_y: int, src_x: int, src_y: int, back: int, original: Machine) -> str: @@ -113,9 +87,7 @@ def _generate_uni_direction_link_error( # if the dest chip is dead. Only report src chip ip address. if dest_chip is None: - return ONE_LINK_DEAD_CHIP.format( - back, dest_chip, src_chip, src_x, src_y, src_ethernet, - dest_x, dest_y) + return __link_dead_chip(back, src_chip, dest_x, dest_y, src_ethernet) # got working chips, so get the separate ethernet's dest_ethernet = original[ @@ -128,15 +100,14 @@ def _generate_uni_direction_link_error( # generate bespoke error message based off if they both reside on same # board. if src_ethernet == dest_ethernet: - return ONE_LINK_SAME_BOARD_MSG.format( - back, dest_chip, src_chip, src_ethernet, - local_dest_chip_x, local_dest_chip_y, local_src_chip_x, - local_src_chip_y) + return __one_link_same_board_msg( + back, dest_chip, src_chip, src_ethernet, local_dest_chip_x, + local_dest_chip_y, local_src_chip_x, local_src_chip_y) else: - return ONE_LINK_DIFFERENT_BOARDS_MSG.format( - back, dest_chip, src_chip, dest_x, dest_y, dest_ethernet, - local_dest_chip_x, local_dest_chip_y, src_x, src_y, src_ethernet, - local_src_chip_x, local_src_chip_y) + return one_link_different_boards_msg( + back, dest_chip, src_chip, dest_ethernet, local_dest_chip_x, + local_dest_chip_y, src_ethernet, local_src_chip_x, + local_src_chip_y) def machine_repair(original: Machine, removed_chips: Iterable[XY] = ()): @@ -169,7 +140,7 @@ def machine_repair(original: Machine, removed_chips: Iterable[XY] = ()): chip = original[xy[0], xy[1]] error_xy = original.get_local_xy(chip) ethernet = original[chip.nearest_ethernet_x, chip.nearest_ethernet_y] - msg = BAD_MSG.format( + msg = __bad_message( "unreachable incoming chips", error_xy, ethernet.ip_address) if repair_machine: dead_chips.add(xy) @@ -181,7 +152,7 @@ def machine_repair(original: Machine, removed_chips: Iterable[XY] = ()): chip = original[xy[0], xy[1]] error_xy = original.get_local_xy(chip) ethernet = original[chip.nearest_ethernet_x, chip.nearest_ethernet_y] - msg = BAD_MSG.format( + msg = __bad_message( "unreachable outgoing chips", error_xy, ethernet.ip_address) if repair_machine: dead_chips.add(xy) @@ -210,9 +181,8 @@ def machine_repair(original: Machine, removed_chips: Iterable[XY] = ()): if not original.is_chip_at(parent_x, parent_y): ethernet_chip = original[ chip.nearest_ethernet_x, chip.nearest_ethernet_y] - msg = CHIP_REMOVED_BY_DEAD_PARENT.format( - chip, parent_x, parent_y, - ethernet_chip.ip_address) + msg = __chip_dead_parent_msg( + chip, parent_x, parent_y, ethernet_chip.ip_address) if repair_machine: dead_chips.add((chip.x, chip.y)) logger.warning(msg) @@ -228,3 +198,49 @@ def machine_repair(original: Machine, removed_chips: Iterable[XY] = ()): new_machine = _machine_ignore(original, dead_chips, dead_links) return machine_repair(new_machine) + + +def __bad_message(issue: str, xp: XY, address: str) -> str: + return f"Your machine has {issue} at {xp} on board {address} " \ + f"which will cause algorithms to fail. " \ + f"Please report this to spinnakerusers@googlegroups.com \n\n" + + +def __one_link_same_board_msg( + link: int, source: Chip, target: Chip, address: str, + source_x: int, source_y: int, target_x: int, target_y: int) -> str: + return f"Link {link} from {source} to {target} does not exist, " \ + f"but the opposite does. Both chips live on the same board under " \ + f"ip address {address} and are local chip ids " \ + f"{source_x}:{source_y} and {target_x}:{target_y}. " \ + f"Please report this to spinnakerusers@googlegroups.com \n\n" + + +def one_link_different_boards_msg( + link: int, source: Chip, target: Chip, source_x: int, source_y: int, + source_address: str, target_x: int, target_y: int, + target_address: str) -> str: + return f"Link {link} from {source} to {target} does not exist, " \ + f"but the opposite does. The chips live on different boards. " \ + f"chip {source.x}:{source.y} resides on board with ip address " \ + f"{source_address} with local id {source_x}:{source_y} and " \ + f"chip {target.x}:{target.y} resides on board with ip address " \ + f"{target_address} with local id {target_x}:{target_y}. " \ + f"Please report this to spinnakerusers@googlegroups.com \n\n" + + +def __link_dead_chip(link: int, source: Chip, target_x: int, target_y: int, + address: str) -> str: + return f"Link {link} from {source} to {target_x}:{target_y} " \ + f"points to a dead chip. Chip {source.x}:" \ + f"{source.y} resides on board with ip address {address}. " \ + f"Please report this to spinnakerusers@googlegroups.com \n\n" + + +def __chip_dead_parent_msg( + source: Chip, parent_x: int, parent_y: int, address : str) -> str: + return f"The {source: Chip} will fail to receive signals because its " \ + f"parent {parent_x}:{parent_y} in the signal tree has " \ + f"disappeared from the machine since it was booted. " \ + f"This occurred on board with ip address {address} " \ + f"Please report this to spinnakerusers@googlegroups.com \n\n" From 7a2f8e68b2da681bd42ee276dd71ec185b7baffe Mon Sep 17 00:00:00 2001 From: "Christian Y. Brenninkmeijer" Date: Mon, 23 Oct 2023 14:53:14 +0100 Subject: [PATCH 14/18] flake8 --- spinn_machine/machine_factory.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spinn_machine/machine_factory.py b/spinn_machine/machine_factory.py index f33bc204..a5e38954 100644 --- a/spinn_machine/machine_factory.py +++ b/spinn_machine/machine_factory.py @@ -230,7 +230,7 @@ def one_link_different_boards_msg( def __link_dead_chip(link: int, source: Chip, target_x: int, target_y: int, - address: str) -> str: + address: str) -> str: return f"Link {link} from {source} to {target_x}:{target_y} " \ f"points to a dead chip. Chip {source.x}:" \ f"{source.y} resides on board with ip address {address}. " \ @@ -238,7 +238,7 @@ def __link_dead_chip(link: int, source: Chip, target_x: int, target_y: int, def __chip_dead_parent_msg( - source: Chip, parent_x: int, parent_y: int, address : str) -> str: + source: Chip, parent_x: int, parent_y: int, address: str) -> str: return f"The {source: Chip} will fail to receive signals because its " \ f"parent {parent_x}:{parent_y} in the signal tree has " \ f"disappeared from the machine since it was booted. " \ From 65ee771ea0d353c3fc556aec123f481f7158538a Mon Sep 17 00:00:00 2001 From: "Christian Y. Brenninkmeijer" Date: Mon, 23 Oct 2023 15:00:32 +0100 Subject: [PATCH 15/18] ip_address has type Optional[str] --- spinn_machine/machine_factory.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/spinn_machine/machine_factory.py b/spinn_machine/machine_factory.py index a5e38954..1a7ab527 100644 --- a/spinn_machine/machine_factory.py +++ b/spinn_machine/machine_factory.py @@ -14,7 +14,7 @@ from collections import defaultdict import logging -from typing import Collection, Iterable, Set, Tuple +from typing import Collection, Iterable, Optional, Set, Tuple from spinn_utilities.config_holder import get_config_bool from spinn_utilities.log import FormatAdapter from spinn_utilities.typing.coords import XY @@ -200,14 +200,14 @@ def machine_repair(original: Machine, removed_chips: Iterable[XY] = ()): return machine_repair(new_machine) -def __bad_message(issue: str, xp: XY, address: str) -> str: +def __bad_message(issue: str, xp: XY, address: Optional[str]) -> str: return f"Your machine has {issue} at {xp} on board {address} " \ f"which will cause algorithms to fail. " \ f"Please report this to spinnakerusers@googlegroups.com \n\n" def __one_link_same_board_msg( - link: int, source: Chip, target: Chip, address: str, + link: int, source: Chip, target: Chip, address: Optional[str], source_x: int, source_y: int, target_x: int, target_y: int) -> str: return f"Link {link} from {source} to {target} does not exist, " \ f"but the opposite does. Both chips live on the same board under " \ @@ -218,8 +218,8 @@ def __one_link_same_board_msg( def one_link_different_boards_msg( link: int, source: Chip, target: Chip, source_x: int, source_y: int, - source_address: str, target_x: int, target_y: int, - target_address: str) -> str: + source_address: Optional[str], target_x: int, target_y: int, + target_address: Optional[str]) -> str: return f"Link {link} from {source} to {target} does not exist, " \ f"but the opposite does. The chips live on different boards. " \ f"chip {source.x}:{source.y} resides on board with ip address " \ @@ -230,15 +230,15 @@ def one_link_different_boards_msg( def __link_dead_chip(link: int, source: Chip, target_x: int, target_y: int, - address: str) -> str: + address: Optional[str]) -> str: return f"Link {link} from {source} to {target_x}:{target_y} " \ f"points to a dead chip. Chip {source.x}:" \ f"{source.y} resides on board with ip address {address}. " \ f"Please report this to spinnakerusers@googlegroups.com \n\n" -def __chip_dead_parent_msg( - source: Chip, parent_x: int, parent_y: int, address: str) -> str: +def __chip_dead_parent_msg(source: Chip, parent_x: int, parent_y: int, + address: Optional[str]) -> str: return f"The {source: Chip} will fail to receive signals because its " \ f"parent {parent_x}:{parent_y} in the signal tree has " \ f"disappeared from the machine since it was booted. " \ From c3a679c1ba209d05020eb6a114c58d2e9036c9a2 Mon Sep 17 00:00:00 2001 From: "Christian Y. Brenninkmeijer" Date: Mon, 23 Oct 2023 15:46:52 +0100 Subject: [PATCH 16/18] inline the messages --- spinn_machine/machine_factory.py | 93 ++++++++++++-------------------- 1 file changed, 33 insertions(+), 60 deletions(-) diff --git a/spinn_machine/machine_factory.py b/spinn_machine/machine_factory.py index 1a7ab527..0477a72c 100644 --- a/spinn_machine/machine_factory.py +++ b/spinn_machine/machine_factory.py @@ -77,7 +77,7 @@ def _machine_ignore( def _generate_uni_direction_link_error( - dest_x: int, dest_y: int, src_x: int, src_y: int, back: int, + dest_x: int, dest_y: int, src_x: int, src_y: int, out: int, back: int, original: Machine) -> str: # get the chips so we can find ethernet's and local ids dest_chip = original.get_chip_at(dest_x, dest_y) @@ -87,7 +87,10 @@ def _generate_uni_direction_link_error( # if the dest chip is dead. Only report src chip ip address. if dest_chip is None: - return __link_dead_chip(back, src_chip, dest_x, dest_y, src_ethernet) + return f"Link {out} from {src_chip} to {dest_x}:{dest_y} points to " \ + f"a dead chip. Chip {src_x}:{src_y} resides on board with ip " \ + f"address {src_ethernet}. " \ + f"Please report this to spinnakerusers@googlegroups.com \n\n" # got working chips, so get the separate ethernet's dest_ethernet = original[ @@ -100,14 +103,21 @@ def _generate_uni_direction_link_error( # generate bespoke error message based off if they both reside on same # board. if src_ethernet == dest_ethernet: - return __one_link_same_board_msg( - back, dest_chip, src_chip, src_ethernet, local_dest_chip_x, - local_dest_chip_y, local_src_chip_x, local_src_chip_y) + return f"Link {back} from {dest_chip} to {src_chip} does not exist, " \ + f"but the opposite does. Both chips live on the same board " \ + f"under ip address {src_ethernet} and are local chip " \ + f"ids {local_dest_chip_x}:{local_dest_chip_y} and " \ + f"{local_src_chip_x}:{local_src_chip_y}. " \ + f"Please report this to spinnakerusers@googlegroups.com \n\n" else: - return one_link_different_boards_msg( - back, dest_chip, src_chip, dest_ethernet, local_dest_chip_x, - local_dest_chip_y, src_ethernet, local_src_chip_x, - local_src_chip_y) + return f"Link {back} from {dest_chip} to {src_chip} does not exist, " \ + f"but the opposite does. The chips live on different boards. " \ + f"chip {dest_x}:{dest_y} resides on board with ip address " \ + f"{dest_ethernet} with local id {local_dest_chip_x}:" \ + f"{local_dest_chip_y} and chip {src_x}:{src_y} resides on " \ + f"board with ip address {src_ethernet} with local id " \ + f"{local_src_chip_x}:{local_src_chip_y}. " \ + f"Please report this to spinnakerusers@googlegroups.com \n\n" def machine_repair(original: Machine, removed_chips: Iterable[XY] = ()): @@ -140,8 +150,9 @@ def machine_repair(original: Machine, removed_chips: Iterable[XY] = ()): chip = original[xy[0], xy[1]] error_xy = original.get_local_xy(chip) ethernet = original[chip.nearest_ethernet_x, chip.nearest_ethernet_y] - msg = __bad_message( - "unreachable incoming chips", error_xy, ethernet.ip_address) + msg = f"Your machine has unreachable incoming chips at {error_xy} " \ + f"on board {ethernet} which will cause algorithms to fail. " \ + f"Please report this to spinnakerusers@googlegroups.com \n\n" if repair_machine: dead_chips.add(xy) logger.warning(msg) @@ -152,8 +163,9 @@ def machine_repair(original: Machine, removed_chips: Iterable[XY] = ()): chip = original[xy[0], xy[1]] error_xy = original.get_local_xy(chip) ethernet = original[chip.nearest_ethernet_x, chip.nearest_ethernet_y] - msg = __bad_message( - "unreachable outgoing chips", error_xy, ethernet.ip_address) + msg = f"Your machine has unreachable outgoing chips at {error_xy} " \ + f"on board {ethernet} which will cause algorithms to fail. " \ + f"Please report this to spinnakerusers@googlegroups.com \n\n" if repair_machine: dead_chips.add(xy) logger.warning(msg) @@ -166,7 +178,7 @@ def machine_repair(original: Machine, removed_chips: Iterable[XY] = ()): dead_links.add((source_x, source_y, out, back)) else: uni_direction_link_message = _generate_uni_direction_link_error( - dest_x, dest_y, source_x, source_y, back, original) + dest_x, dest_y, source_x, source_y, out, back, original) if repair_machine: dead_links.add((source_x, source_y, out, back)) logger.warning(uni_direction_link_message) @@ -181,6 +193,13 @@ def machine_repair(original: Machine, removed_chips: Iterable[XY] = ()): if not original.is_chip_at(parent_x, parent_y): ethernet_chip = original[ chip.nearest_ethernet_x, chip.nearest_ethernet_y] + return f"The {source: Chip} will fail to receive signals " \ + f"because its parent {parent_x}:{parent_y} in the " \ + f"signal tree has disappeared from the machine since " \ + f"it was booted. This occurred on board with " \ + f"ip address {address} Please report this to " \ + f"spinnakerusers@googlegroups.com \n\n" + msg = __chip_dead_parent_msg( chip, parent_x, parent_y, ethernet_chip.ip_address) if repair_machine: @@ -198,49 +217,3 @@ def machine_repair(original: Machine, removed_chips: Iterable[XY] = ()): new_machine = _machine_ignore(original, dead_chips, dead_links) return machine_repair(new_machine) - - -def __bad_message(issue: str, xp: XY, address: Optional[str]) -> str: - return f"Your machine has {issue} at {xp} on board {address} " \ - f"which will cause algorithms to fail. " \ - f"Please report this to spinnakerusers@googlegroups.com \n\n" - - -def __one_link_same_board_msg( - link: int, source: Chip, target: Chip, address: Optional[str], - source_x: int, source_y: int, target_x: int, target_y: int) -> str: - return f"Link {link} from {source} to {target} does not exist, " \ - f"but the opposite does. Both chips live on the same board under " \ - f"ip address {address} and are local chip ids " \ - f"{source_x}:{source_y} and {target_x}:{target_y}. " \ - f"Please report this to spinnakerusers@googlegroups.com \n\n" - - -def one_link_different_boards_msg( - link: int, source: Chip, target: Chip, source_x: int, source_y: int, - source_address: Optional[str], target_x: int, target_y: int, - target_address: Optional[str]) -> str: - return f"Link {link} from {source} to {target} does not exist, " \ - f"but the opposite does. The chips live on different boards. " \ - f"chip {source.x}:{source.y} resides on board with ip address " \ - f"{source_address} with local id {source_x}:{source_y} and " \ - f"chip {target.x}:{target.y} resides on board with ip address " \ - f"{target_address} with local id {target_x}:{target_y}. " \ - f"Please report this to spinnakerusers@googlegroups.com \n\n" - - -def __link_dead_chip(link: int, source: Chip, target_x: int, target_y: int, - address: Optional[str]) -> str: - return f"Link {link} from {source} to {target_x}:{target_y} " \ - f"points to a dead chip. Chip {source.x}:" \ - f"{source.y} resides on board with ip address {address}. " \ - f"Please report this to spinnakerusers@googlegroups.com \n\n" - - -def __chip_dead_parent_msg(source: Chip, parent_x: int, parent_y: int, - address: Optional[str]) -> str: - return f"The {source: Chip} will fail to receive signals because its " \ - f"parent {parent_x}:{parent_y} in the signal tree has " \ - f"disappeared from the machine since it was booted. " \ - f"This occurred on board with ip address {address} " \ - f"Please report this to spinnakerusers@googlegroups.com \n\n" From 5af0c02514574db5554b03e58c713d93af847efa Mon Sep 17 00:00:00 2001 From: "Christian Y. Brenninkmeijer" Date: Mon, 23 Oct 2023 15:53:29 +0100 Subject: [PATCH 17/18] minor fixes --- spinn_machine/machine_factory.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/spinn_machine/machine_factory.py b/spinn_machine/machine_factory.py index 0477a72c..7c64b193 100644 --- a/spinn_machine/machine_factory.py +++ b/spinn_machine/machine_factory.py @@ -14,7 +14,7 @@ from collections import defaultdict import logging -from typing import Collection, Iterable, Optional, Set, Tuple +from typing import Collection, Iterable, Set, Tuple from spinn_utilities.config_holder import get_config_bool from spinn_utilities.log import FormatAdapter from spinn_utilities.typing.coords import XY @@ -191,17 +191,14 @@ def machine_repair(original: Machine, removed_chips: Iterable[XY] = ()): parent_x, parent_y = original.xy_over_link( chip.x, chip.y, chip.parent_link) if not original.is_chip_at(parent_x, parent_y): - ethernet_chip = original[ - chip.nearest_ethernet_x, chip.nearest_ethernet_y] - return f"The {source: Chip} will fail to receive signals " \ - f"because its parent {parent_x}:{parent_y} in the " \ - f"signal tree has disappeared from the machine since " \ - f"it was booted. This occurred on board with " \ - f"ip address {address} Please report this to " \ - f"spinnakerusers@googlegroups.com \n\n" - - msg = __chip_dead_parent_msg( - chip, parent_x, parent_y, ethernet_chip.ip_address) + ethernet = original[chip.nearest_ethernet_x, + chip.nearest_ethernet_y].ip_address + msg = f"The source: {Chip} will fail to receive signals " \ + f"because its parent {parent_x}:{parent_y} in the " \ + f"signal tree has disappeared from the machine since " \ + f"it was booted. This occurred on board with " \ + f"ip address {ethernet} Please report this to " \ + f"spinnakerusers@googlegroups.com \n\n" if repair_machine: dead_chips.add((chip.x, chip.y)) logger.warning(msg) From 0f1493ff79c3fc4be16465dede7817422351948d Mon Sep 17 00:00:00 2001 From: "Christian Y. Brenninkmeijer" Date: Mon, 23 Oct 2023 16:22:28 +0100 Subject: [PATCH 18/18] local variable can not change type --- spinn_machine/machine_factory.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/spinn_machine/machine_factory.py b/spinn_machine/machine_factory.py index 7c64b193..1f4ed94b 100644 --- a/spinn_machine/machine_factory.py +++ b/spinn_machine/machine_factory.py @@ -192,12 +192,13 @@ def machine_repair(original: Machine, removed_chips: Iterable[XY] = ()): chip.x, chip.y, chip.parent_link) if not original.is_chip_at(parent_x, parent_y): ethernet = original[chip.nearest_ethernet_x, - chip.nearest_ethernet_y].ip_address + chip.nearest_ethernet_y] msg = f"The source: {Chip} will fail to receive signals " \ f"because its parent {parent_x}:{parent_y} in the " \ f"signal tree has disappeared from the machine since " \ f"it was booted. This occurred on board with " \ - f"ip address {ethernet} Please report this to " \ + f"ip address {ethernet.ip_address} " \ + f"Please report this to " \ f"spinnakerusers@googlegroups.com \n\n" if repair_machine: dead_chips.add((chip.x, chip.y))