Skip to content

Commit

Permalink
Merge branch 'master' into not_monitor
Browse files Browse the repository at this point in the history
  • Loading branch information
Christian-B authored Nov 21, 2023
2 parents 257d877 + c1258cb commit 916928c
Show file tree
Hide file tree
Showing 9 changed files with 141 additions and 104 deletions.
13 changes: 7 additions & 6 deletions spinn_machine/chip.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,13 +349,14 @@ def __contains__(self, processor_id: int) -> bool:
return self.is_processor_with_id(processor_id)

def __str__(self) -> str:
if self._ip_address:
ip_info = f"ip_address={self.ip_address} "
else:
ip_info = ""
return (
f"[Chip: x={self._x}, y={self._y}, "
f"sdram={self.sdram // (1024 * 1024)} MB, "
f"ip_address={self.ip_address}, router={self.router}, "
f"processors={list(self._p.values())}, "
f"nearest_ethernet={self._nearest_ethernet_x}:"
f"{self._nearest_ethernet_y}]")
f"[Chip: x={self._x}, y={self._y}, {ip_info}"
f"n_cores={self.n_processors}, "
f"mon={self.get_physical_core_id(0)}]")

def __repr__(self) -> str:
return self.__str__()
Expand Down
50 changes: 33 additions & 17 deletions spinn_machine/machine.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,11 +326,19 @@ def where_is_chip(self, chip: Chip) -> str:
:return: A human-readable description of the location of a chip.
:rtype: str
"""
chip00 = self[0, 0]
local00 = self[chip.nearest_ethernet_x, chip.nearest_ethernet_y]
(localx, localy) = self.get_local_xy(chip)
return (f"global chip {chip.x}, {chip.y} on {chip00.ip_address} "
f"is chip {localx}, {localy} on {local00.ip_address}")
try:
chip00 = self[0, 0]
try:
local00 = self[chip.nearest_ethernet_x,
chip.nearest_ethernet_y]
ip_address = f"on {local00.ip_address}"
except KeyError:
ip_address = ""
(localx, localy) = self.get_local_xy(chip)
return (f"global chip {chip.x}, {chip.y} on {chip00.ip_address} "
f"is chip {localx}, {localy} {ip_address}")
except Exception: # pylint: disable=broad-except
return str(Chip)

def where_is_xy(self, x: int, y: int) -> str:
"""
Expand Down Expand Up @@ -501,36 +509,44 @@ def validate(self) -> None:
# The fact that self._boot_ethernet_address is set means there is an
# ethernet chip and it is at 0,0 so no need to check that

version = MachineDataView.get_machine_version()
for chip in self.chips:
if chip.x < 0:
raise SpinnMachineException(f"{chip} has a negative x")
raise SpinnMachineException(
f"{self.where_is_chip(chip)} has a negative x")
if chip.y < 0:
raise SpinnMachineException(f"{chip} has a negative y")
raise SpinnMachineException(
f"{self.where_is_chip(chip)} has a negative y")
if chip.x >= self._width:
raise SpinnMachineException(
f"{chip} has an x larger than width {self._width}")
f"{self.where_is_chip(chip)} has an x larger "
f"than width {self._width}")
if chip.y >= self._height:
raise SpinnMachineException(
f"{chip} has a y larger than height {self._height}")
f"{self.where_is_chip(chip)} has a y larger "
f"than height {self._height}")
if chip.n_processors < version.minimum_cores_expected:
raise SpinnMachineException(
f"{self.where_is_chip(chip)} has too few cores "
f"found {chip.n_processors}")
if chip.ip_address:
# Ethernet Chip checks
if chip.x % 4 != 0:
raise SpinnMachineException(
f"Ethernet {chip} has a x which is not divisible by 4")
if (chip.x + chip.y) % 12 != 0:
error = version.illegal_ethernet_message(chip.x, chip.y)
if error is not None:
raise SpinnMachineException(
f"Ethernet {chip} has an x,y pair that "
"does not add up to 12")
f"{self.where_is_chip(chip)} {error}")
else:
# Non-Ethernet chip checks
if not self.is_chip_at(
chip.nearest_ethernet_x, chip.nearest_ethernet_y):
raise SpinnMachineException(
f"{chip} has an invalid ethernet chip")
f"{self.where_is_chip(chip)} "
f"has an invalid ethernet chip")
local_xy = self.get_local_xy(chip)
if local_xy not in self._chip_core_map:
raise SpinnMachineException(
f"{chip} has an unexpected local xy of {local_xy}")
f"{self.where_is_chip(chip)} "
f"has an unexpected local xy of {local_xy}")

@property
@abstractmethod
Expand Down
82 changes: 35 additions & 47 deletions spinn_machine/machine_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,31 +24,6 @@

logger = FormatAdapter(logging.getLogger(__name__))

BAD_MSG = (
"Your machine has {} at {} on board {} which will cause algorithms to "
"fail. Please report this to [email protected] \n\n")
ONE_LINK_SAME_BOARD_MSG = (
"Link {} from global chip id {}:{} to global chip id {}:{} does not "
"exist, but the opposite does. Both chips live on the same board under "
"ip address {} and are local chip ids {}:{} and {}:{}. "
"Please report this to [email protected] \n\n")
ONE_LINK_DIFFERENT_BOARDS_MSG = (
"Link {} from global chip id {}:{} to global chip id {}:{} does not "
"exist, but the opposite does. The chips live on different boards. "
"chip {}:{} resides on board with ip address {} with local id {}:{} and "
"chip {}:{} resides on board with ip address {} with local id {}:{}. "
"Please report this to [email protected] \n\n")
ONE_LINK_DEAD_CHIP = (
"Link {} from global dead chip id {}:{} to global chip id {}:{} does not "
"exist, but the opposite does. chip {}:{} resides on board with ip "
"address {} but as chip {}:{} is dead, we cannot report its ip address. "
"Please report this to [email protected] \n\n")
CHIP_REMOVED_BY_DEAD_PARENT = (
"The chip {}:{} will fail to receive signals because its parent {}:{} in"
" the signal tree has disappeared from the machine since it was booted. "
"This occurred on board with ip address {} Please report this to "
"[email protected] \n\n")


def _machine_ignore(
original: Machine, dead_chips: Collection[XY],
Expand Down Expand Up @@ -102,7 +77,7 @@ def _machine_ignore(


def _generate_uni_direction_link_error(
dest_x: int, dest_y: int, src_x: int, src_y: int, back: int,
dest_x: int, dest_y: int, src_x: int, src_y: int, out: int, back: int,
original: Machine) -> str:
# get the chips so we can find ethernet's and local ids
dest_chip = original.get_chip_at(dest_x, dest_y)
Expand All @@ -112,9 +87,10 @@ def _generate_uni_direction_link_error(

# if the dest chip is dead. Only report src chip ip address.
if dest_chip is None:
return ONE_LINK_DEAD_CHIP.format(
back, dest_x, dest_y, src_x, src_y, src_x, src_y, src_ethernet,
dest_x, dest_y)
return f"Link {out} from {src_chip} to {dest_x}:{dest_y} points to " \
f"a dead chip. Chip {src_x}:{src_y} resides on board with ip " \
f"address {src_ethernet}. " \
f"Please report this to [email protected] \n\n"

# got working chips, so get the separate ethernet's
dest_ethernet = original[
Expand All @@ -127,15 +103,21 @@ def _generate_uni_direction_link_error(
# generate bespoke error message based off if they both reside on same
# board.
if src_ethernet == dest_ethernet:
return ONE_LINK_SAME_BOARD_MSG.format(
back, dest_x, dest_y, src_x, src_y, src_ethernet,
local_dest_chip_x, local_dest_chip_y, local_src_chip_x,
local_src_chip_y)
return f"Link {back} from {dest_chip} to {src_chip} does not exist, " \
f"but the opposite does. Both chips live on the same board " \
f"under ip address {src_ethernet} and are local chip " \
f"ids {local_dest_chip_x}:{local_dest_chip_y} and " \
f"{local_src_chip_x}:{local_src_chip_y}. " \
f"Please report this to [email protected] \n\n"
else:
return ONE_LINK_DIFFERENT_BOARDS_MSG.format(
back, dest_x, dest_y, src_x, src_y, dest_x, dest_y, dest_ethernet,
local_dest_chip_x, local_dest_chip_y, src_x, src_y, src_ethernet,
local_src_chip_x, local_src_chip_y)
return f"Link {back} from {dest_chip} to {src_chip} does not exist, " \
f"but the opposite does. The chips live on different boards. " \
f"chip {dest_x}:{dest_y} resides on board with ip address " \
f"{dest_ethernet} with local id {local_dest_chip_x}:" \
f"{local_dest_chip_y} and chip {src_x}:{src_y} resides on " \
f"board with ip address {src_ethernet} with local id " \
f"{local_src_chip_x}:{local_src_chip_y}. " \
f"Please report this to [email protected] \n\n"


def machine_repair(original: Machine, removed_chips: Iterable[XY] = ()):
Expand Down Expand Up @@ -168,8 +150,9 @@ def machine_repair(original: Machine, removed_chips: Iterable[XY] = ()):
chip = original[xy[0], xy[1]]
error_xy = original.get_local_xy(chip)
ethernet = original[chip.nearest_ethernet_x, chip.nearest_ethernet_y]
msg = BAD_MSG.format(
"unreachable incoming chips", error_xy, ethernet.ip_address)
msg = f"Your machine has unreachable incoming chips at {error_xy} " \
f"on board {ethernet} which will cause algorithms to fail. " \
f"Please report this to [email protected] \n\n"
if repair_machine:
dead_chips.add(xy)
logger.warning(msg)
Expand All @@ -180,8 +163,9 @@ def machine_repair(original: Machine, removed_chips: Iterable[XY] = ()):
chip = original[xy[0], xy[1]]
error_xy = original.get_local_xy(chip)
ethernet = original[chip.nearest_ethernet_x, chip.nearest_ethernet_y]
msg = BAD_MSG.format(
"unreachable outgoing chips", error_xy, ethernet.ip_address)
msg = f"Your machine has unreachable outgoing chips at {error_xy} " \
f"on board {ethernet} which will cause algorithms to fail. " \
f"Please report this to [email protected] \n\n"
if repair_machine:
dead_chips.add(xy)
logger.warning(msg)
Expand All @@ -194,7 +178,7 @@ def machine_repair(original: Machine, removed_chips: Iterable[XY] = ()):
dead_links.add((source_x, source_y, out, back))
else:
uni_direction_link_message = _generate_uni_direction_link_error(
dest_x, dest_y, source_x, source_y, back, original)
dest_x, dest_y, source_x, source_y, out, back, original)
if repair_machine:
dead_links.add((source_x, source_y, out, back))
logger.warning(uni_direction_link_message)
Expand All @@ -207,11 +191,15 @@ def machine_repair(original: Machine, removed_chips: Iterable[XY] = ()):
parent_x, parent_y = original.xy_over_link(
chip.x, chip.y, chip.parent_link)
if not original.is_chip_at(parent_x, parent_y):
ethernet_chip = original[
chip.nearest_ethernet_x, chip.nearest_ethernet_y]
msg = CHIP_REMOVED_BY_DEAD_PARENT.format(
chip.x, chip.y, parent_x, parent_y,
ethernet_chip.ip_address)
ethernet = original[chip.nearest_ethernet_x,
chip.nearest_ethernet_y]
msg = f"The source: {Chip} will fail to receive signals " \
f"because its parent {parent_x}:{parent_y} in the " \
f"signal tree has disappeared from the machine since " \
f"it was booted. This occurred on board with " \
f"ip address {ethernet.ip_address} " \
f"Please report this to " \
f"[email protected] \n\n"
if repair_machine:
dead_chips.add((chip.x, chip.y))
logger.warning(msg)
Expand Down
34 changes: 34 additions & 0 deletions spinn_machine/version/abstract_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,3 +297,37 @@ def _create_machine(self, width: int, height: int, origin: str) -> Machine:
:rtype: ~spinn_machine.Machine
"""
raise NotImplementedError

@property
@abstractmethod
def minimum_cores_expected(self) -> int:
"""
The minimum number of Chip that we expect from a Chip
If there are less that this number of Cores Machine.validate and
other methods are allowed to raise an exception
:rtype: int
:return: The lowest number of cores to accept before flagging a
Chip to be blacklisted
"""
raise NotImplementedError

@abstractmethod
def illegal_ethernet_message(self, x: int, y: int) -> Optional[str]:
"""
Checks if x and y could be for an Ethernet.
This method will return an explanation if the values for x and y are
known be illegal for an Ethernet chip.
Due to the limited information available this method will generate
False negatives.
So this method returning None does not imply that x, y is an
Ethernet location
:param int x:
:param int y:
:return: An explanation that the x and y can never be an Ethernet
"""
raise NotImplementedError
8 changes: 7 additions & 1 deletion spinn_machine/version/version_3.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Final, Mapping, Sequence, Tuple
from typing import Final, Mapping, Optional, Sequence, Tuple
from spinn_utilities.overrides import overrides
from spinn_utilities.typing.coords import XY
from .version_spin1 import VersionSpin1
Expand Down Expand Up @@ -66,3 +66,9 @@ def _verify_size(self, width: int, height: int):
@overrides(VersionSpin1._create_machine)
def _create_machine(self, width: int, height: int, origin: str) -> Machine:
return FullWrapMachine(width, height, CHIPS_PER_BOARD, origin)

@overrides(VersionSpin1.illegal_ethernet_message)
def illegal_ethernet_message(self, x: int, y: int) -> Optional[str]:
if x != 0 or y != 0:
return "Only Chip 0, 0 may be an Ethernet Chip"
return None
11 changes: 10 additions & 1 deletion spinn_machine/version/version_5.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Final, Mapping, Sequence, Tuple
from typing import Final, Mapping, Optional, Sequence, Tuple
from spinn_utilities.overrides import overrides
from spinn_utilities.typing.coords import XY
from spinn_machine.exceptions import SpinnMachineException
Expand Down Expand Up @@ -98,3 +98,12 @@ def _create_machine(self, width: int, height: int, origin: str) -> Machine:
width, height, CHIPS_PER_BOARD, origin)
else:
return NoWrapMachine(width, height, CHIPS_PER_BOARD, origin)

@overrides(VersionSpin1.illegal_ethernet_message)
def illegal_ethernet_message(self, x: int, y: int) -> Optional[str]:
if x % 4 != 0:
return "Only Chip with X divisible by 4 may be an Ethernet Chip"
if (x + y) % 12 != 0:
return "Only Chip with x + y divisible by 12 " \
"may be an Ethernet Chip"
return None
5 changes: 5 additions & 0 deletions spinn_machine/version/version_spin1.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,8 @@ def n_non_user_cores(self) -> int:
@overrides(AbstractVersion.n_router_entries)
def n_router_entries(self) -> int:
return 1023

@property
@overrides(AbstractVersion.minimum_cores_expected)
def minimum_cores_expected(self) -> int:
return 5
35 changes: 3 additions & 32 deletions unittests/test_chip.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,38 +58,9 @@ def test_create_chip(self):
self.assertIsNone(new_chip[42])
print(new_chip.__repr__())
self.assertEqual(
new_chip.__repr__(),
"[Chip: x=0, y=1, sdram=0 MB, ip_address=192.162.240.253, "
"router=[Router: "
"available_entries=1024, links=["
"[Link: source_x=0, source_y=0, source_link_id=0, "
"destination_x=1, destination_y=1], "
"[Link: source_x=0, source_y=1, source_link_id=1, "
"destination_x=1, destination_y=0], "
"[Link: source_x=1, source_y=1, source_link_id=2, "
"destination_x=0, destination_y=0], "
"[Link: source_x=1, source_y=0, source_link_id=3, "
"destination_x=0, destination_y=1]"
"]], processors=["
"[CPU: id=0, clock_speed=200 MHz, monitor=True], "
"[CPU: id=1, clock_speed=200 MHz, monitor=False], "
"[CPU: id=2, clock_speed=200 MHz, monitor=False], "
"[CPU: id=3, clock_speed=200 MHz, monitor=False], "
"[CPU: id=4, clock_speed=200 MHz, monitor=False], "
"[CPU: id=5, clock_speed=200 MHz, monitor=False], "
"[CPU: id=6, clock_speed=200 MHz, monitor=False], "
"[CPU: id=7, clock_speed=200 MHz, monitor=False], "
"[CPU: id=8, clock_speed=200 MHz, monitor=False], "
"[CPU: id=9, clock_speed=200 MHz, monitor=False], "
"[CPU: id=10, clock_speed=200 MHz, monitor=False], "
"[CPU: id=11, clock_speed=200 MHz, monitor=False], "
"[CPU: id=12, clock_speed=200 MHz, monitor=False], "
"[CPU: id=13, clock_speed=200 MHz, monitor=False], "
"[CPU: id=14, clock_speed=200 MHz, monitor=False], "
"[CPU: id=15, clock_speed=200 MHz, monitor=False], "
"[CPU: id=16, clock_speed=200 MHz, monitor=False], "
"[CPU: id=17, clock_speed=200 MHz, monitor=False]], "
"nearest_ethernet=0:0]")
"[Chip: x=0, y=1, ip_address=192.162.240.253 "
"n_cores=18, mon=None]",
new_chip.__repr__(),)
self.assertEqual(new_chip.tag_ids, OrderedSet([1, 2, 3, 4, 5, 6, 7]))
self.assertEqual(
[p[0] for p in new_chip],
Expand Down
7 changes: 7 additions & 0 deletions unittests/test_machine.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,13 @@ def test_concentric_xys(self):
(2, 4), (1, 3), (0, 2), (0, 1), (0, 0), (1, 0)]
self.assertListEqual(expected, found)

def test_too_few_cores(self):
machine = virtual_machine(8, 8)
# Hack to get n_processors return a low number
machine.get_chip_at(0, 1)._p = [1, 2, 3]
with self.assertRaises(SpinnMachineException):
machine.validate()


if __name__ == '__main__':
unittest.main()

0 comments on commit 916928c

Please sign in to comment.