From 050bb63980f12f65516038228047d57d41ba8914 Mon Sep 17 00:00:00 2001 From: Sami Jaghouar Date: Wed, 5 Jun 2024 10:40:26 +0000 Subject: [PATCH 1/2] add dowloading time to averager --- hivemind/averaging/averager.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hivemind/averaging/averager.py b/hivemind/averaging/averager.py index cd9415667..735990d1e 100644 --- a/hivemind/averaging/averager.py +++ b/hivemind/averaging/averager.py @@ -10,6 +10,7 @@ import random import signal import threading +import time import weakref from dataclasses import asdict from typing import Any, AsyncIterator, Dict, Optional, Sequence, Tuple, Union @@ -700,6 +701,7 @@ async def _load_state_from_peers(self, future: MPFuture, timeout: Optional[float metadata = None for peer in sorted(peer_priority.keys(), key=peer_priority.get, reverse=True): if peer != self.peer_id: + t0 = time.time() logger.info(f"Downloading parameters from peer {peer}") try: stub = self.get_stub(self._p2p, peer, namespace=self.prefix) @@ -722,7 +724,8 @@ async def _load_state_from_peers(self, future: MPFuture, timeout: Optional[float logger.debug(f"Peer {peer} did not send its state") continue - logger.info(f"Finished downloading state from {peer}") + t1 = time.time() + logger.info(f"Finished downloading state in {t1 - t0:.3f}s from {peer}") future.set_result((metadata, tensors)) return except Exception as e: From c84e6dda05efafa5c8cd55e89a8607dc4524e7bf Mon Sep 17 00:00:00 2001 From: Sami Jaghouar Date: Mon, 10 Jun 2024 10:54:12 +0000 Subject: [PATCH 2/2] fix use monotonic time --- hivemind/averaging/averager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hivemind/averaging/averager.py b/hivemind/averaging/averager.py index 735990d1e..8ff75f178 100644 --- a/hivemind/averaging/averager.py +++ b/hivemind/averaging/averager.py @@ -701,7 +701,7 @@ async def _load_state_from_peers(self, future: MPFuture, timeout: Optional[float metadata = None for peer in sorted(peer_priority.keys(), key=peer_priority.get, reverse=True): if peer != self.peer_id: - t0 = time.time() + t0 = time.monotonic() logger.info(f"Downloading parameters from peer {peer}") try: stub = self.get_stub(self._p2p, peer, namespace=self.prefix) @@ -724,7 +724,7 @@ async def _load_state_from_peers(self, future: MPFuture, timeout: Optional[float logger.debug(f"Peer {peer} did not send its state") continue - t1 = time.time() + t1 = time.monotonic() logger.info(f"Finished downloading state in {t1 - t0:.3f}s from {peer}") future.set_result((metadata, tensors)) return