Skip to content

Commit

Permalink
Force Pyro to timeout quickly and introduce retries
Browse files Browse the repository at this point in the history
We observe that sometimes pyro remote calls are with
time gaps in between which should not happen. We guess
it might be due to network issues. However the packets
still arrive and might be due to TCP retransmission.

Pyro has a timeout property which is not exposed publicly.
This property is set and looped over if timeout error is
returned to fix the issue.

Signed-off-by: Lukas Pukenis <[email protected]>
  • Loading branch information
LukasPukenis committed Feb 3, 2025
1 parent c7260ac commit 9c0b45d
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 12 deletions.
Empty file.
45 changes: 33 additions & 12 deletions nat-lab/tests/uniffi/libtelio_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,35 @@ class LibtelioProxy:
def __init__(self, name: str, object_uri: str, features: libtelio.Features):
self._name = name
self._uri = object_uri
self._iterations = 20
self._features = features

def _handle_remote_error(self, f):
print(
f"[{self._name}]: [{datetime.time()}]: handle_remote_error outer: uri: {self._uri}")
with Proxy(self._uri) as remote:
fn_res = f(remote)
if fn_res is None:
return None
(res, err) = fn_res
if err is not None:
raise Exception(err)
return res
print(f"[{self._name}]: [{datetime.time()}]: handle_remote_error inner")

# Pyro does not provide built-in options to configure timeouts and max retries.
# However, in some cases, Pyro may experience delays in reporting events.
# This is likely due to TCP retries, which can be triggered by changes in
# the network interface configuration. To mitigate this issue, we set a
# short timeout and a higher number of retries to bypass the exponential
# growth of TCP retry intervals.
remote._pyroTimeout = 3
try:
fn_res = f(remote)
if fn_res is None:
return None
(res, err) = fn_res
if err is not None:
print(
f"[{self._name}]: [{datetime.now()}]: Pyro error: {err}")
raise Exception(err)
return res
except Pyro5.errors.TimeoutError:
print(
f"[{self._name}]: [{datetime.time()}]: Pyro5 timeout"
)

@move_to_async_thread
def shutdown(self, container_or_vm_name: Optional[str] = None):
Expand Down Expand Up @@ -90,9 +107,11 @@ def create(self):
def next_event(self) -> libtelio.Event:
try:
ev = self._handle_remote_error(lambda r: r.next_event())
print(f"[{self._name}]:{datetime.now()} proxy::next_event returned: {ev}")
print(
f"[{self._name}]:{datetime.now()} proxy::next_event returned: {ev}")
except Exception as e:
print(f"[{self._name}]:{datetime.now()} proxy::next_event: exception {e}")
print(
f"[{self._name}]:{datetime.now()} proxy::next_event: exception {e}")
raise

return ev
Expand Down Expand Up @@ -134,7 +153,8 @@ def connect_to_exit_node(self, public_key, allowed_ips, endpoint):
@move_to_async_thread
def connect_to_exit_node_pq(self, public_key, allowed_ips, endpoint):
self._handle_remote_error(
lambda r: r.connect_to_exit_node_pq(public_key, allowed_ips, endpoint)
lambda r: r.connect_to_exit_node_pq(
public_key, allowed_ips, endpoint)
)

@move_to_async_thread
Expand All @@ -143,7 +163,8 @@ def disconnect_from_exit_nodes(self):

@move_to_async_thread
def enable_magic_dns(self, forward_servers):
self._handle_remote_error(lambda r: r.enable_magic_dns(forward_servers))
self._handle_remote_error(
lambda r: r.enable_magic_dns(forward_servers))

@move_to_async_thread
def disable_magic_dns(self):
Expand Down

0 comments on commit 9c0b45d

Please sign in to comment.