Skip to content

Commit

Permalink
Retry on DB deadlock timeout errors
Browse files Browse the repository at this point in the history
When deleting ports and networks concurrently, because of
known incompatibilty of eventlet and MysqlDB, DB deadlock could
occur, causing API to fail.  The proper fix for this would be to
make all APIs non-blocking, and/or use an eventlet-compatible
MySQL library like mysql-connector.  Until such proper fix is in
place, simply retry on such error to avoid API failure.

Change-Id: I5c2a3c89c494917947a2d1d6163b87a3b8c6d1c6
Fixes: MNP-11
  • Loading branch information
ryu25ish committed Dec 12, 2014
1 parent 79209c9 commit 4fca146
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 7 deletions.
30 changes: 30 additions & 0 deletions midonet/neutron/common/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# under the License.

import re
import time
from webob import exc as w_exc

from midonetclient import exc
Expand All @@ -38,6 +39,35 @@ def wrapped(*args, **kwargs):
return wrapped


def retry_on_error(attempts, delay, error_cls):
"""Decorator for error handling retry logic
This decorator retries the function specified number of times with
specified delay between each attempt, for every exception thrown specified
in error_cls. If case the retry fails in all attempts, the error_cls
exception object is thrown.
:param attempts: Number of retry attempts
:param delay: Delay in seconds between attempts
:param error_cls: The exception class that triggers a retry attempt
"""
def internal_wrapper(func):
def retry(*args, **kwargs):
err = None
for i in range(attempts):
try:
return func(*args, **kwargs)
except error_cls as exc:
LOG.warn(_('Retrying because of error: %r'), exc)
time.sleep(delay)
err = exc
# err should always be set to a valid exception object
assert type(err) is error_cls
raise err
return retry
return internal_wrapper


class MidonetApiException(n_exc.NeutronException):
message = _("MidoNet API error: %(msg)s")

Expand Down
31 changes: 24 additions & 7 deletions midonet/neutron/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,17 @@ def update_network(self, context, id, network):

@util.handle_api_error
@utils.synchronized('midonet-network-lock', external=True)
@util.retry_on_error(2, 1, sa_exc.OperationalError)
def delete_network(self, context, id):
"""Delete a network and its corresponding MidoNet bridge."""
"""Delete a network and its corresponding MidoNet bridge.
This method is wrapped by 'retry_on_error' decorator because concurrent
requests to the API server often causes DB deadlock error because
eventlet green threads do not yield properly when they block inside
the transaction. This hack should no longer become available once
we moved to the model where API requests are asynchronous or when
eventlet-compatible mysqlconnector is used for the DB driver instead.
"""
LOG.info(_("MidonetPluginV2.delete_network called: id=%r"), id)

with context.session.begin(subtransactions=True):
Expand Down Expand Up @@ -306,6 +315,19 @@ def create_port(self, context, port):

@util.handle_api_error
@utils.synchronized('midonet-port-lock', external=True)
@util.retry_on_error(2, 1, sa_exc.OperationalError)
def _process_port_delete(self, context, id):
"""Delete the Neutron and MidoNet ports
This method is wrapped by 'retry_on_error' decorator. See the
explanation in the 'delete_network' comment.
"""
with context.session.begin(subtransactions=True):
super(MidonetPluginV2, self).disassociate_floatingips(
context, id, do_notify=False)
super(MidonetPluginV2, self).delete_port(context, id)
self.api_cli.delete_port(id)

def delete_port(self, context, id, l3_port_check=True):
"""Delete a neutron port and corresponding MidoNet bridge port."""
LOG.info(_("MidonetPluginV2.delete_port called: id=%(id)s "
Expand All @@ -317,12 +339,7 @@ def delete_port(self, context, id, l3_port_check=True):
if l3_port_check:
self.prevent_l3_port_deletion(context, id)

with context.session.begin(subtransactions=True):
super(MidonetPluginV2, self).disassociate_floatingips(
context, id, do_notify=False)
super(MidonetPluginV2, self).delete_port(context, id)
self.api_cli.delete_port(id)

self._process_port_delete(context, id)
LOG.info(_("MidonetPluginV2.delete_port exiting: id=%r"), id)

def _process_port_update(self, context, id, in_port, out_port):
Expand Down
23 changes: 23 additions & 0 deletions midonet/neutron/tests/unit/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,3 +160,26 @@ class FooPlugin(FooPluginBase):
FooPlugin()
self.assertIn('get_foos', FooPlugin.__dict__.keys())
self.assertNotIn('get_foos', FooPlugin.__abstractmethods__)

def test_retry_on_error(self):
retry_num = 2

class TestClass(object):

def __init__(self):
self.attempt = 0

@util.retry_on_error(retry_num, 1, ValueError)
def test(self):
self.attempt += 1
raise ValueError

test_obj = TestClass()
try:
test_obj.test()
# should never reach here
self.assertTrue(False)
except ValueError:
pass

self.assertEqual(retry_num, test_obj.attempt)

0 comments on commit 4fca146

Please sign in to comment.