Skip to content

Commit

Permalink
[core] increase sqlite3 lock timeout to 60s (#4552)
Browse files Browse the repository at this point in the history
  • Loading branch information
cg505 authored Jan 15, 2025
1 parent 837d51b commit d9bb51a
Showing 1 changed file with 18 additions and 4 deletions.
22 changes: 18 additions & 4 deletions sky/utils/db_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,27 @@
import threading
from typing import Any, Callable, Optional

# This parameter (passed to sqlite3.connect) controls how long we will wait to
# obtains a database lock (not necessarily during connection, but whenever it is
# needed). It is not a connection timeout.
# Even in WAL mode, only a single writer is allowed at a time. Other writers
# will block until the write lock can be obtained. This behavior is described in
# the SQLite documentation for WAL: https://www.sqlite.org/wal.html
# Python's default timeout is 5s. In normal usage, lock contention is very low,
# and this is more than sufficient. However, in some highly concurrent cases,
# such as a jobs controller suddenly recovering thousands of jobs at once, we
# can see a small number of processes that take much longer to obtain the lock.
# In contrived highly contentious cases, around 0.1% of transactions will take
# >30s to take the lock. We have not seen cases that take >60s. For cases up to
# 1000x parallelism, this is thus thought to be a conservative setting.
# For more info, see the PR description for #4552.
_DB_TIMEOUT_S = 60


@contextlib.contextmanager
def safe_cursor(db_path: str):
"""A newly created, auto-committing, auto-closing cursor."""
conn = sqlite3.connect(db_path)
conn = sqlite3.connect(db_path, timeout=_DB_TIMEOUT_S)
cursor = conn.cursor()
try:
yield cursor
Expand Down Expand Up @@ -79,8 +95,6 @@ class SQLiteConn(threading.local):
def __init__(self, db_path: str, create_table: Callable):
super().__init__()
self.db_path = db_path
# NOTE: We use a timeout of 10 seconds to avoid database locked
# errors. This is a hack, but it works.
self.conn = sqlite3.connect(db_path, timeout=10)
self.conn = sqlite3.connect(db_path, timeout=_DB_TIMEOUT_S)
self.cursor = self.conn.cursor()
create_table(self.cursor, self.conn)

0 comments on commit d9bb51a

Please sign in to comment.