Skip to content

Commit

Permalink
Move maintaing the RowCount into an async task (#16027)
Browse files Browse the repository at this point in the history
  • Loading branch information
dstufft authored May 31, 2024
1 parent 4d077ff commit 6a55ab5
Show file tree
Hide file tree
Showing 7 changed files with 228 additions and 2 deletions.
5 changes: 5 additions & 0 deletions tests/unit/packaging/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
update_release_description,
)
from warehouse.utils import readme
from warehouse.utils.row_counter import compute_row_counts

from ...common.db.classifiers import ClassifierFactory
from ...common.db.packaging import (
Expand Down Expand Up @@ -101,6 +102,10 @@ def test_compute_packaging_metrics(db_request, metrics):
FileFactory(release=release3, packagetype="sdist")
FileFactory(release=release3, packagetype="bdist_wheel")

# Make sure that the task to update the database counts has been
# called.
compute_row_counts(db_request)

compute_packaging_metrics(db_request)

assert metrics.gauge.calls == [
Expand Down
1 change: 1 addition & 0 deletions tests/unit/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,7 @@ def __init__(self):
pretend.call(".referrer_policy"),
pretend.call(".captcha"),
pretend.call(".http"),
pretend.call(".utils.row_counter"),
]
+ [pretend.call(x) for x in [configurator_settings.get("warehouse.theme")] if x]
+ [pretend.call(".sanity")]
Expand Down
5 changes: 5 additions & 0 deletions tests/unit/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from warehouse import views
from warehouse.errors import WarehouseDenied
from warehouse.packaging.models import ProjectFactory as DBProjectFactory
from warehouse.utils.row_counter import compute_row_counts
from warehouse.views import (
SecurityKeyGiveaway,
current_user_indicator,
Expand Down Expand Up @@ -367,6 +368,10 @@ def test_index(self, db_request):
)
UserFactory.create()

# Make sure that the task to update the database counts has been
# called.
compute_row_counts(db_request)

assert index(db_request) == {
"num_projects": 1,
"num_users": 3,
Expand Down
77 changes: 77 additions & 0 deletions tests/unit/utils/test_row_counter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pretend

from celery.schedules import crontab

from warehouse.accounts.models import User
from warehouse.packaging.models import File, Project, Release
from warehouse.utils import row_counter

from ...common.db.packaging import FileFactory, ProjectFactory, ReleaseFactory


def test_compute_row_counts(db_request):
project1 = ProjectFactory()
project2 = ProjectFactory()
release1 = ReleaseFactory(project=project1)
release2 = ReleaseFactory(project=project2)
release3 = ReleaseFactory(project=project2)
FileFactory(release=release1)
FileFactory(release=release2)
FileFactory(release=release3, packagetype="sdist")
FileFactory(release=release3, packagetype="bdist_wheel")

counts = dict(
db_request.db.query(row_counter.RowCount.table_name, row_counter.RowCount.count)
.filter(
row_counter.RowCount.table_name.in_(
[
Project.__tablename__,
Release.__tablename__,
File.__tablename__,
User.__tablename__,
]
)
)
.all()
)

assert counts == {"users": 0, "projects": 0, "releases": 0, "release_files": 0}

row_counter.compute_row_counts(db_request)

counts = dict(
db_request.db.query(row_counter.RowCount.table_name, row_counter.RowCount.count)
.filter(
row_counter.RowCount.table_name.in_(
[
Project.__tablename__,
Release.__tablename__,
File.__tablename__,
User.__tablename__,
]
)
)
.all()
)

assert counts == {"users": 3, "projects": 2, "releases": 3, "release_files": 4}


def test_includeme():
config = pretend.stub(add_periodic_task=pretend.call_recorder(lambda c, f: None))
row_counter.includeme(config)
assert config.add_periodic_task.calls == [
pretend.call(crontab(minute="*/5"), row_counter.compute_row_counts),
]
3 changes: 3 additions & 0 deletions warehouse/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -807,6 +807,9 @@ def configure(settings=None):
config.add_settings({"http": {"verify": "/etc/ssl/certs/"}})
config.include(".http")

# Register our row counting maintenance
config.include(".utils.row_counter")

# Scan everything for configuration
config.scan(
categories=(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Remove count_rows() triggers
Revision ID: cec0316503a5
Revises: 78ecf599841c
Create Date: 2024-05-30 16:46:07.355604
"""

from alembic import op

revision = "cec0316503a5"
down_revision = "78ecf599841c"


def upgrade():
op.execute("DROP TRIGGER update_row_count ON users")
op.execute("DROP TRIGGER update_row_count ON release_files")
op.execute("DROP TRIGGER update_row_count ON releases")
op.execute("DROP TRIGGER update_row_count ON projects")
op.execute("DROP FUNCTION count_rows()")


def downgrade():
op.execute(
""" CREATE FUNCTION count_rows()
RETURNS TRIGGER AS
'
BEGIN
IF TG_OP = ''INSERT'' THEN
UPDATE row_counts
SET count = count + 1
WHERE table_name = TG_RELNAME;
ELSIF TG_OP = ''DELETE'' THEN
UPDATE row_counts
SET count = count - 1
WHERE table_name = TG_RELNAME;
END IF;
RETURN NULL;
END;
' LANGUAGE plpgsql;
"""
)

op.execute("LOCK TABLE projects IN SHARE ROW EXCLUSIVE MODE")
op.execute("LOCK TABLE releases IN SHARE ROW EXCLUSIVE MODE")
op.execute("LOCK TABLE release_files IN SHARE ROW EXCLUSIVE MODE")
op.execute("LOCK TABLE users IN SHARE ROW EXCLUSIVE MODE")

op.execute(
""" CREATE TRIGGER update_row_count
AFTER INSERT OR DELETE ON projects
FOR EACH ROW
EXECUTE PROCEDURE count_rows();
"""
)

op.execute(
""" CREATE TRIGGER update_row_count
AFTER INSERT OR DELETE ON releases
FOR EACH ROW
EXECUTE PROCEDURE count_rows();
"""
)

op.execute(
""" CREATE TRIGGER update_row_count
AFTER INSERT OR DELETE ON release_files
FOR EACH ROW
EXECUTE PROCEDURE count_rows();
"""
)

op.execute(
""" CREATE TRIGGER update_row_count
AFTER INSERT OR DELETE ON users
FOR EACH ROW
EXECUTE PROCEDURE count_rows();
"""
)

op.execute(
""" INSERT INTO row_counts (table_name, count)
VALUES ('projects', (SELECT COUNT(*) FROM projects));
"""
)

op.execute(
""" INSERT INTO row_counts (table_name, count)
VALUES ('releases', (SELECT COUNT(*) FROM releases));
"""
)

op.execute(
""" INSERT INTO row_counts (table_name, count)
VALUES ('release_files', (SELECT COUNT(*) FROM release_files));
"""
)

op.execute(
""" INSERT INTO row_counts (table_name, count)
VALUES ('users', (SELECT COUNT(*) FROM users));
"""
)
24 changes: 22 additions & 2 deletions warehouse/utils/row_counter.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,34 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from sqlalchemy import BigInteger, sql
from celery.schedules import crontab
from sqlalchemy import BigInteger, func, sql
from sqlalchemy.orm import Mapped, mapped_column

from warehouse import db
from warehouse import db, tasks
from warehouse.accounts.models import User
from warehouse.packaging.models import File, Project, Release

COUNTED_TABLES = [User, Project, Release, File]


class RowCount(db.Model):
__tablename__ = "row_counts"

table_name: Mapped[str] = mapped_column(unique=True)
count: Mapped[int] = mapped_column(BigInteger, server_default=sql.text("0"))


@tasks.task(ignore_result=True, acks_late=True)
def compute_row_counts(request):
for table in COUNTED_TABLES:
request.db.execute(
sql.update(RowCount)
.where(RowCount.table_name == table.__tablename__)
.values(count=sql.select(func.count()).select_from(table).scalar_subquery())
)


def includeme(config):
# Setup our Row Counts to be maintained on a 5 minute interval
config.add_periodic_task(crontab(minute="*/5"), compute_row_counts)

0 comments on commit 6a55ab5

Please sign in to comment.