From 0a4d9f88b6d8d82cc4c908a71e887d5ecfa0fc20 Mon Sep 17 00:00:00 2001 From: BickieSmalls Date: Thu, 18 Jan 2024 10:05:17 -0500 Subject: [PATCH 1/2] Add default limit for fetching failed rows in DbSample --- soda/core/soda/sampler/db_sample.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/soda/core/soda/sampler/db_sample.py b/soda/core/soda/sampler/db_sample.py index a7611c4d2..11bf896bb 100644 --- a/soda/core/soda/sampler/db_sample.py +++ b/soda/core/soda/sampler/db_sample.py @@ -1,5 +1,6 @@ from typing import Tuple +from soda.sampler.sampler import DEFAULT_FAILED_ROWS_SAMPLE_LIMIT from soda.sampler.sample import Sample from soda.sampler.sample_schema import SampleColumn, SampleSchema @@ -11,9 +12,13 @@ def __init__(self, cursor, data_source): self.rows = None def get_rows(self) -> Tuple[Tuple]: - # This might be dangerous if a big number of rows is fetched, consider cleaning up the memory when this object is not needed any more. + # Fetch the default number of failed rows + # TODO: respect the limit set in the config if not self.rows: - self.rows = self.cursor.fetchall() + try: + self.rows = self.cursor.fetchmany(DEFAULT_FAILED_ROWS_SAMPLE_LIMIT) + except: + self.rows =self.cursor.fetchall() return self.rows From d2fcf8207ec131f5004b30d9890d91fa991f50c8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 18 Jan 2024 15:08:58 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- soda/core/soda/sampler/db_sample.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/soda/core/soda/sampler/db_sample.py b/soda/core/soda/sampler/db_sample.py index 11bf896bb..3d0c85e79 100644 --- a/soda/core/soda/sampler/db_sample.py +++ b/soda/core/soda/sampler/db_sample.py @@ -1,8 +1,8 @@ from typing import Tuple -from soda.sampler.sampler import DEFAULT_FAILED_ROWS_SAMPLE_LIMIT from soda.sampler.sample import Sample from soda.sampler.sample_schema import SampleColumn, SampleSchema +from soda.sampler.sampler import DEFAULT_FAILED_ROWS_SAMPLE_LIMIT class DbSample(Sample): @@ -18,7 +18,7 @@ def get_rows(self) -> Tuple[Tuple]: try: self.rows = self.cursor.fetchmany(DEFAULT_FAILED_ROWS_SAMPLE_LIMIT) except: - self.rows =self.cursor.fetchall() + self.rows = self.cursor.fetchall() return self.rows