From b601917b64e94b91ed9fc9eafc2b8940815b1dc6 Mon Sep 17 00:00:00 2001 From: nille02 Date: Sun, 5 Jan 2025 19:48:28 +0100 Subject: [PATCH] Add new option to run just new jobs or jobs without history (#831) New command-line option `--prepare-jobs` to initialize new jobs or jobs without history --- CHANGELOG.md | 6 ++++++ lib/urlwatch/command.py | 15 +++++++++++++++ lib/urlwatch/config.py | 1 + lib/urlwatch/storage.py | 14 ++++++++++++++ 4 files changed, 36 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 03d44c46..f145b5ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. The format mostly follows [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). +## UNRELEASED + +### Added + +- New command-line option `--prepare-jobs` to initialize new jobs or jobs without history (#831 by nille02) + ## [2.29] -- 2024-10-28 ### Added diff --git a/lib/urlwatch/command.py b/lib/urlwatch/command.py index 43b5531e..53d259fc 100644 --- a/lib/urlwatch/command.py +++ b/lib/urlwatch/command.py @@ -142,6 +142,19 @@ def test_filter(self, id): # (ignore_cached) and we do not want to store the newly-retrieved data yet (filter testing) return 0 + def prepare_jobs(self): + new_jobs = [] + for idx, job in enumerate(self.urlwatcher.jobs): + has_history = self.urlwatcher.cache_storage.has_history_data(job.get_guid()) + if not has_history: + logger.info('Add Job: %s', job.pretty_name()) + new_jobs.append(idx + 1) + if not new_jobs: + return 0 + self.urlwatch_config.idx_set = frozenset(new_jobs) + self.urlwatcher.run_jobs() + self.urlwatcher.close() + def _resolve_job_history(self, id, max_entries=10): job = self._get_job(id) @@ -274,6 +287,8 @@ def handle_actions(self): sys.exit(self.test_filter(self.urlwatch_config.test_filter)) if self.urlwatch_config.test_diff_filter: sys.exit(self.test_diff_filter(self.urlwatch_config.test_diff_filter)) + if self.urlwatch_config.prepare_jobs: + sys.exit(self.prepare_jobs()) if self.urlwatch_config.dump_history: sys.exit(self.dump_history(self.urlwatch_config.dump_history)) if self.urlwatch_config.list: diff --git a/lib/urlwatch/config.py b/lib/urlwatch/config.py index 94622019..237d5682 100644 --- a/lib/urlwatch/config.py +++ b/lib/urlwatch/config.py @@ -93,6 +93,7 @@ def parse_args(self, cmdline_args): group.add_argument('--delete', metavar='JOB', help='delete job by location or index') group.add_argument('--enable', metavar='JOB', help='enable job by location or index') group.add_argument('--disable', metavar='JOB', help='disable job by location or index') + group.add_argument('--prepare-jobs', action='store_true', help='run jobs without history') group.add_argument('--change-location', metavar=('JOB', 'NEW_LOCATION'), nargs=2, help='change the location of an existing job by location or index') group.add_argument('--test-filter', metavar='JOB', help='test filter output of job by location or index') group.add_argument('--test-diff-filter', metavar='JOB', diff --git a/lib/urlwatch/storage.py b/lib/urlwatch/storage.py index a865a3ce..12090441 100644 --- a/lib/urlwatch/storage.py +++ b/lib/urlwatch/storage.py @@ -567,6 +567,8 @@ def __init__(self, filename): self.db = minidb.Store(self.filename, debug=True, vacuum_on_close=False) self.db.register(CacheEntry) + self._cached_has_history_data_set = None + def close(self): self.db.close() self.db = None @@ -600,6 +602,15 @@ def get_history_data(self, guid, count=1): break return history + def has_history_data(self, guid): + if not self._cached_has_history_data_set: + self._cached_has_history_data_set = frozenset(guid[0] for guid in + list(CacheEntry.query(self.db, CacheEntry.c.guid, + where=((CacheEntry.c.tries == 0) + | (CacheEntry.c.tries == None))) # noqa:E711 + )) + return guid in self._cached_has_history_data_set + def save(self, job, guid, data, timestamp, tries, etag=None): self.db.save(CacheEntry(guid=guid, timestamp=timestamp, data=data, tries=tries, etag=etag)) self.db.commit() @@ -688,6 +699,9 @@ def get_history_data(self, guid, count=1): break return history + def has_history_data(self, guid): + return bool(self.get_history_data(guid)) + def save(self, job, guid, data, timestamp, tries, etag=None): r = { 'data': data,