Skip to content

Commit

Permalink
Improve last_checked vs last_changed time information precision (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
dgtlmoon authored Jan 6, 2025
1 parent 202e47d commit 66fb055
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 19 deletions.
1 change: 1 addition & 0 deletions changedetectionio/api/api_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def get(self, uuid):
# Return without history, get that via another API call
# Properties are not returned as a JSON, so add the required props manually
watch['history_n'] = watch.history_n
# attr .last_changed will check for the last written text snapshot on change
watch['last_changed'] = watch.last_changed
watch['viewed'] = watch.viewed
return watch
Expand Down
7 changes: 5 additions & 2 deletions changedetectionio/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ def set_modified_response():

return None


def is_valid_uuid(val):
try:
uuid.UUID(str(val))
Expand All @@ -56,8 +55,9 @@ def is_valid_uuid(val):
def test_setup(client, live_server, measure_memory_usage):
live_server_setup(live_server)


def test_api_simple(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
# live_server_setup(live_server)

api_key = extract_api_key_from_UI(client)

Expand Down Expand Up @@ -129,6 +129,9 @@ def test_api_simple(client, live_server, measure_memory_usage):
assert after_recheck_info['last_checked'] != before_recheck_info['last_checked']
assert after_recheck_info['last_changed'] != 0

# #2877 When run in a slow fetcher like playwright etc
assert after_recheck_info['last_changed'] == after_recheck_info['last_checked']

# Check history index list
res = client.get(
url_for("watchhistory", uuid=watch_uuid),
Expand Down
1 change: 0 additions & 1 deletion changedetectionio/tests/test_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import time
from flask import url_for
from urllib.request import urlopen
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, \
extract_UUID_from_client

Expand Down
14 changes: 13 additions & 1 deletion changedetectionio/tests/test_nonrenderable_pages.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
#####################
client.post(
url_for("settings_page"),
data={"application-empty_pages_are_a_change": "",
data={"application-empty_pages_are_a_change": "", # default, OFF, they are NOT a change
"requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True
Expand All @@ -66,6 +66,14 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
res = client.get(url_for("index"))
assert b'unviewed' not in res.data

uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]

assert watch.last_changed == 0
assert watch['last_checked'] != 0




# ok now do the opposite

Expand All @@ -92,6 +100,10 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
# A totally zero byte (#2528) response should also not trigger an error
set_zero_byte_response()
client.get(url_for("form_watch_checknow"), follow_redirects=True)

# 2877
assert watch.last_changed == watch['last_checked']

wait_for_all_checks(client)
res = client.get(url_for("index"))
assert b'unviewed' in res.data # A change should have registered because empty_pages_are_a_change is ON
Expand Down
8 changes: 8 additions & 0 deletions changedetectionio/tests/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,14 @@ def set_more_modified_response():
return None


def set_empty_text_response():
test_return_data = """<html><body></body></html>"""

with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)

return None

def wait_for_notification_endpoint_output():
'''Apprise can take a few seconds to fire'''
#@todo - could check the apprise object directly instead of looking for this file
Expand Down
35 changes: 20 additions & 15 deletions changedetectionio/update_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,6 @@ def cleanup_error_artifacts(self, uuid):
os.unlink(full_path)

def run(self):
now = time.time()

while not self.app.config.exit.is_set():
update_handler = None
Expand All @@ -254,6 +253,7 @@ def run(self):
pass

else:
fetch_start_time = time.time()
uuid = queued_item_data.item.get('uuid')
self.current_uuid = uuid
if uuid in list(self.datastore.data['watching'].keys()) and self.datastore.data['watching'][uuid].get('url'):
Expand All @@ -268,7 +268,6 @@ def run(self):
watch = self.datastore.data['watching'].get(uuid)

logger.info(f"Processing watch UUID {uuid} Priority {queued_item_data.priority} URL {watch['url']}")
now = time.time()

try:
# Processor is what we are using for detecting the "Change"
Expand All @@ -288,6 +287,10 @@ def run(self):

update_handler.call_browser()

# In reality, the actual time of when the change was detected could be a few seconds after this
# For example it should include when the page stopped rendering if using a playwright/chrome type fetch
fetch_start_time = time.time()

changed_detected, update_obj, contents = update_handler.run_changedetection(watch=watch)

# Re #342
Expand Down Expand Up @@ -512,7 +515,7 @@ def run(self):

if not self.datastore.data['watching'].get(uuid):
continue
#

# Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc
if process_changedetection_results:

Expand All @@ -525,8 +528,6 @@ def run(self):
except Exception as e:
logger.warning(f"UUID: {uuid} Extract <title> as watch title was enabled, but couldn't find a <title>.")

# Now update after running everything
timestamp = round(time.time())
try:
self.datastore.update_watch(uuid=uuid, update_obj=update_obj)

Expand All @@ -542,24 +543,28 @@ def run(self):

# Small hack so that we sleep just enough to allow 1 second between history snapshots
# this is because history.txt indexes/keys snapshots by epoch seconds and we dont want dupe keys

if watch.newest_history_key and int(timestamp) == int(watch.newest_history_key):
# @also - the keys are one per second at the most (for now)
if watch.newest_history_key and int(fetch_start_time) == int(watch.newest_history_key):
logger.warning(
f"Timestamp {timestamp} already exists, waiting 1 seconds so we have a unique key in history.txt")
timestamp = str(int(timestamp) + 1)
f"Timestamp {fetch_start_time} already exists, waiting 1 seconds so we have a unique key in history.txt")
fetch_start_time += 1
time.sleep(1)

watch.save_history_text(contents=contents,
timestamp=timestamp,
timestamp=int(fetch_start_time),
snapshot_id=update_obj.get('previous_md5', 'none'))

if update_handler.fetcher.content:
watch.save_last_fetched_html(contents=update_handler.fetcher.content, timestamp=timestamp)

empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
if update_handler.fetcher.content or (not update_handler.fetcher.content and empty_pages_are_a_change):
# attribute .last_changed is then based on this data
watch.save_last_fetched_html(contents=update_handler.fetcher.content, timestamp=int(fetch_start_time))

# Notifications should only trigger on the second time (first time, we gather the initial snapshot)
if watch.history_n >= 2:
logger.info(f"Change detected in UUID {uuid} - {watch['url']}")
if not watch.get('notification_muted'):
# @todo only run this if notifications exist
self.send_content_changed_notification(watch_uuid=uuid)

except Exception as e:
Expand All @@ -581,15 +586,15 @@ def run(self):
except Exception as e:
pass

self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
'last_checked': round(time.time()),
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - fetch_start_time, 3),
'last_checked': int(fetch_start_time),
'check_count': count
})


self.current_uuid = None # Done
self.q.task_done()
logger.debug(f"Watch {uuid} done in {time.time()-now:.2f}s")
logger.debug(f"Watch {uuid} done in {time.time()-fetch_start_time:.2f}s")

# Give the CPU time to interrupt
time.sleep(0.1)
Expand Down

0 comments on commit 66fb055

Please sign in to comment.