Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Only call send content changed notification on change #2884

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changedetectionio/api/api_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def get(self, uuid):
# Return without history, get that via another API call
# Properties are not returned as a JSON, so add the required props manually
watch['history_n'] = watch.history_n
# attr .last_changed will check for the last written text snapshot on change
watch['last_changed'] = watch.last_changed
watch['viewed'] = watch.viewed
return watch
Expand Down
7 changes: 5 additions & 2 deletions changedetectionio/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ def set_modified_response():

return None


def is_valid_uuid(val):
try:
uuid.UUID(str(val))
Expand All @@ -56,8 +55,9 @@ def is_valid_uuid(val):
def test_setup(client, live_server, measure_memory_usage):
live_server_setup(live_server)


def test_api_simple(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
# live_server_setup(live_server)

api_key = extract_api_key_from_UI(client)

Expand Down Expand Up @@ -129,6 +129,9 @@ def test_api_simple(client, live_server, measure_memory_usage):
assert after_recheck_info['last_checked'] != before_recheck_info['last_checked']
assert after_recheck_info['last_changed'] != 0

# #2877 When run in a slow fetcher like playwright etc
assert after_recheck_info['last_changed'] == after_recheck_info['last_checked']

# Check history index list
res = client.get(
url_for("watchhistory", uuid=watch_uuid),
Expand Down
1 change: 0 additions & 1 deletion changedetectionio/tests/test_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import time
from flask import url_for
from urllib.request import urlopen
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, \
extract_UUID_from_client

Expand Down
14 changes: 13 additions & 1 deletion changedetectionio/tests/test_nonrenderable_pages.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
#####################
client.post(
url_for("settings_page"),
data={"application-empty_pages_are_a_change": "",
data={"application-empty_pages_are_a_change": "", # default, OFF, they are NOT a change
"requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True
Expand All @@ -66,6 +66,14 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
res = client.get(url_for("index"))
assert b'unviewed' not in res.data

uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]

assert watch.last_changed == 0
assert watch['last_checked'] != 0




# ok now do the opposite

Expand All @@ -92,6 +100,10 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
# A totally zero byte (#2528) response should also not trigger an error
set_zero_byte_response()
client.get(url_for("form_watch_checknow"), follow_redirects=True)

# 2877
assert watch.last_changed == watch['last_checked']

wait_for_all_checks(client)
res = client.get(url_for("index"))
assert b'unviewed' in res.data # A change should have registered because empty_pages_are_a_change is ON
Expand Down
8 changes: 8 additions & 0 deletions changedetectionio/tests/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,14 @@ def set_more_modified_response():
return None


def set_empty_text_response():
test_return_data = """<html><body></body></html>"""

with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)

return None

def wait_for_notification_endpoint_output():
'''Apprise can take a few seconds to fire'''
#@todo - could check the apprise object directly instead of looking for this file
Expand Down
44 changes: 24 additions & 20 deletions changedetectionio/update_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,6 @@ def cleanup_error_artifacts(self, uuid):
os.unlink(full_path)

def run(self):
now = time.time()

while not self.app.config.exit.is_set():
update_handler = None
Expand All @@ -254,6 +253,7 @@ def run(self):
pass

else:
fetch_start_time = time.time()
uuid = queued_item_data.item.get('uuid')
self.current_uuid = uuid
if uuid in list(self.datastore.data['watching'].keys()) and self.datastore.data['watching'][uuid].get('url'):
Expand All @@ -268,7 +268,6 @@ def run(self):
watch = self.datastore.data['watching'].get(uuid)

logger.info(f"Processing watch UUID {uuid} Priority {queued_item_data.priority} URL {watch['url']}")
now = time.time()

try:
# Processor is what we are using for detecting the "Change"
Expand All @@ -288,6 +287,10 @@ def run(self):

update_handler.call_browser()

# In reality, the actual time of when the change was detected could be a few seconds after this
# For example it should include when the page stopped rendering if using a playwright/chrome type fetch
fetch_start_time = time.time()

changed_detected, update_obj, contents = update_handler.run_changedetection(watch=watch)

# Re #342
Expand Down Expand Up @@ -512,7 +515,7 @@ def run(self):

if not self.datastore.data['watching'].get(uuid):
continue
#

# Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc
if process_changedetection_results:

Expand All @@ -525,8 +528,6 @@ def run(self):
except Exception as e:
logger.warning(f"UUID: {uuid} Extract <title> as watch title was enabled, but couldn't find a <title>.")

# Now update after running everything
timestamp = round(time.time())
try:
self.datastore.update_watch(uuid=uuid, update_obj=update_obj)

Expand All @@ -542,25 +543,28 @@ def run(self):

# Small hack so that we sleep just enough to allow 1 second between history snapshots
# this is because history.txt indexes/keys snapshots by epoch seconds and we dont want dupe keys

if watch.newest_history_key and int(timestamp) == int(watch.newest_history_key):
# @also - the keys are one per second at the most (for now)
if watch.newest_history_key and int(fetch_start_time) == int(watch.newest_history_key):
logger.warning(
f"Timestamp {timestamp} already exists, waiting 1 seconds so we have a unique key in history.txt")
timestamp = str(int(timestamp) + 1)
f"Timestamp {fetch_start_time} already exists, waiting 1 seconds so we have a unique key in history.txt")
fetch_start_time += 1
time.sleep(1)

watch.save_history_text(contents=contents,
timestamp=timestamp,
timestamp=int(fetch_start_time),
snapshot_id=update_obj.get('previous_md5', 'none'))

if update_handler.fetcher.content:
watch.save_last_fetched_html(contents=update_handler.fetcher.content, timestamp=timestamp)

# Notifications should only trigger on the second time (first time, we gather the initial snapshot)
if watch.history_n >= 2:
logger.info(f"Change detected in UUID {uuid} - {watch['url']}")
if not watch.get('notification_muted'):
self.send_content_changed_notification(watch_uuid=uuid)
empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
if update_handler.fetcher.content or (not update_handler.fetcher.content and empty_pages_are_a_change):
# attribute .last_changed is then based on this data
watch.save_last_fetched_html(contents=update_handler.fetcher.content, timestamp=int(fetch_start_time))

# Notifications should only trigger on the second time (first time, we gather the initial snapshot)
if watch.history_n >= 2:
logger.info(f"Change detected in UUID {uuid} - {watch['url']}")
if not watch.get('notification_muted'):
self.send_content_changed_notification(watch_uuid=uuid)

except Exception as e:
# Catch everything possible here, so that if a worker crashes, we don't lose it until restart!
Expand All @@ -581,15 +585,15 @@ def run(self):
except Exception as e:
pass

self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
'last_checked': round(time.time()),
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - fetch_start_time, 3),
'last_checked': int(fetch_start_time),
'check_count': count
})


self.current_uuid = None # Done
self.q.task_done()
logger.debug(f"Watch {uuid} done in {time.time()-now:.2f}s")
logger.debug(f"Watch {uuid} done in {time.time()-fetch_start_time:.2f}s")

# Give the CPU time to interrupt
time.sleep(0.1)
Expand Down
Loading