diff --git a/doajtest/helpers.py b/doajtest/helpers.py index a8f33d52d5..33859e2225 100644 --- a/doajtest/helpers.py +++ b/doajtest/helpers.py @@ -145,7 +145,9 @@ def create_app_patch(cls): 'ENABLE_EMAIL': False, "FAKER_SEED": 1, "EVENT_SEND_FUNCTION": "portality.events.shortcircuit.send_event", - 'CMS_BUILD_ASSETS_ON_STARTUP': False + 'CMS_BUILD_ASSETS_ON_STARTUP': False, + 'URLSHORT_ALLOWED_SUPERDOMAINS': ['doaj.org', 'localhost', '127.0.0.1'], + } @classmethod diff --git a/doajtest/selenium_helpers.py b/doajtest/selenium_helpers.py index 80d9e2e42e..56cf9c15e4 100644 --- a/doajtest/selenium_helpers.py +++ b/doajtest/selenium_helpers.py @@ -1,7 +1,6 @@ import datetime import logging import multiprocessing -import time from multiprocessing import Process, freeze_support from typing import TYPE_CHECKING @@ -14,6 +13,7 @@ from doajtest.helpers import DoajTestCase, patch_config from portality import app, models, core from portality.dao import ESMappingMissingError +from portality.lib.thread_utils import wait_until if TYPE_CHECKING: from selenium.webdriver.remote.webdriver import WebDriver @@ -118,7 +118,7 @@ def setUp(self): self.selenium.set_window_size(1400, 1000) # avoid something is not clickable # wait for server to start - wait_unit(self._is_doaj_server_running, 10, 1.5, timeout_msg='doaj server not started') + wait_until(self._is_doaj_server_running, 10, 1.5, timeout_msg='doaj server not started') fix_index_not_found_exception(self.app_test) self.fix_es_mapping() @@ -143,7 +143,8 @@ def _is_doaj_server_running(self): self.selenium.find_element(By.CSS_SELECTOR, 'div.container') log.info('doaj server is running') return True - except selenium.common.exceptions.NoSuchElementException: + except (selenium.common.exceptions.NoSuchElementException, + selenium.common.exceptions.WebDriverException): log.info('doaj server is not running') return False @@ -159,12 +160,12 @@ def tearDown(self): print(f'{datetime.datetime.now().isoformat()} --- doaj process terminating...') self.doaj_process.terminate() self.doaj_process.join() - wait_unit(lambda: not self._is_doaj_server_running(), 10, 1, - timeout_msg='doaj server is still running') + wait_until(lambda: not self._is_doaj_server_running(), 10, 1, + timeout_msg='doaj server is still running') self.selenium.quit() - wait_unit(self._is_selenium_quit, 10, 1, timeout_msg='selenium is still running') + wait_until(self._is_selenium_quit, 10, 1, timeout_msg='selenium is still running') print('selenium terminated') super().tearDown() @@ -219,17 +220,7 @@ def login_by_acc(driver: 'WebDriver', acc: models.Account = None): assert "/login" not in driver.current_url -def wait_unit(exit_cond_fn, timeout=10, check_interval=0.1, - timeout_msg="wait_unit but exit_cond timeout"): - start = time.time() - while (time.time() - start) < timeout: - if exit_cond_fn(): - return - time.sleep(check_interval) - raise TimeoutError(timeout_msg) - - -def wait_unit_elements(driver: 'WebDriver', css_selector: str, timeout=10, check_interval=0.1): +def wait_until_elements(driver: 'WebDriver', css_selector: str, timeout=10, check_interval=0.1): elements = [] def exit_cond_fn(): @@ -240,11 +231,11 @@ def exit_cond_fn(): except: return False - wait_unit(exit_cond_fn, timeout, check_interval) + wait_until(exit_cond_fn, timeout=timeout, sleep_time=check_interval) return elements -def wait_unit_click(driver: 'WebDriver', css_selector: str, timeout=10, check_interval=0.1): +def wait_until_click(driver: 'WebDriver', css_selector: str, timeout=10, check_interval=0.1): def _click(): try: ele = find_ele_by_css(driver, css_selector) @@ -255,11 +246,11 @@ def _click(): except (StaleElementReferenceException, ElementClickInterceptedException): return False - wait_unit(_click, timeout=10, check_interval=0.1) + wait_until(_click, timeout=timeout, sleep_time=check_interval) def click_edges_item(driver: 'WebDriver', ele_name, item_name): - wait_unit_click(driver, f'#edges-bs3-refiningand-term-selector-toggle-{ele_name}') + wait_until_click(driver, f'#edges-bs3-refiningand-term-selector-toggle-{ele_name}') for ele in find_eles_by_css(driver, f'.edges-bs3-refiningand-term-selector-result-{ele_name} a'): if item_name in ele.text.strip(): ele.click() diff --git a/doajtest/seleniumtest/test_article_xml_upload.py b/doajtest/seleniumtest/test_article_xml_upload.py index 73be167c49..d14e19f5d8 100644 --- a/doajtest/seleniumtest/test_article_xml_upload.py +++ b/doajtest/seleniumtest/test_article_xml_upload.py @@ -16,6 +16,7 @@ from portality import models, dao from portality.constants import FileUploadStatus from portality.ui.messages import Messages +from portality.lib.thread_utils import wait_until HISTORY_ROW_PROCESSING_FAILED = 'processing failed' XML_FORMAT_DOAJ = 'doaj' @@ -62,7 +63,7 @@ def assert_history_row_success(self, history_row, n_article=1): self.assert_history_row(history_row, note=f'successfully processed {n_article} articles imported') @staticmethod - def wait_unit_file_upload_status_ready(): + def wait_until_file_upload_status_ready(): new_file_upload = None def _cond_fn(): @@ -73,7 +74,7 @@ def _cond_fn(): return new_file_upload.status not in (FileUploadStatus.Validated, FileUploadStatus.Incoming) # interval 0.5 is good because ES can't handle too many requests - selenium_helpers.wait_unit(_cond_fn, timeout=15, check_interval=0.5) + wait_until(_cond_fn, timeout=15, sleep_time=0.5) return new_file_upload @@ -108,7 +109,7 @@ def test_upload_fail(self, file_path, err_msg, expected_note): assert err_msg in alert_ele.text # # wait for background job to finish - self.wait_unit_file_upload_status_ready() + self.wait_until_file_upload_status_ready() self.selenium.refresh() new_rows = find_history_rows(self.selenium) @@ -177,16 +178,16 @@ def _find_history_rows(): self.upload_submit_file(file_path) assert 'File uploaded and waiting to be processed' in self.find_ele_by_css('.alert--success').text - selenium_helpers.wait_unit( + wait_until( lambda: len(_find_history_rows()) == n_org_rows + 1, - timeout=10, check_interval=1 + timeout=10, sleep_time=1 ) new_rows = _find_history_rows() assert n_org_rows + 1 == len(new_rows) assert n_file_upload + 1 == models.FileUpload.count() # wait for background job to finish - new_file_upload = self.wait_unit_file_upload_status_ready() + new_file_upload = self.wait_until_file_upload_status_ready() # assert file upload status assert new_file_upload.filename == Path(file_path).name @@ -307,7 +308,7 @@ def step_upload_success(self, publisher, article_xml_path, journal_issn, expecte XML_FORMAT_DOAJ) self.assert_history_row_success(latest_history_row) selenium_helpers.goto(self.selenium, url_path.url_toc_articles(journal_issn)) - selenium_helpers.wait_unit(lambda: self.find_eles_by_css(article_title_selector)) + wait_until(lambda: self.find_eles_by_css(article_title_selector)) assert expected_title in [e.get_attribute('innerHTML').strip() for e in self.find_eles_by_css(article_title_selector)] diff --git a/doajtest/unit/test_lib_urlshort.py b/doajtest/unit/test_lib_urlshort.py new file mode 100644 index 0000000000..e8e3850777 --- /dev/null +++ b/doajtest/unit/test_lib_urlshort.py @@ -0,0 +1,126 @@ +import time + +from doajtest.helpers import DoajTestCase, patch_config +from portality import models +from portality.bll import DOAJ +from portality.core import app +from portality.lib.thread_utils import wait_until +from portality.models import UrlShortener +from portality.util import url_for + +urlshort = DOAJ.urlshortService() + + +def wait_any_url_shortener(): + models.UrlShortener.refresh() + return models.UrlShortener.count() > 0 + + +class TestLibUrlshort(DoajTestCase): + + def test_create_new_alias(self): + n_samples = 3 + aliases = {urlshort.create_new_alias() for _ in range(n_samples)} + self.assertEqual(len(aliases), n_samples) + + assert len(aliases) == n_samples + assert len(list(aliases)[0]) == app.config.get("URLSHORT_ALIAS_LENGTH") + + def test_parse_shortened_url(self): + alias = 'alias_abc' + assert alias in urlshort.parse_shortened_url(alias) + + def test_add_url_shortener(self): + url = 'http://aabbcc.com' + surl = urlshort.add_url_shortener(url) + + assert surl + assert isinstance(surl, str) + + time.sleep(2) + UrlShortener.refresh() + + surl2 = urlshort.add_url_shortener(url) + assert surl == surl2 + + surl3 = urlshort.add_url_shortener(url + 'xxxx') + assert surl != surl3 + + def test_find_shortened_url(self): + url = 'http://aabbcc.com' + assert urlshort.find_shortened_url(url) is None + + surl = urlshort.add_url_shortener(url) + + time.sleep(2) + UrlShortener.refresh() + + surl2 = urlshort.find_shortened_url(url) + assert surl == surl2 + + def test_find_url_by_alias(self): + data = {} + for idx in range(3): + url = f'/{idx}' + surl = urlshort.add_url_shortener(url) + alias = surl[surl.rfind('/') + 1:] + data[alias] = url + + wait_until(wait_any_url_shortener) + + results = models.UrlShortener.q2obj() + + alias = results[0].alias + assert urlshort.find_url_by_alias(alias) == data[alias] + + +def surl_to_alias(surl): + alias = surl[surl.rfind('/') + 1:] + return alias + + +class TestUrlshortRoute(DoajTestCase): + def test_urlshort_route(self): + url = 'https://www.google.com' + surl = urlshort.add_url_shortener(url) + wait_until(wait_any_url_shortener) + + with self.app_test.test_client() as c: + rv = c.get(surl) + assert rv.status_code == 302 + assert rv.headers['Location'] == url + + def test_urlshort_route__not_found(self): + with self.app_test.test_client() as c: + rv = c.get(urlshort.parse_shortened_url('nnnnnnnnot_found')) + assert rv.status_code == 404 + + def test_create_shorten_url(self): + data = {'url': 'http://localhost:5004/search/journals'} + with self.app_test.test_client() as c: + rv = c.post(url_for('doajservices.shorten'), json=data) + assert rv.status_code == 200 + assert rv.json['short_url'] + + wait_until(wait_any_url_shortener) + assert urlshort.find_url_by_alias(surl_to_alias(rv.json['short_url'])) == data['url'] + + def test_create_shorten_url__invalid(self): + data = {'url': 'http://invalid.domain.abc/aaaaa'} + with self.app_test.test_client() as c: + rv = c.post(url_for('doajservices.shorten'), json=data) + assert rv.status_code == 400 + + def test_create_shorten_url__limit_reached(self): + orig_config = patch_config(self.app_test, {'URLSHORT_LIMIT': 1}) + data = {'url': 'http://localhost:5004/search/journals'} + with self.app_test.test_client() as c: + rv = c.post(url_for('doajservices.shorten'), json=data) + assert rv.status_code != 429 + + wait_until(wait_any_url_shortener) + + rv = c.post(url_for('doajservices.shorten'), json=data) + assert rv.status_code == 429 + + patch_config(self.app_test, orig_config) diff --git a/doajtest/unit/test_scripts_accounts_with_marketing_consent.py b/doajtest/unit/test_scripts_accounts_with_marketing_consent.py index cad2cb9870..7ba7dfbd59 100644 --- a/doajtest/unit/test_scripts_accounts_with_marketing_consent.py +++ b/doajtest/unit/test_scripts_accounts_with_marketing_consent.py @@ -61,7 +61,7 @@ def test_01_publishers_with_consent(self): str('False') ]) - thread_utils.wait_until(lambda: org_size + num_new_records == Account.count(), sleep_time=0.4) + thread_utils.wait_until(lambda: org_size + num_new_records * 3 == Account.count(), sleep_time=0.4) publishers_with_consent(output_file) assert os.path.exists(output_file) diff --git a/docs/dev/how-to-setup.md b/docs/dev/how-to-setup.md index 671336385f..dcdbc6d763 100644 --- a/docs/dev/how-to-setup.md +++ b/docs/dev/how-to-setup.md @@ -1,12 +1,16 @@ Setup google API key for google sheet ------------------------------------------ + ### create project an enable api + * go to https://console.cloud.google.com/ * create and select a project on the top left -* searching for "Google Drive API" and enable it, url should be some thing like (https://console.cloud.google.com/marketplace/product/google/drive.googleapis.com) +* searching for "Google Drive API" and enable it, url should be some thing + like (https://console.cloud.google.com/marketplace/product/google/drive.googleapis.com) * searching for "Google Sheets API" and enable it ### create key + * click `create credentials` button * select `Google Drive API` and `Web server` and `Application data` * select `No, I'm not using them` @@ -17,19 +21,18 @@ Setup google API key for google sheet * click `KEYS`, `ADD KEY` * select `JSON` and click create - ### share google sheet to service account + * go to google drive * right click the sheet you want to share * click `Share` * paste the service account email to `People` field * click `Done` - - How to setup for `datalog_journal_added_update` task -------------------------------------------------- following variable need for background job `datalog_journal_added_update` + ``` # value should be key file path of json, empty string means disabled GOOGLE_KEY_PATH = '' @@ -40,3 +43,28 @@ DATALOG_JA_FILENAME = 'DOAJ: journals added and withdrawn' # worksheet name or tab name that datalog will write to DATALOG_JA_WORKSHEET_NAME = 'Added' ``` + +How to setup for dev with Plausible +----------------------------------- + +* run plausible + * ref 'https://github.com/plausible/community-edition' + * update `plausible-conf.env` + * run docker `docker-compose up` + * testing configuration by browse `http://localhost:8000` and login admin user +* setup fake domain in /etc/hosts + * e.g. `127.0.0.1 doaj.dev.local` +* setup dev.cfg + * `DEBUG = False` + * `BASE_URL = "https://doaj.dev.local:5004"` + * `PLAUSIBLE_URL = "http://localhost:8000"` + * `PLAUSIBLE_JS_URL = PLAUSIBLE_URL + "/js/script.outbound-links.file-downloads.js"` + * `PLAUSIBLE_API_URL = PLAUSIBLE_URL + "/api/event"` + * `PLAUSIBLE_SITE_NAME = "doaj.dev.local"` +* update `portality/app.py`, change `fake_https=True` e.g. `run_server(fake_https=True)` + * you might need `cryptography~=42.0` installed in pip +* run `portality/app.py` +* testing configuration by browse `https://doaj.dev.local:5004` + + + diff --git a/docs/pr_note/2881_url_shortener.md b/docs/pr_note/2881_url_shortener.md new file mode 100644 index 0000000000..1715bcebc4 --- /dev/null +++ b/docs/pr_note/2881_url_shortener.md @@ -0,0 +1,7 @@ + + + +Reminders: +--------------------- +* new `Goals` `Urlshort` should be added to plausible +* edges library updated for generate url shorten \ No newline at end of file diff --git a/portality/app.py b/portality/app.py index efd9e5b170..8050ee61a4 100644 --- a/portality/app.py +++ b/portality/app.py @@ -9,7 +9,7 @@ ~~DOAJ:WebApp~~ """ - +import logging import os, sys import tzlocal import pytz @@ -434,6 +434,23 @@ def page_not_found(e): return render_template('500.html'), 500 +is_dev_log_setup_completed = False + + +def setup_dev_log(): + global is_dev_log_setup_completed + if not is_dev_log_setup_completed: + is_dev_log_setup_completed = True + app.logger.handlers = [] + log = logging.getLogger() + log.setLevel(logging.DEBUG) + ch = logging.StreamHandler() + ch.setLevel(logging.DEBUG) + ch.setFormatter(logging.Formatter('%(asctime)s %(levelname).4s %(processName)s%(threadName)s - ' + '%(message)s --- [%(name)s][%(funcName)s:%(lineno)d]')) + log.addHandler(ch) + + def run_server(host=None, port=None, fake_https=False): """ :param host: @@ -443,6 +460,10 @@ def run_server(host=None, port=None, fake_https=False): that can help for debugging Plausible :return: """ + + if app.config.get('DEBUG_DEV_LOG', False): + setup_dev_log() + pycharm_debug = app.config.get('DEBUG_PYCHARM', False) if len(sys.argv) > 1: if sys.argv[1] == '-d': diff --git a/portality/bll/doaj.py b/portality/bll/doaj.py index bd756e8b59..b0dffa3d12 100644 --- a/portality/bll/doaj.py +++ b/portality/bll/doaj.py @@ -130,4 +130,9 @@ def tourService(cls): @classmethod def autochecksService(cls, autocheck_plugins=None): from portality.bll.services import autochecks - return autochecks.AutocheckService(autocheck_plugins=autocheck_plugins) \ No newline at end of file + return autochecks.AutocheckService(autocheck_plugins=autocheck_plugins) + + @classmethod + def urlshortService(cls): + from portality.bll.services import urlshort + return urlshort diff --git a/portality/bll/services/urlshort.py b/portality/bll/services/urlshort.py new file mode 100644 index 0000000000..a017c44f93 --- /dev/null +++ b/portality/bll/services/urlshort.py @@ -0,0 +1,77 @@ +import random +import string +from typing import Optional + +from portality import models +from portality.core import app +from portality.models.url_shortener import AliasQuery, UrlQuery +from portality.util import url_for + +# global current status of the alias length +ALIAS_CHARS = string.ascii_letters + string.digits + + +def add_url_shortener(url: str) -> str: + """ + create or find a shorted url from the given url + + Parameters + ---------- + url + + Returns + ------- + shortened URL + """ + + shortened_url = find_shortened_url(url) + if shortened_url: + return shortened_url + + alias = create_new_alias() + models.UrlShortener(url=url, alias=alias).save() + + return parse_shortened_url(alias) + + +def create_new_alias(n_retry=5) -> str: + alias_len = app.config.get("URLSHORT_ALIAS_LENGTH") + for _ in range(n_retry): + alias = ''.join(random.sample(ALIAS_CHARS, alias_len)) + cnt = models.UrlShortener.hit_count(UrlQuery(alias).query()) + if cnt == 0: + return alias + + raise ValueError('Could not create a unique alias') + + +def find_shortened_url(url: str) -> Optional[str]: + """ find the shorted url from the given url """ + + aliases = models.UrlShortener.q2obj(q=AliasQuery(url).query()) + + if len(aliases) == 0: + return None + + if len(aliases) > 1: + app.logger.warning(f'More than one alias found for url[{url}] n[{len(aliases)}]') + + return parse_shortened_url(aliases[0].alias) + + +def find_url_by_alias(alias: str) -> Optional[str]: + """ find the original url from the given alias """ + + urls = models.UrlShortener.q2obj(q=UrlQuery(alias).query()) + n_url = len(urls) + if n_url == 0: + return None + if n_url > 1: + app.logger.warning(f'More than one URL found for alias[{alias}] n[{n_url}]') + + return urls[0].url + + +def parse_shortened_url(alias: str) -> str: + """ parse the shortened url from the given alias """ + return app.config.get("BASE_URL") + url_for('doaj.shortened_url', alias=alias) diff --git a/portality/dao.py b/portality/dao.py index 1c2b32da5f..62a61c95c7 100644 --- a/portality/dao.py +++ b/portality/dao.py @@ -857,7 +857,7 @@ def count(cls): # return requests.get(cls.target() + '_count').json()['count'] @classmethod - def hit_count(cls, query, **kwargs): + def hit_count(cls, query, **kwargs) -> int: countable_query = deepcopy(query) if "track_total_hits" not in countable_query: countable_query["track_total_hits"] = True diff --git a/portality/lib/thread_utils.py b/portality/lib/thread_utils.py index cee82e82a3..4512d497d3 100644 --- a/portality/lib/thread_utils.py +++ b/portality/lib/thread_utils.py @@ -2,13 +2,14 @@ from typing import Callable -def wait_until(cond_fn: Callable[[], bool], timeout=10, sleep_time=0.1): +def wait_until(cond_fn: Callable[[], bool], timeout=10, sleep_time=0.1, + timeout_msg='fail to meet the condition within the timeout period.'): start_time = time.time() - while True: - if cond_fn(): - return True - - if (time.time() - start_time) > timeout: - return False + while (time.time() - start_time) < timeout: + cond_result = cond_fn() + if cond_result: + return cond_result time.sleep(sleep_time) + + raise TimeoutError(timeout_msg) diff --git a/portality/models/__init__.py b/portality/models/__init__.py index eea1859a70..6ad3cb9eda 100644 --- a/portality/models/__init__.py +++ b/portality/models/__init__.py @@ -28,6 +28,7 @@ from portality.models.event import Event from portality.models.notifications import Notification from portality.models.autocheck import Autocheck +from portality.models.url_shortener import UrlShortener import sys diff --git a/portality/models/url_shortener.py b/portality/models/url_shortener.py new file mode 100644 index 0000000000..fdb0a1b594 --- /dev/null +++ b/portality/models/url_shortener.py @@ -0,0 +1,76 @@ +from portality.dao import DomainObject + + +class UrlShortener(DomainObject): + """~~UrlShortener:Model->DomainObject:Model~~""" + __type__ = "url_shortener" + + def __init__(self, **kwargs): + super(UrlShortener, self).__init__(**kwargs) + + @property + def url(self): + return self.data.get("url") + + @url.setter + def url(self, val): + self.data["url"] = val + + @property + def alias(self): + return self.data.get("alias") + + @alias.setter + def alias(self, val): + self.data["alias"] = val + + +class UrlQuery: + def __init__(self, alias: str): + self.alias = alias + + def query(self): + return { + 'query': { + 'bool': { + 'must': [ + {'term': {'alias.exact': self.alias}} + ] + } + }, + '_source': ['url'], + } + + +class AliasQuery: + def __init__(self, url: str): + self.url = url + + def query(self): + return { + 'query': { + 'bool': { + 'must': [ + {'term': {'url.exact': self.url}} + ] + } + }, + '_source': ['alias'], + } + + +class CountWithinDaysQuery: + def __init__(self, days: int): + self.days = days + + def query(self): + return { + "size": 0, + "query": { + "range": { + "created_date": { + "gte": f"now-{self.days}d", + } + } + } + } diff --git a/portality/settings.py b/portality/settings.py index 998b6ce68b..b7ae3da0ea 100644 --- a/portality/settings.py +++ b/portality/settings.py @@ -17,6 +17,7 @@ HOST = '0.0.0.0' DEBUG = False +DEBUG_DEV_LOG = False # show all log of each module PORT = 5004 SSL = True VALID_ENVIRONMENTS = ['dev', 'test', 'staging', 'production', 'harvester'] @@ -709,6 +710,8 @@ MAPPINGS['provenance'] = MAPPINGS["account"] #~~->Provenance:Model~~ MAPPINGS['preserve'] = MAPPINGS["account"] #~~->Preservation:Model~~ MAPPINGS['notification'] = MAPPINGS["account"] #~~->Notification:Model~~ +MAPPINGS['url_shortener'] = MAPPINGS["account"] #~~->URLShortener:Model~~ + ######################################### # Query Routes @@ -1267,6 +1270,12 @@ ANALYTICS_CATEGORY_PUBLICDATADUMP = 'PublicDataDump' ANALYTICS_ACTION_PUBLICDATADUMP = 'Download' +# Plausible for Urlshort +# ~~->URLShortener:Feature~~ +ANALYTICS_CATEGORY_URLSHORT = 'Urlshort' +ANALYTICS_ACTION_URLSHORT_ADD = 'Find or create shortener url' +ANALYTICS_ACTION_URLSHORT_REDIRECT = 'Redirect' + # Plausible for API # ~~-> API:Feature~~ ANALYTICS_CATEGORY_API = 'API Hit' @@ -1551,3 +1560,17 @@ AUTOCHECK_RESOURCE_ISSN_ORG_TIMEOUT = 10 AUTOCHECK_RESOURCE_ISSN_ORG_THROTTLE = 1 # seconds between requests + + + +################################ +# Url Shortener +# ~~->URLShortener:Feature~~ + +# URLSHORT_LIMIT* used to limit the number of short URLs (URLSHORT_LIMIT) created +# by a user within a certain time period (URLSHORT_LIMIT_WITHIN_DAYS) +URLSHORT_LIMIT_WITHIN_DAYS = 7 +URLSHORT_LIMIT = 50_000 + +URLSHORT_ALLOWED_SUPERDOMAINS = ['doaj.org'] +URLSHORT_ALIAS_LENGTH = 6 diff --git a/portality/static/js/doaj.fieldrender.edges.js b/portality/static/js/doaj.fieldrender.edges.js index dc3138e4f8..6f5e44c0e9 100644 --- a/portality/static/js/doaj.fieldrender.edges.js +++ b/portality/static/js/doaj.fieldrender.edges.js @@ -1303,7 +1303,6 @@ $.extend(true, doaj, { let shorten = ""; if (this.component.urlShortener) { - var shortenClass = edges.css_classes(this.namespace, "shorten", this); var shortenButtonClass = edges.css_classes(this.namespace, "shorten-url", this) shorten = '

'; } diff --git a/portality/static/js/doaj.js b/portality/static/js/doaj.js index 5069903dcd..a8b583f25c 100644 --- a/portality/static/js/doaj.js +++ b/portality/static/js/doaj.js @@ -69,29 +69,29 @@ var doaj = { doaj.bindMiniSearch(); }, - // bitlyShortener : function(query, success_callback, error_callback) { - // // ~~-> Bitly:ExternalService ~~ - // function callbackWrapper(data) { - // success_callback(data.url); - // } - // - // function errorHandler() { - // alert("Sorry, we're unable to generate short urls at this time"); - // error_callback(); - // } - // - // var page = window.location.protocol + '//' + window.location.host + window.location.pathname; - // - // $.ajax({ - // type: "POST", - // contentType: "application/json", - // dataType: "jsonp", - // url: "/service/shorten", - // data : JSON.stringify({page: page, query: query}), - // success: callbackWrapper, - // error: errorHandler - // }); - // }, + doajUrlShortener : function(query, success_callback, error_callback) { + function callbackWrapper(data) { + success_callback(data.short_url); + } + + function errorHandler() { + alert("Sorry, we're unable to generate short urls at this time"); + error_callback && error_callback(); + } + + const page = `${window.location.protocol}//${window.location.host}${window.location.pathname}`; + const url = `${page}?source=${encodeURIComponent(JSON.stringify(query))}`; + + $.ajax({ + type: "POST", + contentType: "application/json", + url: "/service/shorten", + data : JSON.stringify({url: url}), + success: callbackWrapper, + error: errorHandler + }); + + }, journal_toc_id : function(journal) { // if e-issn is available, use that diff --git a/portality/static/js/edges/public.article.edge.js b/portality/static/js/edges/public.article.edge.js index b520a5c474..06ffb46652 100644 --- a/portality/static/js/edges/public.article.edge.js +++ b/portality/static/js/edges/public.article.edge.js @@ -139,7 +139,7 @@ $.extend(true, doaj, { edges.newFullSearchController({ id: "share_embed", category: "controller", - // urlShortener : doaj.bitlyShortener, + urlShortener : doaj.doajUrlShortener, embedSnippet : doaj.publicSearch.embedSnippet, renderer: doaj.renderers.newShareEmbedRenderer({ shareLinkText: ' Share or embed' diff --git a/portality/static/js/edges/public.journal.edge.js b/portality/static/js/edges/public.journal.edge.js index 8904e50dfb..60c1d124d0 100644 --- a/portality/static/js/edges/public.journal.edge.js +++ b/portality/static/js/edges/public.journal.edge.js @@ -224,7 +224,7 @@ $.extend(true, doaj, { edges.newFullSearchController({ id: "share_embed", category: "controller", - // urlShortener : doaj.bitlyShortener, + urlShortener : doaj.doajUrlShortener, embedSnippet : doaj.publicSearch.embedSnippet, renderer: doaj.renderers.newShareEmbedRenderer({ shareLinkText: ' Share or embed' diff --git a/portality/static/vendor/edges b/portality/static/vendor/edges index 990f422016..268a7568b1 160000 --- a/portality/static/vendor/edges +++ b/portality/static/vendor/edges @@ -1 +1 @@ -Subproject commit 990f4220163a3e18880f0bdc3ad5c80d234d22dd +Subproject commit 268a7568b1895a511095b6a3477e63126ba2065e diff --git a/portality/view/doaj.py b/portality/view/doaj.py index a10217534c..0cdad7c526 100644 --- a/portality/view/doaj.py +++ b/portality/view/doaj.py @@ -12,6 +12,7 @@ from portality import dao from portality import models from portality import store +from portality.bll import DOAJ from portality.core import app from portality.decorators import ssl_required, api_key_required from portality.forms.application_forms import JournalFormFactory @@ -43,7 +44,8 @@ def cookie_consent(): else: resp = make_response() # set a cookie that lasts for one year - resp.set_cookie(app.config.get("CONSENT_COOKIE_KEY"), Messages.CONSENT_COOKIE_VALUE, max_age=31536000, samesite=None, secure=True) + resp.set_cookie(app.config.get("CONSENT_COOKIE_KEY"), Messages.CONSENT_COOKIE_VALUE, max_age=31536000, + samesite=None, secure=True) return resp @@ -55,7 +57,8 @@ def dismiss_site_note(): else: resp = make_response() # set a cookie that lasts for one year - resp.set_cookie(app.config.get("SITE_NOTE_KEY"), app.config.get("SITE_NOTE_COOKIE_VALUE"), max_age=app.config.get("SITE_NOTE_SLEEP"), samesite=None, secure=True) + resp.set_cookie(app.config.get("SITE_NOTE_KEY"), app.config.get("SITE_NOTE_COOKIE_VALUE"), + max_age=app.config.get("SITE_NOTE_SLEEP"), samesite=None, secure=True) return resp @@ -100,7 +103,7 @@ def search(): def search_post(): """ Redirect a query from the box on the index page to the search page. """ if request.form.get('origin') != 'ui': - abort(400) # bad request - we must receive searches from our own UI + abort(400) # bad request - we must receive searches from our own UI ref = request.form.get("ref") if ref is None: @@ -115,14 +118,14 @@ def search_post(): # lhs for journals, rhs for articles field_map = { - "all" : (None, None), - "title" : ("bibjson.title", "bibjson.title"), - "abstract" : (None, "bibjson.abstract"), - "subject" : ("index.classification", "index.classification"), - "author" : (None, "bibjson.author.name"), - "issn" : ("index.issn.exact", None), - "publisher" : ("bibjson.publisher.name", None), - "country" : ("index.country", None) + "all": (None, None), + "title": ("bibjson.title", "bibjson.title"), + "abstract": (None, "bibjson.abstract"), + "subject": ("index.classification", "index.classification"), + "author": (None, "bibjson.author.name"), + "issn": ("index.issn.exact", None), + "publisher": ("bibjson.publisher.name", None), + "country": ("index.country", None) } default_field_opts = field_map.get(field, None) default_field = None @@ -143,6 +146,7 @@ def search_post(): return redirect(route + '?source=' + urllib.parse.quote(json.dumps(query)) + "&ref=" + urllib.parse.quote(ref)) + ############################################# # FIXME: this should really live somewhere else more appropirate to who can access it @@ -151,9 +155,9 @@ def search_post(): @ssl_required def journal_readonly(journal_id): if ( - not current_user.has_role("admin") - or not current_user.has_role("editor") - or not current_user.has_role("associate_editor") + not current_user.has_role("admin") + or not current_user.has_role("editor") + or not current_user.has_role("associate_editor") ): abort(401) @@ -234,11 +238,13 @@ def get_from_local_store(container, filename): def autocomplete(doc_type, field_name): prefix = request.args.get('q', '') if not prefix: - return jsonify({'suggestions': [{"id": "", "text": "No results found"}]}) # select2 does not understand 400, which is the correct code here... + return jsonify({'suggestions': [ + {"id": "", "text": "No results found"}]}) # select2 does not understand 400, which is the correct code here... m = models.lookup_model(doc_type) if not m: - return jsonify({'suggestions': [{"id": "", "text": "No results found"}]}) # select2 does not understand 404, which is the correct code here... + return jsonify({'suggestions': [ + {"id": "", "text": "No results found"}]}) # select2 does not understand 404, which is the correct code here... size = request.args.get('size', 5) @@ -286,6 +292,7 @@ def find_toc_journal_by_identifier(identifier): def is_issn_by_identifier(identifier): return len(identifier) == 9 + def find_correct_redirect_identifier(identifier, bibjson) -> str: """ return None if identifier is correct and no redirect is needed @@ -326,6 +333,7 @@ def find_correct_redirect_identifier(identifier, bibjson) -> str: # let it continue loading if we only have the hex UUID for the journal (no ISSNs) # and the user is referring to the toc page via that ID + @blueprint.route("/toc/") def toc(identifier=None): """ Table of Contents page for a journal. identifier may be the journal id or an issn """ @@ -338,7 +346,7 @@ def toc(identifier=None): return redirect(url_for('doaj.toc', identifier=real_identifier), 301) else: # now render all that information - return render_template('doaj/toc.html', journal=journal, bibjson=bibjson ) + return render_template('doaj/toc.html', journal=journal, bibjson=bibjson) @blueprint.route("/toc/articles/") @@ -353,11 +361,10 @@ def toc_articles(identifier=None): if real_identifier: return redirect(url_for('doaj.toc_articles', identifier=real_identifier), 301) else: - return render_template('doaj/toc_articles.html', journal=journal, bibjson=bibjson ) + return render_template('doaj/toc_articles.html', journal=journal, bibjson=bibjson) - -#~~->Article:Page~~ +# ~~->Article:Page~~ @blueprint.route("/article/") def article_page(identifier=None): # identifier must be the article id @@ -375,7 +382,8 @@ def article_page(identifier=None): if len(journals) > 0: journal = journals[0] - return render_template('doaj/article.html', article=article, journal=journal, page={"highlight" : True}) + return render_template('doaj/article.html', article=article, journal=journal, page={"highlight": True}) + # Not using this form for now but we might bring it back later # @blueprint.route("/contact/", methods=["GET", "POST"]) @@ -518,7 +526,8 @@ def xml(): @blueprint.route("/docs/widgets/") def widgets(): - return render_template("layouts/static_page.html", page_frag="/docs/widgets.html", base_url=app.config.get('BASE_URL')) + return render_template("layouts/static_page.html", page_frag="/docs/widgets.html", + base_url=app.config.get('BASE_URL')) @blueprint.route("/docs/public-data-dump/") @@ -540,10 +549,12 @@ def faq(): def about(): return render_template("layouts/static_page.html", page_frag="/about/index.html") + @blueprint.route("/at-20/") def at_20(): return render_template("layouts/static_page.html", page_frag="/about/at-20.html") + @blueprint.route("/about/ambassadors/") def ambassadors(): return render_template("layouts/static_page.html", page_frag="/about/ambassadors.html") @@ -640,3 +651,15 @@ def publishers(): @blueprint.route("/password-reset/") def new_password_reset(): return redirect(url_for('account.forgot'), code=301) + + +@blueprint.route("/sc/") +@plausible.pa_event(app.config.get('ANALYTICS_CATEGORY_URLSHORT', 'Urlshort'), + action=app.config.get('ANALYTICS_ACTION_URLSHORT_REDIRECT', 'Redirect')) +def shortened_url(alias): + url = DOAJ.urlshortService().find_url_by_alias(alias) + if url: + return redirect(url) + + app.logger.debug(f"Shortened URL not found: [{alias}]") + abort(404) diff --git a/portality/view/doajservices.py b/portality/view/doajservices.py index 48c9c65003..8c94353775 100644 --- a/portality/view/doajservices.py +++ b/portality/view/doajservices.py @@ -1,13 +1,17 @@ -import json, urllib.request, urllib.parse, urllib.error, requests +import json +from urllib.parse import urlparse from flask import Blueprint, make_response, request, abort, render_template from flask_login import current_user, login_required -from portality.core import app -from portality.decorators import ssl_required, write_required, restrict_to_role -from portality.util import jsonp from portality import lock, models from portality.bll import DOAJ +from portality.bll.services import urlshort +from portality.core import app +from portality.decorators import ssl_required, write_required +from portality.lib import plausible +from portality.models.url_shortener import CountWithinDaysQuery +from portality.util import jsonp blueprint = Blueprint('doajservices', __name__) @@ -40,7 +44,7 @@ def unlock(object_type, object_id): abort(400) # otherwise, return success - resp = make_response(json.dumps({"result" : "success"})) + resp = make_response(json.dumps({"result": "success"})) resp.mimetype = "application/json" return resp @@ -55,47 +59,34 @@ def unlocked(): return render_template("unlocked.html") -# @blueprint.route("/shorten", methods=["POST"]) -# @jsonp -# def shorten(): -# # Enable this if you are testing and you want to see the front end work, without working bit.ly credentials -# # return make_response(json.dumps({"url" : "testing url"})) -# try: -# # parse the json -# d = json.loads(request.data) -# p = d['page'] -# q = d['query'] -# -# # re-serialise the query, and url encode it -# source = urllib.parse.quote(json.dumps(q)) -# -# # assemble the DOAJ url -# doajurl = p + "?source=" + source -# -# # assemble the bitly url. Note that we re-encode the doajurl to include in the -# # query arguments, so by this point it is double-encoded -# bitly = app.config.get("BITLY_SHORTENING_API_URL") -# bitly_oauth = app.config.get("BITLY_OAUTH_TOKEN") -# -# # Set an Auth Bearer token (Bitly 4.0) -# headers = {'Authorization': 'Bearer ' + bitly_oauth} -# -# # Add the long url as a payload -# payload = {'long_url': doajurl} -# -# # make the request -# resp = requests.post(bitly, headers=headers, data=json.dumps(payload)) -# shorturl = resp.json().get('link') -# -# if not shorturl: -# abort(400) -# -# # make the response -# answer = make_response(json.dumps({"url": shorturl})) -# answer.mimetype = "application/json" -# return answer -# except: -# abort(400) +@blueprint.route("/shorten", methods=["POST"]) +@plausible.pa_event(app.config.get('ANALYTICS_CATEGORY_URLSHORT', 'Urlshort'), + action=app.config.get('ANALYTICS_ACTION_URLSHORT_ADD', 'Find or create shortener url')) +def shorten(): + """ create shortener url """ + + # check if limit reached + n_created = models.UrlShortener.hit_count(CountWithinDaysQuery( + app.config.get("URLSHORT_LIMIT_WITHIN_DAYS", 7) + ).query()) + n_created_limit = app.config.get("URLSHORT_LIMIT", 100_000) + if n_created >= n_created_limit: + app.logger.warning(f"Url shortener limit reached: [{n_created=}] >= [{n_created_limit=}]") + abort(429) + + url = json.loads(request.data)['url'] + + # validate url + hostname = urlparse(url).hostname + if not any((hostname == d or hostname.endswith(f".{d}")) + for d in app.config.get("URLSHORT_ALLOWED_SUPERDOMAINS", [])): + app.logger.warning(f"Invalid url shorten request: {url}") + abort(400) + + short_url = urlshort.add_url_shortener(url) + resp = make_response(json.dumps({"short_url": short_url})) + resp.mimetype = "application/json" + return resp @blueprint.route("/groupstatus/", methods=["GET"]) @@ -107,7 +98,8 @@ def group_status(group_id): :param group_id: :return: """ - if (not (current_user.has_role("editor") and models.EditorGroup.pull(group_id).editor == current_user.id)) and (not current_user.has_role("admin")): + if (not (current_user.has_role("editor") and models.EditorGroup.pull(group_id).editor == current_user.id)) and ( + not current_user.has_role("admin")): abort(404) svc = DOAJ.todoService() stats = svc.group_stats(group_id) @@ -126,6 +118,7 @@ def dismiss_autocheck(autocheck_set_id, autocheck_id): abort(404) return make_response(json.dumps({"status": "success"})) + @blueprint.route("/autocheck/undismiss//", methods=["GET", "POST"]) @jsonp @login_required @@ -137,4 +130,3 @@ def undismiss_autocheck(autocheck_set_id, autocheck_id): if not done: abort(404) return make_response(json.dumps({"status": "success"})) - diff --git a/setup.py b/setup.py index e7c9268f63..c304d6228e 100644 --- a/setup.py +++ b/setup.py @@ -72,6 +72,7 @@ "selenium==4.12.0", "combinatrix @ git+https://github.com/CottageLabs/combinatrix.git@740d255f0050d53a20324df41c08981499bb292c#egg=combinatrix", "bs4==0.0.1", # beautifulsoup for HTML parsing + "cryptography~=42.0", # for ad-hoc https ], # additional test dependencies for the test-extras target