From a586a2130609e9b1040e4143551911c97925d00e Mon Sep 17 00:00:00 2001 From: philip Date: Mon, 4 Mar 2024 11:47:13 +0000 Subject: [PATCH 01/28] move wait_unit --- doajtest/helpers.py | 11 +++++++++++ doajtest/selenium_helpers.py | 14 +------------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/doajtest/helpers.py b/doajtest/helpers.py index 60c8dd0e85..f4a6c5f19b 100644 --- a/doajtest/helpers.py +++ b/doajtest/helpers.py @@ -4,6 +4,7 @@ import logging import os import shutil +import time from contextlib import contextmanager from glob import glob from unittest import TestCase @@ -411,3 +412,13 @@ def login(app_client, username, password, follow_redirects=True): def logout(app_client, follow_redirects=True): return app_client.get(url_for('account.logout'), follow_redirects=follow_redirects) + + +def wait_unit(exit_cond_fn, timeout=10, check_interval=0.1, + timeout_msg="wait_unit but exit_cond timeout"): + start = time.time() + while (time.time() - start) < timeout: + if exit_cond_fn(): + return + time.sleep(check_interval) + raise TimeoutError(timeout_msg) diff --git a/doajtest/selenium_helpers.py b/doajtest/selenium_helpers.py index b3f5e5ea21..79f038e5ce 100644 --- a/doajtest/selenium_helpers.py +++ b/doajtest/selenium_helpers.py @@ -1,6 +1,5 @@ import datetime import logging -import time import multiprocessing from multiprocessing import Process, freeze_support from typing import TYPE_CHECKING @@ -8,11 +7,10 @@ import selenium from selenium import webdriver from selenium.common import StaleElementReferenceException, ElementClickInterceptedException -from selenium.webdriver import DesiredCapabilities from selenium.webdriver.common.by import By from doajtest.fixtures.url_path import URL_LOGOUT -from doajtest.helpers import DoajTestCase, patch_config +from doajtest.helpers import DoajTestCase, patch_config, wait_unit from portality import app, models, core from portality.dao import ESMappingMissingError @@ -220,16 +218,6 @@ def login_by_acc(driver: 'WebDriver', acc: models.Account = None): assert "/login" not in driver.current_url -def wait_unit(exit_cond_fn, timeout=10, check_interval=0.1, - timeout_msg="wait_unit but exit_cond timeout"): - start = time.time() - while (time.time() - start) < timeout: - if exit_cond_fn(): - return - time.sleep(check_interval) - raise TimeoutError(timeout_msg) - - def wait_unit_elements(driver: 'WebDriver', css_selector: str, timeout=10, check_interval=0.1): elements = [] From 626c3f8af3e96bb576255a2aae1a80186a62af2e Mon Sep 17 00:00:00 2001 From: philip Date: Mon, 4 Mar 2024 11:48:23 +0000 Subject: [PATCH 02/28] move wait_unit --- doajtest/seleniumtest/test_article_xml_upload.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/doajtest/seleniumtest/test_article_xml_upload.py b/doajtest/seleniumtest/test_article_xml_upload.py index e8fe1e1671..1127ed9284 100644 --- a/doajtest/seleniumtest/test_article_xml_upload.py +++ b/doajtest/seleniumtest/test_article_xml_upload.py @@ -7,6 +7,7 @@ from selenium.webdriver.common.by import By from selenium.webdriver.support.select import Select +import doajtest.helpers from doajtest import selenium_helpers from doajtest.fixtures import JournalFixtureFactory, url_path, article_doajxml from doajtest.fixtures.accounts import PUBLISHER_B_SOURCE, create_publisher_a, create_maned_a @@ -72,7 +73,7 @@ def _cond_fn(): return new_file_upload.status not in (FileUploadStatus.Validated, FileUploadStatus.Incoming) # interval 0.5 is good because ES can't handle too many requests - selenium_helpers.wait_unit(_cond_fn, timeout=15, check_interval=0.5) + doajtest.helpers.wait_unit(_cond_fn, timeout=15, check_interval=0.5) return new_file_upload @@ -176,7 +177,7 @@ def _find_history_rows(): self.upload_submit_file(file_path) assert 'File uploaded and waiting to be processed' in self.find_ele_by_css('.alert--success').text - selenium_helpers.wait_unit( + doajtest.helpers.wait_unit( lambda: len(_find_history_rows()) == n_org_rows + 1, timeout=10, check_interval=1 ) @@ -306,7 +307,7 @@ def step_upload_success(self, publisher, article_xml_path, journal_issn, expecte XML_FORMAT_DOAJ) self.assert_history_row_success(latest_history_row) selenium_helpers.goto(self.selenium, url_path.url_toc_articles(journal_issn)) - selenium_helpers.wait_unit(lambda: self.find_eles_by_css(article_title_selector)) + doajtest.helpers.wait_unit(lambda: self.find_eles_by_css(article_title_selector)) assert expected_title in [e.get_attribute('innerHTML').strip() for e in self.find_eles_by_css(article_title_selector)] From 475d1a30ad90d71da7b3fa2a958d280ac519333a Mon Sep 17 00:00:00 2001 From: philip Date: Mon, 4 Mar 2024 12:06:53 +0000 Subject: [PATCH 03/28] add urlshort --- doajtest/unit/test_lib_urlshort.py | 81 ++++++++++++++++++++++++++++ portality/dao.py | 2 +- portality/lib/urlshort.py | 84 ++++++++++++++++++++++++++++++ portality/models/__init__.py | 1 + portality/models/url_shortener.py | 59 +++++++++++++++++++++ portality/settings.py | 2 + portality/view/doaj.py | 66 +++++++++++++++-------- 7 files changed, 271 insertions(+), 24 deletions(-) create mode 100644 doajtest/unit/test_lib_urlshort.py create mode 100644 portality/lib/urlshort.py create mode 100644 portality/models/url_shortener.py diff --git a/doajtest/unit/test_lib_urlshort.py b/doajtest/unit/test_lib_urlshort.py new file mode 100644 index 0000000000..95c9813db5 --- /dev/null +++ b/doajtest/unit/test_lib_urlshort.py @@ -0,0 +1,81 @@ +import time + +from doajtest.helpers import DoajTestCase, wait_unit +from portality import models +from portality.lib import urlshort +from portality.models import UrlShortener + + +def wait_any_url_shortener(): + models.UrlShortener.refresh() + return models.UrlShortener.count() > 0 + + +class TestLibUrlshort(DoajTestCase): + + def test_create_new_alias(self): + n_samples = 3 + aliases = {urlshort.create_new_alias() for _ in range(n_samples)} + self.assertEqual(len(aliases), n_samples) + + assert len(aliases) == n_samples + assert len(list(aliases)[0]) == urlshort.alias_len + + def test_parse_shortened_url(self): + alias = 'alias_abc' + assert alias in urlshort.parse_shortened_url(alias) + + def test_add_url_shortener(self): + url = 'http://aabbcc.com' + surl = urlshort.add_url_shortener(url) + + assert surl + assert isinstance(surl, str) + + time.sleep(2) + UrlShortener.refresh() + + surl2 = urlshort.add_url_shortener(url) + assert surl == surl2 + + surl3 = urlshort.add_url_shortener(url + 'xxxx') + assert surl != surl3 + + def test_find_shortened_url(self): + url = 'http://aabbcc.com' + assert urlshort.find_shortened_url(url) is None + + surl = urlshort.add_url_shortener(url) + + time.sleep(2) + UrlShortener.refresh() + + surl2 = urlshort.find_shortened_url(url) + assert surl == surl2 + + def test_find_url_by_alias(self): + data = {} + for idx in range(3): + url = f'/{idx}' + surl = urlshort.add_url_shortener(url) + alias = surl[surl.rfind('/') + 1:] + data[alias] = url + + wait_unit(wait_any_url_shortener) + + results = models.UrlShortener.q2obj() + + alias = results[0].alias + assert urlshort.find_url_by_alias(alias) == data[alias] + + +class TestUrlshortRoute(DoajTestCase): + def test_urlshort_route(self): + url = 'https://www.google.com' + surl = urlshort.add_url_shortener(url) + wait_unit(wait_any_url_shortener) + + with self.app_test.test_client() as c: + rv = c.get(surl) + assert rv.status_code == 302 + assert rv.headers['Location'] == url diff --git a/portality/dao.py b/portality/dao.py index 4688662338..5767a6e657 100644 --- a/portality/dao.py +++ b/portality/dao.py @@ -858,7 +858,7 @@ def count(cls): # return requests.get(cls.target() + '_count').json()['count'] @classmethod - def hit_count(cls, query, **kwargs): + def hit_count(cls, query, **kwargs) -> int: countable_query = deepcopy(query) if "track_total_hits" not in countable_query: countable_query["track_total_hits"] = True diff --git a/portality/lib/urlshort.py b/portality/lib/urlshort.py new file mode 100644 index 0000000000..0104a56190 --- /dev/null +++ b/portality/lib/urlshort.py @@ -0,0 +1,84 @@ +import random +import string +import threading +from typing import Optional + +from portality import models +from portality.core import app +from portality.models.url_shortener import AliasQuery, UrlQuery +from portality.util import url_for + +# global current status of the alias length +alias_len = 6 +ALIAS_CHARS = string.ascii_letters + string.digits +alias_lock = threading.Lock() + + +def add_url_shortener(url: str) -> str: + """ + create or find a shorted url from the given url + + Parameters + ---------- + url + + Returns + ------- + shortened URL + """ + + shortened_url = find_shortened_url(url) + if shortened_url: + return shortened_url + + with alias_lock: + alias = create_new_alias() + models.UrlShortener(url=url, alias=alias).save() + + return parse_shortened_url(alias) + + +def create_new_alias() -> str: + global alias_len + for _ in range(5): + alias = ''.join(random.sample(ALIAS_CHARS, alias_len)) + cnt = models.UrlShortener.hit_count(UrlQuery(alias).query()) + if cnt == 0: + return alias + + alias_len += 1 + app.logger.info(f'alias length increased to {alias_len}') + + raise ValueError('Could not create a unique alias') + + +def find_shortened_url(url: str) -> Optional[str]: + """ find the shorted url from the given url """ + + aliases = models.UrlShortener.q2obj(q=AliasQuery(url).query()) + + if len(aliases) == 0: + return None + + if len(aliases) > 1: + app.logger.warning(f'More than one alias found for url[{url}] n[{len(aliases)}]') + + return parse_shortened_url(aliases[0].alias) + + +def find_url_by_alias(alias: str) -> Optional[str]: + """ find the original url from the given alias """ + + urls = models.UrlShortener.q2obj(q=UrlQuery(alias).query()) + n_url = len(urls) + if n_url == 0: + return None + if n_url > 1: + app.logger.warning(f'More than one URL found for alias[{alias}] n[{n_url}]') + + return urls[0].url + + +def parse_shortened_url(alias: str) -> str: + """ parse the shortened url from the given alias """ + return url_for('doaj.shortened_url', alias=alias) diff --git a/portality/models/__init__.py b/portality/models/__init__.py index 57eb28e5f9..09b738180c 100644 --- a/portality/models/__init__.py +++ b/portality/models/__init__.py @@ -27,6 +27,7 @@ from portality.models.harvester import HarvestState from portality.models.event import Event from portality.models.notifications import Notification +from portality.models.url_shortener import UrlShortener import sys diff --git a/portality/models/url_shortener.py b/portality/models/url_shortener.py new file mode 100644 index 0000000000..074a08c51a --- /dev/null +++ b/portality/models/url_shortener.py @@ -0,0 +1,59 @@ +from portality.dao import DomainObject + + +class UrlShortener(DomainObject): + """~~UrlShortener:Model->DomainObject:Model~~""" + __type__ = "url_shortener" + + def __init__(self, **kwargs): + super(UrlShortener, self).__init__(**kwargs) + + @property + def url(self): + return self.data.get("url") + + @url.setter + def url(self, val): + self.data["url"] = val + + @property + def alias(self): + return self.data.get("alias") + + @alias.setter + def alias(self, val): + self.data["alias"] = val + + +class UrlQuery: + def __init__(self, alias: str): + self.alias = alias + + def query(self): + return { + 'query': { + 'bool': { + 'must': [ + {'term': {'alias.exact': self.alias}} + ] + } + }, + '_source': ['url'], + } + + +class AliasQuery: + def __init__(self, url: str): + self.url = url + + def query(self): + return { + 'query': { + 'bool': { + 'must': [ + {'term': {'url.exact': self.url}} + ] + } + }, + '_source': ['alias'], + } diff --git a/portality/settings.py b/portality/settings.py index 37729773c2..f323813181 100644 --- a/portality/settings.py +++ b/portality/settings.py @@ -705,6 +705,8 @@ MAPPINGS['provenance'] = MAPPINGS["account"] #~~->Provenance:Model~~ MAPPINGS['preserve'] = MAPPINGS["account"] #~~->Preservation:Model~~ MAPPINGS['notification'] = MAPPINGS["account"] #~~->Notification:Model~~ +MAPPINGS['url_shortener'] = MAPPINGS["account"] #~~->URLShortener:Model~~ + ######################################### # Query Routes diff --git a/portality/view/doaj.py b/portality/view/doaj.py index 649eab8478..ada6b0996e 100644 --- a/portality/view/doaj.py +++ b/portality/view/doaj.py @@ -16,7 +16,7 @@ from portality.decorators import ssl_required, api_key_required from portality.forms.application_forms import JournalFormFactory from portality.lcc import lcc_jstree -from portality.lib import plausible +from portality.lib import plausible, urlshort from portality.ui.messages import Messages # ~~DOAJ:Blueprint~~ @@ -43,7 +43,8 @@ def cookie_consent(): else: resp = make_response() # set a cookie that lasts for one year - resp.set_cookie(app.config.get("CONSENT_COOKIE_KEY"), Messages.CONSENT_COOKIE_VALUE, max_age=31536000, samesite=None, secure=True) + resp.set_cookie(app.config.get("CONSENT_COOKIE_KEY"), Messages.CONSENT_COOKIE_VALUE, max_age=31536000, + samesite=None, secure=True) return resp @@ -55,7 +56,8 @@ def dismiss_site_note(): else: resp = make_response() # set a cookie that lasts for one year - resp.set_cookie(app.config.get("SITE_NOTE_KEY"), app.config.get("SITE_NOTE_COOKIE_VALUE"), max_age=app.config.get("SITE_NOTE_SLEEP"), samesite=None, secure=True) + resp.set_cookie(app.config.get("SITE_NOTE_KEY"), app.config.get("SITE_NOTE_COOKIE_VALUE"), + max_age=app.config.get("SITE_NOTE_SLEEP"), samesite=None, secure=True) return resp @@ -100,7 +102,7 @@ def search(): def search_post(): """ Redirect a query from the box on the index page to the search page. """ if request.form.get('origin') != 'ui': - abort(400) # bad request - we must receive searches from our own UI + abort(400) # bad request - we must receive searches from our own UI ref = request.form.get("ref") if ref is None: @@ -115,14 +117,14 @@ def search_post(): # lhs for journals, rhs for articles field_map = { - "all" : (None, None), - "title" : ("bibjson.title", "bibjson.title"), - "abstract" : (None, "bibjson.abstract"), - "subject" : ("index.classification", "index.classification"), - "author" : (None, "bibjson.author.name"), - "issn" : ("index.issn.exact", None), - "publisher" : ("bibjson.publisher.name", None), - "country" : ("index.country", None) + "all": (None, None), + "title": ("bibjson.title", "bibjson.title"), + "abstract": (None, "bibjson.abstract"), + "subject": ("index.classification", "index.classification"), + "author": (None, "bibjson.author.name"), + "issn": ("index.issn.exact", None), + "publisher": ("bibjson.publisher.name", None), + "country": ("index.country", None) } default_field_opts = field_map.get(field, None) default_field = None @@ -143,6 +145,7 @@ def search_post(): return redirect(route + '?source=' + urllib.parse.quote(json.dumps(query)) + "&ref=" + urllib.parse.quote(ref)) + ############################################# # FIXME: this should really live somewhere else more appropirate to who can access it @@ -151,9 +154,9 @@ def search_post(): @ssl_required def journal_readonly(journal_id): if ( - not current_user.has_role("admin") - or not current_user.has_role("editor") - or not current_user.has_role("associate_editor") + not current_user.has_role("admin") + or not current_user.has_role("editor") + or not current_user.has_role("associate_editor") ): abort(401) @@ -234,11 +237,13 @@ def get_from_local_store(container, filename): def autocomplete(doc_type, field_name): prefix = request.args.get('q', '') if not prefix: - return jsonify({'suggestions': [{"id": "", "text": "No results found"}]}) # select2 does not understand 400, which is the correct code here... + return jsonify({'suggestions': [ + {"id": "", "text": "No results found"}]}) # select2 does not understand 400, which is the correct code here... m = models.lookup_model(doc_type) if not m: - return jsonify({'suggestions': [{"id": "", "text": "No results found"}]}) # select2 does not understand 404, which is the correct code here... + return jsonify({'suggestions': [ + {"id": "", "text": "No results found"}]}) # select2 does not understand 404, which is the correct code here... size = request.args.get('size', 5) @@ -286,6 +291,7 @@ def find_toc_journal_by_identifier(identifier): def is_issn_by_identifier(identifier): return len(identifier) == 9 + def find_correct_redirect_identifier(identifier, bibjson) -> str: """ return None if identifier is correct and no redirect is needed @@ -326,6 +332,7 @@ def find_correct_redirect_identifier(identifier, bibjson) -> str: # let it continue loading if we only have the hex UUID for the journal (no ISSNs) # and the user is referring to the toc page via that ID + @blueprint.route("/toc/") def toc(identifier=None): """ Table of Contents page for a journal. identifier may be the journal id or an issn """ @@ -338,7 +345,7 @@ def toc(identifier=None): return redirect(url_for('doaj.toc', identifier=real_identifier), 301) else: # now render all that information - return render_template('doaj/toc.html', journal=journal, bibjson=bibjson ) + return render_template('doaj/toc.html', journal=journal, bibjson=bibjson) @blueprint.route("/toc/articles/") @@ -365,11 +372,10 @@ def toc_articles(identifier=None, volume=None, issue=None): + '?source=' + dao.Facetview2.url_encode_query(q)) # now render all that information - return render_template('doaj/toc_articles.html', journal=journal, bibjson=bibjson ) + return render_template('doaj/toc_articles.html', journal=journal, bibjson=bibjson) - -#~~->Article:Page~~ +# ~~->Article:Page~~ @blueprint.route("/article/") def article_page(identifier=None): # identifier must be the article id @@ -387,7 +393,8 @@ def article_page(identifier=None): if len(journals) > 0: journal = journals[0] - return render_template('doaj/article.html', article=article, journal=journal, page={"highlight" : True}) + return render_template('doaj/article.html', article=article, journal=journal, page={"highlight": True}) + # Not using this form for now but we might bring it back later # @blueprint.route("/contact/", methods=["GET", "POST"]) @@ -530,7 +537,8 @@ def xml(): @blueprint.route("/docs/widgets/") def widgets(): - return render_template("layouts/static_page.html", page_frag="/docs/widgets.html", base_url=app.config.get('BASE_URL')) + return render_template("layouts/static_page.html", page_frag="/docs/widgets.html", + base_url=app.config.get('BASE_URL')) @blueprint.route("/docs/public-data-dump/") @@ -552,10 +560,12 @@ def faq(): def about(): return render_template("layouts/static_page.html", page_frag="/about/index.html") + @blueprint.route("/at-20/") def at_20(): return render_template("layouts/static_page.html", page_frag="/about/at-20.html") + @blueprint.route("/about/ambassadors/") def ambassadors(): return render_template("layouts/static_page.html", page_frag="/about/ambassadors.html") @@ -652,3 +662,13 @@ def publishers(): @blueprint.route("/password-reset/") def new_password_reset(): return redirect(url_for('account.forgot'), code=301) + + +@blueprint.route("/sc/") +def shortened_url(alias): + # KTODO handle plausible + url = urlshort.find_url_by_alias(alias) + if url: + return redirect(url) + + abort(404) From 6312f510f6ec47ab12eb770102e388ce1b88c3a5 Mon Sep 17 00:00:00 2001 From: philip Date: Mon, 4 Mar 2024 13:54:51 +0000 Subject: [PATCH 04/28] apply urlshort in frontend --- portality/lib/urlshort.py | 2 +- portality/settings.py | 8 +++++ portality/static/js/doaj.fieldrender.edges.js | 1 - portality/static/js/doaj.js | 21 ++++++++++++ .../static/js/edges/public.article.edge.js | 2 +- .../static/js/edges/public.journal.edge.js | 2 +- portality/view/doajservices.py | 32 +++++++++++++++---- 7 files changed, 58 insertions(+), 10 deletions(-) diff --git a/portality/lib/urlshort.py b/portality/lib/urlshort.py index 0104a56190..9d7f34a77e 100644 --- a/portality/lib/urlshort.py +++ b/portality/lib/urlshort.py @@ -81,4 +81,4 @@ def find_url_by_alias(alias: str) -> Optional[str]: def parse_shortened_url(alias: str) -> str: """ parse the shortened url from the given alias """ - return url_for('doaj.shortened_url', alias=alias) + return app.config.get("BASE_URL") + url_for('doaj.shortened_url', alias=alias) diff --git a/portality/settings.py b/portality/settings.py index f323813181..b22665d61c 100644 --- a/portality/settings.py +++ b/portality/settings.py @@ -1531,3 +1531,11 @@ # worksheet name or tab name that datalog will write to DATALOG_JA_WORKSHEET_NAME = 'Added' + + + +################################ +# Url Shortener +# ~~->URLShortener:Feature~~ + +ALLOWED_SHORTEN_PATH = ['/search/journals', '/search/articles'] diff --git a/portality/static/js/doaj.fieldrender.edges.js b/portality/static/js/doaj.fieldrender.edges.js index 8ae34f7271..db559a4545 100644 --- a/portality/static/js/doaj.fieldrender.edges.js +++ b/portality/static/js/doaj.fieldrender.edges.js @@ -1302,7 +1302,6 @@ $.extend(true, doaj, { let shorten = ""; if (this.component.urlShortener) { - var shortenClass = edges.css_classes(this.namespace, "shorten", this); var shortenButtonClass = edges.css_classes(this.namespace, "shorten-url", this) shorten = '

'; } diff --git a/portality/static/js/doaj.js b/portality/static/js/doaj.js index 5069903dcd..6dc90b9fee 100644 --- a/portality/static/js/doaj.js +++ b/portality/static/js/doaj.js @@ -93,6 +93,27 @@ var doaj = { // }); // }, + doajUrlShortener : function(url, success_callback, error_callback) { + function callbackWrapper(data) { + success_callback(data.short_url); + } + + function errorHandler() { + alert("Sorry, we're unable to generate short urls at this time"); + error_callback && error_callback(); + } + + $.ajax({ + type: "POST", + contentType: "application/json", + url: "/service/shorten", + data : JSON.stringify({url: url}), + success: callbackWrapper, + error: errorHandler + }); + + }, + journal_toc_id : function(journal) { // if e-issn is available, use that // if not, but a p-issn is available, use that diff --git a/portality/static/js/edges/public.article.edge.js b/portality/static/js/edges/public.article.edge.js index b520a5c474..06ffb46652 100644 --- a/portality/static/js/edges/public.article.edge.js +++ b/portality/static/js/edges/public.article.edge.js @@ -139,7 +139,7 @@ $.extend(true, doaj, { edges.newFullSearchController({ id: "share_embed", category: "controller", - // urlShortener : doaj.bitlyShortener, + urlShortener : doaj.doajUrlShortener, embedSnippet : doaj.publicSearch.embedSnippet, renderer: doaj.renderers.newShareEmbedRenderer({ shareLinkText: ' Share or embed' diff --git a/portality/static/js/edges/public.journal.edge.js b/portality/static/js/edges/public.journal.edge.js index 8904e50dfb..60c1d124d0 100644 --- a/portality/static/js/edges/public.journal.edge.js +++ b/portality/static/js/edges/public.journal.edge.js @@ -224,7 +224,7 @@ $.extend(true, doaj, { edges.newFullSearchController({ id: "share_embed", category: "controller", - // urlShortener : doaj.bitlyShortener, + urlShortener : doaj.doajUrlShortener, embedSnippet : doaj.publicSearch.embedSnippet, renderer: doaj.renderers.newShareEmbedRenderer({ shareLinkText: ' Share or embed' diff --git a/portality/view/doajservices.py b/portality/view/doajservices.py index 7d67cc03de..7a7cb5a707 100644 --- a/portality/view/doajservices.py +++ b/portality/view/doajservices.py @@ -1,13 +1,15 @@ -import json, urllib.request, urllib.parse, urllib.error, requests +import json +from urllib.parse import urlparse from flask import Blueprint, make_response, request, abort, render_template from flask_login import current_user, login_required -from portality.core import app -from portality.decorators import ssl_required, write_required, restrict_to_role -from portality.util import jsonp from portality import lock, models from portality.bll import DOAJ +from portality.core import app +from portality.decorators import ssl_required, write_required +from portality.lib import urlshort +from portality.util import jsonp blueprint = Blueprint('doajservices', __name__) @@ -40,7 +42,7 @@ def unlock(object_type, object_id): abort(400) # otherwise, return success - resp = make_response(json.dumps({"result" : "success"})) + resp = make_response(json.dumps({"result": "success"})) resp.mimetype = "application/json" return resp @@ -97,6 +99,23 @@ def unlocked(): # except: # abort(400) +@blueprint.route("/shorten", methods=["POST"]) +def shorten(): + """ create shortener url """ + data = json.loads(request.data) + url = data['url'] + + # validate url + path = urlparse(url).path + if not any(path == p for p in app.config.get("ALLOWED_SHORTEN_PATH", [])): + app.logger.warning(f"Invalid url shorten request: {url}") + abort(400) + + short_url = urlshort.add_url_shortener(url) + resp = make_response(json.dumps({"short_url": short_url})) + resp.mimetype = "application/json" + return resp + @blueprint.route("/groupstatus/", methods=["GET"]) @jsonp @@ -107,7 +126,8 @@ def group_status(group_id): :param group_id: :return: """ - if (not (current_user.has_role("editor") and models.EditorGroup.pull(group_id).editor == current_user.id)) and (not current_user.has_role("admin")): + if (not (current_user.has_role("editor") and models.EditorGroup.pull(group_id).editor == current_user.id)) and ( + not current_user.has_role("admin")): abort(404) svc = DOAJ.todoService() stats = svc.group_stats(group_id) From 21ffdb531ae76380f22f6e69c729ade42a49fbaf Mon Sep 17 00:00:00 2001 From: philip Date: Mon, 4 Mar 2024 13:56:28 +0000 Subject: [PATCH 05/28] cleanup old url shortener --- portality/static/js/doaj.js | 24 ------------------- portality/view/doajservices.py | 42 ---------------------------------- 2 files changed, 66 deletions(-) diff --git a/portality/static/js/doaj.js b/portality/static/js/doaj.js index 6dc90b9fee..8df810e3a7 100644 --- a/portality/static/js/doaj.js +++ b/portality/static/js/doaj.js @@ -69,30 +69,6 @@ var doaj = { doaj.bindMiniSearch(); }, - // bitlyShortener : function(query, success_callback, error_callback) { - // // ~~-> Bitly:ExternalService ~~ - // function callbackWrapper(data) { - // success_callback(data.url); - // } - // - // function errorHandler() { - // alert("Sorry, we're unable to generate short urls at this time"); - // error_callback(); - // } - // - // var page = window.location.protocol + '//' + window.location.host + window.location.pathname; - // - // $.ajax({ - // type: "POST", - // contentType: "application/json", - // dataType: "jsonp", - // url: "/service/shorten", - // data : JSON.stringify({page: page, query: query}), - // success: callbackWrapper, - // error: errorHandler - // }); - // }, - doajUrlShortener : function(url, success_callback, error_callback) { function callbackWrapper(data) { success_callback(data.short_url); diff --git a/portality/view/doajservices.py b/portality/view/doajservices.py index 7a7cb5a707..e5ef302b49 100644 --- a/portality/view/doajservices.py +++ b/portality/view/doajservices.py @@ -57,48 +57,6 @@ def unlocked(): return render_template("unlocked.html") -# @blueprint.route("/shorten", methods=["POST"]) -# @jsonp -# def shorten(): -# # Enable this if you are testing and you want to see the front end work, without working bit.ly credentials -# # return make_response(json.dumps({"url" : "testing url"})) -# try: -# # parse the json -# d = json.loads(request.data) -# p = d['page'] -# q = d['query'] -# -# # re-serialise the query, and url encode it -# source = urllib.parse.quote(json.dumps(q)) -# -# # assemble the DOAJ url -# doajurl = p + "?source=" + source -# -# # assemble the bitly url. Note that we re-encode the doajurl to include in the -# # query arguments, so by this point it is double-encoded -# bitly = app.config.get("BITLY_SHORTENING_API_URL") -# bitly_oauth = app.config.get("BITLY_OAUTH_TOKEN") -# -# # Set an Auth Bearer token (Bitly 4.0) -# headers = {'Authorization': 'Bearer ' + bitly_oauth} -# -# # Add the long url as a payload -# payload = {'long_url': doajurl} -# -# # make the request -# resp = requests.post(bitly, headers=headers, data=json.dumps(payload)) -# shorturl = resp.json().get('link') -# -# if not shorturl: -# abort(400) -# -# # make the response -# answer = make_response(json.dumps({"url": shorturl})) -# answer.mimetype = "application/json" -# return answer -# except: -# abort(400) - @blueprint.route("/shorten", methods=["POST"]) def shorten(): """ create shortener url """ From 3f411b5c6bcc4331ebad5a13d6a242cee72e2204 Mon Sep 17 00:00:00 2001 From: philip Date: Tue, 5 Mar 2024 11:17:18 +0000 Subject: [PATCH 06/28] add doc `How to setup for dev with Plausible` --- docs/dev/how-to-setup.md | 36 ++++++++++++++++++++++++++++++++---- setup.py | 1 + 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/docs/dev/how-to-setup.md b/docs/dev/how-to-setup.md index 671336385f..ba725de978 100644 --- a/docs/dev/how-to-setup.md +++ b/docs/dev/how-to-setup.md @@ -1,12 +1,16 @@ Setup google API key for google sheet ------------------------------------------ + ### create project an enable api + * go to https://console.cloud.google.com/ * create and select a project on the top left -* searching for "Google Drive API" and enable it, url should be some thing like (https://console.cloud.google.com/marketplace/product/google/drive.googleapis.com) +* searching for "Google Drive API" and enable it, url should be some thing + like (https://console.cloud.google.com/marketplace/product/google/drive.googleapis.com) * searching for "Google Sheets API" and enable it ### create key + * click `create credentials` button * select `Google Drive API` and `Web server` and `Application data` * select `No, I'm not using them` @@ -17,19 +21,18 @@ Setup google API key for google sheet * click `KEYS`, `ADD KEY` * select `JSON` and click create - ### share google sheet to service account + * go to google drive * right click the sheet you want to share * click `Share` * paste the service account email to `People` field * click `Done` - - How to setup for `datalog_journal_added_update` task -------------------------------------------------- following variable need for background job `datalog_journal_added_update` + ``` # value should be key file path of json, empty string means disabled GOOGLE_KEY_PATH = '' @@ -40,3 +43,28 @@ DATALOG_JA_FILENAME = 'DOAJ: journals added and withdrawn' # worksheet name or tab name that datalog will write to DATALOG_JA_WORKSHEET_NAME = 'Added' ``` + +How to setup for dev with Plausible +----------------------------------- + +* run plausible + * ref 'https://github.com/plausible/community-edition' + * update `plausible-conf.env` + * run docker `docker-compose up` + * testing configuration by browse `http://localhost:8000` and login admin user +* setup fake domain in /etc/hosts + * e.g. `127.0.0.1 doaj.dev.local` +* setup dev.cfg + * `DEBUG = False` + * `BASE_URL = "https://doaj.dev.local:5004"` + * `PLAUSIBLE_URL = "http://localhost:8000"` + * `PLAUSIBLE_JS_URL = PLAUSIBLE_URL + "/js/script.outbound-links.file-downloads.js"` + * `PLAUSIBLE_API_URL = PLAUSIBLE_URL + "/api/event"` + * `PLAUSIBLE_SITE_NAME = "doaj.dev.local"` +* update `portality/app.py`, change `fake_https=True` e.g. `run_server(fake_https=True)` + * you might need `cryptography~=42.0` installed in pip +* run `portality/app.py` +* testing configuration by browse `https://doaj.dev.local:5004` + + + diff --git a/setup.py b/setup.py index 1c3e15e6ae..4dc7d70ddc 100644 --- a/setup.py +++ b/setup.py @@ -71,6 +71,7 @@ "selenium==4.12.0", "combinatrix @ git+https://github.com/CottageLabs/combinatrix.git@740d255f0050d53a20324df41c08981499bb292c#egg=combinatrix", "bs4==0.0.1", # beautifulsoup for HTML parsing + "cryptography~=42.0", # for ad-hoc https ], # additional test dependencies for the test-extras target From 0581a0232cbfd4ba5efa88279841ecf5253e5084 Mon Sep 17 00:00:00 2001 From: philip Date: Tue, 5 Mar 2024 11:17:44 +0000 Subject: [PATCH 07/28] add setup_dev_log --- portality/app.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/portality/app.py b/portality/app.py index efd9e5b170..8050ee61a4 100644 --- a/portality/app.py +++ b/portality/app.py @@ -9,7 +9,7 @@ ~~DOAJ:WebApp~~ """ - +import logging import os, sys import tzlocal import pytz @@ -434,6 +434,23 @@ def page_not_found(e): return render_template('500.html'), 500 +is_dev_log_setup_completed = False + + +def setup_dev_log(): + global is_dev_log_setup_completed + if not is_dev_log_setup_completed: + is_dev_log_setup_completed = True + app.logger.handlers = [] + log = logging.getLogger() + log.setLevel(logging.DEBUG) + ch = logging.StreamHandler() + ch.setLevel(logging.DEBUG) + ch.setFormatter(logging.Formatter('%(asctime)s %(levelname).4s %(processName)s%(threadName)s - ' + '%(message)s --- [%(name)s][%(funcName)s:%(lineno)d]')) + log.addHandler(ch) + + def run_server(host=None, port=None, fake_https=False): """ :param host: @@ -443,6 +460,10 @@ def run_server(host=None, port=None, fake_https=False): that can help for debugging Plausible :return: """ + + if app.config.get('DEBUG_DEV_LOG', False): + setup_dev_log() + pycharm_debug = app.config.get('DEBUG_PYCHARM', False) if len(sys.argv) > 1: if sys.argv[1] == '-d': From 58b4fd20da21cc2ac82b69fb26c634c1f25fe4f5 Mon Sep 17 00:00:00 2001 From: philip Date: Tue, 5 Mar 2024 12:46:24 +0000 Subject: [PATCH 08/28] commit edges --- doajtest/unit/test_lib_urlshort.py | 3 +++ docs/pr_note/2881_url_shortener.md | 6 ++++++ portality/static/vendor/edges | 2 +- 3 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 docs/pr_note/2881_url_shortener.md diff --git a/doajtest/unit/test_lib_urlshort.py b/doajtest/unit/test_lib_urlshort.py index 95c9813db5..76e3a50e7b 100644 --- a/doajtest/unit/test_lib_urlshort.py +++ b/doajtest/unit/test_lib_urlshort.py @@ -79,3 +79,6 @@ def test_urlshort_route(self): rv = c.get(surl) assert rv.status_code == 302 assert rv.headers['Location'] == url + + + # KTODO add /services/shorten diff --git a/docs/pr_note/2881_url_shortener.md b/docs/pr_note/2881_url_shortener.md new file mode 100644 index 0000000000..c768035bfb --- /dev/null +++ b/docs/pr_note/2881_url_shortener.md @@ -0,0 +1,6 @@ + + + +New setup +--------------------- +* new `Goals` `Urlshort` should be added to plausible \ No newline at end of file diff --git a/portality/static/vendor/edges b/portality/static/vendor/edges index 990f422016..991c6117a1 160000 --- a/portality/static/vendor/edges +++ b/portality/static/vendor/edges @@ -1 +1 @@ -Subproject commit 990f4220163a3e18880f0bdc3ad5c80d234d22dd +Subproject commit 991c6117a180757668e3c5bfcb1fdb279b549967 From eaba321fe64cbbd4bbb811f731c99020be010856 Mon Sep 17 00:00:00 2001 From: philip Date: Tue, 5 Mar 2024 12:56:59 +0000 Subject: [PATCH 09/28] add plausible for urlshort --- portality/settings.py | 6 ++++++ portality/view/doaj.py | 4 +++- portality/view/doajservices.py | 4 +++- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/portality/settings.py b/portality/settings.py index b22665d61c..5bdadc9a0c 100644 --- a/portality/settings.py +++ b/portality/settings.py @@ -1265,6 +1265,12 @@ ANALYTICS_CATEGORY_PUBLICDATADUMP = 'PublicDataDump' ANALYTICS_ACTION_PUBLICDATADUMP = 'Download' +# Plausible for Urlshort +# ~~->URLShortener:Feature~~ +ANALYTICS_CATEGORY_URLSHORT = 'Urlshort' +ANALYTICS_ACTION_URLSHORT_ADD = 'Find or create shortener url' +ANALYTICS_ACTION_URLSHORT_REDIRECT = 'Redirect' + # Plausible for API # ~~-> API:Feature~~ ANALYTICS_CATEGORY_API = 'API Hit' diff --git a/portality/view/doaj.py b/portality/view/doaj.py index ada6b0996e..4a569ef8f3 100644 --- a/portality/view/doaj.py +++ b/portality/view/doaj.py @@ -665,10 +665,12 @@ def new_password_reset(): @blueprint.route("/sc/") +@plausible.pa_event(app.config.get('ANALYTICS_CATEGORY_URLSHORT', 'Urlshort'), + action=app.config.get('ANALYTICS_ACTION_URLSHORT_REDIRECT', 'Redirect')) def shortened_url(alias): - # KTODO handle plausible url = urlshort.find_url_by_alias(alias) if url: return redirect(url) + app.logger.debug(f"Shortened URL not found: [{alias}]") abort(404) diff --git a/portality/view/doajservices.py b/portality/view/doajservices.py index e5ef302b49..7ccd2fb1a4 100644 --- a/portality/view/doajservices.py +++ b/portality/view/doajservices.py @@ -8,7 +8,7 @@ from portality.bll import DOAJ from portality.core import app from portality.decorators import ssl_required, write_required -from portality.lib import urlshort +from portality.lib import urlshort, plausible from portality.util import jsonp blueprint = Blueprint('doajservices', __name__) @@ -58,6 +58,8 @@ def unlocked(): @blueprint.route("/shorten", methods=["POST"]) +@plausible.pa_event(app.config.get('ANALYTICS_CATEGORY_URLSHORT', 'Urlshort'), + action=app.config.get('ANALYTICS_ACTION_URLSHORT_ADD', 'Find or create shortener url')) def shorten(): """ create shortener url """ data = json.loads(request.data) From 6909692e0c07d2e247eabc76e07057f18171349a Mon Sep 17 00:00:00 2001 From: philip Date: Tue, 5 Mar 2024 13:11:59 +0000 Subject: [PATCH 10/28] add more test cases --- doajtest/unit/test_lib_urlshort.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/doajtest/unit/test_lib_urlshort.py b/doajtest/unit/test_lib_urlshort.py index 76e3a50e7b..235c4f4d42 100644 --- a/doajtest/unit/test_lib_urlshort.py +++ b/doajtest/unit/test_lib_urlshort.py @@ -4,6 +4,7 @@ from portality import models from portality.lib import urlshort from portality.models import UrlShortener +from portality.util import url_for def wait_any_url_shortener(): @@ -69,6 +70,11 @@ def test_find_url_by_alias(self): assert urlshort.find_url_by_alias(alias) == data[alias] +def surl_to_alias(surl): + alias = surl[surl.rfind('/') + 1:] + return alias + + class TestUrlshortRoute(DoajTestCase): def test_urlshort_route(self): url = 'https://www.google.com' @@ -80,5 +86,23 @@ def test_urlshort_route(self): assert rv.status_code == 302 assert rv.headers['Location'] == url + def test_urlshort_route__not_found(self): + with self.app_test.test_client() as c: + rv = c.get(urlshort.parse_shortened_url('nnnnnnnnot_found')) + assert rv.status_code == 404 + + def test_create_shorten_url(self): + data = {'url': 'http://localhost:5004/search/journals'} + with self.app_test.test_client() as c: + rv = c.post(url_for('doajservices.shorten'), json=data) + assert rv.status_code == 200 + assert rv.json['short_url'] + + wait_unit(wait_any_url_shortener) + assert urlshort.find_url_by_alias(surl_to_alias(rv.json['short_url'])) == data['url'] - # KTODO add /services/shorten + def test_create_shorten_url__invalid(self): + data = {'url': 'http://localhost:5004/invalid'} + with self.app_test.test_client() as c: + rv = c.post(url_for('doajservices.shorten'), json=data) + assert rv.status_code == 400 From b818da0e8551edabb0ba58217a2a8054f91b41c2 Mon Sep 17 00:00:00 2001 From: philip Date: Tue, 5 Mar 2024 13:35:04 +0000 Subject: [PATCH 11/28] add note --- docs/pr_note/2881_url_shortener.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/pr_note/2881_url_shortener.md b/docs/pr_note/2881_url_shortener.md index c768035bfb..1715bcebc4 100644 --- a/docs/pr_note/2881_url_shortener.md +++ b/docs/pr_note/2881_url_shortener.md @@ -1,6 +1,7 @@ -New setup +Reminders: --------------------- -* new `Goals` `Urlshort` should be added to plausible \ No newline at end of file +* new `Goals` `Urlshort` should be added to plausible +* edges library updated for generate url shorten \ No newline at end of file From 90b00d4cbea219e437d47c95644a7e4ce7b083c5 Mon Sep 17 00:00:00 2001 From: philip Date: Tue, 5 Mar 2024 13:35:20 +0000 Subject: [PATCH 12/28] update edges --- portality/static/vendor/edges | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/portality/static/vendor/edges b/portality/static/vendor/edges index 991c6117a1..fa15f74f85 160000 --- a/portality/static/vendor/edges +++ b/portality/static/vendor/edges @@ -1 +1 @@ -Subproject commit 991c6117a180757668e3c5bfcb1fdb279b549967 +Subproject commit fa15f74f858c558d4ba62d4fbb10c2c71eb902c6 From 50f995f7cb5b56d5d0a119e7795e2a167b8742cd Mon Sep 17 00:00:00 2001 From: philip Date: Tue, 5 Mar 2024 13:43:27 +0000 Subject: [PATCH 13/28] fix docs --- docs/dev/how-to-setup.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/dev/how-to-setup.md b/docs/dev/how-to-setup.md index ba725de978..08a13c11e9 100644 --- a/docs/dev/how-to-setup.md +++ b/docs/dev/how-to-setup.md @@ -5,8 +5,7 @@ Setup google API key for google sheet * go to https://console.cloud.google.com/ * create and select a project on the top left -* searching for "Google Drive API" and enable it, url should be some thing - like (https://console.cloud.google.com/marketplace/product/google/drive.googleapis.com) +* searching for "Google Drive API" and enable it, url should be some thing like (https://console.cloud.google.com/marketplace/product/google/drive.googleapis.com) * searching for "Google Sheets API" and enable it ### create key From 8068d94b8bb91601cb8ba0fc96f6ae9302f2964e Mon Sep 17 00:00:00 2001 From: philip Date: Tue, 5 Mar 2024 13:44:09 +0000 Subject: [PATCH 14/28] doc format --- docs/dev/how-to-setup.md | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/docs/dev/how-to-setup.md b/docs/dev/how-to-setup.md index 08a13c11e9..dcdbc6d763 100644 --- a/docs/dev/how-to-setup.md +++ b/docs/dev/how-to-setup.md @@ -5,7 +5,8 @@ Setup google API key for google sheet * go to https://console.cloud.google.com/ * create and select a project on the top left -* searching for "Google Drive API" and enable it, url should be some thing like (https://console.cloud.google.com/marketplace/product/google/drive.googleapis.com) +* searching for "Google Drive API" and enable it, url should be some thing + like (https://console.cloud.google.com/marketplace/product/google/drive.googleapis.com) * searching for "Google Sheets API" and enable it ### create key @@ -47,21 +48,21 @@ How to setup for dev with Plausible ----------------------------------- * run plausible - * ref 'https://github.com/plausible/community-edition' - * update `plausible-conf.env` - * run docker `docker-compose up` - * testing configuration by browse `http://localhost:8000` and login admin user + * ref 'https://github.com/plausible/community-edition' + * update `plausible-conf.env` + * run docker `docker-compose up` + * testing configuration by browse `http://localhost:8000` and login admin user * setup fake domain in /etc/hosts - * e.g. `127.0.0.1 doaj.dev.local` + * e.g. `127.0.0.1 doaj.dev.local` * setup dev.cfg - * `DEBUG = False` - * `BASE_URL = "https://doaj.dev.local:5004"` - * `PLAUSIBLE_URL = "http://localhost:8000"` - * `PLAUSIBLE_JS_URL = PLAUSIBLE_URL + "/js/script.outbound-links.file-downloads.js"` - * `PLAUSIBLE_API_URL = PLAUSIBLE_URL + "/api/event"` - * `PLAUSIBLE_SITE_NAME = "doaj.dev.local"` + * `DEBUG = False` + * `BASE_URL = "https://doaj.dev.local:5004"` + * `PLAUSIBLE_URL = "http://localhost:8000"` + * `PLAUSIBLE_JS_URL = PLAUSIBLE_URL + "/js/script.outbound-links.file-downloads.js"` + * `PLAUSIBLE_API_URL = PLAUSIBLE_URL + "/api/event"` + * `PLAUSIBLE_SITE_NAME = "doaj.dev.local"` * update `portality/app.py`, change `fake_https=True` e.g. `run_server(fake_https=True)` - * you might need `cryptography~=42.0` installed in pip + * you might need `cryptography~=42.0` installed in pip * run `portality/app.py` * testing configuration by browse `https://doaj.dev.local:5004` From b60d6de291014fd7e777b15707ade14fc43af4f4 Mon Sep 17 00:00:00 2001 From: philip Date: Mon, 18 Mar 2024 12:56:33 +0000 Subject: [PATCH 15/28] fix names wait_until --- doajtest/selenium_helpers.py | 16 ++++++++-------- doajtest/seleniumtest/test_article_xml_upload.py | 16 ++++++++-------- doajtest/unit/test_lib_urlshort.py | 9 +++++---- 3 files changed, 21 insertions(+), 20 deletions(-) diff --git a/doajtest/selenium_helpers.py b/doajtest/selenium_helpers.py index 53800b00ef..4ffc24fc89 100644 --- a/doajtest/selenium_helpers.py +++ b/doajtest/selenium_helpers.py @@ -118,7 +118,7 @@ def setUp(self): self.selenium.set_window_size(1400, 1000) # avoid something is not clickable # wait for server to start - wait_unit(self._is_doaj_server_running, 10, 1.5, timeout_msg='doaj server not started') + wait_until(self._is_doaj_server_running, 10, 1.5, timeout_msg='doaj server not started') fix_index_not_found_exception(self.app_test) self.fix_es_mapping() @@ -159,12 +159,12 @@ def tearDown(self): print(f'{datetime.datetime.now().isoformat()} --- doaj process terminating...') self.doaj_process.terminate() self.doaj_process.join() - wait_unit(lambda: not self._is_doaj_server_running(), 10, 1, + wait_until(lambda: not self._is_doaj_server_running(), 10, 1, timeout_msg='doaj server is still running') self.selenium.quit() - wait_unit(self._is_selenium_quit, 10, 1, timeout_msg='selenium is still running') + wait_until(self._is_selenium_quit, 10, 1, timeout_msg='selenium is still running') print('selenium terminated') super().tearDown() @@ -219,7 +219,7 @@ def login_by_acc(driver: 'WebDriver', acc: models.Account = None): assert "/login" not in driver.current_url -def wait_unit_elements(driver: 'WebDriver', css_selector: str, timeout=10, check_interval=0.1): +def wait_until_elements(driver: 'WebDriver', css_selector: str, timeout=10, check_interval=0.1): elements = [] def exit_cond_fn(): @@ -230,11 +230,11 @@ def exit_cond_fn(): except: return False - wait_unit(exit_cond_fn, timeout, check_interval) + wait_until(exit_cond_fn, timeout, check_interval) return elements -def wait_unit_click(driver: 'WebDriver', css_selector: str, timeout=10, check_interval=0.1): +def wait_until_click(driver: 'WebDriver', css_selector: str, timeout=10, check_interval=0.1): def _click(): try: ele = find_ele_by_css(driver, css_selector) @@ -245,11 +245,11 @@ def _click(): except (StaleElementReferenceException, ElementClickInterceptedException): return False - wait_unit(_click, timeout=10, check_interval=0.1) + wait_until(_click, timeout=10, check_interval=0.1) def click_edges_item(driver: 'WebDriver', ele_name, item_name): - wait_unit_click(driver, f'#edges-bs3-refiningand-term-selector-toggle-{ele_name}') + wait_until_click(driver, f'#edges-bs3-refiningand-term-selector-toggle-{ele_name}') for ele in find_eles_by_css(driver, f'.edges-bs3-refiningand-term-selector-result-{ele_name} a'): if item_name in ele.text.strip(): ele.click() diff --git a/doajtest/seleniumtest/test_article_xml_upload.py b/doajtest/seleniumtest/test_article_xml_upload.py index 1127ed9284..23afcc2a6c 100644 --- a/doajtest/seleniumtest/test_article_xml_upload.py +++ b/doajtest/seleniumtest/test_article_xml_upload.py @@ -7,7 +7,6 @@ from selenium.webdriver.common.by import By from selenium.webdriver.support.select import Select -import doajtest.helpers from doajtest import selenium_helpers from doajtest.fixtures import JournalFixtureFactory, url_path, article_doajxml from doajtest.fixtures.accounts import PUBLISHER_B_SOURCE, create_publisher_a, create_maned_a @@ -16,6 +15,7 @@ from doajtest.selenium_helpers import SeleniumTestCase from portality import models, dao from portality.constants import FileUploadStatus +from portality.lib.thread_utils import wait_until HISTORY_ROW_PROCESSING_FAILED = 'processing failed' XML_FORMAT_DOAJ = 'doaj' @@ -62,7 +62,7 @@ def assert_history_row_success(self, history_row, n_article=1): self.assert_history_row(history_row, note=f'successfully processed {n_article} articles imported') @staticmethod - def wait_unit_file_upload_status_ready(): + def wait_until_file_upload_status_ready(): new_file_upload = None def _cond_fn(): @@ -73,7 +73,7 @@ def _cond_fn(): return new_file_upload.status not in (FileUploadStatus.Validated, FileUploadStatus.Incoming) # interval 0.5 is good because ES can't handle too many requests - doajtest.helpers.wait_unit(_cond_fn, timeout=15, check_interval=0.5) + wait_until(_cond_fn, timeout=15, sleep_time=0.5) return new_file_upload @@ -108,7 +108,7 @@ def test_upload_fail(self, file_path, err_msg, expected_note): assert err_msg in alert_ele.text # # wait for background job to finish - self.wait_unit_file_upload_status_ready() + self.wait_until_file_upload_status_ready() self.selenium.refresh() new_rows = find_history_rows(self.selenium) @@ -177,16 +177,16 @@ def _find_history_rows(): self.upload_submit_file(file_path) assert 'File uploaded and waiting to be processed' in self.find_ele_by_css('.alert--success').text - doajtest.helpers.wait_unit( + wait_until( lambda: len(_find_history_rows()) == n_org_rows + 1, - timeout=10, check_interval=1 + timeout=10, sleep_time=1 ) new_rows = _find_history_rows() assert n_org_rows + 1 == len(new_rows) assert n_file_upload + 1 == models.FileUpload.count() # wait for background job to finish - new_file_upload = self.wait_unit_file_upload_status_ready() + new_file_upload = self.wait_until_file_upload_status_ready() # assert file upload status assert new_file_upload.filename == Path(file_path).name @@ -307,7 +307,7 @@ def step_upload_success(self, publisher, article_xml_path, journal_issn, expecte XML_FORMAT_DOAJ) self.assert_history_row_success(latest_history_row) selenium_helpers.goto(self.selenium, url_path.url_toc_articles(journal_issn)) - doajtest.helpers.wait_unit(lambda: self.find_eles_by_css(article_title_selector)) + wait_until(lambda: self.find_eles_by_css(article_title_selector)) assert expected_title in [e.get_attribute('innerHTML').strip() for e in self.find_eles_by_css(article_title_selector)] diff --git a/doajtest/unit/test_lib_urlshort.py b/doajtest/unit/test_lib_urlshort.py index 235c4f4d42..ffe1fc4165 100644 --- a/doajtest/unit/test_lib_urlshort.py +++ b/doajtest/unit/test_lib_urlshort.py @@ -1,10 +1,11 @@ import time -from doajtest.helpers import DoajTestCase, wait_unit +from doajtest.helpers import DoajTestCase from portality import models from portality.lib import urlshort from portality.models import UrlShortener from portality.util import url_for +from portality.lib.thread_utils import wait_until def wait_any_url_shortener(): @@ -62,7 +63,7 @@ def test_find_url_by_alias(self): alias = surl[surl.rfind('/') + 1:] data[alias] = url - wait_unit(wait_any_url_shortener) + wait_until(wait_any_url_shortener) results = models.UrlShortener.q2obj() @@ -79,7 +80,7 @@ class TestUrlshortRoute(DoajTestCase): def test_urlshort_route(self): url = 'https://www.google.com' surl = urlshort.add_url_shortener(url) - wait_unit(wait_any_url_shortener) + wait_until(wait_any_url_shortener) with self.app_test.test_client() as c: rv = c.get(surl) @@ -98,7 +99,7 @@ def test_create_shorten_url(self): assert rv.status_code == 200 assert rv.json['short_url'] - wait_unit(wait_any_url_shortener) + wait_until(wait_any_url_shortener) assert urlshort.find_url_by_alias(surl_to_alias(rv.json['short_url'])) == data['url'] def test_create_shorten_url__invalid(self): From 3e4e3f3b52779a1cde7b37644efbc237847fc2fb Mon Sep 17 00:00:00 2001 From: philip Date: Mon, 18 Mar 2024 14:09:45 +0000 Subject: [PATCH 16/28] add url limit --- doajtest/unit/test_lib_urlshort.py | 16 +++++++++++++++- portality/models/url_shortener.py | 17 +++++++++++++++++ portality/settings.py | 4 ++++ portality/view/doajservices.py | 11 +++++++++++ 4 files changed, 47 insertions(+), 1 deletion(-) diff --git a/doajtest/unit/test_lib_urlshort.py b/doajtest/unit/test_lib_urlshort.py index ffe1fc4165..8e67dc078e 100644 --- a/doajtest/unit/test_lib_urlshort.py +++ b/doajtest/unit/test_lib_urlshort.py @@ -1,6 +1,6 @@ import time -from doajtest.helpers import DoajTestCase +from doajtest.helpers import DoajTestCase, patch_config from portality import models from portality.lib import urlshort from portality.models import UrlShortener @@ -107,3 +107,17 @@ def test_create_shorten_url__invalid(self): with self.app_test.test_client() as c: rv = c.post(url_for('doajservices.shorten'), json=data) assert rv.status_code == 400 + + def test_create_shorten_url__limit_reached(self): + orig_config = patch_config(self.app_test, {'URLSHORT_LIMIT': 1}) + data = {'url': 'http://localhost:5004/search/journals'} + with self.app_test.test_client() as c: + rv = c.post(url_for('doajservices.shorten'), json=data) + assert rv.status_code != 429 + + wait_until(wait_any_url_shortener) + + rv = c.post(url_for('doajservices.shorten'), json=data) + assert rv.status_code == 429 + + patch_config(self.app_test, orig_config) diff --git a/portality/models/url_shortener.py b/portality/models/url_shortener.py index 074a08c51a..fdb0a1b594 100644 --- a/portality/models/url_shortener.py +++ b/portality/models/url_shortener.py @@ -57,3 +57,20 @@ def query(self): }, '_source': ['alias'], } + + +class CountWithinDaysQuery: + def __init__(self, days: int): + self.days = days + + def query(self): + return { + "size": 0, + "query": { + "range": { + "created_date": { + "gte": f"now-{self.days}d", + } + } + } + } diff --git a/portality/settings.py b/portality/settings.py index b03d3251b8..20eed8b4ca 100644 --- a/portality/settings.py +++ b/portality/settings.py @@ -1566,4 +1566,8 @@ # Url Shortener # ~~->URLShortener:Feature~~ +URLSHORT_LIMIT_WITHIN_DAYS = 7 +URLSHORT_LIMIT = 50_000 +URLSHORT_ALLOWED_SUPERDOMAINS = ['doaj.org'] + ALLOWED_SHORTEN_PATH = ['/search/journals', '/search/articles'] diff --git a/portality/view/doajservices.py b/portality/view/doajservices.py index c391d12285..b50a9f3937 100644 --- a/portality/view/doajservices.py +++ b/portality/view/doajservices.py @@ -9,6 +9,7 @@ from portality.core import app from portality.decorators import ssl_required, write_required from portality.lib import urlshort, plausible +from portality.models.url_shortener import CountWithinDaysQuery from portality.util import jsonp blueprint = Blueprint('doajservices', __name__) @@ -62,6 +63,16 @@ def unlocked(): action=app.config.get('ANALYTICS_ACTION_URLSHORT_ADD', 'Find or create shortener url')) def shorten(): """ create shortener url """ + + # check if limit reached + n_created = models.UrlShortener.hit_count(CountWithinDaysQuery( + app.config.get("URLSHORT_LIMIT_WITHIN_DAYS", 7) + ).query()) + n_created_limit = app.config.get("URLSHORT_LIMIT", 100_000) + if n_created >= n_created_limit: + app.logger.warning(f"Url shortener limit reached: [{n_created=}] >= [{n_created_limit=}]") + abort(429) + data = json.loads(request.data) url = data['url'] From 6d271ff1fa582126c36775ca7b4e7b22d59b4a62 Mon Sep 17 00:00:00 2001 From: philip Date: Mon, 18 Mar 2024 14:17:58 +0000 Subject: [PATCH 17/28] limit by domain instead of paths --- doajtest/helpers.py | 4 +++- doajtest/unit/test_lib_urlshort.py | 2 +- portality/settings.py | 2 -- portality/view/doajservices.py | 10 +++++----- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/doajtest/helpers.py b/doajtest/helpers.py index a8f33d52d5..33859e2225 100644 --- a/doajtest/helpers.py +++ b/doajtest/helpers.py @@ -145,7 +145,9 @@ def create_app_patch(cls): 'ENABLE_EMAIL': False, "FAKER_SEED": 1, "EVENT_SEND_FUNCTION": "portality.events.shortcircuit.send_event", - 'CMS_BUILD_ASSETS_ON_STARTUP': False + 'CMS_BUILD_ASSETS_ON_STARTUP': False, + 'URLSHORT_ALLOWED_SUPERDOMAINS': ['doaj.org', 'localhost', '127.0.0.1'], + } @classmethod diff --git a/doajtest/unit/test_lib_urlshort.py b/doajtest/unit/test_lib_urlshort.py index 8e67dc078e..3f3fe21e42 100644 --- a/doajtest/unit/test_lib_urlshort.py +++ b/doajtest/unit/test_lib_urlshort.py @@ -103,7 +103,7 @@ def test_create_shorten_url(self): assert urlshort.find_url_by_alias(surl_to_alias(rv.json['short_url'])) == data['url'] def test_create_shorten_url__invalid(self): - data = {'url': 'http://localhost:5004/invalid'} + data = {'url': 'http://invalid.domain.abc/aaaaa'} with self.app_test.test_client() as c: rv = c.post(url_for('doajservices.shorten'), json=data) assert rv.status_code == 400 diff --git a/portality/settings.py b/portality/settings.py index 20eed8b4ca..5a10f7db8f 100644 --- a/portality/settings.py +++ b/portality/settings.py @@ -1569,5 +1569,3 @@ URLSHORT_LIMIT_WITHIN_DAYS = 7 URLSHORT_LIMIT = 50_000 URLSHORT_ALLOWED_SUPERDOMAINS = ['doaj.org'] - -ALLOWED_SHORTEN_PATH = ['/search/journals', '/search/articles'] diff --git a/portality/view/doajservices.py b/portality/view/doajservices.py index b50a9f3937..d5bc2006a9 100644 --- a/portality/view/doajservices.py +++ b/portality/view/doajservices.py @@ -73,12 +73,12 @@ def shorten(): app.logger.warning(f"Url shortener limit reached: [{n_created=}] >= [{n_created_limit=}]") abort(429) - data = json.loads(request.data) - url = data['url'] + url = json.loads(request.data)['url'] # validate url - path = urlparse(url).path - if not any(path == p for p in app.config.get("ALLOWED_SHORTEN_PATH", [])): + hostname = urlparse(url).hostname + if not any((hostname == d or hostname.endswith(f".{d}")) + for d in app.config.get("URLSHORT_ALLOWED_SUPERDOMAINS", [])): app.logger.warning(f"Invalid url shorten request: {url}") abort(400) @@ -117,6 +117,7 @@ def dismiss_autocheck(autocheck_set_id, autocheck_id): abort(404) return make_response(json.dumps({"status": "success"})) + @blueprint.route("/autocheck/undismiss//", methods=["GET", "POST"]) @jsonp @login_required @@ -128,4 +129,3 @@ def undismiss_autocheck(autocheck_set_id, autocheck_id): if not done: abort(404) return make_response(json.dumps({"status": "success"})) - From 194c8e14c2f8dbd39a603103267d8edb0494cc83 Mon Sep 17 00:00:00 2001 From: philip Date: Mon, 18 Mar 2024 22:51:11 +0000 Subject: [PATCH 18/28] fix wait_until --- portality/lib/thread_utils.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/portality/lib/thread_utils.py b/portality/lib/thread_utils.py index cee82e82a3..4512d497d3 100644 --- a/portality/lib/thread_utils.py +++ b/portality/lib/thread_utils.py @@ -2,13 +2,14 @@ from typing import Callable -def wait_until(cond_fn: Callable[[], bool], timeout=10, sleep_time=0.1): +def wait_until(cond_fn: Callable[[], bool], timeout=10, sleep_time=0.1, + timeout_msg='fail to meet the condition within the timeout period.'): start_time = time.time() - while True: - if cond_fn(): - return True - - if (time.time() - start_time) > timeout: - return False + while (time.time() - start_time) < timeout: + cond_result = cond_fn() + if cond_result: + return cond_result time.sleep(sleep_time) + + raise TimeoutError(timeout_msg) From ff699dd871d2b6df3201c329962dede43aba0125 Mon Sep 17 00:00:00 2001 From: philip Date: Mon, 18 Mar 2024 22:51:25 +0000 Subject: [PATCH 19/28] fix some testcases --- doajtest/unit/test_scripts_accounts_with_marketing_consent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doajtest/unit/test_scripts_accounts_with_marketing_consent.py b/doajtest/unit/test_scripts_accounts_with_marketing_consent.py index cad2cb9870..7ba7dfbd59 100644 --- a/doajtest/unit/test_scripts_accounts_with_marketing_consent.py +++ b/doajtest/unit/test_scripts_accounts_with_marketing_consent.py @@ -61,7 +61,7 @@ def test_01_publishers_with_consent(self): str('False') ]) - thread_utils.wait_until(lambda: org_size + num_new_records == Account.count(), sleep_time=0.4) + thread_utils.wait_until(lambda: org_size + num_new_records * 3 == Account.count(), sleep_time=0.4) publishers_with_consent(output_file) assert os.path.exists(output_file) From 7b173f7fbe827091552cf9f9f7faa5222c1fe23d Mon Sep 17 00:00:00 2001 From: philip Date: Mon, 18 Mar 2024 23:45:18 +0000 Subject: [PATCH 20/28] fix wait_until --- doajtest/selenium_helpers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doajtest/selenium_helpers.py b/doajtest/selenium_helpers.py index 4ffc24fc89..a210ebe658 100644 --- a/doajtest/selenium_helpers.py +++ b/doajtest/selenium_helpers.py @@ -230,7 +230,7 @@ def exit_cond_fn(): except: return False - wait_until(exit_cond_fn, timeout, check_interval) + wait_until(exit_cond_fn, timeout=timeout, sleep_time=check_interval) return elements @@ -245,7 +245,7 @@ def _click(): except (StaleElementReferenceException, ElementClickInterceptedException): return False - wait_until(_click, timeout=10, check_interval=0.1) + wait_until(_click, timeout=timeout, sleep_time=check_interval) def click_edges_item(driver: 'WebDriver', ele_name, item_name): From 1447101396efa36b4fd43e567565700eddf77928 Mon Sep 17 00:00:00 2001 From: philip Date: Tue, 19 Mar 2024 10:31:50 +0000 Subject: [PATCH 21/28] avoid selenium test fail on circleci --- doajtest/selenium_helpers.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doajtest/selenium_helpers.py b/doajtest/selenium_helpers.py index a210ebe658..56cf9c15e4 100644 --- a/doajtest/selenium_helpers.py +++ b/doajtest/selenium_helpers.py @@ -143,7 +143,8 @@ def _is_doaj_server_running(self): self.selenium.find_element(By.CSS_SELECTOR, 'div.container') log.info('doaj server is running') return True - except selenium.common.exceptions.NoSuchElementException: + except (selenium.common.exceptions.NoSuchElementException, + selenium.common.exceptions.WebDriverException): log.info('doaj server is not running') return False @@ -160,7 +161,7 @@ def tearDown(self): self.doaj_process.terminate() self.doaj_process.join() wait_until(lambda: not self._is_doaj_server_running(), 10, 1, - timeout_msg='doaj server is still running') + timeout_msg='doaj server is still running') self.selenium.quit() From e3cf981cfe831c3e4e399e092f03d3d855aaac84 Mon Sep 17 00:00:00 2001 From: philip Date: Tue, 19 Mar 2024 13:28:33 +0000 Subject: [PATCH 22/28] move urlshort to services --- doajtest/unit/test_lib_urlshort.py | 5 +++-- portality/bll/doaj.py | 7 ++++++- portality/{lib => bll/services}/urlshort.py | 0 portality/view/doaj.py | 5 +++-- portality/view/doajservices.py | 3 ++- 5 files changed, 14 insertions(+), 6 deletions(-) rename portality/{lib => bll/services}/urlshort.py (100%) diff --git a/doajtest/unit/test_lib_urlshort.py b/doajtest/unit/test_lib_urlshort.py index 3f3fe21e42..2117d66537 100644 --- a/doajtest/unit/test_lib_urlshort.py +++ b/doajtest/unit/test_lib_urlshort.py @@ -2,11 +2,12 @@ from doajtest.helpers import DoajTestCase, patch_config from portality import models -from portality.lib import urlshort +from portality.bll import DOAJ +from portality.lib.thread_utils import wait_until from portality.models import UrlShortener from portality.util import url_for -from portality.lib.thread_utils import wait_until +urlshort = DOAJ.urlshortService() def wait_any_url_shortener(): models.UrlShortener.refresh() diff --git a/portality/bll/doaj.py b/portality/bll/doaj.py index bd756e8b59..b0dffa3d12 100644 --- a/portality/bll/doaj.py +++ b/portality/bll/doaj.py @@ -130,4 +130,9 @@ def tourService(cls): @classmethod def autochecksService(cls, autocheck_plugins=None): from portality.bll.services import autochecks - return autochecks.AutocheckService(autocheck_plugins=autocheck_plugins) \ No newline at end of file + return autochecks.AutocheckService(autocheck_plugins=autocheck_plugins) + + @classmethod + def urlshortService(cls): + from portality.bll.services import urlshort + return urlshort diff --git a/portality/lib/urlshort.py b/portality/bll/services/urlshort.py similarity index 100% rename from portality/lib/urlshort.py rename to portality/bll/services/urlshort.py diff --git a/portality/view/doaj.py b/portality/view/doaj.py index 013e77b73e..0cdad7c526 100644 --- a/portality/view/doaj.py +++ b/portality/view/doaj.py @@ -12,11 +12,12 @@ from portality import dao from portality import models from portality import store +from portality.bll import DOAJ from portality.core import app from portality.decorators import ssl_required, api_key_required from portality.forms.application_forms import JournalFormFactory from portality.lcc import lcc_jstree -from portality.lib import plausible, urlshort +from portality.lib import plausible from portality.ui.messages import Messages # ~~DOAJ:Blueprint~~ @@ -656,7 +657,7 @@ def new_password_reset(): @plausible.pa_event(app.config.get('ANALYTICS_CATEGORY_URLSHORT', 'Urlshort'), action=app.config.get('ANALYTICS_ACTION_URLSHORT_REDIRECT', 'Redirect')) def shortened_url(alias): - url = urlshort.find_url_by_alias(alias) + url = DOAJ.urlshortService().find_url_by_alias(alias) if url: return redirect(url) diff --git a/portality/view/doajservices.py b/portality/view/doajservices.py index d5bc2006a9..e2cfa2eebf 100644 --- a/portality/view/doajservices.py +++ b/portality/view/doajservices.py @@ -8,7 +8,8 @@ from portality.bll import DOAJ from portality.core import app from portality.decorators import ssl_required, write_required -from portality.lib import urlshort, plausible +from portality.lib import plausible +from portality.bll.services import urlshort from portality.models.url_shortener import CountWithinDaysQuery from portality.util import jsonp From 085697a40e6821c3702f8f4daf0899c754d07edb Mon Sep 17 00:00:00 2001 From: philip Date: Wed, 20 Mar 2024 10:46:18 +0000 Subject: [PATCH 23/28] change generateShortUrl interface --- portality/static/js/doaj.fieldrender.edges.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/portality/static/js/doaj.fieldrender.edges.js b/portality/static/js/doaj.fieldrender.edges.js index 28bdd8fbe1..9a17662fca 100644 --- a/portality/static/js/doaj.fieldrender.edges.js +++ b/portality/static/js/doaj.fieldrender.edges.js @@ -1370,7 +1370,7 @@ $.extend(true, doaj, { this.toggleShorten = function(element) { if (!this.component.shortUrl) { var callback = edges.objClosure(this, "updateShortUrl"); - this.component.generateShortUrl(callback); + this.component.generateShortUrl(this.component.edge.fullUrl(), callback); } else { this.updateShortUrl(); } From e9be06d75965fdc1d4e895a07a3ad814315c093f Mon Sep 17 00:00:00 2001 From: philip Date: Wed, 20 Mar 2024 10:46:47 +0000 Subject: [PATCH 24/28] change branch 2881_url_shortener --- portality/static/vendor/edges | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/portality/static/vendor/edges b/portality/static/vendor/edges index fa15f74f85..9639b871ac 160000 --- a/portality/static/vendor/edges +++ b/portality/static/vendor/edges @@ -1 +1 @@ -Subproject commit fa15f74f858c558d4ba62d4fbb10c2c71eb902c6 +Subproject commit 9639b871acb1f6590ee78e236f2c9333479c9fe8 From 37163cfe48668db9cb0f795c0e8a4b329f268f1d Mon Sep 17 00:00:00 2001 From: philip Date: Fri, 19 Apr 2024 13:52:50 +0100 Subject: [PATCH 25/28] add DEBUG_DEV_LOG --- portality/settings.py | 1 + 1 file changed, 1 insertion(+) diff --git a/portality/settings.py b/portality/settings.py index 5a10f7db8f..9685bb9b81 100644 --- a/portality/settings.py +++ b/portality/settings.py @@ -17,6 +17,7 @@ HOST = '0.0.0.0' DEBUG = False +DEBUG_DEV_LOG = False # show all log of each module PORT = 5004 SSL = True VALID_ENVIRONMENTS = ['dev', 'test', 'staging', 'production', 'harvester'] From 6b4d0057073d4348dccef5f9ad3ba6829cf7e13e Mon Sep 17 00:00:00 2001 From: philip Date: Thu, 2 May 2024 12:50:47 +0100 Subject: [PATCH 26/28] remove lock and add URLSHORT_ALIAS_LENGTH --- doajtest/unit/test_lib_urlshort.py | 4 +++- portality/bll/services/urlshort.py | 13 +++---------- portality/settings.py | 1 + 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/doajtest/unit/test_lib_urlshort.py b/doajtest/unit/test_lib_urlshort.py index 2117d66537..e8e3850777 100644 --- a/doajtest/unit/test_lib_urlshort.py +++ b/doajtest/unit/test_lib_urlshort.py @@ -3,12 +3,14 @@ from doajtest.helpers import DoajTestCase, patch_config from portality import models from portality.bll import DOAJ +from portality.core import app from portality.lib.thread_utils import wait_until from portality.models import UrlShortener from portality.util import url_for urlshort = DOAJ.urlshortService() + def wait_any_url_shortener(): models.UrlShortener.refresh() return models.UrlShortener.count() > 0 @@ -22,7 +24,7 @@ def test_create_new_alias(self): self.assertEqual(len(aliases), n_samples) assert len(aliases) == n_samples - assert len(list(aliases)[0]) == urlshort.alias_len + assert len(list(aliases)[0]) == app.config.get("URLSHORT_ALIAS_LENGTH") def test_parse_shortened_url(self): alias = 'alias_abc' diff --git a/portality/bll/services/urlshort.py b/portality/bll/services/urlshort.py index 9d7f34a77e..fd9272b1a3 100644 --- a/portality/bll/services/urlshort.py +++ b/portality/bll/services/urlshort.py @@ -1,6 +1,5 @@ import random import string -import threading from typing import Optional from portality import models @@ -9,9 +8,7 @@ from portality.util import url_for # global current status of the alias length -alias_len = 6 ALIAS_CHARS = string.ascii_letters + string.digits -alias_lock = threading.Lock() def add_url_shortener(url: str) -> str: @@ -31,24 +28,20 @@ def add_url_shortener(url: str) -> str: if shortened_url: return shortened_url - with alias_lock: - alias = create_new_alias() - models.UrlShortener(url=url, alias=alias).save() + alias = create_new_alias() + models.UrlShortener(url=url, alias=alias).save() return parse_shortened_url(alias) def create_new_alias() -> str: - global alias_len + alias_len = app.config.get("URLSHORT_ALIAS_LENGTH") for _ in range(5): alias = ''.join(random.sample(ALIAS_CHARS, alias_len)) cnt = models.UrlShortener.hit_count(UrlQuery(alias).query()) if cnt == 0: return alias - alias_len += 1 - app.logger.info(f'alias length increased to {alias_len}') - raise ValueError('Could not create a unique alias') diff --git a/portality/settings.py b/portality/settings.py index 9685bb9b81..290a71ee92 100644 --- a/portality/settings.py +++ b/portality/settings.py @@ -1570,3 +1570,4 @@ URLSHORT_LIMIT_WITHIN_DAYS = 7 URLSHORT_LIMIT = 50_000 URLSHORT_ALLOWED_SUPERDOMAINS = ['doaj.org'] +URLSHORT_ALIAS_LENGTH = 6 From 91fbf5396f0d3db902a059a1b26590b9f2a59a19 Mon Sep 17 00:00:00 2001 From: philip Date: Thu, 2 May 2024 12:58:38 +0100 Subject: [PATCH 27/28] rename n_retry --- portality/bll/services/urlshort.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/portality/bll/services/urlshort.py b/portality/bll/services/urlshort.py index fd9272b1a3..a017c44f93 100644 --- a/portality/bll/services/urlshort.py +++ b/portality/bll/services/urlshort.py @@ -34,9 +34,9 @@ def add_url_shortener(url: str) -> str: return parse_shortened_url(alias) -def create_new_alias() -> str: +def create_new_alias(n_retry=5) -> str: alias_len = app.config.get("URLSHORT_ALIAS_LENGTH") - for _ in range(5): + for _ in range(n_retry): alias = ''.join(random.sample(ALIAS_CHARS, alias_len)) cnt = models.UrlShortener.hit_count(UrlQuery(alias).query()) if cnt == 0: From ed3eb06522daa7fe4644b4cf3477293b1cc96dad Mon Sep 17 00:00:00 2001 From: philip Date: Fri, 3 May 2024 12:44:02 +0100 Subject: [PATCH 28/28] reverse for old version edges urlshort interface --- portality/settings.py | 3 +++ portality/static/js/doaj.fieldrender.edges.js | 2 +- portality/static/js/doaj.js | 5 ++++- portality/static/vendor/edges | 2 +- portality/view/doajservices.py | 2 +- 5 files changed, 10 insertions(+), 4 deletions(-) diff --git a/portality/settings.py b/portality/settings.py index 290a71ee92..196fcb1747 100644 --- a/portality/settings.py +++ b/portality/settings.py @@ -1567,7 +1567,10 @@ # Url Shortener # ~~->URLShortener:Feature~~ +# URLSHORT_LIMIT* used to limit the number of short URLs (URLSHORT_LIMIT) created +# by a user within a certain time period (URLSHORT_LIMIT_WITHIN_DAYS) URLSHORT_LIMIT_WITHIN_DAYS = 7 URLSHORT_LIMIT = 50_000 + URLSHORT_ALLOWED_SUPERDOMAINS = ['doaj.org'] URLSHORT_ALIAS_LENGTH = 6 diff --git a/portality/static/js/doaj.fieldrender.edges.js b/portality/static/js/doaj.fieldrender.edges.js index 9a17662fca..28bdd8fbe1 100644 --- a/portality/static/js/doaj.fieldrender.edges.js +++ b/portality/static/js/doaj.fieldrender.edges.js @@ -1370,7 +1370,7 @@ $.extend(true, doaj, { this.toggleShorten = function(element) { if (!this.component.shortUrl) { var callback = edges.objClosure(this, "updateShortUrl"); - this.component.generateShortUrl(this.component.edge.fullUrl(), callback); + this.component.generateShortUrl(callback); } else { this.updateShortUrl(); } diff --git a/portality/static/js/doaj.js b/portality/static/js/doaj.js index 8df810e3a7..a8b583f25c 100644 --- a/portality/static/js/doaj.js +++ b/portality/static/js/doaj.js @@ -69,7 +69,7 @@ var doaj = { doaj.bindMiniSearch(); }, - doajUrlShortener : function(url, success_callback, error_callback) { + doajUrlShortener : function(query, success_callback, error_callback) { function callbackWrapper(data) { success_callback(data.short_url); } @@ -79,6 +79,9 @@ var doaj = { error_callback && error_callback(); } + const page = `${window.location.protocol}//${window.location.host}${window.location.pathname}`; + const url = `${page}?source=${encodeURIComponent(JSON.stringify(query))}`; + $.ajax({ type: "POST", contentType: "application/json", diff --git a/portality/static/vendor/edges b/portality/static/vendor/edges index 9639b871ac..a2ae52d6fd 160000 --- a/portality/static/vendor/edges +++ b/portality/static/vendor/edges @@ -1 +1 @@ -Subproject commit 9639b871acb1f6590ee78e236f2c9333479c9fe8 +Subproject commit a2ae52d6fd245780d9da423927c0b733c167cecf diff --git a/portality/view/doajservices.py b/portality/view/doajservices.py index e2cfa2eebf..8c94353775 100644 --- a/portality/view/doajservices.py +++ b/portality/view/doajservices.py @@ -6,10 +6,10 @@ from portality import lock, models from portality.bll import DOAJ +from portality.bll.services import urlshort from portality.core import app from portality.decorators import ssl_required, write_required from portality.lib import plausible -from portality.bll.services import urlshort from portality.models.url_shortener import CountWithinDaysQuery from portality.util import jsonp