Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/2881 url shortener #2358

Open
wants to merge 30 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
a586a21
move wait_unit
philipkcl Mar 4, 2024
626c3f8
move wait_unit
philipkcl Mar 4, 2024
475d1a3
add urlshort
philipkcl Mar 4, 2024
6312f51
apply urlshort in frontend
philipkcl Mar 4, 2024
21ffdb5
cleanup old url shortener
philipkcl Mar 4, 2024
3f411b5
add doc `How to setup for dev with Plausible`
philipkcl Mar 5, 2024
0581a02
add setup_dev_log
philipkcl Mar 5, 2024
58b4fd2
commit edges
philipkcl Mar 5, 2024
eaba321
add plausible for urlshort
philipkcl Mar 5, 2024
6909692
add more test cases
philipkcl Mar 5, 2024
b818da0
add note
philipkcl Mar 5, 2024
90b00d4
update edges
philipkcl Mar 5, 2024
50f995f
fix docs
philipkcl Mar 5, 2024
8068d94
doc format
philipkcl Mar 5, 2024
0a5d5cc
merge develop 240318
philipkcl Mar 18, 2024
b60d6de
fix names wait_until
philipkcl Mar 18, 2024
3e4e3f3
add url limit
philipkcl Mar 18, 2024
6d271ff
limit by domain instead of paths
philipkcl Mar 18, 2024
194c8e1
fix wait_until
philipkcl Mar 18, 2024
ff699dd
fix some testcases
philipkcl Mar 18, 2024
7b173f7
fix wait_until
philipkcl Mar 18, 2024
1447101
avoid selenium test fail on circleci
philipkcl Mar 19, 2024
e3cf981
move urlshort to services
philipkcl Mar 19, 2024
085697a
change generateShortUrl interface
philipkcl Mar 20, 2024
e9be06d
change branch 2881_url_shortener
philipkcl Mar 20, 2024
37163cf
add DEBUG_DEV_LOG
philipkcl Apr 19, 2024
6b4d005
remove lock and add URLSHORT_ALIAS_LENGTH
philipkcl May 2, 2024
91fbf53
rename n_retry
philipkcl May 2, 2024
ed3eb06
reverse for old version edges urlshort interface
philipkcl May 3, 2024
6f42055
merge develop 240503
philipkcl May 3, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion doajtest/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,9 @@ def create_app_patch(cls):
'ENABLE_EMAIL': False,
"FAKER_SEED": 1,
"EVENT_SEND_FUNCTION": "portality.events.shortcircuit.send_event",
'CMS_BUILD_ASSETS_ON_STARTUP': False
'CMS_BUILD_ASSETS_ON_STARTUP': False,
'URLSHORT_ALLOWED_SUPERDOMAINS': ['doaj.org', 'localhost', '127.0.0.1'],

}

@classmethod
Expand Down
33 changes: 12 additions & 21 deletions doajtest/selenium_helpers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import datetime
import logging
import multiprocessing
import time
from multiprocessing import Process, freeze_support
from typing import TYPE_CHECKING

Expand All @@ -14,6 +13,7 @@
from doajtest.helpers import DoajTestCase, patch_config
from portality import app, models, core
from portality.dao import ESMappingMissingError
from portality.lib.thread_utils import wait_until

if TYPE_CHECKING:
from selenium.webdriver.remote.webdriver import WebDriver
Expand Down Expand Up @@ -118,7 +118,7 @@ def setUp(self):
self.selenium.set_window_size(1400, 1000) # avoid something is not clickable

# wait for server to start
wait_unit(self._is_doaj_server_running, 10, 1.5, timeout_msg='doaj server not started')
wait_until(self._is_doaj_server_running, 10, 1.5, timeout_msg='doaj server not started')

fix_index_not_found_exception(self.app_test)
self.fix_es_mapping()
Expand All @@ -143,7 +143,8 @@ def _is_doaj_server_running(self):
self.selenium.find_element(By.CSS_SELECTOR, 'div.container')
log.info('doaj server is running')
return True
except selenium.common.exceptions.NoSuchElementException:
except (selenium.common.exceptions.NoSuchElementException,
selenium.common.exceptions.WebDriverException):
log.info('doaj server is not running')
return False

Expand All @@ -159,12 +160,12 @@ def tearDown(self):
print(f'{datetime.datetime.now().isoformat()} --- doaj process terminating...')
self.doaj_process.terminate()
self.doaj_process.join()
wait_unit(lambda: not self._is_doaj_server_running(), 10, 1,
timeout_msg='doaj server is still running')
wait_until(lambda: not self._is_doaj_server_running(), 10, 1,
timeout_msg='doaj server is still running')

self.selenium.quit()

wait_unit(self._is_selenium_quit, 10, 1, timeout_msg='selenium is still running')
wait_until(self._is_selenium_quit, 10, 1, timeout_msg='selenium is still running')
print('selenium terminated')

super().tearDown()
Expand Down Expand Up @@ -219,17 +220,7 @@ def login_by_acc(driver: 'WebDriver', acc: models.Account = None):
assert "/login" not in driver.current_url


def wait_unit(exit_cond_fn, timeout=10, check_interval=0.1,
timeout_msg="wait_unit but exit_cond timeout"):
start = time.time()
while (time.time() - start) < timeout:
if exit_cond_fn():
return
time.sleep(check_interval)
raise TimeoutError(timeout_msg)


def wait_unit_elements(driver: 'WebDriver', css_selector: str, timeout=10, check_interval=0.1):
def wait_until_elements(driver: 'WebDriver', css_selector: str, timeout=10, check_interval=0.1):
elements = []

def exit_cond_fn():
Expand All @@ -240,11 +231,11 @@ def exit_cond_fn():
except:
return False

wait_unit(exit_cond_fn, timeout, check_interval)
wait_until(exit_cond_fn, timeout=timeout, sleep_time=check_interval)
return elements


def wait_unit_click(driver: 'WebDriver', css_selector: str, timeout=10, check_interval=0.1):
def wait_until_click(driver: 'WebDriver', css_selector: str, timeout=10, check_interval=0.1):
def _click():
try:
ele = find_ele_by_css(driver, css_selector)
Expand All @@ -255,11 +246,11 @@ def _click():
except (StaleElementReferenceException, ElementClickInterceptedException):
return False

wait_unit(_click, timeout=10, check_interval=0.1)
wait_until(_click, timeout=timeout, sleep_time=check_interval)


def click_edges_item(driver: 'WebDriver', ele_name, item_name):
wait_unit_click(driver, f'#edges-bs3-refiningand-term-selector-toggle-{ele_name}')
wait_until_click(driver, f'#edges-bs3-refiningand-term-selector-toggle-{ele_name}')
for ele in find_eles_by_css(driver, f'.edges-bs3-refiningand-term-selector-result-{ele_name} a'):
if item_name in ele.text.strip():
ele.click()
15 changes: 8 additions & 7 deletions doajtest/seleniumtest/test_article_xml_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from portality import models, dao
from portality.constants import FileUploadStatus
from portality.ui.messages import Messages
from portality.lib.thread_utils import wait_until

HISTORY_ROW_PROCESSING_FAILED = 'processing failed'
XML_FORMAT_DOAJ = 'doaj'
Expand Down Expand Up @@ -62,7 +63,7 @@ def assert_history_row_success(self, history_row, n_article=1):
self.assert_history_row(history_row, note=f'successfully processed {n_article} articles imported')

@staticmethod
def wait_unit_file_upload_status_ready():
def wait_until_file_upload_status_ready():
new_file_upload = None

def _cond_fn():
Expand All @@ -73,7 +74,7 @@ def _cond_fn():
return new_file_upload.status not in (FileUploadStatus.Validated, FileUploadStatus.Incoming)

# interval 0.5 is good because ES can't handle too many requests
selenium_helpers.wait_unit(_cond_fn, timeout=15, check_interval=0.5)
wait_until(_cond_fn, timeout=15, sleep_time=0.5)
return new_file_upload


Expand Down Expand Up @@ -108,7 +109,7 @@ def test_upload_fail(self, file_path, err_msg, expected_note):
assert err_msg in alert_ele.text

# # wait for background job to finish
self.wait_unit_file_upload_status_ready()
self.wait_until_file_upload_status_ready()

self.selenium.refresh()
new_rows = find_history_rows(self.selenium)
Expand Down Expand Up @@ -177,16 +178,16 @@ def _find_history_rows():
self.upload_submit_file(file_path)

assert 'File uploaded and waiting to be processed' in self.find_ele_by_css('.alert--success').text
selenium_helpers.wait_unit(
wait_until(
lambda: len(_find_history_rows()) == n_org_rows + 1,
timeout=10, check_interval=1
timeout=10, sleep_time=1
)
new_rows = _find_history_rows()
assert n_org_rows + 1 == len(new_rows)
assert n_file_upload + 1 == models.FileUpload.count()

# wait for background job to finish
new_file_upload = self.wait_unit_file_upload_status_ready()
new_file_upload = self.wait_until_file_upload_status_ready()

# assert file upload status
assert new_file_upload.filename == Path(file_path).name
Expand Down Expand Up @@ -307,7 +308,7 @@ def step_upload_success(self, publisher, article_xml_path, journal_issn, expecte
XML_FORMAT_DOAJ)
self.assert_history_row_success(latest_history_row)
selenium_helpers.goto(self.selenium, url_path.url_toc_articles(journal_issn))
selenium_helpers.wait_unit(lambda: self.find_eles_by_css(article_title_selector))
wait_until(lambda: self.find_eles_by_css(article_title_selector))
assert expected_title in [e.get_attribute('innerHTML').strip()
for e in self.find_eles_by_css(article_title_selector)]

Expand Down
126 changes: 126 additions & 0 deletions doajtest/unit/test_lib_urlshort.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import time

from doajtest.helpers import DoajTestCase, patch_config
from portality import models
from portality.bll import DOAJ
from portality.core import app
from portality.lib.thread_utils import wait_until
from portality.models import UrlShortener
from portality.util import url_for

urlshort = DOAJ.urlshortService()


def wait_any_url_shortener():
models.UrlShortener.refresh()
richard-jones marked this conversation as resolved.
Show resolved Hide resolved
return models.UrlShortener.count() > 0


class TestLibUrlshort(DoajTestCase):

def test_create_new_alias(self):
n_samples = 3
aliases = {urlshort.create_new_alias() for _ in range(n_samples)}
self.assertEqual(len(aliases), n_samples)

assert len(aliases) == n_samples
assert len(list(aliases)[0]) == app.config.get("URLSHORT_ALIAS_LENGTH")

def test_parse_shortened_url(self):
alias = 'alias_abc'
assert alias in urlshort.parse_shortened_url(alias)

def test_add_url_shortener(self):
url = 'http://aabbcc.com'
surl = urlshort.add_url_shortener(url)

assert surl
assert isinstance(surl, str)

time.sleep(2)
UrlShortener.refresh()

surl2 = urlshort.add_url_shortener(url)
assert surl == surl2

surl3 = urlshort.add_url_shortener(url + 'xxxx')
assert surl != surl3

def test_find_shortened_url(self):
url = 'http://aabbcc.com'
assert urlshort.find_shortened_url(url) is None

surl = urlshort.add_url_shortener(url)

time.sleep(2)
UrlShortener.refresh()

surl2 = urlshort.find_shortened_url(url)
assert surl == surl2

def test_find_url_by_alias(self):
data = {}
for idx in range(3):
url = f'/{idx}'
surl = urlshort.add_url_shortener(url)
alias = surl[surl.rfind('/') + 1:]
data[alias] = url

wait_until(wait_any_url_shortener)

results = models.UrlShortener.q2obj()

alias = results[0].alias
assert urlshort.find_url_by_alias(alias) == data[alias]


def surl_to_alias(surl):
alias = surl[surl.rfind('/') + 1:]
return alias


class TestUrlshortRoute(DoajTestCase):
def test_urlshort_route(self):
url = 'https://www.google.com'
surl = urlshort.add_url_shortener(url)
wait_until(wait_any_url_shortener)

with self.app_test.test_client() as c:
rv = c.get(surl)
assert rv.status_code == 302
assert rv.headers['Location'] == url

def test_urlshort_route__not_found(self):
with self.app_test.test_client() as c:
rv = c.get(urlshort.parse_shortened_url('nnnnnnnnot_found'))
assert rv.status_code == 404

def test_create_shorten_url(self):
data = {'url': 'http://localhost:5004/search/journals'}
with self.app_test.test_client() as c:
rv = c.post(url_for('doajservices.shorten'), json=data)
assert rv.status_code == 200
assert rv.json['short_url']

wait_until(wait_any_url_shortener)
assert urlshort.find_url_by_alias(surl_to_alias(rv.json['short_url'])) == data['url']

def test_create_shorten_url__invalid(self):
data = {'url': 'http://invalid.domain.abc/aaaaa'}
with self.app_test.test_client() as c:
rv = c.post(url_for('doajservices.shorten'), json=data)
assert rv.status_code == 400

def test_create_shorten_url__limit_reached(self):
orig_config = patch_config(self.app_test, {'URLSHORT_LIMIT': 1})
data = {'url': 'http://localhost:5004/search/journals'}
with self.app_test.test_client() as c:
rv = c.post(url_for('doajservices.shorten'), json=data)
assert rv.status_code != 429

wait_until(wait_any_url_shortener)

rv = c.post(url_for('doajservices.shorten'), json=data)
assert rv.status_code == 429

patch_config(self.app_test, orig_config)
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def test_01_publishers_with_consent(self):
str('False')
])

thread_utils.wait_until(lambda: org_size + num_new_records == Account.count(), sleep_time=0.4)
thread_utils.wait_until(lambda: org_size + num_new_records * 3 == Account.count(), sleep_time=0.4)
publishers_with_consent(output_file)

assert os.path.exists(output_file)
Expand Down
36 changes: 32 additions & 4 deletions docs/dev/how-to-setup.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
Setup google API key for google sheet
------------------------------------------

### create project an enable api

* go to https://console.cloud.google.com/
* create and select a project on the top left
* searching for "Google Drive API" and enable it, url should be some thing like (https://console.cloud.google.com/marketplace/product/google/drive.googleapis.com)
* searching for "Google Drive API" and enable it, url should be some thing
like (https://console.cloud.google.com/marketplace/product/google/drive.googleapis.com)
* searching for "Google Sheets API" and enable it

### create key

* click `create credentials` button
* select `Google Drive API` and `Web server` and `Application data`
* select `No, I'm not using them`
Expand All @@ -17,19 +21,18 @@ Setup google API key for google sheet
* click `KEYS`, `ADD KEY`
* select `JSON` and click create


### share google sheet to service account

* go to google drive
* right click the sheet you want to share
* click `Share`
* paste the service account email to `People` field
* click `Done`



How to setup for `datalog_journal_added_update` task
--------------------------------------------------
following variable need for background job `datalog_journal_added_update`

```
# value should be key file path of json, empty string means disabled
GOOGLE_KEY_PATH = ''
Expand All @@ -40,3 +43,28 @@ DATALOG_JA_FILENAME = 'DOAJ: journals added and withdrawn'
# worksheet name or tab name that datalog will write to
DATALOG_JA_WORKSHEET_NAME = 'Added'
```

How to setup for dev with Plausible
-----------------------------------

* run plausible
* ref 'https://github.com/plausible/community-edition'
* update `plausible-conf.env`
* run docker `docker-compose up`
* testing configuration by browse `http://localhost:8000` and login admin user
* setup fake domain in /etc/hosts
* e.g. `127.0.0.1 doaj.dev.local`
* setup dev.cfg
* `DEBUG = False`
* `BASE_URL = "https://doaj.dev.local:5004"`
* `PLAUSIBLE_URL = "http://localhost:8000"`
* `PLAUSIBLE_JS_URL = PLAUSIBLE_URL + "/js/script.outbound-links.file-downloads.js"`
* `PLAUSIBLE_API_URL = PLAUSIBLE_URL + "/api/event"`
* `PLAUSIBLE_SITE_NAME = "doaj.dev.local"`
* update `portality/app.py`, change `fake_https=True` e.g. `run_server(fake_https=True)`
* you might need `cryptography~=42.0` installed in pip
* run `portality/app.py`
* testing configuration by browse `https://doaj.dev.local:5004`



7 changes: 7 additions & 0 deletions docs/pr_note/2881_url_shortener.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@



Reminders:
---------------------
* new `Goals` `Urlshort` should be added to plausible
* edges library updated for generate url shorten
Loading
Loading