From f03aaec493232e68e73b9f528e988448a90c4f68 Mon Sep 17 00:00:00 2001 From: philip Date: Mon, 12 Feb 2024 12:29:28 +0000 Subject: [PATCH 01/23] add model api_log --- portality/models/api_log.py | 50 +++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 portality/models/api_log.py diff --git a/portality/models/api_log.py b/portality/models/api_log.py new file mode 100644 index 0000000000..c9f3ac4a90 --- /dev/null +++ b/portality/models/api_log.py @@ -0,0 +1,50 @@ +from portality.dao import DomainObject + + +class ApiLog(DomainObject): + """~~ApiLog:Model->DomainObject:Model~~""" + __type__ = "api_rate" + + def __init__(self, **kwargs): + super(ApiLog, self).__init__(**kwargs) + + @property + def src(self): + return self.data.get("src") + + @src.setter + def src(self, val: str): + """ + Parameters + ---------- + val + can be IP address or API key + """ + self.data["src"] = val + + @property + def target(self): + return self.data.get("target") + + @target.setter + def target(self, target: str): + """ + + Parameters + ---------- + target + value format should be "METHOD /api/endpoint" + + Returns + ------- + + """ + self.data["target"] = target + + @property + def created_date(self): + return self.data.get("created_date") + + @created_date.setter + def created_date(self, val): + self.data["created_date"] = val From 018159b9206a1bc7a805151d03bce4e7eb323dfa Mon Sep 17 00:00:00 2001 From: philip Date: Mon, 12 Feb 2024 12:29:49 +0000 Subject: [PATCH 02/23] update data cleanup settings --- portality/settings.py | 1 + portality/tasks/old_data_cleanup.py | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/portality/settings.py b/portality/settings.py index 37729773c2..29158faab3 100644 --- a/portality/settings.py +++ b/portality/settings.py @@ -1381,6 +1381,7 @@ TASK_DATA_RETENTION_DAYS = { "notification": 180, # ~~-> Notifications:Feature ~~ "background_job": 180, # ~~-> BackgroundJobs:Feature ~~ + "api_log": 180, # ~~-> ApiLog:Feature ~~ } ######################################## diff --git a/portality/tasks/old_data_cleanup.py b/portality/tasks/old_data_cleanup.py index 4c764694ee..cee05b334b 100644 --- a/portality/tasks/old_data_cleanup.py +++ b/portality/tasks/old_data_cleanup.py @@ -8,10 +8,16 @@ from portality.lib import dates from portality.lib.es_queries import ES_DATETIME_FMT from portality.models import Notification, BackgroundJob +from portality.models.api_log import ApiLog from portality.tasks.helpers import background_helper from portality.tasks.redis_huey import schedule, long_running target_queue = long_running +MODEL_TOBE_CLEANUP = [ + Notification, + BackgroundJob, + ApiLog, +] class RetentionQuery: @@ -59,7 +65,7 @@ def clean_all_old_data(logger_fn=None): if logger_fn is None: logger_fn = print - for klazz in [Notification, BackgroundJob]: + for klazz in MODEL_TOBE_CLEANUP: _clean_old_data(klazz, logger_fn=logger_fn) logger_fn("old data cleanup completed") From b0358ec29dfeafe16d9dcfe283e82d0acb582e9f Mon Sep 17 00:00:00 2001 From: philip Date: Mon, 12 Feb 2024 12:31:52 +0000 Subject: [PATCH 03/23] update data cleanup settings --- portality/tasks/old_data_cleanup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/portality/tasks/old_data_cleanup.py b/portality/tasks/old_data_cleanup.py index cee05b334b..a4a92f85f7 100644 --- a/portality/tasks/old_data_cleanup.py +++ b/portality/tasks/old_data_cleanup.py @@ -13,7 +13,7 @@ from portality.tasks.redis_huey import schedule, long_running target_queue = long_running -MODEL_TOBE_CLEANUP = [ +MODELS_TOBE_CLEANUP = [ Notification, BackgroundJob, ApiLog, @@ -65,7 +65,7 @@ def clean_all_old_data(logger_fn=None): if logger_fn is None: logger_fn = print - for klazz in MODEL_TOBE_CLEANUP: + for klazz in MODELS_TOBE_CLEANUP: _clean_old_data(klazz, logger_fn=logger_fn) logger_fn("old data cleanup completed") From 02da7a10c54f7761ae3208004133d5191ff0fc2d Mon Sep 17 00:00:00 2001 From: philip Date: Tue, 13 Feb 2024 12:16:47 +0000 Subject: [PATCH 04/23] add DEBUG_DEV_LOG --- portality/app.py | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/portality/app.py b/portality/app.py index efd9e5b170..955b704b73 100644 --- a/portality/app.py +++ b/portality/app.py @@ -9,34 +9,34 @@ ~~DOAJ:WebApp~~ """ +import logging +import os +import sys +from datetime import datetime -import os, sys -import tzlocal import pytz - +import tzlocal from flask import request, abort, render_template, redirect, send_file, url_for, jsonify, send_from_directory from flask_login import login_user, current_user -from datetime import datetime - import portality.models as models -from portality.core import app, es_connection, initialise_index from portality import settings +from portality.core import app, es_connection, initialise_index from portality.lib import edges, dates from portality.lib.dates import FMT_DATETIME_STD, FMT_YEAR - from portality.view.account import blueprint as account from portality.view.admin import blueprint as admin -from portality.view.publisher import blueprint as publisher -from portality.view.query import blueprint as query -from portality.view.doaj import blueprint as doaj -from portality.view.oaipmh import blueprint as oaipmh -from portality.view.openurl import blueprint as openurl +from portality.view.apply import blueprint as apply from portality.view.atom import blueprint as atom -from portality.view.editor import blueprint as editor +from portality.view.doaj import blueprint as doaj from portality.view.doajservices import blueprint as services +from portality.view.editor import blueprint as editor from portality.view.jct import blueprint as jct -from portality.view.apply import blueprint as apply +from portality.view.oaipmh import blueprint as oaipmh +from portality.view.openurl import blueprint as openurl +from portality.view.publisher import blueprint as publisher +from portality.view.query import blueprint as query + if 'api1' in app.config['FEATURES']: from portality.view.api_v1 import blueprint as api_v1 if 'api2' in app.config['FEATURES']: @@ -443,6 +443,17 @@ def run_server(host=None, port=None, fake_https=False): that can help for debugging Plausible :return: """ + + if app.config.get('DEBUG_DEV_LOG', False): + app.logger.handlers = [] + log = logging.getLogger() + log.setLevel(logging.DEBUG) + ch = logging.StreamHandler() + ch.setLevel(logging.DEBUG) + ch.setFormatter(logging.Formatter('%(asctime)s %(levelname).4s %(processName)s%(threadName)s - ' + '%(message)s --- [%(name)s][%(funcName)s:%(lineno)d]')) + log.addHandler(ch) + pycharm_debug = app.config.get('DEBUG_PYCHARM', False) if len(sys.argv) > 1: if sys.argv[1] == '-d': From 601eb366f2a7112c5d3e1cad695387a4c52100da Mon Sep 17 00:00:00 2001 From: philip Date: Tue, 13 Feb 2024 12:17:04 +0000 Subject: [PATCH 05/23] cleanup --- portality/app.py | 59 ++++++++++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/portality/app.py b/portality/app.py index 955b704b73..0e2c330709 100644 --- a/portality/app.py +++ b/portality/app.py @@ -51,35 +51,35 @@ if app.config.get("DEBUG", False) and app.config.get("TESTDRIVE_ENABLED", False): from portality.view.testdrive import blueprint as testdrive -app.register_blueprint(account, url_prefix='/account') #~~->Account:Blueprint~~ -app.register_blueprint(admin, url_prefix='/admin') #~~-> Admin:Blueprint~~ -app.register_blueprint(publisher, url_prefix='/publisher') #~~-> Publisher:Blueprint~~ -app.register_blueprint(query, name='query', url_prefix='/query') # ~~-> Query:Blueprint~~ +app.register_blueprint(account, url_prefix='/account') # ~~->Account:Blueprint~~ +app.register_blueprint(admin, url_prefix='/admin') # ~~-> Admin:Blueprint~~ +app.register_blueprint(publisher, url_prefix='/publisher') # ~~-> Publisher:Blueprint~~ +app.register_blueprint(query, name='query', url_prefix='/query') # ~~-> Query:Blueprint~~ app.register_blueprint(query, name='admin_query', url_prefix='/admin_query') app.register_blueprint(query, name='publisher_query', url_prefix='/publisher_query') app.register_blueprint(query, name='editor_query', url_prefix='/editor_query') app.register_blueprint(query, name='associate_query', url_prefix='/associate_query') app.register_blueprint(query, name='dashboard_query', url_prefix="/dashboard_query") -app.register_blueprint(editor, url_prefix='/editor') # ~~-> Editor:Blueprint~~ -app.register_blueprint(services, url_prefix='/service') # ~~-> Services:Blueprint~~ +app.register_blueprint(editor, url_prefix='/editor') # ~~-> Editor:Blueprint~~ +app.register_blueprint(services, url_prefix='/service') # ~~-> Services:Blueprint~~ if 'api1' in app.config['FEATURES']: - app.register_blueprint(api_v1, url_prefix='/api/v1') # ~~-> APIv1:Blueprint~~ + app.register_blueprint(api_v1, url_prefix='/api/v1') # ~~-> APIv1:Blueprint~~ if 'api2' in app.config['FEATURES']: - app.register_blueprint(api_v2, url_prefix='/api/v2') # ~~-> APIv2:Blueprint~~ + app.register_blueprint(api_v2, url_prefix='/api/v2') # ~~-> APIv2:Blueprint~~ if 'api3' in app.config['FEATURES']: - app.register_blueprint(api_v3, name='api', url_prefix='/api') # ~~-> APIv3:Blueprint~~ - app.register_blueprint(api_v3, name='api_v3', url_prefix='/api/v3') # ~~-> APIv3:Blueprint~~ -app.register_blueprint(status, name='status', url_prefix='/status') # ~~-> Status:Blueprint~~ + app.register_blueprint(api_v3, name='api', url_prefix='/api') # ~~-> APIv3:Blueprint~~ + app.register_blueprint(api_v3, name='api_v3', url_prefix='/api/v3') # ~~-> APIv3:Blueprint~~ +app.register_blueprint(status, name='status', url_prefix='/status') # ~~-> Status:Blueprint~~ app.register_blueprint(status, name='_status', url_prefix='/_status') -app.register_blueprint(apply, url_prefix='/apply') # ~~-> Apply:Blueprint~~ -app.register_blueprint(jct, url_prefix="/jct") # ~~-> JCT:Blueprint~~ -app.register_blueprint(dashboard, url_prefix="/dashboard") #~~-> Dashboard:Blueprint~~ +app.register_blueprint(apply, url_prefix='/apply') # ~~-> Apply:Blueprint~~ +app.register_blueprint(jct, url_prefix="/jct") # ~~-> JCT:Blueprint~~ +app.register_blueprint(dashboard, url_prefix="/dashboard") # ~~-> Dashboard:Blueprint~~ app.register_blueprint(tours, url_prefix="/tours") # ~~-> Tours:Blueprint~~ -app.register_blueprint(oaipmh) # ~~-> OAIPMH:Blueprint~~ -app.register_blueprint(openurl) # ~~-> OpenURL:Blueprint~~ -app.register_blueprint(atom) # ~~-> Atom:Blueprint~~ -app.register_blueprint(doaj) # ~~-> DOAJ:Blueprint~~ +app.register_blueprint(oaipmh) # ~~-> OAIPMH:Blueprint~~ +app.register_blueprint(openurl) # ~~-> OpenURL:Blueprint~~ +app.register_blueprint(atom) # ~~-> Atom:Blueprint~~ +app.register_blueprint(doaj) # ~~-> DOAJ:Blueprint~~ if app.config.get("DEBUG", False) and app.config.get("TESTDRIVE_ENABLED", False): app.logger.warning('Enabling TESTDRIVE at /testdrive') @@ -91,6 +91,7 @@ # putting it here ensures it will run under any web server initialise_index(app, es_connection) + # serve static files from multiple potential locations # this allows us to override the standard static file handling with our own dynamic version # ~~-> Assets:WebRoute~~ @@ -108,11 +109,14 @@ def custom_static(path): return send_from_directory(os.path.dirname(target), os.path.basename(target)) abort(404) + # Configure Analytics # ~~-> PlausibleAnalytics:ExternalService~~ from portality.lib import plausible + plausible.create_logfile(app.config.get('PLAUSIBLE_LOG_DIR', None)) + # Redirects from previous DOAJ app. # RJ: I have decided to put these here so that they can be managed # alongside the DOAJ codebase. I know they could also go into the @@ -139,6 +143,7 @@ def legacy(): def another_legacy_csv_route(): return redirect("/csv"), 301 + ################################################### # ~~-> DOAJArticleXML:Schema~~ @@ -146,9 +151,9 @@ def another_legacy_csv_route(): def legacy_doaj_XML_schema(): schema_fn = 'doajArticles.xsd' return send_file( - os.path.join(app.config.get("STATIC_DIR"), "doaj", schema_fn), - mimetype="application/xml", as_attachment=True, attachment_filename=schema_fn - ) + os.path.join(app.config.get("STATIC_DIR"), "doaj", schema_fn), + mimetype="application/xml", as_attachment=True, attachment_filename=schema_fn + ) # ~~-> CrossrefArticleXML:WebRoute~~ @@ -182,7 +187,7 @@ def set_current_context(): "app": app, "current_year": dates.now_str(FMT_YEAR), "base_url": app.config.get('BASE_URL'), - } + } # Jinja2 Template Filters @@ -193,7 +198,7 @@ def bytes_to_filesize(size): units = ["bytes", "Kb", "Mb", "Gb"] scale = 0 while size > 1000 and scale < len(units): - size = float(size) / 1000.0 # note that it is no longer 1024 + size = float(size) / 1000.0 # note that it is no longer 1024 scale += 1 return "{size:.1f}{unit}".format(size=size, unit=units[scale]) @@ -286,6 +291,7 @@ def form_diff_table_subject_expand(val): return ", ".join(results) + @app.template_filter("is_in_the_past") def is_in_the_past(dttm): return dates.is_before(dttm, dates.today()) @@ -297,6 +303,7 @@ def is_in_the_past(dttm): def search_query_source_wrapper(): def search_query_source(**params): return edges.make_url_query(**params) + return dict(search_query_source=search_query_source) @@ -311,6 +318,7 @@ def maned_of(): if len(egs) > 0: assignments = models.Application.assignment_to_editor_groups(egs) return egs, assignments + return dict(maned_of=maned_of) @@ -325,8 +333,10 @@ def editor_of(): if len(egs) > 0: assignments = models.Application.assignment_to_editor_groups(egs) return egs, assignments + return dict(editor_of=editor_of) + @app.context_processor def associate_of_wrapper(): def associate_of(): @@ -338,8 +348,10 @@ def associate_of(): if len(egs) > 0: assignments = models.Application.assignment_to_editor_groups(egs) return egs, assignments + return dict(associate_of=associate_of) + # ~~-> Account:Model~~ # ~~-> AuthNZ:Feature~~ @app.before_request @@ -478,4 +490,3 @@ def run_server(host=None, port=None, fake_https=False): if __name__ == "__main__": run_server() - From 479dfc7a8864a3897607d4b0409f5a8a44abe121 Mon Sep 17 00:00:00 2001 From: philip Date: Tue, 13 Feb 2024 12:20:06 +0000 Subject: [PATCH 06/23] add flask_utils.get_remote_addr --- portality/lib/flask_utils.py | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 portality/lib/flask_utils.py diff --git a/portality/lib/flask_utils.py b/portality/lib/flask_utils.py new file mode 100644 index 0000000000..401c197a95 --- /dev/null +++ b/portality/lib/flask_utils.py @@ -0,0 +1,7 @@ +from flask import request + + +def get_remote_addr(): + if request: + return request.headers.get("cf-connecting-ip", request.remote_addr) + return None From b88d7b2214d5f8055b4267b19f8702eed17f5057 Mon Sep 17 00:00:00 2001 From: philip Date: Tue, 13 Feb 2024 12:20:23 +0000 Subject: [PATCH 07/23] add flask_utils.get_remote_addr --- portality/lib/plausible.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/portality/lib/plausible.py b/portality/lib/plausible.py index 72dca5cf48..c3743ef658 100644 --- a/portality/lib/plausible.py +++ b/portality/lib/plausible.py @@ -1,15 +1,15 @@ # ~~ PlausibleAnalytics:ExternalService~~ -import json import logging import os -import requests - from functools import wraps from threading import Thread -from portality.core import app +import requests from flask import request +from portality.core import app +from portality.lib import flask_utils + logger = logging.getLogger(__name__) # Keep track of when this is misconfigured so we don't spam the logs with skip messages @@ -50,7 +50,7 @@ def send_event(goal: str, on_completed=None, **props_kwargs): headers = {'Content-Type': 'application/json'} if request: # Add IP from CloudFlare header or remote_addr - this works because we have ProxyFix on the app - headers["X-Forwarded-For"] = request.headers.get("cf-connecting-ip", request.remote_addr) + headers["X-Forwarded-For"] = flask_utils.get_remote_addr() user_agent_key = 'User-Agent' user_agent_val = request.headers.get(user_agent_key) if user_agent_val: From 889251b72ba3d98b225823f7fb078a608cf02a40 Mon Sep 17 00:00:00 2001 From: philip Date: Tue, 13 Feb 2024 12:32:32 +0000 Subject: [PATCH 08/23] add api_rate_srevice --- portality/api/common.py | 47 +++++++++++++++---- portality/bll/doaj.py | 9 ++++ portality/bll/services/api_rate.py | 75 ++++++++++++++++++++++++++++++ portality/dao.py | 5 +- portality/models/api_log.py | 41 +++++++++++++--- portality/settings.py | 16 +++++++ 6 files changed, 173 insertions(+), 20 deletions(-) create mode 100644 portality/bll/services/api_rate.py diff --git a/portality/api/common.py b/portality/api/common.py index 5a5bbf9932..0d40c40aaa 100644 --- a/portality/api/common.py +++ b/portality/api/common.py @@ -1,10 +1,13 @@ -#~~API:Feature~~ -import json, uuid -from portality.core import app -from flask import request +# ~~API:Feature~~ +import json +import uuid from copy import deepcopy + +from flask import request from link_header import LinkHeader, Link +from portality.core import app + LINK_HEADERS = ['next', 'prev', 'last'] TOTAL_RESULTS_COUNT = ['total'] @@ -16,17 +19,17 @@ class Api(object): # ~~->Swagger:Feature~~ # ~~->API:Documentation~~ SWAG_TEMPLATE = { - "description" : "", + "description": "", "responses": {}, "parameters": [], "tags": [] } R200 = {"schema": {}, "description": "A successful request/response"} R201 = {"schema": {"properties": CREATED_TEMPLATE}, "description": "Resource created successfully, response " - "contains the new resource ID and location."} - R201_BULK = {"schema": {"items": {"properties" : CREATED_TEMPLATE, "type" : "object"}, "type" : "array"}, - "description": "Resources created successfully, response contains the new resource IDs " - "and locations."} + "contains the new resource ID and location."} + R201_BULK = {"schema": {"items": {"properties": CREATED_TEMPLATE, "type": "object"}, "type": "array"}, + "description": "Resources created successfully, response contains the new resource IDs " + "and locations."} R204 = {"description": "OK (Request succeeded), No Content"} R400 = {"schema": {"properties": ERROR_TEMPLATE}, "description": "Bad Request. Your request body was missing a " "required field, or the data in one of the " @@ -125,6 +128,13 @@ class Api409Error(Exception): pass +class Api429Error(Exception): + """ + Too many requests + """ + pass + + class Api500Error(Exception): pass @@ -201,7 +211,7 @@ def generate_link_headers(metadata): links.append(Link(v, rel=k)) # e.g. Link("https://example.com/foo", rel="next") return str(LinkHeader(links)) # RFC compliant headers e.g. - # ; rel=next, ; rel=last + # ; rel=next, ; rel=last def respond(data, status, metadata=None): @@ -226,6 +236,16 @@ def respond(data, status, metadata=None): return app.response_class(data, status, headers, mimetype='application/json') +def resp_err(error, log_msg, status_code, status_msg): + magic = uuid.uuid1() + err_msg = str(error) + " (ref: {y})".format(y=magic) + app.logger.info(log_msg + f' -- {err_msg}') + t = deepcopy(ERROR_TEMPLATE) + t['status'] = status_msg + t['error'] = err_msg + return respond(json.dumps(t), status_code) + + @app.errorhandler(Api400Error) def bad_request(error): magic = uuid.uuid1() @@ -266,6 +286,13 @@ def forbidden(error): return respond(json.dumps(t), 403) +@app.errorhandler(Api429Error) +def too_many_requests(error): + return resp_err(error, + f"Sending 429 Too Many Requests from client", + 429, 'too_many_requests') + + @app.errorhandler(Api500Error) def bad_request(error): magic = uuid.uuid1() diff --git a/portality/bll/doaj.py b/portality/bll/doaj.py index 584daa9b8f..ebd889d2cd 100644 --- a/portality/bll/doaj.py +++ b/portality/bll/doaj.py @@ -126,3 +126,12 @@ def tourService(cls): """ from portality.bll.services import tour return tour.TourService() + + @classmethod + def apiRateService(cls): + """ + Obtain an instance of the api_rate service ~~->ApiRate:Service~~ + :return: ApiRateService + """ + from portality.bll.services import api_rate + return api_rate.ApiRateService() diff --git a/portality/bll/services/api_rate.py b/portality/bll/services/api_rate.py new file mode 100644 index 0000000000..742bdbeb58 --- /dev/null +++ b/portality/bll/services/api_rate.py @@ -0,0 +1,75 @@ +import datetime +import functools +import logging + +from portality.api import Api429Error +from portality.core import app +from portality.lib import flask_utils, dates +from portality.models import Account +from portality.models.api_log import ApiLog, ApiRateQuery + +ROLE_API_RATE_T2 = 'api_rate_t2' + + +def count_api_rate(src: str, target: str) -> float: + minutes = 1 + since = dates.now() - datetime.timedelta(minutes=minutes) + count = ApiLog.count(body=ApiRateQuery(src=src, target=target, since=since).query()) + rate = count / minutes + return rate + + +class ApiRateService: + + @staticmethod + def track_api_rate(endpoint_fn): + """ + Decorator for endpoint function to track API rate + it will add api_log and throw 429 error if rate limit exceeded + """ + + @functools.wraps(endpoint_fn) + def fn(*args, **kwargs): + from flask import request + + # define src + src = None + api_user = None + if 'api_key' in request.values: + api_key = request.values['api_key'] + api_user = Account.pull_by_api_key(api_key) + if api_user is None: + app.logger.debug(f'api_key [{api_key}] not found src[{flask_utils.get_remote_addr()}]') + else: + src = api_key + + if src is None: + src = flask_utils.get_remote_addr() + + target = request.url_rule.endpoint + + # rate checking + cur_api_rate = count_api_rate(src, target) + limited_api_rate = ApiRateService.get_allowed_rate(api_user) + app.logger.debug(f'track_api_rate src[{src}] target[{target}] ' + f'cur_rate[{cur_api_rate}] limited[{limited_api_rate}]') + if cur_api_rate > limited_api_rate: + app.logger.info(f'reject src[{src}] target[{target}] rate[{cur_api_rate} > {limited_api_rate}]') + raise Api429Error('Rate limit exceeded') + else: + ApiLog.create(src, target) + + return endpoint_fn(*args, **kwargs) + + return fn + + @staticmethod + def get_allowed_rate(api_user: Account = None) -> float: + if api_user is not None and ApiRateService.is_t2_user(api_user): + return app.config.get('RATE_LIMITS_PER_MIN_T2', 1000) + + return app.config.get('RATE_LIMITS_PER_MIN_DEFAULT', 10) + + @staticmethod + def is_t2_user(api_user: Account) -> bool: + return api_user.has_role(ROLE_API_RATE_T2) diff --git a/portality/dao.py b/portality/dao.py index 4688662338..c064a7e7db 100644 --- a/portality/dao.py +++ b/portality/dao.py @@ -852,10 +852,9 @@ def all(cls, size=10000, **kwargs): return cls.q2obj(size=size, **kwargs) @classmethod - def count(cls): - res = ES.count(index=cls.index_name(), doc_type=cls.doc_type()) + def count(cls, **kwargs): + res = ES.count(index=cls.index_name(), doc_type=cls.doc_type(), **kwargs) return res.get("count") - # return requests.get(cls.target() + '_count').json()['count'] @classmethod def hit_count(cls, query, **kwargs): diff --git a/portality/models/api_log.py b/portality/models/api_log.py index c9f3ac4a90..b2a55bcf6c 100644 --- a/portality/models/api_log.py +++ b/portality/models/api_log.py @@ -1,9 +1,12 @@ +import datetime + from portality.dao import DomainObject +from portality.lib import dates class ApiLog(DomainObject): """~~ApiLog:Model->DomainObject:Model~~""" - __type__ = "api_rate" + __type__ = "api_log" def __init__(self, **kwargs): super(ApiLog, self).__init__(**kwargs) @@ -41,10 +44,34 @@ def target(self, target: str): """ self.data["target"] = target - @property - def created_date(self): - return self.data.get("created_date") + @classmethod + def create(cls, src: str, target: str): + api_log = ApiLog() + api_log.src = src + api_log.target = target + api_log.set_created() + api_log.save() + return api_log + + +class ApiRateQuery: + + def __init__(self, src: str, target: str, since): + if isinstance(since, datetime.datetime): + since = dates.format(since) + self._src = src + self._target = target + self._since = since - @created_date.setter - def created_date(self, val): - self.data["created_date"] = val + def query(self): + return { + 'query': { + 'bool': { + 'must': [ + {'range': {'created_date': {'gte': self._since}}}, + {'term': {'src': self._src}}, + {'term': {'target': self._target}}, + ] + } + } + } diff --git a/portality/settings.py b/portality/settings.py index 29158faab3..925648c508 100644 --- a/portality/settings.py +++ b/portality/settings.py @@ -705,6 +705,7 @@ MAPPINGS['provenance'] = MAPPINGS["account"] #~~->Provenance:Model~~ MAPPINGS['preserve'] = MAPPINGS["account"] #~~->Preservation:Model~~ MAPPINGS['notification'] = MAPPINGS["account"] #~~->Notification:Model~~ +MAPPINGS['api_log'] = MAPPINGS["account"] #~~->ApiLog:Model~~ ######################################### # Query Routes @@ -1530,3 +1531,18 @@ # worksheet name or tab name that datalog will write to DATALOG_JA_WORKSHEET_NAME = 'Added' + + + +################################################## +# ApiRate +# ~~->ApiRate:Feature~~ + +# api rate limits per minute for user have no api key or normal api key +RATE_LIMITS_PER_MIN_DEFAULT = 1 + +# api rate limits per minute for user have two-tiered api key +RATE_LIMITS_PER_MIN_T2 = 1000 + + + From 4498c8c7c7483021a56965c15c65bdb88100e91f Mon Sep 17 00:00:00 2001 From: philip Date: Tue, 13 Feb 2024 12:34:54 +0000 Subject: [PATCH 09/23] cleanup --- portality/view/api_v3.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/portality/view/api_v3.py b/portality/view/api_v3.py index adfaa4fb15..316c2cef88 100644 --- a/portality/view/api_v3.py +++ b/portality/view/api_v3.py @@ -52,11 +52,13 @@ def api_spec(): # Handle wayward paths by raising an API404Error -@blueprint.route("/", methods=["POST", "GET", "PUT", "DELETE", "PATCH", "HEAD"]) # leaving out methods should mean all, but tests haven't shown that behaviour. +# leaving out methods should mean all, but tests haven't shown that behaviour. +@blueprint.route("/", methods=["POST", "GET", "PUT", "DELETE", "PATCH", "HEAD"]) def missing_resource(invalid_path): docs_url = app.config.get("BASE_URL", "") + url_for('.docs') spec_url = app.config.get("BASE_URL", "") + url_for('.api_spec') - raise Api404Error("No endpoint at {0}. See {1} for valid paths or read the documentation at {2}.".format(invalid_path, spec_url, docs_url)) + raise Api404Error("No endpoint at {0}. See {1} for valid paths or read the documentation at {2}.".format( + invalid_path, spec_url, docs_url)) @swag(swag_summary='Search your applications [Authenticated, not public]', @@ -309,7 +311,8 @@ def retrieve_journal(journal_id): @write_required(api=True) @swag(swag_summary='Create applications in bulk [Authenticated, not public]', swag_spec=ApplicationsBulkApi.create_swag()) # must be applied after @api_key_(optional|required) decorators. They don't preserve func attributes. -@plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('bulk_application_create', 'Bulk application create')) +@plausible.pa_event(ANALYTICS_CATEGORY, + action=ANALYTICS_ACTIONS.get('bulk_application_create', 'Bulk application create')) def bulk_application_create(): # get the data from the request try: @@ -334,7 +337,8 @@ def bulk_application_create(): @write_required(api=True) @swag(swag_summary='Delete applications in bulk [Authenticated, not public]', swag_spec=ApplicationsBulkApi.delete_swag()) # must be applied after @api_key_(optional|required) decorators. They don't preserve func attributes. -@plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('bulk_application_delete', 'Bulk application delete')) +@plausible.pa_event(ANALYTICS_CATEGORY, + action=ANALYTICS_ACTIONS.get('bulk_application_delete', 'Bulk application delete')) def bulk_application_delete(): # get the data from the request try: From 00c12f992001dc3f81416a13002aa4a6ab6c4beb Mon Sep 17 00:00:00 2001 From: philip Date: Tue, 13 Feb 2024 12:38:29 +0000 Subject: [PATCH 10/23] rename err_ref_id --- portality/api/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/portality/api/common.py b/portality/api/common.py index 0d40c40aaa..ce84c4ef02 100644 --- a/portality/api/common.py +++ b/portality/api/common.py @@ -237,8 +237,8 @@ def respond(data, status, metadata=None): def resp_err(error, log_msg, status_code, status_msg): - magic = uuid.uuid1() - err_msg = str(error) + " (ref: {y})".format(y=magic) + err_ref_id = uuid.uuid1() + err_msg = str(error) + " (ref: {y})".format(y=err_ref_id) app.logger.info(log_msg + f' -- {err_msg}') t = deepcopy(ERROR_TEMPLATE) t['status'] = status_msg From a28a97889cc9a85182934c8efb29f79b592aeacc Mon Sep 17 00:00:00 2001 From: philip Date: Tue, 13 Feb 2024 13:02:26 +0000 Subject: [PATCH 11/23] change rate limit --- portality/bll/services/api_rate.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/portality/bll/services/api_rate.py b/portality/bll/services/api_rate.py index 742bdbeb58..1373decb04 100644 --- a/portality/bll/services/api_rate.py +++ b/portality/bll/services/api_rate.py @@ -53,8 +53,8 @@ def fn(*args, **kwargs): limited_api_rate = ApiRateService.get_allowed_rate(api_user) app.logger.debug(f'track_api_rate src[{src}] target[{target}] ' f'cur_rate[{cur_api_rate}] limited[{limited_api_rate}]') - if cur_api_rate > limited_api_rate: - app.logger.info(f'reject src[{src}] target[{target}] rate[{cur_api_rate} > {limited_api_rate}]') + if cur_api_rate >= limited_api_rate: + app.logger.info(f'reject src[{src}] target[{target}] rate[{cur_api_rate} >= {limited_api_rate}]') raise Api429Error('Rate limit exceeded') else: ApiLog.create(src, target) From 73a12a9d23001c6a699a0761ba46a0bfc7d04cd0 Mon Sep 17 00:00:00 2001 From: philip Date: Tue, 13 Feb 2024 13:02:38 +0000 Subject: [PATCH 12/23] apply track_api_rate --- portality/view/api_v3.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/portality/view/api_v3.py b/portality/view/api_v3.py index 316c2cef88..1254601db4 100644 --- a/portality/view/api_v3.py +++ b/portality/view/api_v3.py @@ -9,6 +9,7 @@ from portality.api.current import DiscoveryApi, DiscoveryException from portality.api.current import jsonify_models, jsonify_data_object, Api400Error, Api404Error, created, \ no_content, bulk_created +from portality.bll import DOAJ from portality.core import app from portality.decorators import api_key_required, api_key_optional, swag, write_required from portality.lib import plausible @@ -20,6 +21,7 @@ # Google Analytics category for API events ANALYTICS_CATEGORY = app.config.get('ANALYTICS_CATEGORY_API', 'API Hit') ANALYTICS_ACTIONS = app.config.get('ANALYTICS_ACTIONS_API', {}) +api_rate_service = DOAJ.apiRateService() @blueprint.route('/') @@ -28,6 +30,7 @@ def api_v3_root(): @blueprint.route('/docs') +@api_rate_service.track_api_rate def docs(): account_url = None if current_user.is_authenticated: @@ -41,6 +44,7 @@ def docs(): @blueprint.route('/swagger.json') +@api_rate_service.track_api_rate def api_spec(): swag = swagger(app) swag['info']['title'] = "" @@ -54,6 +58,7 @@ def api_spec(): # Handle wayward paths by raising an API404Error # leaving out methods should mean all, but tests haven't shown that behaviour. @blueprint.route("/", methods=["POST", "GET", "PUT", "DELETE", "PATCH", "HEAD"]) +@api_rate_service.track_api_rate def missing_resource(invalid_path): docs_url = app.config.get("BASE_URL", "") + url_for('.docs') spec_url = app.config.get("BASE_URL", "") + url_for('.api_spec') @@ -67,6 +72,7 @@ def missing_resource(invalid_path): @api_key_required @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('search_applications', 'Search applications'), record_value_of_which_arg='search_query') +@api_rate_service.track_api_rate def search_applications(search_query): # get the values for the 2 other bits of search info: the page number and the page size page = request.values.get("page", 1) @@ -98,6 +104,7 @@ def search_applications(search_query): @blueprint.route('/search/journals/') @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('search_journals', 'Search journals'), record_value_of_which_arg='search_query') +@api_rate_service.track_api_rate def search_journals(search_query): # get the values for the 2 other bits of search info: the page number and the page size page = request.values.get("page", 1) @@ -129,6 +136,7 @@ def search_journals(search_query): @blueprint.route('/search/articles/') @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('search_articles', 'Search articles'), record_value_of_which_arg='search_query') +@api_rate_service.track_api_rate def search_articles(search_query): # get the values for the 2 other bits of search info: the page number and the page size page = request.values.get("page", 1) @@ -165,6 +173,7 @@ def search_articles(search_query): @swag(swag_summary='Create an application [Authenticated, not public]', swag_spec=ApplicationsCrudApi.create_swag()) # must be applied after @api_key_(optional|required) decorators. They don't preserve func attributes. @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('create_application', 'Create application')) +@api_rate_service.track_api_rate def create_application(): # get the data from the request try: @@ -185,6 +194,7 @@ def create_application(): swag_spec=ApplicationsCrudApi.retrieve_swag()) # must be applied after @api_key_(optional|required) decorators. They don't preserve func attributes. @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('retrieve_application', 'Retrieve application'), record_value_of_which_arg='application_id') +@api_rate_service.track_api_rate def retrieve_application(application_id): a = ApplicationsCrudApi.retrieve(application_id, current_user) return jsonify_models(a) @@ -197,6 +207,7 @@ def retrieve_application(application_id): swag_spec=ApplicationsCrudApi.update_swag()) # must be applied after @api_key_(optional|required) decorators. They don't preserve func attributes. @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('update_application', 'Update application'), record_value_of_which_arg='application_id') +@api_rate_service.track_api_rate def update_application(application_id): # get the data from the request try: @@ -218,6 +229,7 @@ def update_application(application_id): swag_spec=ApplicationsCrudApi.delete_swag()) # must be applied after @api_key_(optional|required) decorators. They don't preserve func attributes. @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('delete_application', 'Delete application'), record_value_of_which_arg='application_id') +@api_rate_service.track_api_rate def delete_application(application_id): ApplicationsCrudApi.delete(application_id, current_user._get_current_object()) return no_content() @@ -232,6 +244,7 @@ def delete_application(application_id): @swag(swag_summary='Create an article [Authenticated, not public]', swag_spec=ArticlesCrudApi.create_swag()) # must be applied after @api_key_(optional|required) decorators. They don't preserve func attributes. @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('create_article', 'Create article')) +@api_rate_service.track_api_rate def create_article(): # get the data from the request try: @@ -252,6 +265,7 @@ def create_article(): swag_spec=ArticlesCrudApi.retrieve_swag()) # must be applied after @api_key_(optional|required) decorators. They don't preserve func attributes. @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('retrieve_article', 'Retrieve article'), record_value_of_which_arg='article_id') +@api_rate_service.track_api_rate def retrieve_article(article_id): a = ArticlesCrudApi.retrieve(article_id, current_user) return jsonify_models(a) @@ -264,6 +278,7 @@ def retrieve_article(article_id): swag_spec=ArticlesCrudApi.update_swag()) # must be applied after @api_key_(optional|required) decorators. They don't preserve func attributes. @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('update_article', 'Update article'), record_value_of_which_arg='article_id') +@api_rate_service.track_api_rate def update_article(article_id): # get the data from the request try: @@ -285,6 +300,7 @@ def update_article(article_id): swag_spec=ArticlesCrudApi.delete_swag()) # must be applied after @api_key_(optional|required) decorators. They don't preserve func attributes. @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('delete_article', 'Delete article'), record_value_of_which_arg='article_id') +@api_rate_service.track_api_rate def delete_article(article_id): ArticlesCrudApi.delete(article_id, current_user) return no_content() @@ -299,6 +315,7 @@ def delete_article(article_id): swag_spec=JournalsCrudApi.retrieve_swag()) # must be applied after @api_key_(optional|required) decorators. They don't preserve func attributes. @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('retrieve_journal', 'Retrieve journal'), record_value_of_which_arg='journal_id') +@api_rate_service.track_api_rate def retrieve_journal(journal_id): return jsonify_data_object(JournalsCrudApi.retrieve(journal_id, current_user)) @@ -313,6 +330,7 @@ def retrieve_journal(journal_id): swag_spec=ApplicationsBulkApi.create_swag()) # must be applied after @api_key_(optional|required) decorators. They don't preserve func attributes. @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('bulk_application_create', 'Bulk application create')) +@api_rate_service.track_api_rate def bulk_application_create(): # get the data from the request try: @@ -339,6 +357,7 @@ def bulk_application_create(): swag_spec=ApplicationsBulkApi.delete_swag()) # must be applied after @api_key_(optional|required) decorators. They don't preserve func attributes. @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('bulk_application_delete', 'Bulk application delete')) +@api_rate_service.track_api_rate def bulk_application_delete(): # get the data from the request try: @@ -360,6 +379,7 @@ def bulk_application_delete(): @swag(swag_summary='Bulk article creation [Authenticated, not public]', swag_spec=ArticlesBulkApi.create_swag()) # must be applied after @api_key_(optional|required) decorators. They don't preserve func attributes. @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('bulk_article_create', 'Bulk article create')) +@api_rate_service.track_api_rate def bulk_article_create(): # get the data from the request try: @@ -385,6 +405,7 @@ def bulk_article_create(): @swag(swag_summary='Bulk article delete [Authenticated, not public]', swag_spec=ArticlesBulkApi.delete_swag()) # must be applied after @api_key_(optional|required) decorators. They don't preserve func attributes. @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('bulk_article_delete', 'Bulk article delete')) +@api_rate_service.track_api_rate def bulk_article_delete(): # get the data from the request try: From 2ec461da9dbb8a610f158baa7ccee9adb791fa3f Mon Sep 17 00:00:00 2001 From: philip Date: Tue, 13 Feb 2024 13:04:41 +0000 Subject: [PATCH 13/23] cleanup import --- portality/bll/services/api_rate.py | 1 - 1 file changed, 1 deletion(-) diff --git a/portality/bll/services/api_rate.py b/portality/bll/services/api_rate.py index 1373decb04..0d5a44d2c6 100644 --- a/portality/bll/services/api_rate.py +++ b/portality/bll/services/api_rate.py @@ -1,6 +1,5 @@ import datetime import functools -import logging from portality.api import Api429Error from portality.core import app From 12327088645f84758ec8535f7da54808f96920d2 Mon Sep 17 00:00:00 2001 From: philip Date: Tue, 13 Feb 2024 13:06:27 +0000 Subject: [PATCH 14/23] change RATE_LIMITS_PER_MIN_DEFAULT --- portality/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/portality/settings.py b/portality/settings.py index 925648c508..e2b6bb4c5c 100644 --- a/portality/settings.py +++ b/portality/settings.py @@ -1539,7 +1539,7 @@ # ~~->ApiRate:Feature~~ # api rate limits per minute for user have no api key or normal api key -RATE_LIMITS_PER_MIN_DEFAULT = 1 +RATE_LIMITS_PER_MIN_DEFAULT = 10 # api rate limits per minute for user have two-tiered api key RATE_LIMITS_PER_MIN_T2 = 1000 From 0f53aaa05607f02224b1fad89c2e772b208cec41 Mon Sep 17 00:00:00 2001 From: philip Date: Tue, 13 Feb 2024 14:39:00 +0000 Subject: [PATCH 15/23] rename api_rate_serv --- docs/dictionary.md | 21 +++++++++++---------- portality/view/api_v3.py | 40 ++++++++++++++++++++-------------------- 2 files changed, 31 insertions(+), 30 deletions(-) diff --git a/docs/dictionary.md b/docs/dictionary.md index 49ba798b8f..02ee217810 100644 --- a/docs/dictionary.md +++ b/docs/dictionary.md @@ -1,10 +1,11 @@ -| Short | Description | -|---------|------------------------------| -| bgjob | background job | -| noti | notification | -| noqa | NO-QA (NO Quality Assurance) | -| inst | instance | -| fmt | format | -| exparam | extra parameter | -| maned | Managing Editor | -| gsheet | Google Sheet | \ No newline at end of file +| Short | Description | +|----------|------------------------------| +| bgjob | background job | +| noti | notification | +| noqa | NO-QA (NO Quality Assurance) | +| inst | instance | +| fmt | format | +| exparam | extra parameter | +| maned | Managing Editor | +| gsheet | Google Sheet | +| serv | service | \ No newline at end of file diff --git a/portality/view/api_v3.py b/portality/view/api_v3.py index 1254601db4..943aa15f85 100644 --- a/portality/view/api_v3.py +++ b/portality/view/api_v3.py @@ -21,7 +21,7 @@ # Google Analytics category for API events ANALYTICS_CATEGORY = app.config.get('ANALYTICS_CATEGORY_API', 'API Hit') ANALYTICS_ACTIONS = app.config.get('ANALYTICS_ACTIONS_API', {}) -api_rate_service = DOAJ.apiRateService() +api_rate_serv = DOAJ.apiRateService() @blueprint.route('/') @@ -30,7 +30,7 @@ def api_v3_root(): @blueprint.route('/docs') -@api_rate_service.track_api_rate +@api_rate_serv.track_api_rate def docs(): account_url = None if current_user.is_authenticated: @@ -44,7 +44,7 @@ def docs(): @blueprint.route('/swagger.json') -@api_rate_service.track_api_rate +@api_rate_serv.track_api_rate def api_spec(): swag = swagger(app) swag['info']['title'] = "" @@ -58,7 +58,7 @@ def api_spec(): # Handle wayward paths by raising an API404Error # leaving out methods should mean all, but tests haven't shown that behaviour. @blueprint.route("/", methods=["POST", "GET", "PUT", "DELETE", "PATCH", "HEAD"]) -@api_rate_service.track_api_rate +@api_rate_serv.track_api_rate def missing_resource(invalid_path): docs_url = app.config.get("BASE_URL", "") + url_for('.docs') spec_url = app.config.get("BASE_URL", "") + url_for('.api_spec') @@ -72,7 +72,7 @@ def missing_resource(invalid_path): @api_key_required @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('search_applications', 'Search applications'), record_value_of_which_arg='search_query') -@api_rate_service.track_api_rate +@api_rate_serv.track_api_rate def search_applications(search_query): # get the values for the 2 other bits of search info: the page number and the page size page = request.values.get("page", 1) @@ -104,7 +104,7 @@ def search_applications(search_query): @blueprint.route('/search/journals/') @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('search_journals', 'Search journals'), record_value_of_which_arg='search_query') -@api_rate_service.track_api_rate +@api_rate_serv.track_api_rate def search_journals(search_query): # get the values for the 2 other bits of search info: the page number and the page size page = request.values.get("page", 1) @@ -136,7 +136,7 @@ def search_journals(search_query): @blueprint.route('/search/articles/') @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('search_articles', 'Search articles'), record_value_of_which_arg='search_query') -@api_rate_service.track_api_rate +@api_rate_serv.track_api_rate def search_articles(search_query): # get the values for the 2 other bits of search info: the page number and the page size page = request.values.get("page", 1) @@ -173,7 +173,7 @@ def search_articles(search_query): @swag(swag_summary='Create an application [Authenticated, not public]', swag_spec=ApplicationsCrudApi.create_swag()) # must be applied after @api_key_(optional|required) decorators. They don't preserve func attributes. @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('create_application', 'Create application')) -@api_rate_service.track_api_rate +@api_rate_serv.track_api_rate def create_application(): # get the data from the request try: @@ -194,7 +194,7 @@ def create_application(): swag_spec=ApplicationsCrudApi.retrieve_swag()) # must be applied after @api_key_(optional|required) decorators. They don't preserve func attributes. @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('retrieve_application', 'Retrieve application'), record_value_of_which_arg='application_id') -@api_rate_service.track_api_rate +@api_rate_serv.track_api_rate def retrieve_application(application_id): a = ApplicationsCrudApi.retrieve(application_id, current_user) return jsonify_models(a) @@ -207,7 +207,7 @@ def retrieve_application(application_id): swag_spec=ApplicationsCrudApi.update_swag()) # must be applied after @api_key_(optional|required) decorators. They don't preserve func attributes. @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('update_application', 'Update application'), record_value_of_which_arg='application_id') -@api_rate_service.track_api_rate +@api_rate_serv.track_api_rate def update_application(application_id): # get the data from the request try: @@ -229,7 +229,7 @@ def update_application(application_id): swag_spec=ApplicationsCrudApi.delete_swag()) # must be applied after @api_key_(optional|required) decorators. They don't preserve func attributes. @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('delete_application', 'Delete application'), record_value_of_which_arg='application_id') -@api_rate_service.track_api_rate +@api_rate_serv.track_api_rate def delete_application(application_id): ApplicationsCrudApi.delete(application_id, current_user._get_current_object()) return no_content() @@ -244,7 +244,7 @@ def delete_application(application_id): @swag(swag_summary='Create an article [Authenticated, not public]', swag_spec=ArticlesCrudApi.create_swag()) # must be applied after @api_key_(optional|required) decorators. They don't preserve func attributes. @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('create_article', 'Create article')) -@api_rate_service.track_api_rate +@api_rate_serv.track_api_rate def create_article(): # get the data from the request try: @@ -265,7 +265,7 @@ def create_article(): swag_spec=ArticlesCrudApi.retrieve_swag()) # must be applied after @api_key_(optional|required) decorators. They don't preserve func attributes. @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('retrieve_article', 'Retrieve article'), record_value_of_which_arg='article_id') -@api_rate_service.track_api_rate +@api_rate_serv.track_api_rate def retrieve_article(article_id): a = ArticlesCrudApi.retrieve(article_id, current_user) return jsonify_models(a) @@ -278,7 +278,7 @@ def retrieve_article(article_id): swag_spec=ArticlesCrudApi.update_swag()) # must be applied after @api_key_(optional|required) decorators. They don't preserve func attributes. @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('update_article', 'Update article'), record_value_of_which_arg='article_id') -@api_rate_service.track_api_rate +@api_rate_serv.track_api_rate def update_article(article_id): # get the data from the request try: @@ -300,7 +300,7 @@ def update_article(article_id): swag_spec=ArticlesCrudApi.delete_swag()) # must be applied after @api_key_(optional|required) decorators. They don't preserve func attributes. @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('delete_article', 'Delete article'), record_value_of_which_arg='article_id') -@api_rate_service.track_api_rate +@api_rate_serv.track_api_rate def delete_article(article_id): ArticlesCrudApi.delete(article_id, current_user) return no_content() @@ -315,7 +315,7 @@ def delete_article(article_id): swag_spec=JournalsCrudApi.retrieve_swag()) # must be applied after @api_key_(optional|required) decorators. They don't preserve func attributes. @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('retrieve_journal', 'Retrieve journal'), record_value_of_which_arg='journal_id') -@api_rate_service.track_api_rate +@api_rate_serv.track_api_rate def retrieve_journal(journal_id): return jsonify_data_object(JournalsCrudApi.retrieve(journal_id, current_user)) @@ -330,7 +330,7 @@ def retrieve_journal(journal_id): swag_spec=ApplicationsBulkApi.create_swag()) # must be applied after @api_key_(optional|required) decorators. They don't preserve func attributes. @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('bulk_application_create', 'Bulk application create')) -@api_rate_service.track_api_rate +@api_rate_serv.track_api_rate def bulk_application_create(): # get the data from the request try: @@ -357,7 +357,7 @@ def bulk_application_create(): swag_spec=ApplicationsBulkApi.delete_swag()) # must be applied after @api_key_(optional|required) decorators. They don't preserve func attributes. @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('bulk_application_delete', 'Bulk application delete')) -@api_rate_service.track_api_rate +@api_rate_serv.track_api_rate def bulk_application_delete(): # get the data from the request try: @@ -379,7 +379,7 @@ def bulk_application_delete(): @swag(swag_summary='Bulk article creation [Authenticated, not public]', swag_spec=ArticlesBulkApi.create_swag()) # must be applied after @api_key_(optional|required) decorators. They don't preserve func attributes. @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('bulk_article_create', 'Bulk article create')) -@api_rate_service.track_api_rate +@api_rate_serv.track_api_rate def bulk_article_create(): # get the data from the request try: @@ -405,7 +405,7 @@ def bulk_article_create(): @swag(swag_summary='Bulk article delete [Authenticated, not public]', swag_spec=ArticlesBulkApi.delete_swag()) # must be applied after @api_key_(optional|required) decorators. They don't preserve func attributes. @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('bulk_article_delete', 'Bulk article delete')) -@api_rate_service.track_api_rate +@api_rate_serv.track_api_rate def bulk_article_delete(): # get the data from the request try: From bc1d1c75d0f2973bbb8e4c76a5fbfd05ae5bb604 Mon Sep 17 00:00:00 2001 From: philip Date: Tue, 13 Feb 2024 14:39:32 +0000 Subject: [PATCH 16/23] avoid query delay --- portality/bll/services/api_rate.py | 1 + 1 file changed, 1 insertion(+) diff --git a/portality/bll/services/api_rate.py b/portality/bll/services/api_rate.py index 0d5a44d2c6..430bf301e3 100644 --- a/portality/bll/services/api_rate.py +++ b/portality/bll/services/api_rate.py @@ -13,6 +13,7 @@ def count_api_rate(src: str, target: str) -> float: minutes = 1 since = dates.now() - datetime.timedelta(minutes=minutes) + ApiLog.refresh() count = ApiLog.count(body=ApiRateQuery(src=src, target=target, since=since).query()) rate = count / minutes return rate From 200bf6ac5d5014ae9f877f0f14ef7fab3d613456 Mon Sep 17 00:00:00 2001 From: philip Date: Wed, 14 Feb 2024 09:59:44 +0000 Subject: [PATCH 17/23] add test_api_rate_limited --- doajtest/helpers.py | 9 +- .../unit/api_tests/test_api_rate_limited.py | 92 +++++++++++++++++++ portality/app.py | 26 ++++-- 3 files changed, 118 insertions(+), 9 deletions(-) create mode 100644 doajtest/unit/api_tests/test_api_rate_limited.py diff --git a/doajtest/helpers.py b/doajtest/helpers.py index 60c8dd0e85..ccbbca03d7 100644 --- a/doajtest/helpers.py +++ b/doajtest/helpers.py @@ -138,7 +138,14 @@ def create_app_patch(cls): 'ENABLE_EMAIL': False, "FAKER_SEED": 1, "EVENT_SEND_FUNCTION": "portality.events.shortcircuit.send_event", - 'CMS_BUILD_ASSETS_ON_STARTUP': False + 'CMS_BUILD_ASSETS_ON_STARTUP': False, + + # disable send plausible request + 'PLAUSIBLE_URL': '', + 'PLAUSIBLE_API_URL': '', + + 'RATE_LIMITS_PER_MIN_DEFAULT': 1000000, + 'RATE_LIMITS_PER_MIN_T2 ': 1000000, } @classmethod diff --git a/doajtest/unit/api_tests/test_api_rate_limited.py b/doajtest/unit/api_tests/test_api_rate_limited.py new file mode 100644 index 0000000000..2590a44148 --- /dev/null +++ b/doajtest/unit/api_tests/test_api_rate_limited.py @@ -0,0 +1,92 @@ +import json + +from doajtest import fixtures +from doajtest.helpers import DoajTestCase, patch_config +from portality.app import setup_dev_log +from portality.bll import DOAJ +from portality.core import app +from portality.models import Account + +api_rate_serv = DOAJ.apiRateService() + + +def send_test_req(client, api_key=None): + journal_id = '112233' + url = f'/api/journals/{journal_id}' + if api_key is not None: + url += f'?api_key={api_key}' + return client.get(url) + + +def assert_is_too_many_requests(resp): + data = json.loads(resp.data) + assert data['status'] == 'too_many_requests' + assert resp.status_code == 429 + + +def assert_not_too_many_requests(resp): + data = json.loads(resp.data) + assert data['status'] != 'too_many_requests' + assert resp.status_code != 429 + + +class TestApiRateLimited(DoajTestCase): + + @classmethod + def setUpClass(cls) -> None: + super().setUpClass() + cls.originals = patch_config( + app, + { + 'RATE_LIMITS_PER_MIN_DEFAULT': 10, + 'RATE_LIMITS_PER_MIN_T2 ': 1000, + }) + setup_dev_log() + + + def setUp(self): + super().setUp() + self.t2_user = Account(**fixtures.accounts.PUBLISHER_SOURCE) + self.t2_user.generate_api_key() + self.t2_user.add_role('api_rate_t2') + + self.normal_user = Account(**fixtures.accounts.PUBLISHER_B_SOURCE) + self.normal_user.generate_api_key() + + Account.save_all([self.normal_user, self.t2_user]) + Account.refresh() + + def send_multi_req_more_than_default_limit(self, api_key=None): + with self.app_test.test_client() as t_client: + for _ in range(app.config['RATE_LIMITS_PER_MIN_DEFAULT'] + 5): + resp = send_test_req(t_client, api_key=api_key) + return resp + + def send_one_req(self, api_key=None): + with self.app_test.test_client() as t_client: + resp = send_test_req(t_client, api_key=api_key) + return resp + + def test_normal__no_api_key(self): + resp = self.send_one_req() + assert_not_too_many_requests(resp) + + def test_normal__normal_user(self): + resp = self.send_one_req(api_key=self.normal_user.api_key) + assert_not_too_many_requests(resp) + + def test_normal__t2_user(self): + resp = self.send_one_req(api_key=self.t2_user.api_key) + assert_not_too_many_requests(resp) + + def test_multi_req__too_many_requests__no_api_key(self): + resp = self.send_multi_req_more_than_default_limit() + assert_is_too_many_requests(resp) + + def test_multi_req__too_many_requests__normal_user(self): + resp = self.send_multi_req_more_than_default_limit(api_key=self.normal_user.api_key) + assert_is_too_many_requests(resp) + + def test_multi_req__normal__t2_user(self): + resp = self.send_multi_req_more_than_default_limit(api_key=self.t2_user.api_key) + assert_not_too_many_requests(resp) diff --git a/portality/app.py b/portality/app.py index 0e2c330709..309a3f4c14 100644 --- a/portality/app.py +++ b/portality/app.py @@ -446,6 +446,23 @@ def page_not_found(e): return render_template('500.html'), 500 +is_dev_log_setup_completed = False + + +def setup_dev_log(): + global is_dev_log_setup_completed + if not is_dev_log_setup_completed: + is_dev_log_setup_completed = True + app.logger.handlers = [] + log = logging.getLogger() + log.setLevel(logging.DEBUG) + ch = logging.StreamHandler() + ch.setLevel(logging.DEBUG) + ch.setFormatter(logging.Formatter('%(asctime)s %(levelname).4s %(processName)s%(threadName)s - ' + '%(message)s --- [%(name)s][%(funcName)s:%(lineno)d]')) + log.addHandler(ch) + + def run_server(host=None, port=None, fake_https=False): """ :param host: @@ -457,14 +474,7 @@ def run_server(host=None, port=None, fake_https=False): """ if app.config.get('DEBUG_DEV_LOG', False): - app.logger.handlers = [] - log = logging.getLogger() - log.setLevel(logging.DEBUG) - ch = logging.StreamHandler() - ch.setLevel(logging.DEBUG) - ch.setFormatter(logging.Formatter('%(asctime)s %(levelname).4s %(processName)s%(threadName)s - ' - '%(message)s --- [%(name)s][%(funcName)s:%(lineno)d]')) - log.addHandler(ch) + setup_dev_log() pycharm_debug = app.config.get('DEBUG_PYCHARM', False) if len(sys.argv) > 1: From 8dad0f29110e3c7f9b214a2941eec29ace13166a Mon Sep 17 00:00:00 2001 From: philip Date: Wed, 14 Feb 2024 10:37:26 +0000 Subject: [PATCH 18/23] add rate limit doc --- portality/bll/services/api_rate.py | 6 +++++- .../templates/api/current/swagger_description.html | 12 ++++++++++++ portality/view/api_v3.py | 9 ++++++++- 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/portality/bll/services/api_rate.py b/portality/bll/services/api_rate.py index 430bf301e3..16b33f6657 100644 --- a/portality/bll/services/api_rate.py +++ b/portality/bll/services/api_rate.py @@ -68,8 +68,12 @@ def get_allowed_rate(api_user: Account = None) -> float: if api_user is not None and ApiRateService.is_t2_user(api_user): return app.config.get('RATE_LIMITS_PER_MIN_T2', 1000) + return ApiRateService.get_default_api_rate() + + @staticmethod + def get_default_api_rate(): return app.config.get('RATE_LIMITS_PER_MIN_DEFAULT', 10) @staticmethod def is_t2_user(api_user: Account) -> bool: - return api_user.has_role(ROLE_API_RATE_T2) + return isinstance(api_user, Account) and api_user.has_role(ROLE_API_RATE_T2) diff --git a/portality/templates/api/current/swagger_description.html b/portality/templates/api/current/swagger_description.html index 3db704c2c2..a67b278db3 100644 --- a/portality/templates/api/current/swagger_description.html +++ b/portality/templates/api/current/swagger_description.html @@ -21,6 +21,18 @@

Authenticated routes

If you already have an account, please log in, click 'My Account' and 'Settings' to see your API key. If you do not see an API key then please contact us

{% endif %} + +

Rate limiting

+

+ {% if is_default_api_rate %} + Default rate limit is {{ api_rate_limit }} requests per minute. API rate can be increase to {{ config.get('RATE_LIMITS_PER_MIN_T2') }}. + {% else %} + Your API key is rate limited to {{ api_rate_limit }} requests per minute. + {% endif %} + If you exceed this limit, you will receive a 429 status code. + If you need a higher rate limit, please contact us. +

+

Help and support

We have 3 API groups that you can join for API announcements and discussion:

diff --git a/portality/view/api_v3.py b/portality/view/api_v3.py index 943aa15f85..2e72695e32 100644 --- a/portality/view/api_v3.py +++ b/portality/view/api_v3.py @@ -36,11 +36,18 @@ def docs(): if current_user.is_authenticated: account_url = url_for('account.username', username=current_user.id, _external=True, _scheme=app.config.get('PREFERRED_URL_SCHEME', 'https')) + + api_rate_limit = api_rate_serv.get_allowed_rate(current_user) + is_default_api_rate = api_rate_limit == api_rate_serv.get_default_api_rate() + return render_template('api/current/api_docs.html', api_version=API_VERSION_NUMBER, base_url=app.config.get("BASE_API_URL", url_for('.api_v3_root')), contact_us_url=url_for('doaj.contact'), - account_url=account_url) + account_url=account_url, + api_rate_limit=api_rate_limit, + is_default_api_rate=is_default_api_rate, + ) @blueprint.route('/swagger.json') From a3b656cc19aabfc22f2cdcf785f5159427d60170 Mon Sep 17 00:00:00 2001 From: philip Date: Wed, 14 Feb 2024 11:03:55 +0000 Subject: [PATCH 19/23] cleanup --- portality/bll/services/api_rate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/portality/bll/services/api_rate.py b/portality/bll/services/api_rate.py index 16b33f6657..ef19d46426 100644 --- a/portality/bll/services/api_rate.py +++ b/portality/bll/services/api_rate.py @@ -52,7 +52,7 @@ def fn(*args, **kwargs): cur_api_rate = count_api_rate(src, target) limited_api_rate = ApiRateService.get_allowed_rate(api_user) app.logger.debug(f'track_api_rate src[{src}] target[{target}] ' - f'cur_rate[{cur_api_rate}] limited[{limited_api_rate}]') + f'cur_rate[{cur_api_rate}] limited[{limited_api_rate}]') if cur_api_rate >= limited_api_rate: app.logger.info(f'reject src[{src}] target[{target}] rate[{cur_api_rate} >= {limited_api_rate}]') raise Api429Error('Rate limit exceeded') From 200e09aa4f43c8a2987b835bd671cb574f477bbe Mon Sep 17 00:00:00 2001 From: philip Date: Wed, 6 Mar 2024 14:22:30 +0000 Subject: [PATCH 20/23] rate limit checking for public API will be handled by nginx --- portality/view/api_v3.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/portality/view/api_v3.py b/portality/view/api_v3.py index 2e72695e32..6c98e58e57 100644 --- a/portality/view/api_v3.py +++ b/portality/view/api_v3.py @@ -30,7 +30,6 @@ def api_v3_root(): @blueprint.route('/docs') -@api_rate_serv.track_api_rate def docs(): account_url = None if current_user.is_authenticated: @@ -51,7 +50,6 @@ def docs(): @blueprint.route('/swagger.json') -@api_rate_serv.track_api_rate def api_spec(): swag = swagger(app) swag['info']['title'] = "" @@ -65,7 +63,6 @@ def api_spec(): # Handle wayward paths by raising an API404Error # leaving out methods should mean all, but tests haven't shown that behaviour. @blueprint.route("/", methods=["POST", "GET", "PUT", "DELETE", "PATCH", "HEAD"]) -@api_rate_serv.track_api_rate def missing_resource(invalid_path): docs_url = app.config.get("BASE_URL", "") + url_for('.docs') spec_url = app.config.get("BASE_URL", "") + url_for('.api_spec') @@ -111,7 +108,6 @@ def search_applications(search_query): @blueprint.route('/search/journals/') @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('search_journals', 'Search journals'), record_value_of_which_arg='search_query') -@api_rate_serv.track_api_rate def search_journals(search_query): # get the values for the 2 other bits of search info: the page number and the page size page = request.values.get("page", 1) @@ -143,7 +139,6 @@ def search_journals(search_query): @blueprint.route('/search/articles/') @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('search_articles', 'Search articles'), record_value_of_which_arg='search_query') -@api_rate_serv.track_api_rate def search_articles(search_query): # get the values for the 2 other bits of search info: the page number and the page size page = request.values.get("page", 1) @@ -272,7 +267,6 @@ def create_article(): swag_spec=ArticlesCrudApi.retrieve_swag()) # must be applied after @api_key_(optional|required) decorators. They don't preserve func attributes. @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('retrieve_article', 'Retrieve article'), record_value_of_which_arg='article_id') -@api_rate_serv.track_api_rate def retrieve_article(article_id): a = ArticlesCrudApi.retrieve(article_id, current_user) return jsonify_models(a) @@ -322,7 +316,6 @@ def delete_article(article_id): swag_spec=JournalsCrudApi.retrieve_swag()) # must be applied after @api_key_(optional|required) decorators. They don't preserve func attributes. @plausible.pa_event(ANALYTICS_CATEGORY, action=ANALYTICS_ACTIONS.get('retrieve_journal', 'Retrieve journal'), record_value_of_which_arg='journal_id') -@api_rate_serv.track_api_rate def retrieve_journal(journal_id): return jsonify_data_object(JournalsCrudApi.retrieve(journal_id, current_user)) From 1e665ca1e256f413a75c2a67643b6be6cc2d098c Mon Sep 17 00:00:00 2001 From: philip Date: Wed, 6 Mar 2024 14:35:50 +0000 Subject: [PATCH 21/23] src no longer support IP --- portality/bll/services/api_rate.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/portality/bll/services/api_rate.py b/portality/bll/services/api_rate.py index ef19d46426..49d3138404 100644 --- a/portality/bll/services/api_rate.py +++ b/portality/bll/services/api_rate.py @@ -39,12 +39,12 @@ def fn(*args, **kwargs): api_key = request.values['api_key'] api_user = Account.pull_by_api_key(api_key) if api_user is None: - app.logger.debug(f'api_key [{api_key}] not found src[{flask_utils.get_remote_addr()}]') + app.logger.debug(f'api_key not found [{api_key}]') else: src = api_key if src is None: - src = flask_utils.get_remote_addr() + raise ValueError('api_key not found') target = request.url_rule.endpoint From 78bbac8f683cdcf33ad921bf8b7ae2b0384e5a37 Mon Sep 17 00:00:00 2001 From: philip Date: Wed, 6 Mar 2024 14:36:01 +0000 Subject: [PATCH 22/23] edges --- portality/static/vendor/edges | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/portality/static/vendor/edges b/portality/static/vendor/edges index 990f422016..fa15f74f85 160000 --- a/portality/static/vendor/edges +++ b/portality/static/vendor/edges @@ -1 +1 @@ -Subproject commit 990f4220163a3e18880f0bdc3ad5c80d234d22dd +Subproject commit fa15f74f858c558d4ba62d4fbb10c2c71eb902c6 From fee39361c59386705a4266612cb074716d65c707 Mon Sep 17 00:00:00 2001 From: philip Date: Thu, 7 Mar 2024 10:41:49 +0000 Subject: [PATCH 23/23] update testcases --- .../unit/api_tests/test_api_rate_limited.py | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/doajtest/unit/api_tests/test_api_rate_limited.py b/doajtest/unit/api_tests/test_api_rate_limited.py index 2590a44148..ce9d5b25d8 100644 --- a/doajtest/unit/api_tests/test_api_rate_limited.py +++ b/doajtest/unit/api_tests/test_api_rate_limited.py @@ -10,11 +10,8 @@ api_rate_serv = DOAJ.apiRateService() -def send_test_req(client, api_key=None): - journal_id = '112233' - url = f'/api/journals/{journal_id}' - if api_key is not None: - url += f'?api_key={api_key}' +def send_test_req(client, api_key): + url = '/api/search/applications/issn%3A0000-0000?api_key=' + api_key return client.get(url) @@ -26,7 +23,7 @@ def assert_is_too_many_requests(resp): def assert_not_too_many_requests(resp): data = json.loads(resp.data) - assert data['status'] != 'too_many_requests' + assert data['query'] assert resp.status_code != 429 @@ -43,7 +40,6 @@ def setUpClass(cls) -> None: }) setup_dev_log() - def setUp(self): super().setUp() self.t2_user = Account(**fixtures.accounts.PUBLISHER_SOURCE) @@ -67,10 +63,6 @@ def send_one_req(self, api_key=None): resp = send_test_req(t_client, api_key=api_key) return resp - def test_normal__no_api_key(self): - resp = self.send_one_req() - assert_not_too_many_requests(resp) - def test_normal__normal_user(self): resp = self.send_one_req(api_key=self.normal_user.api_key) assert_not_too_many_requests(resp) @@ -79,10 +71,6 @@ def test_normal__t2_user(self): resp = self.send_one_req(api_key=self.t2_user.api_key) assert_not_too_many_requests(resp) - def test_multi_req__too_many_requests__no_api_key(self): - resp = self.send_multi_req_more_than_default_limit() - assert_is_too_many_requests(resp) - def test_multi_req__too_many_requests__normal_user(self): resp = self.send_multi_req_more_than_default_limit(api_key=self.normal_user.api_key) assert_is_too_many_requests(resp)