diff --git a/api.config.example b/api.config.example index ac7181fc..3a476e0e 100644 --- a/api.config.example +++ b/api.config.example @@ -1,6 +1,9 @@ # Rename this file to "api.config" to make it active [apis] +api_version = 3 +default_apis_base_url = {{ apis_common_base_URL }} +production_apis_base_url = {{ apis_production_base_URL }} # if true, blocks all writing behavior read_only = READ_ONLY_MODE @@ -37,11 +40,12 @@ following_repo_remote = FOLLOWING_REPO_REMOTE git_ssh = GIT_SSH pkey = PKEY -oti_base_url = OTI_BASE_URL -otindex_base_url = OTINDEX_BASE_URL -collections_api_base_url = COLLECTIONS_API_BASE_URL -amendments_api_base_url = AMENDMENTS_API_BASE_URL -favorites_api_base_url = FAVORITES_API_BASE_URL +# deprecate these overly-specific base URLs? or copy same value to all? +##oti_base_url = OTI_BASE_URL +##otindex_base_url = OTINDEX_BASE_URL +##collections_api_base_url = COLLECTIONS_API_BASE_URL +##amendments_api_base_url = AMENDMENTS_API_BASE_URL +##favorites_api_base_url = FAVORITES_API_BASE_URL opentree_docstore_url = OPENTREE_DOCSTORE_URL # Push scripts will substitute the OTI_BASE_URL server-config variables, shared diff --git a/phylesystem_api/phylesystem_api/api_utils.py b/phylesystem_api/phylesystem_api/api_utils.py index fda76e7d..a6e04368 100644 --- a/phylesystem_api/phylesystem_api/api_utils.py +++ b/phylesystem_api/phylesystem_api/api_utils.py @@ -43,7 +43,7 @@ READ_ONLY_MODE = True def get_private_dir(request): - _LOG.debug("WHY PROVATE DIR") + _LOG.debug("WHY PRIVATE DIR") return "~/private/" def atomic_write_json_if_not_found(obj, dest, request): @@ -669,7 +669,7 @@ def find_in_request(request, property_name, default_value=None, return_all_value # N.B. HTML comments are stripped by default. Non-allowed tags will appear # "naked" in output, so we can identify any bad actors. allowed_curation_comment_tags = ['p', 'br', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'pre', 'code'] # any others? -ot_markdown_tags = list(set( bleach.sanitizer.ALLOWED_TAGS + allowed_curation_comment_tags)) +ot_markdown_tags = list(set(bleach.sanitizer.ALLOWED_TAGS)) + list(set(allowed_curation_comment_tags)) # allow hyperlinks with target="_blank" ot_markdown_attributes = {} ot_markdown_attributes.update(bleach.sanitizer.ALLOWED_ATTRIBUTES) diff --git a/phylesystem_api/phylesystem_api/routes.py b/phylesystem_api/phylesystem_api/routes.py index 2ccc0326..918c59cf 100644 --- a/phylesystem_api/phylesystem_api/routes.py +++ b/phylesystem_api/phylesystem_api/routes.py @@ -16,7 +16,6 @@ def includeme(config): config.add_route('merge_docstore_changes', '/{api_version}/merge_docstore_changes/{doc_id}/{starting_commit_SHA}') config.add_route('push_docstore_changes', '/{api_version}/push_docstore_changes/{doc_type}/{doc_id}') config.add_route('push_docstore_changes_bare', '/{api_version}/push_docstore_changes', request_method='PUT') - # # STUDY/TREE ROUTES # @@ -29,12 +28,15 @@ def includeme(config): config.add_route('create_study', '/{api_version}/study/') config.add_route('study_CORS_preflight', '/{api_version}/study/{study_id}', request_method='OPTIONS') config.add_route('fetch_study', '/{api_version}/study/{study_id}', request_method='GET') + config.add_route('fetch_study_label', '/{api_version}/study/{study_id}/', request_method='GET') config.add_route('update_study', '/{api_version}/study/{study_id}', request_method='PUT') config.add_route('delete_study', '/{api_version}/study/{study_id}', request_method='DELETE') config.add_route('get_study_file_list', '/{api_version}/study/{study_id}/file') config.add_route('get_study_single_file', '/{api_version}/study/{study_id}/file/{file_id}') config.add_route('get_study_external_url', '/{api_version}/study/external_url/{study_id}') config.add_route('get_study_tree', '/{api_version}/study/{study_id}/tree/{tree_id_with_extension}') + config.add_route('get_study_tree_label', '/{api_version}/study/{study_id}/tree/{tree_id_with_extension}/') + # # TREE COLLECTION ROUTES # diff --git a/phylesystem_api/phylesystem_api/views/default.py b/phylesystem_api/phylesystem_api/views/default.py index 32ca5d99..e1fa7faa 100644 --- a/phylesystem_api/phylesystem_api/views/default.py +++ b/phylesystem_api/phylesystem_api/views/default.py @@ -67,6 +67,15 @@ def base_API_view(request): "source_url": "https://github.com/OpenTreeOfLife/phylesystem-api" } + + +# Create a unique cache key with the URL and any vars (GET *and* POST) to its "query string" +# ALSO include the request method (HTTP verb) to respond to OPTIONS requests +def create_unique_cache_key(target_url, request): + unique_key = "cached:{}:{}:{}".format(request.method, target_url, request.body.decode('utf-8')) + #_LOG.warn(">> unique cache key: {}".format(unique_key)) + return unique_key + @view_config(route_name='pull_through_cache') def pull_through_cache(request): """ @@ -77,23 +86,46 @@ def pull_through_cache(request): """ # _LOG = api_utils.get_logger(request, 'ot_api') api_utils.raise_on_CORS_preflight(request) + + # gather any request elements used to build a unique cache key target_url = request.matchdict.get('target_url') _LOG.warn(">> target_url: {}".format(target_url)) - @cache_region('short_term', 'pull-through') + # Some headers should not be used when adding to our RAM cache + hop_by_hop_headers = ['Keep-Alive', + 'Transfer-Encoding', + 'TE', + 'Connection', + 'Trailer', + 'Upgrade', + 'Proxy-Authorization', + 'Proxy-Authenticate', + ] + + @cache_region('short_term', create_unique_cache_key(target_url, request)) def fetch_and_cache(url): # let's restrict this to URLs on this api server, to avoid shenanigans #import pdb; pdb.set_trace() root_relative_url = "/{}".format(url) _LOG.warn(">> root_relative_url: {}".format(root_relative_url)) - fetch_url = request.relative_url(root_relative_url) + conf = api_utils.get_conf_object(request) + base_url = conf.get("apis", "default_apis_base_url") + fetch_url = base_url + root_relative_url _LOG.warn("NOT CACHED, FETCHING THIS URL: {}".format(fetch_url)) _LOG.warn(" request.method = {}".format(request.method)) + + # modify or discard "hop-by-hop" headers + for bad_header in hop_by_hop_headers: + request.headers.pop(bad_header, None) + #_LOG.warn(" MODIFIED request.headers:") + #_LOG.warn( dict(request.headers) ) + try: if request.method == 'POST': # assume a typical API request with JSON payload # (pass this along unchanged) _LOG.warn(" treating as POST") + _LOG.warn(" headers: {}".format(request.headers)) fetched = requests.post(url=fetch_url, data=request.body, headers=request.headers) @@ -110,6 +142,13 @@ def fetch_and_cache(url): _LOG.warn("... and now we're back with fetched, which is a {}".format( type(fetched) )) fetched.raise_for_status() fetched.encoding = 'utf-8' # Optional: requests infers this internally + + # modify or discard "hop-by-hop" headers + for bad_header in hop_by_hop_headers: + fetched.headers.pop(bad_header, None) + #_LOG.warn(" MODIFIED fetched.headers:") + #_LOG.warn( dict(fetched.headers) ) + try: test_for_json = fetched.json() # missing JSON payload will raise an error return Response( diff --git a/phylesystem_api/phylesystem_api/views/study.py b/phylesystem_api/phylesystem_api/views/study.py index c8294a34..c34b7c59 100644 --- a/phylesystem_api/phylesystem_api/views/study.py +++ b/phylesystem_api/phylesystem_api/views/study.py @@ -162,22 +162,23 @@ def __finish_write_verb(phylesystem, - -@view_config(route_name='fetch_study', renderer=None) +@view_config(route_name='fetch_study', renderer=None, request_method='GET') +@view_config(route_name='fetch_study_label', renderer=None, request_method='GET') def fetch_study(request): repo_parent, repo_remote, git_ssh, pkey, git_hub_remote, max_filesize, max_num_trees, read_only_mode = api_utils.read_phylesystem_config(request) #_LOG = api_utils.get_logger(request, 'ot_api.default.v1') _LOG.debug("Fetching study") api_version = request.matchdict['api_version'] study_id = request.matchdict['study_id'] + _LOG.debug('study_id = {}'.format(study_id)) content_id = None version_history = None comment_html = None - final_path_part = request.path.split('/')[-1] ##TODO What if there are other parts... # does this look like a filename? if so, grab its extension request_extension = None - fpps = final_path_part.split('.') + fpps = study_id.split('.') if len(fpps) > 1: + _LOG.debug('len(fpps) > 1') request_extension = fpps[-1] study_id = '.'.join(fpps[:-1]) _LOG.debug("Request extension is {}".format) @@ -208,7 +209,7 @@ def fetch_study(request): except: comment_html = '' except: - # _LOG.exception('GET failed') + _LOG.exception('GET failed') e = sys.exc_info()[0] raise HTTPBadRequest(e) @@ -692,7 +693,8 @@ def get_study_external_url(request): except: raise HTTPNotFound(body='{"error": 1, "description": "study not found"}') -@view_config(route_name='get_study_tree', renderer='json') +@view_config(route_name='get_study_tree', renderer=None) +@view_config(route_name='get_study_tree_label', renderer=None) def get_study_tree(request): api_utils.raise_on_CORS_preflight(request) @@ -749,4 +751,8 @@ def get_study_tree(request): if result_data is None: raise HTTPNotFound(body='subresource "tree/{t}" not found in study "{s}"'.format(t=tree_id, s=study_id)) - return result_data + if out_schema.is_json(): + return render_to_response('json', result_data, request) + else: + # _LOG.debug(result_data) + return render_to_response('string', result_data, request)