Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Development #247

Merged
merged 38 commits into from
Jul 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
840cc1c
syntax error fix
snacktavish May 9, 2023
c0e25a9
test hardcoded caching fix
snacktavish May 23, 2023
2206459
instantiate phylesystem
snacktavish May 23, 2023
cf7fff8
try keepalive-no in requests
snacktavish May 23, 2023
d5ac28a
remove bad log statement
snacktavish May 23, 2023
7a139af
remove unsuccesful attempt to not keep alive via requests
snacktavish May 23, 2023
9bce36a
Update example config with newer expected vars
jimallman May 24, 2023
f6cbf29
Test some approaches to modified Connection header
jimallman May 24, 2023
14ef919
Merge branch 'tag-fix' of github.com:OpenTreeOfLife/phylesystem-api i…
jimallman May 24, 2023
2995bba
Revise testing of modified headers
jimallman May 24, 2023
b3bbbe2
Simpler fix to Connection header
jimallman May 24, 2023
2bd29b4
Show all headers (something's weird)
jimallman May 24, 2023
db88ff8
Argh, trying again for a full header listing
jimallman May 24, 2023
5d93b27
One more time, trying to remove Connection header
jimallman May 24, 2023
e2e25fe
More test chatter
jimallman May 24, 2023
d28f5df
Remove RESPONSE hop-by-hop headers
jimallman May 24, 2023
a70ecfe
Safer removal of optional keys
jimallman May 24, 2023
f0412e1
Clean up code, add more hop-by-hop headers
jimallman May 24, 2023
d5ed564
Cleanup and consolidate hop-by-hop definitions
jimallman May 25, 2023
ca42fd6
Merge branch 'master' into tag-fix
jimallman Jun 22, 2023
6b9a6ca
Add POST payload (request body) to our cache keys!
jimallman Jun 27, 2023
5fdd74f
Emulate stronger cache keys from legacy web2py
jimallman Jun 27, 2023
5c7220b
Log unique cache keys for review
jimallman Jun 27, 2023
b05a697
Encode request body for simpler cache keys
jimallman Jun 27, 2023
a1ba35c
Remove log chatter
jimallman Jun 27, 2023
a8a0c74
get base_url from config
snacktavish Jul 7, 2023
917d3ba
fix config syntax
snacktavish Jul 7, 2023
0a110c2
git failure log still not working
snacktavish Jul 11, 2023
a07d554
attempt to fix trailing slash on get
snacktavish Jul 11, 2023
7f83503
restore accidentally deleted route
snacktavish Jul 11, 2023
6fd64f4
optional trailing slash on all study fetch
snacktavish Jul 11, 2023
8e9c1a8
optional return to split fetch
snacktavish Jul 11, 2023
5d7f5f9
debug extention
snacktavish Jul 11, 2023
d1fc6b3
Merge pull request #240 from OpenTreeOfLife/tip-labels
snacktavish Jul 11, 2023
a6f8aa4
only render to JSON if outformat is json
mtholder Jul 12, 2023
ec6cb5c
Merge pull request #242 from OpenTreeOfLife/nonjsontree
snacktavish Jul 12, 2023
71b6014
log exceptions generated when fetching a study from the docstore
mtholder Jul 18, 2023
baad1da
Merge pull request #246 from OpenTreeOfLife/log-get-exceptions
snacktavish Jul 18, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions api.config.example
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Rename this file to "api.config" to make it active

[apis]
api_version = 3
default_apis_base_url = {{ apis_common_base_URL }}
production_apis_base_url = {{ apis_production_base_URL }}

# if true, blocks all writing behavior
read_only = READ_ONLY_MODE
Expand Down Expand Up @@ -37,11 +40,12 @@ following_repo_remote = FOLLOWING_REPO_REMOTE
git_ssh = GIT_SSH
pkey = PKEY

oti_base_url = OTI_BASE_URL
otindex_base_url = OTINDEX_BASE_URL
collections_api_base_url = COLLECTIONS_API_BASE_URL
amendments_api_base_url = AMENDMENTS_API_BASE_URL
favorites_api_base_url = FAVORITES_API_BASE_URL
# deprecate these overly-specific base URLs? or copy same value to all?
##oti_base_url = OTI_BASE_URL
##otindex_base_url = OTINDEX_BASE_URL
##collections_api_base_url = COLLECTIONS_API_BASE_URL
##amendments_api_base_url = AMENDMENTS_API_BASE_URL
##favorites_api_base_url = FAVORITES_API_BASE_URL

opentree_docstore_url = OPENTREE_DOCSTORE_URL
# Push scripts will substitute the OTI_BASE_URL server-config variables, shared
Expand Down
4 changes: 2 additions & 2 deletions phylesystem_api/phylesystem_api/api_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
READ_ONLY_MODE = True

def get_private_dir(request):
_LOG.debug("WHY PROVATE DIR")
_LOG.debug("WHY PRIVATE DIR")
return "~/private/"

def atomic_write_json_if_not_found(obj, dest, request):
Expand Down Expand Up @@ -669,7 +669,7 @@ def find_in_request(request, property_name, default_value=None, return_all_value
# N.B. HTML comments are stripped by default. Non-allowed tags will appear
# "naked" in output, so we can identify any bad actors.
allowed_curation_comment_tags = ['p', 'br', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'pre', 'code'] # any others?
ot_markdown_tags = list(set( bleach.sanitizer.ALLOWED_TAGS + allowed_curation_comment_tags))
ot_markdown_tags = list(set(bleach.sanitizer.ALLOWED_TAGS)) + list(set(allowed_curation_comment_tags))
# allow hyperlinks with target="_blank"
ot_markdown_attributes = {}
ot_markdown_attributes.update(bleach.sanitizer.ALLOWED_ATTRIBUTES)
Expand Down
4 changes: 3 additions & 1 deletion phylesystem_api/phylesystem_api/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ def includeme(config):
config.add_route('merge_docstore_changes', '/{api_version}/merge_docstore_changes/{doc_id}/{starting_commit_SHA}')
config.add_route('push_docstore_changes', '/{api_version}/push_docstore_changes/{doc_type}/{doc_id}')
config.add_route('push_docstore_changes_bare', '/{api_version}/push_docstore_changes', request_method='PUT')

#
# STUDY/TREE ROUTES
#
Expand All @@ -29,12 +28,15 @@ def includeme(config):
config.add_route('create_study', '/{api_version}/study/')
config.add_route('study_CORS_preflight', '/{api_version}/study/{study_id}', request_method='OPTIONS')
config.add_route('fetch_study', '/{api_version}/study/{study_id}', request_method='GET')
config.add_route('fetch_study_label', '/{api_version}/study/{study_id}/', request_method='GET')
config.add_route('update_study', '/{api_version}/study/{study_id}', request_method='PUT')
config.add_route('delete_study', '/{api_version}/study/{study_id}', request_method='DELETE')
config.add_route('get_study_file_list', '/{api_version}/study/{study_id}/file')
config.add_route('get_study_single_file', '/{api_version}/study/{study_id}/file/{file_id}')
config.add_route('get_study_external_url', '/{api_version}/study/external_url/{study_id}')
config.add_route('get_study_tree', '/{api_version}/study/{study_id}/tree/{tree_id_with_extension}')
config.add_route('get_study_tree_label', '/{api_version}/study/{study_id}/tree/{tree_id_with_extension}/')

#
# TREE COLLECTION ROUTES
#
Expand Down
43 changes: 41 additions & 2 deletions phylesystem_api/phylesystem_api/views/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,15 @@ def base_API_view(request):
"source_url": "https://github.com/OpenTreeOfLife/phylesystem-api"
}



# Create a unique cache key with the URL and any vars (GET *and* POST) to its "query string"
# ALSO include the request method (HTTP verb) to respond to OPTIONS requests
def create_unique_cache_key(target_url, request):
unique_key = "cached:{}:{}:{}".format(request.method, target_url, request.body.decode('utf-8'))
#_LOG.warn(">> unique cache key: {}".format(unique_key))
return unique_key

@view_config(route_name='pull_through_cache')
def pull_through_cache(request):
"""
Expand All @@ -77,23 +86,46 @@ def pull_through_cache(request):
"""
# _LOG = api_utils.get_logger(request, 'ot_api')
api_utils.raise_on_CORS_preflight(request)

# gather any request elements used to build a unique cache key
target_url = request.matchdict.get('target_url')
_LOG.warn(">> target_url: {}".format(target_url))

@cache_region('short_term', 'pull-through')
# Some headers should not be used when adding to our RAM cache
hop_by_hop_headers = ['Keep-Alive',
'Transfer-Encoding',
'TE',
'Connection',
'Trailer',
'Upgrade',
'Proxy-Authorization',
'Proxy-Authenticate',
]

@cache_region('short_term', create_unique_cache_key(target_url, request))
def fetch_and_cache(url):
# let's restrict this to URLs on this api server, to avoid shenanigans
#import pdb; pdb.set_trace()
root_relative_url = "/{}".format(url)
_LOG.warn(">> root_relative_url: {}".format(root_relative_url))
fetch_url = request.relative_url(root_relative_url)
conf = api_utils.get_conf_object(request)
base_url = conf.get("apis", "default_apis_base_url")
fetch_url = base_url + root_relative_url
_LOG.warn("NOT CACHED, FETCHING THIS URL: {}".format(fetch_url))
_LOG.warn(" request.method = {}".format(request.method))

# modify or discard "hop-by-hop" headers
for bad_header in hop_by_hop_headers:
request.headers.pop(bad_header, None)
#_LOG.warn(" MODIFIED request.headers:")
#_LOG.warn( dict(request.headers) )

try:
if request.method == 'POST':
# assume a typical API request with JSON payload
# (pass this along unchanged)
_LOG.warn(" treating as POST")
_LOG.warn(" headers: {}".format(request.headers))
fetched = requests.post(url=fetch_url,
data=request.body,
headers=request.headers)
Expand All @@ -110,6 +142,13 @@ def fetch_and_cache(url):
_LOG.warn("... and now we're back with fetched, which is a {}".format( type(fetched) ))
fetched.raise_for_status()
fetched.encoding = 'utf-8' # Optional: requests infers this internally

# modify or discard "hop-by-hop" headers
for bad_header in hop_by_hop_headers:
fetched.headers.pop(bad_header, None)
#_LOG.warn(" MODIFIED fetched.headers:")
#_LOG.warn( dict(fetched.headers) )

try:
test_for_json = fetched.json() # missing JSON payload will raise an error
return Response(
Expand Down
20 changes: 13 additions & 7 deletions phylesystem_api/phylesystem_api/views/study.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,22 +162,23 @@ def __finish_write_verb(phylesystem,




@view_config(route_name='fetch_study', renderer=None)
@view_config(route_name='fetch_study', renderer=None, request_method='GET')
@view_config(route_name='fetch_study_label', renderer=None, request_method='GET')
def fetch_study(request):
repo_parent, repo_remote, git_ssh, pkey, git_hub_remote, max_filesize, max_num_trees, read_only_mode = api_utils.read_phylesystem_config(request)
#_LOG = api_utils.get_logger(request, 'ot_api.default.v1')
_LOG.debug("Fetching study")
api_version = request.matchdict['api_version']
study_id = request.matchdict['study_id']
_LOG.debug('study_id = {}'.format(study_id))
content_id = None
version_history = None
comment_html = None
final_path_part = request.path.split('/')[-1] ##TODO What if there are other parts...
# does this look like a filename? if so, grab its extension
request_extension = None
fpps = final_path_part.split('.')
fpps = study_id.split('.')
if len(fpps) > 1:
_LOG.debug('len(fpps) > 1')
request_extension = fpps[-1]
study_id = '.'.join(fpps[:-1])
_LOG.debug("Request extension is {}".format)
Expand Down Expand Up @@ -208,7 +209,7 @@ def fetch_study(request):
except:
comment_html = ''
except:
# _LOG.exception('GET failed')
_LOG.exception('GET failed')
e = sys.exc_info()[0]
raise HTTPBadRequest(e)

Expand Down Expand Up @@ -692,7 +693,8 @@ def get_study_external_url(request):
except:
raise HTTPNotFound(body='{"error": 1, "description": "study not found"}')

@view_config(route_name='get_study_tree', renderer='json')
@view_config(route_name='get_study_tree', renderer=None)
@view_config(route_name='get_study_tree_label', renderer=None)
def get_study_tree(request):
api_utils.raise_on_CORS_preflight(request)

Expand Down Expand Up @@ -749,4 +751,8 @@ def get_study_tree(request):
if result_data is None:
raise HTTPNotFound(body='subresource "tree/{t}" not found in study "{s}"'.format(t=tree_id,
s=study_id))
return result_data
if out_schema.is_json():
return render_to_response('json', result_data, request)
else:
# _LOG.debug(result_data)
return render_to_response('string', result_data, request)