Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

storageService: add new query timeout #1170

Merged
merged 1 commit into from
Jul 5, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/MCPClient/install/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,12 @@ This is the full list of variables supported by MCPClient:
- **Type:** `float`
- **Default:** `86400`

- **`ARCHIVEMATICA_MCPCLIENT_MCPCLIENT_STORAGE_SERVICE_CLIENT_QUICK_TIMEOUT`**:
- **Description:** configures the Storage Service client to stop waiting for a response after a given number of seconds when the client uses asynchronous API endpoints.
- **Config file example:** `MCPClient.storage_service_client_quick_timeout`
- **Type:** `float`
- **Default:** `5`

- **`ARCHIVEMATICA_MCPCLIENT_MCPCLIENT_AGENTARCHIVES_CLIENT_TIMEOUT`**:
- **Description:** configures the agentarchives client to stop waiting for a response after a given number of seconds.
- **Config file example:** `MCPClient.agentarchives_client_timeout`
Expand Down
3 changes: 3 additions & 0 deletions src/MCPClient/lib/settings/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
'temp_directory': {'section': 'MCPClient', 'option': 'temp_dir', 'type': 'string'},
'secret_key': {'section': 'MCPClient', 'option': 'django_secret_key', 'type': 'string'},
'storage_service_client_timeout': {'section': 'MCPClient', 'option': 'storage_service_client_timeout', 'type': 'float'},
'storage_service_client_quick_timeout': {'section': 'MCPClient', 'option': 'storage_service_client_quick_timeout', 'type': 'float'},
'agentarchives_client_timeout': {'section': 'MCPClient', 'option': 'agentarchives_client_timeout', 'type': 'float'},

# [antivirus]
Expand Down Expand Up @@ -85,6 +86,7 @@
clamav_server = /var/run/clamav/clamd.ctl
clamav_pass_by_stream = True
storage_service_client_timeout = 86400
storage_service_client_quick_timeout = 5
agentarchives_client_timeout = 300
clamav_client_timeout = 86400
clamav_client_backend = clamdscanner ; Options: clamdscanner or clamscanner
Expand Down Expand Up @@ -221,6 +223,7 @@
CLAMAV_CLIENT_MAX_FILE_SIZE = config.get('clamav_client_max_file_size')
CLAMAV_CLIENT_MAX_SCAN_SIZE = config.get('clamav_client_max_scan_size')
STORAGE_SERVICE_CLIENT_TIMEOUT = config.get('storage_service_client_timeout')
STORAGE_SERVICE_CLIENT_QUICK_TIMEOUT = config.get('storage_service_client_quick_timeout')
AGENTARCHIVES_CLIENT_TIMEOUT = config.get('agentarchives_client_timeout')
SEARCH_ENABLED = config.get('search_enabled')
DEFAULT_CHECKSUM_ALGORITHM = 'sha256'
Expand Down
26 changes: 16 additions & 10 deletions src/archivematicaCommon/lib/storageService.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def _storage_service_url():
return storage_service_url


def _storage_api_session(timeout=django_settings.STORAGE_SERVICE_CLIENT_TIMEOUT):
def _storage_api_session(timeout=django_settings.STORAGE_SERVICE_CLIENT_QUICK_TIMEOUT):
"""Return a requests.Session with a customized adapter with timeout support."""
class HTTPAdapterWithTimeout(requests.adapters.HTTPAdapter):
def __init__(self, timeout=None, *args, **kwargs):
Expand All @@ -75,6 +75,11 @@ def send(self, *args, **kwargs):
return session


def _storage_api_slow_session():
"""Return a requests.Session with a higher configurable timeout."""
return _storage_api_session(django_settings.STORAGE_SERVICE_CLIENT_TIMEOUT)


def _storage_api_params():
"""Return API GET params username=USERNAME&api_key=KEY for use in URL."""
username = get_setting('storage_service_user', 'test')
Expand Down Expand Up @@ -307,13 +312,14 @@ def create_file(uuid, origin_location, origin_path, current_location,

LOGGER.info("Creating file with %s", new_file)
try:
session = _storage_api_session()
if update:
session = _storage_api_slow_session()
new_file['reingest'] = pipeline['uuid']
url = _storage_service_url() + 'file/' + uuid + '/'
response = session.put(url, json=new_file)
return (response.json(), None)
else:
session = _storage_api_session()
url = _storage_service_url() + 'file/async/'
response = session.post(url, json=new_file, allow_redirects=False)
return wait_for_async(response)
Expand Down Expand Up @@ -346,7 +352,7 @@ def get_file_info(uuid=None, origin_location=None, origin_path=None,
'offset': 0,
}
while True:
response = _storage_api_session().get(url, params=params)
response = _storage_api_slow_session().get(url, params=params)
files = response.json()
return_files += files['objects']
if not files['meta']['next']:
Expand Down Expand Up @@ -383,7 +389,7 @@ def extract_file(uuid, relative_path, save_path):
""" Fetches `relative_path` from package with `uuid` and saves to `save_path`. """
url = _storage_service_url() + 'file/' + uuid + '/extract_file/'
params = {'relative_path_to_file': relative_path}
response = _storage_api_session().get(url, params=params, stream=True)
response = _storage_api_slow_session().get(url, params=params, stream=True)
chunk_size = 1024 * 1024
with open(save_path, 'wb') as f:
for chunk in response.iter_content(chunk_size):
Expand Down Expand Up @@ -418,7 +424,7 @@ def request_reingest(package_uuid, reingest_type, processing_config):
}
url = _storage_service_url() + 'file/' + package_uuid + '/reingest/'
try:
response = _storage_api_session().post(url, json=api_request)
response = _storage_api_slow_session().post(url, json=api_request)
except requests.ConnectionError:
LOGGER.exception("Could not connect to storage service")
return {'error': True, 'message': 'Could not connect to storage service'}
Expand Down Expand Up @@ -447,7 +453,7 @@ def request_file_deletion(uuid, user_id, user_email, reason_for_deletion):

def post_store_aip_callback(uuid):
url = _storage_service_url() + 'file/' + uuid + '/send_callback/post_store/'
response = _storage_api_session().get(url)
response = _storage_api_slow_session().get(url)
try:
return response.json()
except Exception:
Expand All @@ -456,26 +462,26 @@ def post_store_aip_callback(uuid):

def get_file_metadata(**kwargs):
url = _storage_service_url() + 'file/metadata/'
response = _storage_api_session().get(url, params=kwargs)
response = _storage_api_slow_session().get(url, params=kwargs)
if 400 <= response.status_code < 500:
raise ResourceNotFound("No file found for arguments: {}".format(kwargs))
return response.json()


def remove_files_from_transfer(transfer_uuid):
url = _storage_service_url() + 'file/' + transfer_uuid + '/contents/'
_storage_api_session().delete(url)
_storage_api_slow_session().delete(url)


def index_backlogged_transfer_contents(transfer_uuid, file_set):
url = _storage_service_url() + 'file/' + transfer_uuid + '/contents/'
response = _storage_api_session().put(url, json=file_set)
response = _storage_api_slow_session().put(url, json=file_set)
if 400 <= response.status_code < 500:
raise BadRequest("Unable to add files to transfer: {}".format(response.text))


def reindex_file(transfer_uuid):
url = _storage_service_url() + 'file/' + transfer_uuid + '/reindex/'
response = _storage_api_session().post(url)
response = _storage_api_slow_session().post(url)
response.raise_for_status()
return response.json()
6 changes: 6 additions & 0 deletions src/dashboard/install/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,12 @@ variables or in the gunicorn configuration file.
- **Type:** `float`
- **Default:** `86400`

- **`ARCHIVEMATICA_DASHBOARD_DASHBOARD_STORAGE_SERVICE_CLIENT_QUICK_TIMEOUT`**:
- **Description:** configures the Storage Service client to stop waiting for a response after a given number of seconds when the client uses asynchronous API endpoints.
- **Config file example:** `Dashboard.storage_service_client_quick_timeout`
- **Type:** `float`
- **Default:** `5`

- **`ARCHIVEMATICA_DASHBOARD_DASHBOARD_AGENTARCHIVES_CLIENT_TIMEOUT`**:
- **Description:** configures the agentarchives client to stop waiting for a response after a given number of seconds.
- **Config file example:** `Dashboard.agentarchives_client_timeout`
Expand Down
3 changes: 3 additions & 0 deletions src/dashboard/src/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
'shibboleth_authentication': {'section': 'Dashboard', 'option': 'shibboleth_authentication', 'type': 'boolean'},
'ldap_authentication': {'section': 'Dashboard', 'option': 'ldap_authentication', 'type': 'boolean'},
'storage_service_client_timeout': {'section': 'Dashboard', 'option': 'storage_service_client_timeout', 'type': 'float'},
'storage_service_client_quick_timeout': {'section': 'Dashboard', 'option': 'storage_service_client_quick_timeout', 'type': 'float'},
'agentarchives_client_timeout': {'section': 'Dashboard', 'option': 'agentarchives_client_timeout', 'type': 'float'},
'site_url': {'section': 'Dashboard', 'option': 'site_url', 'type': 'string'},

Expand Down Expand Up @@ -71,6 +72,7 @@
shibboleth_authentication = False
ldap_authentication = False
storage_service_client_timeout = 86400
storage_service_client_quick_timeout = 5
agentarchives_client_timeout = 300
site_url =

Expand Down Expand Up @@ -443,6 +445,7 @@
ELASTICSEARCH_TIMEOUT = config.get('elasticsearch_timeout')
SEARCH_ENABLED = config.get('search_enabled')
STORAGE_SERVICE_CLIENT_TIMEOUT = config.get('storage_service_client_timeout')
STORAGE_SERVICE_CLIENT_QUICK_TIMEOUT = config.get('storage_service_client_quick_timeout')
AGENTARCHIVES_CLIENT_TIMEOUT = config.get('agentarchives_client_timeout')

SITE_URL = config.get('site_url')
Expand Down