Skip to content

Commit

Permalink
Fixed schema name assignment in the DownloadSchema module.
Browse files Browse the repository at this point in the history
  • Loading branch information
rfm-targa committed Nov 2, 2023
1 parent efee4a8 commit 8cae076
Showing 1 changed file with 23 additions and 27 deletions.
50 changes: 23 additions & 27 deletions CHEWBBACA/CHEWBBACA_NS/download_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,10 +301,10 @@ def download_fastas(loci, download_folder, headers_get, schema_date):
total_loci = len(loci)
print('Number of loci to download: {0}'.format(total_loci))

# build the list of urls to get
# Build the list of urls to get
fasta_urls = [cr.make_url(locus, 'fasta') for locus in loci]

# multithread the requests
# Multithread the requests
print('Downloading schema files...')
total = 0
failed = []
Expand Down Expand Up @@ -337,19 +337,16 @@ def download_fastas(loci, download_folder, headers_get, schema_date):
return ns_files


def download_compressed(zip_uri, species_name, schema_name,
download_folder, headers_get):
def download_compressed(zip_uri, schema_filename, download_folder, headers_get):
"""Download and extract ZIP archive with a ready-to-use schema.
Parameters
----------
zip_uri : str
Endpoint URL to make the request to download
the compressed schema.
species_name : str
Scientific name of the schema species.
schema_name : str
Name of the schema in the Chewie-NS.
schema_filename : str
Name assigned to the local schema.
download_folder : str
Path to the directory to which the ZIP archive
will be saved.
Expand All @@ -362,22 +359,19 @@ def download_compressed(zip_uri, species_name, schema_name,
ZIP archive contents will be extracted to this
directory.
"""
zip_name = '{0}{1}_{2}.zip'.format(species_name[0].lower(),
species_name.split(' ')[-1],
schema_name)
schema_path = os.path.join(download_folder,
zip_name.split('.zip')[0])
schema_path = os.path.join(download_folder, schema_filename)
fo.create_directory(schema_path)

# download ZIP archive
# Download ZIP archive
zip_name = f'{schema_filename}.zip'
url, zip_response = cr.simple_get_request(zip_uri, headers_get,
parameters={'request_type': 'download'})
zip_path = os.path.join(schema_path, zip_name)
open(zip_path, 'wb').write(zip_response.content)
# uncompress
# Uncompress
print('Decompressing schema...')
shutil.unpack_archive(zip_path, extract_dir=schema_path)
# delete ZIP
# Delete ZIP
os.remove(zip_path)

return schema_path
Expand Down Expand Up @@ -439,8 +433,8 @@ def main(species_id, schema_id, download_folder, cpu_cores,
sys.exit('There is no species with the provided '
'identifier in the Chewie-NS.')

# check if user provided schema identifier or schema description
# get info about all the species schemas
# Check if user provided schema identifier or schema description
# Get info about all the species schemas
schema_id, schema_uri,\
schema_name, schema_params = cr.get_species_schemas(schema_id,
species_id,
Expand All @@ -452,31 +446,35 @@ def main(species_id, schema_id, download_folder, cpu_cores,
print("Schema's species: {0} "
"(id={1})".format(species_name, species_id))

# create parameters dict
# Assign name to local schema
schema_basename = '_'.join(species_name.split(' '))
schema_basename = f'{schema_basename}_{schema_name}'

# Create parameters dict
schema_params_dict = {k: schema_params[k]['value']
for k in schema_params.keys()
if k != 'name'}

# check if schema is locked
# Check if schema is locked
lock_status = schema_params_dict['Schema_lock']
if lock_status != 'Unlocked':
sys.exit('Schema is locked. This might be because it '
'is being uploaded, updated or compressed.'
' Please try again later and contact the Administrator '
'if the schema stays locked for a long period of time.')

# get zip information
# Get zip information
zip_uri, zip_date = check_compressed(schema_uri, headers_get)

schema_date = download_date(date, zip_date, latest,
schema_params_dict['dateEntered'],
schema_params_dict['last_modified'])

# check output folder
# Check output folder
if not os.path.exists(download_folder):
os.mkdir(download_folder)
else:
# verify that folder is empty and abort if it is not
# Verify that folder is empty and abort if it is not
download_folder_files = os.listdir(download_folder)
if len(download_folder_files) > 0:
sys.exit('Download folder is not empty. Please ensure '
Expand All @@ -487,7 +485,7 @@ def main(species_id, schema_id, download_folder, cpu_cores,
if schema_date == zip_date:
print('\nDownloading compressed version...')
# Chewie-NS does not add clustering parameters to config, change that
schema_path = download_compressed(zip_uri, species_name, schema_name,
schema_path = download_compressed(zip_uri, schema_basename,
download_folder, headers_get)
else:
print('\nDownloading schema FASTA files...')
Expand All @@ -503,9 +501,7 @@ def main(species_id, schema_id, download_folder, cpu_cores,
nomenclature_server)

# Use PrepExternalSchema to determine representatives
genus, epithet = species_name.split(' ')
schema_name = '{0}{1}_{2}'.format(genus[0].lower(), epithet, schema_name)
schema_path = fo.join_paths(download_folder, [schema_name])
schema_path = fo.join_paths(download_folder, [schema_basename])
schema_path_short = fo.join_paths(schema_path, ['short'])
# Create output directories
schema_path_exists = fo.create_directory(schema_path)
Expand Down

0 comments on commit 8cae076

Please sign in to comment.