Skip to content

Commit

Permalink
fix: Do not use nbsp in strings (#2140)
Browse files Browse the repository at this point in the history
* Use normal spaces in ingest

*In preprocessing replace nbsps with normal spaces.
  • Loading branch information
anna-parker authored Jun 12, 2024
1 parent 7d3dac4 commit 04c6263
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 3 deletions.
2 changes: 1 addition & 1 deletion ingest/scripts/prepare_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def split_authors(authors: str) -> str:

for i in range(0, len(single_split), 2):
if i + 1 < len(single_split):
result.append(single_split[i + 1].strip() + "\u00a0" + single_split[i].strip())
result.append(single_split[i + 1].strip() + " " + single_split[i].strip())
else:
result.append(single_split[i].strip())

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def get_config() -> Config:
# Overwrite config with config in config_file
if args.config_file:
config = load_config_from_yaml(args.config_file, config)
if config.backend_host is None: # Check if backend_host wasn't set during initialization
if not config.backend_host: # Check if backend_host wasn't set during initialization
config.backend_host = f"http://127.0.0.1:8079/{config.organism}"

# Use environment variables if available
Expand Down
4 changes: 3 additions & 1 deletion preprocessing/nextclade/src/loculus_preprocessing/prepro.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,9 @@ def parse_ndjson(ndjson_data: str) -> Sequence[UnprocessedEntry]:
for json_str in ndjson_data.split("\n"):
if len(json_str) == 0:
continue
json_object = json.loads(json_str)
# Loculus currently cannot handle non-breaking spaces.
json_str_processed = json_str.replace("\N{NO-BREAK SPACE}", " ")
json_object = json.loads(json_str_processed)
unprocessed_data = UnprocessedData(
metadata=json_object["data"]["metadata"],
unalignedNucleotideSequences=json_object["data"]["unalignedNucleotideSequences"],
Expand Down

0 comments on commit 04c6263

Please sign in to comment.