From bd7ae8c4861f071e42aa5ce954c9881862c34d03 Mon Sep 17 00:00:00 2001 From: Jordan Henderson Date: Fri, 24 Jan 2025 14:24:45 -0600 Subject: [PATCH] Revert 'Address code page issues w/ Windows file paths (#4172)' --- release_docs/RELEASE.txt | 12 ++++++- src/H5system.c | 71 +++++++++++----------------------------- src/H5win32defs.h | 12 +++---- 3 files changed, 36 insertions(+), 59 deletions(-) diff --git a/release_docs/RELEASE.txt b/release_docs/RELEASE.txt index af7612e9a34..9103fb10506 100644 --- a/release_docs/RELEASE.txt +++ b/release_docs/RELEASE.txt @@ -116,7 +116,17 @@ Bug Fixes since HDF5-1.14.5 release =================================== Library ------- - - + - Reverted a change to the library's handling of UTF-8 file names + + A change was made in the HDF5 1.14.4 release to address some issues with + the library's handling of code pages and file paths on Windows. This change + introduced other issues with the handling of UTF-8 file names that caused + breakage for software using the 1.14.4 and 1.14.5 releases of HDF5. + + The change has been reverted for this release and will be addressed in a + different manner in a future release. + + Reverted commit: eb0351efffe987a9c1882ccc9b03b5b0aec7f2dd Java Library diff --git a/src/H5system.c b/src/H5system.c index 65b56fd8d81..12bedcf6773 100644 --- a/src/H5system.c +++ b/src/H5system.c @@ -521,22 +521,28 @@ H5_get_utf16_str(const char *s) } /* end H5_get_utf16_str() */ /*------------------------------------------------------------------------- - * Function: Wopen + * Function: Wopen_utf8 * - * Purpose: Equivalent of open(2) for use on Windows. Necessary to - * handle code pages and Unicode on that platform. + * Purpose: UTF-8 equivalent of open(2) for use on Windows. + * Converts a UTF-8 input path to UTF-16 and then opens the + * file via _wopen() under the hood * * Return: Success: A POSIX file descriptor * Failure: -1 + * *------------------------------------------------------------------------- */ int -Wopen(const char *path, int oflag, ...) +Wopen_utf8(const char *path, int oflag, ...) { int fd = -1; /* POSIX file descriptor to be returned */ wchar_t *wpath = NULL; /* UTF-16 version of the path */ int pmode = 0; /* mode (optionally set via variable args) */ + /* Convert the input UTF-8 path to UTF-16 */ + if (NULL == (wpath = H5_get_utf16_str(path))) + goto done; + /* _O_BINARY must be set in Windows to avoid CR-LF <-> LF EOL * transformations when performing I/O. Note that this will * produce Unix-style text files, though. @@ -552,83 +558,44 @@ Wopen(const char *path, int oflag, ...) va_end(vl); } - /* First try opening the file with the normal POSIX open() call. - * This will handle ASCII without additional processing as well as - * systems where code pages are being used instead of true Unicode. - */ - if ((fd = open(path, oflag, pmode)) >= 0) { - /* If this succeeds, we're done */ - goto done; - } - - if (errno == ENOENT) { - /* Not found, reset errno and try with UTF-16 */ - errno = 0; - } - else { - /* Some other error (like permissions), so just exit */ - goto done; - } - - /* Convert the input UTF-8 path to UTF-16 */ - if (NULL == (wpath = H5_get_utf16_str(path))) - goto done; - - /* Open the file using a UTF-16 path */ + /* Open the file */ fd = _wopen(wpath, oflag, pmode); done: H5MM_xfree(wpath); return fd; -} /* end Wopen() */ +} /* end Wopen_utf8() */ /*------------------------------------------------------------------------- - * Function: Wremove + * Function: Wremove_utf8 * - * Purpose: Equivalent of remove(3) for use on Windows. Necessary to - * handle code pages and Unicode on that platform. + * Purpose: UTF-8 equivalent of remove(3) for use on Windows. + * Converts a UTF-8 input path to UTF-16 and then opens the + * file via _wremove() under the hood * * Return: Success: 0 * Failure: -1 *------------------------------------------------------------------------- */ int -Wremove(const char *path) +Wremove_utf8(const char *path) { wchar_t *wpath = NULL; /* UTF-16 version of the path */ int ret = -1; - /* First try removing the file with the normal POSIX remove() call. - * This will handle ASCII without additional processing as well as - * systems where code pages are being used instead of true Unicode. - */ - if ((ret = remove(path)) >= 0) { - /* If this succeeds, we're done */ - goto done; - } - - if (errno == ENOENT) { - /* Not found, reset errno and try with UTF-16 */ - errno = 0; - } - else { - /* Some other error (like permissions), so just exit */ - goto done; - } - /* Convert the input UTF-8 path to UTF-16 */ if (NULL == (wpath = H5_get_utf16_str(path))) goto done; - /* Remove the file using a UTF-16 path */ + /* Remove the file */ ret = _wremove(wpath); done: H5MM_xfree(wpath); return ret; -} /* end Wremove() */ +} /* end Wremove_utf8() */ #endif /* H5_HAVE_WIN32_API */ diff --git a/src/H5win32defs.h b/src/H5win32defs.h index a996f60ab01..430aed2e27a 100644 --- a/src/H5win32defs.h +++ b/src/H5win32defs.h @@ -39,7 +39,7 @@ struct timezone { }; #endif -#define HDcreat(S, M) Wopen(S, O_CREAT | O_TRUNC | O_RDWR, M) +#define HDcreat(S, M) Wopen_utf8(S, O_CREAT | O_TRUNC | O_RDWR, M) #define HDflock(F, L) Wflock(F, L) #define HDfstat(F, B) _fstati64(F, B) #define HDftell(F) _ftelli64(F) @@ -59,13 +59,13 @@ struct timezone { */ #if (defined(_MSC_VER) && !defined(_MSVC_TRADITIONAL)) || defined(_MSVC_TRADITIONAL) /* Using the MSVC traditional preprocessor */ -#define HDopen(S, F, ...) Wopen(S, F, __VA_ARGS__) +#define HDopen(S, F, ...) Wopen_utf8(S, F, __VA_ARGS__) #else /* Using a standards conformant preprocessor */ -#define HDopen(S, F, ...) Wopen(S, F, ##__VA_ARGS__) +#define HDopen(S, F, ...) Wopen_utf8(S, F, ##__VA_ARGS__) #endif -#define HDremove(S) Wremove(S) +#define HDremove(S) Wremove_utf8(S) #define HDsetenv(N, V, O) Wsetenv(N, V, O) #define HDsetvbuf(F, S, M, Z) setvbuf(F, S, M, (Z > 1 ? Z : 2)) #define HDsleep(S) Sleep(S * 1000) @@ -89,8 +89,8 @@ H5_DLL int Wsetenv(const char *name, const char *value, int overwrite); H5_DLL int Wflock(int fd, int operation); H5_DLL herr_t H5_expand_windows_env_vars(char **env_var); H5_DLL wchar_t *H5_get_utf16_str(const char *s); -H5_DLL int Wopen(const char *path, int oflag, ...); -H5_DLL int Wremove(const char *path); +H5_DLL int Wopen_utf8(const char *path, int oflag, ...); +H5_DLL int Wremove_utf8(const char *path); H5_DLL int H5_get_win32_times(H5_timevals_t *tvs); H5_DLL char *H5_strndup(const char *s, size_t n); H5_DLL char *Wstrcasestr_wrap(const char *haystack, const char *needle);