From 670d9415e66c59b3e293e9eb09752195a519653c Mon Sep 17 00:00:00 2001 From: Erik Frojdh Date: Tue, 2 Apr 2024 17:19:10 +0200 Subject: [PATCH 1/2] added tests and features to load full file --- core/include/aare/DType.hpp | 1 + core/include/aare/NDView.hpp | 11 ++++++++++ core/include/aare/defs.hpp | 5 ----- file_io/CMakeLists.txt | 1 + file_io/include/aare/NumpyFile.hpp | 13 +++++++++++ file_io/include/aare/NumpyHelpers.hpp | 4 ++-- file_io/src/NumpyFile.cpp | 6 +++--- file_io/src/NumpyHelpers.cpp | 28 +++++++++++++----------- file_io/test/NumpyHelpers.test.cpp | 31 +++++++++++++++++++++++++++ 9 files changed, 78 insertions(+), 22 deletions(-) diff --git a/core/include/aare/DType.hpp b/core/include/aare/DType.hpp index 018047fc..cca5a0d0 100644 --- a/core/include/aare/DType.hpp +++ b/core/include/aare/DType.hpp @@ -19,6 +19,7 @@ enum class endian { }; class DType { + //TODO! support for non native endianess? static_assert(sizeof(long) == sizeof(int64_t), "long should be 64bits"); public: diff --git a/core/include/aare/NDView.hpp b/core/include/aare/NDView.hpp index 8528e97e..3a493fb8 100644 --- a/core/include/aare/NDView.hpp +++ b/core/include/aare/NDView.hpp @@ -5,11 +5,22 @@ #include #include #include +#include namespace aare { template using Shape = std::array; +//TODO! fix mismatch between signed and unsigned +template +Shape make_shape(const std::vector& shape){ + if(shape.size() != Ndim) + throw std::runtime_error("Shape size mismatch"); + Shape arr; + std::copy_n(shape.begin(), Ndim, arr.begin()); + return arr; +} + template ssize_t element_offset(const Strides &) { return 0; } template diff --git a/core/include/aare/defs.hpp b/core/include/aare/defs.hpp index f28a5ebb..5eba329d 100644 --- a/core/include/aare/defs.hpp +++ b/core/include/aare/defs.hpp @@ -72,11 +72,6 @@ struct RawFileConfig { } }; -const char little_endian_char = '<'; -const char big_endian_char = '>'; -const char no_endian_char = '|'; -const std::array endian_chars = {little_endian_char, big_endian_char, no_endian_char}; -const std::array numtype_chars = {'f', 'i', 'u', 'c'}; } // namespace aare \ No newline at end of file diff --git a/file_io/CMakeLists.txt b/file_io/CMakeLists.txt index 80884745..845a5d70 100644 --- a/file_io/CMakeLists.txt +++ b/file_io/CMakeLists.txt @@ -32,6 +32,7 @@ endif() if(AARE_TESTS) set(TestSources + ${CMAKE_CURRENT_SOURCE_DIR}/test/NumpyFile.test.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test/NumpyHelpers.test.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test/RawFile.test.cpp ) diff --git a/file_io/include/aare/NumpyFile.hpp b/file_io/include/aare/NumpyFile.hpp index b3193781..72e19804 100644 --- a/file_io/include/aare/NumpyFile.hpp +++ b/file_io/include/aare/NumpyFile.hpp @@ -1,6 +1,7 @@ #pragma once #include "aare/FileInterface.hpp" #include "aare/NumpyHelpers.hpp" +#include "aare/DType.hpp" #include "aare/defs.hpp" #include #include @@ -45,6 +46,18 @@ class NumpyFile : public FileInterface { ssize_t cols() const override { return m_header.shape[2]; } ssize_t bitdepth() const override { return m_header.dtype.bitdepth(); } + DType dtype() const { return m_header.dtype; } + std::vector shape() const { return m_header.shape; } + + //load the full numpy file into a NDArray + template + NDArray load(){ + NDArray arr(make_shape(m_header.shape)); + fseek(fp, header_size, SEEK_SET); + fread(arr.data(), sizeof(T), arr.size(), fp); + return arr; + } + ~NumpyFile(); }; diff --git a/file_io/include/aare/NumpyHelpers.hpp b/file_io/include/aare/NumpyHelpers.hpp index 389b751f..2e843543 100644 --- a/file_io/include/aare/NumpyHelpers.hpp +++ b/file_io/include/aare/NumpyHelpers.hpp @@ -14,7 +14,7 @@ #include "aare/DType.hpp" #include "aare/defs.hpp" -using shape_t = std::vector; +using shape_t = std::vector; struct header_t { header_t() : dtype(aare::DType(aare::DType::ERROR)), fortran_order(false), shape(shape_t()){}; @@ -53,7 +53,7 @@ std::string get_value_from_map(const std::string &mapstr); std::unordered_map parse_dict(std::string in, const std::vector &keys); -template inline bool in_array(T val, const std::array &arr) { +template bool in_array(T val, const std::array &arr) { return std::find(std::begin(arr), std::end(arr), val) != std::end(arr); } bool is_digits(const std::string &str); diff --git a/file_io/src/NumpyFile.cpp b/file_io/src/NumpyFile.cpp index 44f452b9..e153b5f3 100644 --- a/file_io/src/NumpyFile.cpp +++ b/file_io/src/NumpyFile.cpp @@ -139,9 +139,9 @@ void NumpyFile::load_metadata(){ } // read header - auto buf_v = std::vector(header_len); - fread(buf_v.data(), header_len,1,fp); - std::string header(buf_v.data(), header_len); + std::string header(header_len, '\0'); + fread(header.data(), header_len,1,fp); + // parse header std::vector keys{"descr", "fortran_order", "shape"}; diff --git a/file_io/src/NumpyHelpers.cpp b/file_io/src/NumpyHelpers.cpp index 59a85b9a..fef52ac1 100644 --- a/file_io/src/NumpyHelpers.cpp +++ b/file_io/src/NumpyHelpers.cpp @@ -41,13 +41,13 @@ std::unordered_map parse_dict(std::string in, const st std::vector> positions; - for (auto const &value : keys) { - size_t pos = in.find("'" + value + "'"); + for (auto const &key : keys) { + size_t pos = in.find("'" + key + "'"); if (pos == std::string::npos) - throw std::runtime_error("Missing '" + value + "' key."); + throw std::runtime_error("Missing '" + key + "' key."); - std::pair position_pair{pos, value}; + std::pair position_pair{pos, key}; positions.push_back(position_pair); } @@ -78,12 +78,19 @@ std::unordered_map parse_dict(std::string in, const st } aare::DType parse_descr(std::string typestring) { + if (typestring.length() < 3) { throw std::runtime_error("invalid typestring (length)"); } - char byteorder_c = typestring.at(0); - char kind_c = typestring.at(1); + constexpr char little_endian_char = '<'; + constexpr char big_endian_char = '>'; + constexpr char no_endian_char = '|'; + constexpr std::array endian_chars = {little_endian_char, big_endian_char, no_endian_char}; + constexpr std::array numtype_chars = {'f', 'i', 'u', 'c'}; + + const char byteorder_c = typestring[0]; + const char kind_c = typestring[1]; std::string itemsize_s = typestring.substr(2); if (!in_array(byteorder_c, endian_chars)) { @@ -97,7 +104,6 @@ aare::DType parse_descr(std::string typestring) { if (!is_digits(itemsize_s)) { throw std::runtime_error("invalid typestring (itemsize)"); } - // unsigned int itemsize = std::stoul(itemsize_s); return aare::DType(typestring); } @@ -107,8 +113,7 @@ bool parse_bool(const std::string &in) { return true; if (in == "False") return false; - - throw std::runtime_error("Invalid python boolan."); + throw std::runtime_error("Invalid python boolean."); } std::string get_value_from_map(const std::string &mapstr) { @@ -124,7 +129,7 @@ bool is_digits(const std::string &str) { return std::all_of(str.begin(), str.end std::vector parse_tuple(std::string in) { std::vector v; - const char seperator = ','; + const char separator = ','; in = trim(in); @@ -135,7 +140,7 @@ std::vector parse_tuple(std::string in) { std::istringstream iss(in); - for (std::string token; std::getline(iss, token, seperator);) { + for (std::string token; std::getline(iss, token, separator);) { v.push_back(token); } @@ -150,7 +155,6 @@ std::string trim(const std::string &str) { return ""; auto end = str.find_last_not_of(whitespace); - return str.substr(begin, end - begin + 1); } diff --git a/file_io/test/NumpyHelpers.test.cpp b/file_io/test/NumpyHelpers.test.cpp index 16da4474..7022dd5f 100644 --- a/file_io/test/NumpyHelpers.test.cpp +++ b/file_io/test/NumpyHelpers.test.cpp @@ -30,4 +30,35 @@ TEST_CASE("trim whitespace"){ REQUIRE(trim("hej ") == "hej"); REQUIRE(trim(" ") == ""); REQUIRE(trim(" \thej hej ") == "hej hej"); +} + +TEST_CASE("parse data type descriptions"){ + REQUIRE(parse_descr("{1,2,3})); + REQUIRE_FALSE(in_array(4, std::array{1,2,3})); + REQUIRE(in_array(1, std::array{1})); + REQUIRE_FALSE(in_array(1, std::array{})); +} + +TEST_CASE("Parse numpy dict"){ + std::string in = "{'descr': ' keys{"descr", "fortran_order", "shape"}; + auto map = parse_dict(in, keys); + REQUIRE(map["descr"] == "' Date: Tue, 2 Apr 2024 17:19:57 +0200 Subject: [PATCH 2/2] new files --- data/numpy/test_1d_int32.npy | Bin 0 -> 168 bytes data/numpy/test_3d_double.npy | Bin 0 -> 368 bytes data/numpy/write_test_files.py | 12 ++++++++ file_io/test/NumpyFile.test.cpp | 53 ++++++++++++++++++++++++++++++++ 4 files changed, 65 insertions(+) create mode 100644 data/numpy/test_1d_int32.npy create mode 100644 data/numpy/test_3d_double.npy create mode 100644 data/numpy/write_test_files.py create mode 100644 file_io/test/NumpyFile.test.cpp diff --git a/data/numpy/test_1d_int32.npy b/data/numpy/test_1d_int32.npy new file mode 100644 index 0000000000000000000000000000000000000000..a95c6a637ba7f1120fc47410fa781c1ecc91591b GIT binary patch literal 168 zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlWC%^qoAIaUsO_*m=~X4l#&V(cT3DEP6dh= vXCxM+0{I$-20EHL3bhL411<(2U<6_&AZ7+)79eH?Vm2UV2VxE&<^*B@qPiQH literal 0 HcmV?d00001 diff --git a/data/numpy/test_3d_double.npy b/data/numpy/test_3d_double.npy new file mode 100644 index 0000000000000000000000000000000000000000..f46b4d0ab4582fb0c2cfe0e1794c4aa72837fe2b GIT binary patch literal 368 zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh= yXCxM+0{I%oItoTQ3Z|Mm3bhL40WJnG_+SsA862QA4l0O%JPvKN7H;q-+Fbx3TppnS literal 0 HcmV?d00001 diff --git a/data/numpy/write_test_files.py b/data/numpy/write_test_files.py new file mode 100644 index 00000000..ba3f8550 --- /dev/null +++ b/data/numpy/write_test_files.py @@ -0,0 +1,12 @@ +import numpy as np + + +arr = np.arange(10, dtype = np.int32) +np.save('test_1d_int32.npy', arr) + +arr2 = np.zeros((3,2,5), dtype = np.float64) +arr2[0,0,0] = 1.0 +arr2[0,0,1] = 2.0 +arr2[0,1,0] = 72.0 +arr2[2,0,4] = 63.0 +np.save('test_3d_double.npy', arr2) \ No newline at end of file diff --git a/file_io/test/NumpyFile.test.cpp b/file_io/test/NumpyFile.test.cpp new file mode 100644 index 00000000..7f5792d3 --- /dev/null +++ b/file_io/test/NumpyFile.test.cpp @@ -0,0 +1,53 @@ +#include +#include "aare/NumpyFile.hpp" +#include "aare/NDArray.hpp" + +#include "test_config.hpp" + +using aare::NumpyFile; +using aare::DType; +TEST_CASE("Read a 1D numpy file with int32 data type"){ + + auto fpath = test_data_path() / "numpy" / "test_1d_int32.npy"; + REQUIRE(std::filesystem::exists(fpath)); + + NumpyFile f(fpath); + + //we know the file contains 10 elements of np.int32 containing values 0-9 + REQUIRE(f.dtype() == DType::INT32); + REQUIRE(f.shape() == std::vector{10}); + + //use the load function to read the full file into a NDArray + auto data = f.load(); + for(size_t i = 0; i < 10; i++){ + REQUIRE(data(i) == i); + } + +} + +TEST_CASE("Read a 3D numpy file with np.double data type"){ + + auto fpath = test_data_path() / "numpy" / "test_3d_double.npy"; + REQUIRE(std::filesystem::exists(fpath)); + + NumpyFile f(fpath); + + //we know the file contains 10 elements of np.int32 containing values 0-9 + REQUIRE(f.dtype() == DType::DOUBLE); + REQUIRE(f.shape() == std::vector{3,2,5}); + + //use the load function to read the full file into a NDArray + //numpy code to generate the array + // arr2[0,0,0] = 1.0 + // arr2[0,0,1] = 2.0 + // arr2[0,1,0] = 72.0 + // arr2[2,0,4] = 63.0 + + auto data = f.load(); + REQUIRE(data(0,0,0) == 1.0); + REQUIRE(data(0,0,1) == 2.0); + REQUIRE(data(0,1,0) == 72.0); + REQUIRE(data(2,0,4) == 63.0); + + +} \ No newline at end of file