Skip to content

Commit

Permalink
Significantly reduced simulation startup time
Browse files Browse the repository at this point in the history
  • Loading branch information
ProjectPhysX committed Jan 18, 2024
1 parent 636724b commit 6a9a419
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 39 deletions.
8 changes: 4 additions & 4 deletions DOCUMENTATION.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,20 @@ git clone https://github.com/ProjectPhysX/FluidX3D.git
chmod +x make.sh
./make.sh
```
- Compiling requires `C++17`, which is supported since `g++` version `8` (check with `g++ --version`). If you have `make` installed (check with `make --version`), compiling will will be faster using multiple CPU cores; otherwise compiling falls back to using a single CPU core.
- If you use [`INTERACTIVE_GRAPHICS`](src/defines.hpp), change to the "[compile on Linux with X11 graphics](make.sh#L3)" command in [`make.sh`](make.sh#L3).
- Compiling requires [`g++`](https://gcc.gnu.org/) with `C++17`, which is supported since version `8` (check with `g++ --version`). If you have [`make`](https://www.gnu.org/software/make/) installed (check with `make --version`), compiling will will be faster using multiple CPU cores; otherwise compiling falls back to using a single CPU core.
- If you use [`INTERACTIVE_GRAPHICS`](src/defines.hpp), select [`TARGET=Linux-X11`](make.sh#L3) in [`make.sh`](make.sh#L3).
- To select a specific GPU, enter `./make.sh 0` to compile+run, or `bin/FluidX3D 0` to run on device `0`. You can also select multiple GPUs with `bin/FluidX3D 0 1 3 6` if the setup is [configured as multi-GPU](#the-lbm-class).

### macOS
- Select the "[compile on macOS](make.sh#L5)" command in [`make.sh`](make.sh#L5).
- Select [`TARGET=macOS`](make.sh#L5) in [`make.sh`](make.sh#L5).
- Compile and run with:
```bash
chmod +x make.sh
./make.sh
```

### Android
- Select the "[compile on Android](make.sh#L6)" command in [`make.sh`](make.sh#L6).
- Select [`TARGET=Android`](make.sh#L6) in [`make.sh`](make.sh#L6).
- Compile and run with:
```bash
chmod +x make.sh
Expand Down
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,10 @@ The fastest and most memory efficient lattice Boltzmann CFD software, running on
- added operating system info to OpenCL device driver version printout
- fixed flickering with frustrum culling at very small field of view
- fixed bug where rendered/exported frame was not updated when `visualization_modes` changed
- v2.12 (17.01.2024)
- significantly (~3x) faster source code compiling on Linux using multiple CPU cores if [`make`](https://www.gnu.org/software/make/) is installed
- significantly faster simulation initialization (~40% single-GPU, ~15% multi-GPU)
- minor bug fix in `Memory_Container::reset()` function

</details>

Expand Down
2 changes: 1 addition & 1 deletion src/info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ void Info::print_logo() const {
print("| "); print("\\ \\ / /", c); print(" |\n");
print("| "); print("\\ ' /", c); print(" |\n");
print("| "); print("\\ /", c); print(" |\n");
print("| "); print("\\ /", c); print(" FluidX3D Version 2.11 |\n");
print("| "); print("\\ /", c); print(" FluidX3D Version 2.12 |\n");
print("| "); print("'", c); print(" Copyright (c) Dr. Moritz Lehmann |\n");
print("|-----------------------------------------------------------------------------|\n");
}
Expand Down
87 changes: 53 additions & 34 deletions src/lbm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,29 +222,53 @@ class LBM {
public:
template<typename T> class Memory_Container { // does not hold any data itsef, just links to LBM_Domain data
private:
LBM* lbm = nullptr;
ulong N = 0ull; // buffer length
uint d = 1u; // buffer dimensions
uint Nx=1u, Ny=1u, Nz=1u; // (local) lattice dimensions
uint Dx=1u, Dy=1u, Dz=1u; // lattice domains
LBM* lbm = nullptr;
Memory<T>** buffers = nullptr; // host buffers
string name = "";

uint Nx=1u, Ny=1u, Nz=1u, Dx=1u, Dy=1u, Dz=1u, D=1u; // auxiliary variables: (local) lattice dimensions, lattice domains, number of domains
uint NxDx=1u, NyDy=1u, NzDz=1u, Hx=0u, Hy=0u, Hz=0u; // auxiliary variables: number of domains, shortcuts for N_/D_, halo offsets
ulong NxNy=1ull, local_Nx=1ull, local_Ny=1ull, local_Nz=1ull, local_N=1ull; // auxiliary variables: shortcut for Nx*Ny, size of each domain, number of cells in each domain
inline void initialize_auxiliary_variables() { // these variables are frequently used in reference() functions, so pre-compute them only once here
Nx = lbm->get_Nx(); Ny = lbm->get_Ny(); Nz = lbm->get_Nz();
Dx = lbm->get_Dx(); Dy = lbm->get_Dy(); Dz = lbm->get_Dz();
D = Dx*Dy*Dz; // number of domains
NxNy = (ulong)Nx*(ulong)Ny; // shortcut for Nx*Ny
NxDx=Nx/Dx; NyDy=Ny/Dy; NzDz=Nz/Dz; // shortcuts for N_/D_
Hx=Dx>1u; Hy=Dy>1u; Hz=Dz>1u; // halo offsets
local_Nx=(ulong)(NxDx+2u*Hx); local_Ny=(ulong)(NyDy+2u*Hy); local_Nz=(ulong)(NzDz+2u*Hz); // size of each domain
local_N = local_Nx*local_Ny*local_Nz; // number of cells in each domain
}
inline void initialize_auxiliary_pointers() {
/********/ x = Pointer(this, 0x0u);
if(d>0x1u) y = Pointer(this, 0x1u);
if(d>0x2u) z = Pointer(this, 0x2u);
}
inline T& reference(const ulong i) { // stitch together domain buffers and make them appear as one single large buffer
if(D==1u) { // take shortcut for single domain
return buffers[0]->data()[i]; // array of structures
} else { // decompose index for multiple domains
const ulong global_i=i%N, t=global_i%NxNy;
const uint x=(uint)(t%(ulong)Nx), y=(uint)(t/(ulong)Nx), z=(uint)(global_i/NxNy); // n = x+(y+z*Ny)*Nx
const uint px=x%NxDx, py=y%NyDy, pz=z%NzDz, dx=x/NxDx, dy=y/NyDy, dz=z/NzDz, domain=dx+(dy+dz*Dy)*Dx; // 3D position within domain and which domain
const ulong local_i = (ulong)(px+Hx)+((ulong)(py+Hy)+(ulong)(pz+Hz)*local_Ny)*local_Nx; // add halo offsets
const ulong local_dimension = i/N;
return buffers[domain]->data()[local_i+local_dimension*local_N]; // array of structures
}
}
inline T& reference(const ulong i, const uint dimension) { // stitch together domain buffers and make them appear as one single large buffer
const ulong global_i = i%N;
const ulong NxNy=(ulong)Nx*(ulong)Ny, t=global_i%NxNy;
const uint x=(uint)(t%(ulong)Nx), y=(uint)(t/(ulong)Nx), z=(uint)(global_i/NxNy); // n = x+(y+z*Ny)*Nx
const uint NxDx=Nx/Dx, NyDy=Ny/Dy, NzDz=Nz/Dz;
const uint px=x%NxDx, py=y%NyDy, pz=z%NzDz, dx=x/NxDx, dy=y/NyDy, dz=z/NzDz, domain=dx+(dy+dz*Dy)*Dx;
const uint Hx=Dx>1u, Hy=Dy>1u, Hz=Dz>1u; // halo offsets
const ulong local_N = (ulong)(NxDx+2u*Hx)*(ulong)(NyDy+2u*Hy)*(ulong)(NzDz+2u*Hz); // add halo offsets
const ulong local_i = (ulong)(px+Hx)+((ulong)(py+Hy)+(ulong)(pz+Hz)*(ulong)(NyDy+2u*Hy))*(ulong)(NxDx+2u*Hx); // add halo offsets
const ulong local_dimension = max(i/N, (ulong)dimension);
return buffers[domain]->data()[local_i+local_dimension*local_N]; // array of structures
if(D==1u) { // take shortcut for single domain
return buffers[0]->data()[i+(ulong)dimension*N]; // array of structures
} else { // decompose index for multiple domains
const ulong global_i=i%N, t=global_i%NxNy;
const uint x=(uint)(t%(ulong)Nx), y=(uint)(t/(ulong)Nx), z=(uint)(global_i/NxNy); // n = x+(y+z*Ny)*Nx
const uint px=x%NxDx, py=y%NyDy, pz=z%NzDz, dx=x/NxDx, dy=y/NyDy, dz=z/NzDz, domain=dx+(dy+dz*Dy)*Dx; // 3D position within domain and which domain
const ulong local_i = (ulong)(px+Hx)+((ulong)(py+Hy)+(ulong)(pz+Hz)*local_Ny)*local_Nx; // add halo offsets
const ulong local_dimension = max(i/N, (ulong)dimension);
return buffers[domain]->data()[local_i+local_dimension*local_N]; // array of structures
}
}
inline static string vtk_type() {
/**/ if constexpr(std::is_same<T, char >::value) return "char" ; else if constexpr(std::is_same<T, uchar >::value) return "unsigned_char" ;
Expand Down Expand Up @@ -300,52 +324,47 @@ class LBM {
Pointer x, y, z; // host buffer auxiliary pointers for multi-dimensional array access (array of structures)

inline Memory_Container(LBM* lbm, Memory<T>** buffers, const string& name) {
this->N = lbm->get_N();
this->d = buffers[0]->dimensions();
if(this->N*(ulong)this->d==0ull) print_error("Memory size must be larger than 0.");
this->lbm = lbm;
this->Nx = lbm->get_Nx(); this->Ny = lbm->get_Ny(); this->Nz = lbm->get_Nz();
this->Dx = lbm->get_Dx(); this->Dy = lbm->get_Dy(); this->Dz = lbm->get_Dz();
this->buffers = buffers;
this->name = name;
this->N = (ulong)this->Nx*(ulong)this->Ny*(ulong)this->Nz;
this->d = buffers[0]->dimensions();
if(this->N*(ulong)this->d==0ull) print_error("Memory size must be larger than 0.");
initialize_auxiliary_variables();
initialize_auxiliary_pointers();
}
inline Memory_Container() {} // default constructor
inline Memory_Container& operator=(Memory_Container&& memory) noexcept { // move assignment
this->N = memory.N;
this->d = memory.d;
this->lbm = memory.lbm;
this->Nx = memory.Nx; this->Ny = memory.Ny; this->Nz = memory.Nz;
this->Dx = memory.Dx; this->Dy = memory.Dy; this->Dz = memory.Dz;
this->buffers = memory.buffers;
this->name = memory.name;
this->N = memory.N;
this->d = memory.d;
initialize_auxiliary_variables();
initialize_auxiliary_pointers();
return *this;
}
inline void reset(const T value=(T)0) {
for(uint domain=0u; domain<Dx*Dy*Dz; domain++) {
for(ulong i=0ull; i<range()/(ulong)(Dx*Dy*Dz); i++) buffers[domain][i] = value;
}
write_to_device();
for(uint domain=0u; domain<D; domain++) buffers[domain]->reset(value);
}
inline const ulong length() const { return N; }
inline const uint dimensions() const { return d; }
inline const ulong range() const { return N*(ulong)d; }
inline const ulong capacity() const { return N*(ulong)d*sizeof(T); } // returns capacity of the buffer in Byte
inline T& operator[](const ulong i) { return reference(i, 0u); }
inline const T& operator[](const ulong i) const { return reference(i, 0u); }
inline const T operator()(const ulong i) const { return reference(i, 0u); }
inline T& operator[](const ulong i) { return reference(i); }
inline const T& operator[](const ulong i) const { return reference(i); }
inline const T operator()(const ulong i) const { return reference(i); }
inline const T operator()(const ulong i, const uint dimension) const { return reference(i, dimension); } // array of structures
inline void read_from_device() {
#ifndef UPDATE_FIELDS
for(uint domain=0u; domain<Dx*Dy*Dz; domain++) lbm->lbm[domain]->enqueue_update_fields(); // make sure data in device memory is up-to-date
for(uint domain=0u; domain<D; domain++) lbm->lbm[domain]->enqueue_update_fields(); // make sure data in device memory is up-to-date
#endif // UPDATE_FIELDS
for(uint domain=0u; domain<Dx*Dy*Dz; domain++) buffers[domain]->enqueue_read_from_device();
for(uint domain=0u; domain<Dx*Dy*Dz; domain++) buffers[domain]->finish_queue();
for(uint domain=0u; domain<D; domain++) buffers[domain]->enqueue_read_from_device();
for(uint domain=0u; domain<D; domain++) buffers[domain]->finish_queue();
}
inline void write_to_device() {
for(uint domain=0u; domain<Dx*Dy*Dz; domain++) buffers[domain]->enqueue_write_to_device();
for(uint domain=0u; domain<Dx*Dy*Dz; domain++) buffers[domain]->finish_queue();
for(uint domain=0u; domain<D; domain++) buffers[domain]->enqueue_write_to_device();
for(uint domain=0u; domain<D; domain++) buffers[domain]->finish_queue();
}
inline void write_host_to_vtk(const string& path="") { // write binary .vtk file
write_vtk(default_filename(path, name, ".vtk", lbm->get_t()));
Expand Down

0 comments on commit 6a9a419

Please sign in to comment.