Merge pull request #1 from jcamachado/feat/cudafy-particles

Feat/cudafy particles
jcamachado · Oct 10, 2023 · ad22c17 · ad22c17
2 parents e730c64 + 9312ab5
commit ad22c17
Show file tree

Hide file tree

Showing 23 changed files with 242 additions and 26 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -18,6 +18,8 @@
         "string": "cpp",
         "unordered_map": "cpp",
         "vector": "cpp",
-        "chrono": "cpp"
+        "chrono": "cpp",
+        "iosfwd": "cpp",
+        "__config": "cpp"
     }
 }
diff --git a/README.md b/README.md
@@ -5,17 +5,19 @@ gcc 11.4.0 x64
 gnu 17
 gnu++ 14
 nvcc 11.5.119
+NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2
 
 Aluno: Jose Carlos de Almeida Machado
 
 
 Hello, aqui vou disponibilizar codigos feitos para treinar CUDA e prog. paralela.
 
 
-Aula 1 e 2: ./openmp/ e ./openacc
+Aula 1 e 2: ./openmp - 1/ e ./openacc - 2
 
 Aula 3: Esqueleto - ./cuda/particleSystem/*
 
 Aula 4 - Soma de matrizes: ./cuda/matrixSum, processamento de imagem (tornar Grayscale) ./cuda/imageShorteningParallel.cu
 
+Aula 5 - Shared Memory - ./cuda/exercicios de aula/sharedMem1.cu 
 
diff --git a/build/main b/build/main
diff --git a/build/sharedMem1 b/build/sharedMem1
diff --git a/cuda/imageShortening.cu → cuda/exercicios de aula/imageShortening.cu b/cuda/imageShortening.cu → cuda/exercicios de aula/imageShortening.cu
diff --git a/cuda/imageShorteningParallel.cu → ...cicios de aula/imageShorteningParallel.cu b/cuda/imageShorteningParallel.cu → ...cicios de aula/imageShorteningParallel.cu
diff --git a/cuda/matrixSum.cu → cuda/exercicios de aula/matrixSum.cu b/cuda/matrixSum.cu → cuda/exercicios de aula/matrixSum.cu
diff --git a/cuda/exercicios de aula/sharedMem1.cu b/cuda/exercicios de aula/sharedMem1.cu
@@ -0,0 +1,74 @@
+#include <math.h>
+#include <stdio.h>
+
+#define THREADS 8 // 1000 threads per block
+#define ARRAYSIZE 8
+
+// Professor, nao lembro do exemplo que o senhor fez em aula, e nao
+// consegui achar o seu slide, entao inventei um exemplo para testar 
+
+
+// Um teste de troca de elementos pelo seu corresponde na outra metade do vetor
+// apos se dobrar)
+// exemplo: um array de tamanho N
+// a[0] *= 2
+// se index < n/2
+// a[index] = a[(index + n/2)+1] //em caso de impar, o elemento do meio nao sera trocado
+// ex:  input:[0, 1, 5, 3, 7]
+//      output:[6, 14, 10, 0, 2]
+__global__ void swapElement(int* data){
+    int index = threadIdx.x + blockIdx.x * blockDim.x;
+    __shared__ int shared[ARRAYSIZE];
+    int aux;
+    shared[threadIdx.x] = data[threadIdx.x];
+    shared[index] *= 2;
+    __syncthreads(); //Se remover, o comportamento sera indeterminado
+    if (index < ARRAYSIZE/2)
+        aux = shared[index];
+        shared[index] = shared[(index + (ARRAYSIZE+1)/2)];
+        shared[(index + (ARRAYSIZE+1)/2)] = aux;
+    __syncthreads();
+    data[threadIdx.x] = shared[threadIdx.x];
+}
+
+int main(){
+    int *a, *d_a;
+
+    a = (int*) malloc(sizeof(int) * ARRAYSIZE);    
+    for(int i = 0; i < ARRAYSIZE; i++){
+        a[i] = i;
+    }
+
+    printf("\nTeste Entrada\n");
+
+    printf("%d ", a[0]);
+    for(int i = 0; i < ARRAYSIZE; i++){
+        printf("%d ", a[i]);
+    }
+    cudaMalloc((void**) &d_a, sizeof(int)*ARRAYSIZE);
+    cudaMemcpy(d_a, a, sizeof(int) * ARRAYSIZE, cudaMemcpyHostToDevice);
+
+    swapElement<<<1,THREADS>>>(d_a);
+
+    cudaMemcpy(a, d_a, sizeof(int) * ARRAYSIZE, cudaMemcpyDeviceToHost);
+
+    printf("\nTeste saida: \n");
+
+    // for(int i = 0; i < ARRAYSIZE; i++){
+    //     //first element is 
+    //     if(i % 10 == 0){
+    //         printf("\n");
+    //     }
+    //     printf("%d ", a[i]);
+
+    // }
+    printf("%d ", a[0]);
+    for(int i = 0; i < ARRAYSIZE; i++){
+        printf("%d ", a[i]);
+
+    }
+
+    free(a);
+    cudaFree(d_a);
+    return 0;
+}
diff --git a/cuda/vecadd.cu → cuda/exercicios de aula/vecadd.cu b/cuda/vecadd.cu → cuda/exercicios de aula/vecadd.cu
diff --git a/cuda/particleSystem/jaxeUtils/jx_generator.h b/cuda/particleSystem/jaxeUtils/jx_generator.h
@@ -3,10 +3,12 @@
 #include <vector>
 #include "jx_particle.h"
 #include "jx_math.h"
+#include "jx_geometry.h"
 
-#define NPARTICLES 1000
+#define NPARTICLES 1000000
 
 using namespace std;
+// talvez separar generator de painter
 
 vector<particle> generateParticles(int amount=NPARTICLES) {
     vector<particle> particles;
@@ -23,3 +25,4 @@ vector<particle> generateParticles(int amount=NPARTICLES) {
     }
     return particles;
 }
+
diff --git a/cuda/particleSystem/jaxeUtils/jx_geometry.h b/cuda/particleSystem/jaxeUtils/jx_geometry.h
@@ -1,16 +1,119 @@
-#include <GL/glut.h>
-#include "glm/glm.hpp"
-#include <vector>
+// #include <GL/glut.h>
+// #include "glm/glm.hpp"
+// #include <vector>
 
 
-struct Mesh
-{
-    GLuint PositionHandle;
-    GLuint NormalHandle;
-    GLuint IndexHandle;
+// struct Mesh
+// {
+//     GLuint PositionHandle;
+//     GLuint NormalHandle;
+//     GLuint IndexHandle;
 
-    unsigned int IndexBufferLength;
+//     unsigned int IndexBufferLength;
+
+//     Mesh();
+//     Mesh(std::vector<float> const & Positions, std::vector<float> const & Normals, std::vector<unsigned short> const & Indices);
+//     void Draw();
+// };
+
+//     Mesh * GeometryCreator::CreateCube(glm::vec3 const & Size)
+// {
+//     std::vector<float> Positions, Normals;
+//     std::vector<unsigned short> Indices;
+
+//     static float const CubePositions[] =
+//     {
+//         -0.5, -0.5, -0.5, // back face verts [0-3]
+//         -0.5,  0.5, -0.5,
+//          0.5,  0.5, -0.5,
+//          0.5, -0.5, -0.5,
+
+//         -0.5, -0.5,  0.5, // front face verts [4-7]
+//         -0.5,  0.5,  0.5,
+//          0.5,  0.5,  0.5,
+//          0.5, -0.5,  0.5,
+
+//         -0.5, -0.5,  0.5, // left face verts [8-11]
+//         -0.5, -0.5, -0.5,
+//         -0.5,  0.5, -0.5,
+//         -0.5,  0.5,  0.5,
+
+//          0.5, -0.5,  0.5, // right face verts [12-15]
+//          0.5, -0.5, -0.5,
+//          0.5,  0.5, -0.5,
+//          0.5,  0.5,  0.5,
+
+//         -0.5,  0.5,  0.5, // top face verts [16-19]
+//         -0.5,  0.5, -0.5,
+//          0.5,  0.5, -0.5,
+//          0.5,  0.5,  0.5,
+
+//         -0.5, -0.5,  0.5, // bottom face verts [20-23]
+//         -0.5, -0.5, -0.5,
+//          0.5, -0.5, -0.5,
+//          0.5, -0.5,  0.5
+//     };
+//     Positions = std::vector<float>(CubePositions, CubePositions + 24 * 3);
+//     int i = 0;
+//     for (std::vector<float>::iterator it = Positions.begin(); it != Positions.end(); ++ it, ++ i)
+//         * it *= Size[i %= 3];
+
+//     static float const CubeNormals[] =
+//     {
+//          0,  0, -1, // back face verts [0-3]
+//          0,  0, -1,
+//          0,  0, -1,
+//          0,  0, -1,
+
+//          0,  0,  1, // front face verts [4-7]
+//          0,  0,  1,
+//          0,  0,  1,
+//          0,  0,  1,
+
+//         -1,  0,  0, // left face verts [8-11]
+//         -1,  0,  0,
+//         -1,  0,  0,
+//         -1,  0,  0,
+
+//          1,  0,  0, // right face verts [12-15]
+//          1,  0,  0,
+//          1,  0,  0,
+//          1,  0,  0,
+
+//          0,  1,  0, // top face verts [16-19]
+//          0,  1,  0,
+//          0,  1,  0,
+//          0,  1,  0,
+
+//          0, -1,  0, // bottom face verts [20-23]
+//          0, -1,  0,
+//          0, -1,  0,
+//          0, -1,  0
+//     };
+//     Normals = std::vector<float>(CubeNormals, CubeNormals + 24 * 3);
+
+//     static unsigned short const CubeIndices[] =
+//     {
+//          0,  1,  2, // back face verts [0-3]
+//          2,  3,  0,
+
+//          4,  7,  6, // front face verts [4-7]
+//          6,  5,  4,
+
+//          8, 11, 10, // left face verts [8-11]
+//         10,  9,  8,
+
+//         12, 13, 14, // right face verts [12-15]
+//         14, 15, 12,
+
+//         16, 19, 18, // top face verts [16-19]
+//         18, 17, 16,
+
+//         20, 21, 22, // bottom face verts [20-23]
+//         22, 23, 20
+//     };
+//     Indices = std::vector<unsigned short>(CubeIndices, CubeIndices + 12 * 3);
+
+//     return new Mesh(Positions, Normals, Indices);
+// }
 
-    Mesh();
-    Mesh(std::vector<float> const & Positions, std::vector<float> const & Normals, std::vector<unsigned short> const & Indices);
-};
diff --git a/cuda/particleSystem/jaxeUtils/jx_particle.h b/cuda/particleSystem/jaxeUtils/jx_particle.h
@@ -1,7 +1,11 @@
+//Abstracao da particula
 
 // Std. Includes
 #include <vector>
 
+//cuda
+#include <cuda_runtime.h>
+
 // GL Includes
 #include <glm/glm.hpp>
 class particle {
@@ -16,9 +20,8 @@ class particle {
 
     float mass;
 
-
     // particle(glm::vec3 pos, glm::vec3 vel, glm::vec3 acc, glm::vec4 colr , float s) {
-    particle(glm::vec3 pos, glm::vec3 vel, glm::vec4 colr , float siz, float mas, int life=100) {
+    __host__ __device__  particle(glm::vec3 pos, glm::vec3 vel, glm::vec4 colr , float siz, float mas, int life=100) {
         position = pos;
         velocity = vel;
         // acceleration = acc;
@@ -27,19 +30,26 @@ class particle {
         mass = mas;
         lifeTime = life;
     }
-    void applyForce(glm::vec3 force) {
+    __device__ void applyForce(glm::vec3 force) {
         acceleration += force;
     }
 
-    void update(float dt) {
+    // void update(float dt) {
+    //     velocity += acceleration * dt;
+    //     position += velocity * dt;
+    // }
+
+    __host__ __device__ void update(float dt) {
         // pensando como a funcao de euler, 
         // newPosition = oldPosition + dt * (Forcas/massa)
         // onde forcas/massa = derivEval
         velocity += acceleration * dt;
         position += velocity * dt;
 
-        lifeTime--;
+        // lifeTime--;
         acceleration *=0; //reset acceleration, its not cumulative
     }
+
+
 
 };
diff --git a/cuda/particleSystem/jaxeUtils/jx_physics.h b/cuda/particleSystem/jaxeUtils/jx_physics.h
@@ -1,14 +1,17 @@
 // inclide std
 #include <glm/glm.hpp>
+#include <cuda_runtime.h>
 
 #define GRAVITY glm::vec3(0.0f, -9.8f, 0.0f)
 
 // Weigth of a particle
 // Weight = mass * gravity
-glm::vec3 weightForce(float mass){
-    return GRAVITY * mass;
+// glm::vec3 weightForce(float mass){
+//     return GRAVITY * mass;
+// }
+__device__ glm::vec3 weightForce(float mass){
+        return GRAVITY * mass;
 }
-
 // Drag of a particle
 // Drag = -velocity * drag
 // glm::vec3 returnDrag(glm::vec3 velocity, float drag){

diff --git a/cuda/particleSystem/main.cu b/cuda/particleSystem/main.cu
@@ -23,13 +23,32 @@ void draw_particles() {
     glEnd();
 }
 
-void update_particles(float dt) {
-    // Ideally, summing update would receive a sum of forces
-    for (int i = 0; i < particles.size(); i++) {
+// void update_particles(float dt) {
+//     // Ideally, summing update would receive a sum of forces
+//     for (int i = 0; i < particles.size(); i++) {
+//         particles[i].applyForce(weightForce(particles[i].mass));
+//         particles[i].update(dt);
+//     }
+// }
+__global__ void update_particles_kernel(particle* particles, float dt, int num_particles) {
+    int i = blockIdx.x * blockDim.x + threadIdx.x;
+    if (i < num_particles) {
         particles[i].applyForce(weightForce(particles[i].mass));
         particles[i].update(dt);
     }
 }
+void update_particles(float dt) {
+    particle* d_particles;
+    cudaMalloc(&d_particles, particles.size() * sizeof(particle));
+    cudaMemcpy(d_particles, particles.data(), particles.size() * sizeof(particle), cudaMemcpyHostToDevice);
+
+    int block_size = 256;
+    int num_blocks = (particles.size() + block_size - 1) / block_size;
+    update_particles_kernel<<<num_blocks, block_size>>>(d_particles, dt, particles.size());
+
+    cudaMemcpy(particles.data(), d_particles, particles.size() * sizeof(particle), cudaMemcpyDeviceToHost);
+    cudaFree(d_particles);
+}
 
 void drawCube(float scale=1.0f, bool isWireframe=true) {
     glLoadIdentity();                 // Reset the model-view matrix

diff --git a/...gFaPjyY7dcUT5F6nd7XWkApYB7aSjMrNgmy0VI72vdV91q-LALHx8yQfs6ia6mZsRKNkIYTNLRbyQ_kgv7YsgGg1Q b/...gFaPjyY7dcUT5F6nd7XWkApYB7aSjMrNgmy0VI72vdV91q-LALHx8yQfs6ia6mZsRKNkIYTNLRbyQ_kgv7YsgGg1Q
diff --git a/openacc/README.md → openacc - 2/README.md b/openacc/README.md → openacc - 2/README.md
diff --git a/openacc/solver/Makefile → openacc - 2/solver/Makefile b/openacc/solver/Makefile → openacc - 2/solver/Makefile
diff --git a/openacc/solver/README.md → openacc - 2/solver/README.md b/openacc/solver/README.md → openacc - 2/solver/README.md
diff --git a/openacc/solver/jsolvec → openacc - 2/solver/jsolvec b/openacc/solver/jsolvec → openacc - 2/solver/jsolvec
diff --git a/openacc/solver/jsolvec.cpp → openacc - 2/solver/jsolvec.cpp b/openacc/solver/jsolvec.cpp → openacc - 2/solver/jsolvec.cpp
diff --git a/openacc/solver/jsolvef.F90 → openacc - 2/solver/jsolvef.F90 b/openacc/solver/jsolvef.F90 → openacc - 2/solver/jsolvef.F90
diff --git a/openmp/starrynight → openmp - 1/starrynight b/openmp/starrynight → openmp - 1/starrynight
diff --git a/openmp/starrynight.cpp → openmp - 1/starrynight.cpp b/openmp/starrynight.cpp → openmp - 1/starrynight.cpp