Skip to content

Commit

Permalink
Merge pull request #1 from jcamachado/feat/cudafy-particles
Browse files Browse the repository at this point in the history
Feat/cudafy particles
  • Loading branch information
jcamachado authored Oct 10, 2023
2 parents e730c64 + 9312ab5 commit ad22c17
Show file tree
Hide file tree
Showing 23 changed files with 242 additions and 26 deletions.
4 changes: 3 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
"string": "cpp",
"unordered_map": "cpp",
"vector": "cpp",
"chrono": "cpp"
"chrono": "cpp",
"iosfwd": "cpp",
"__config": "cpp"
}
}
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,19 @@ gcc 11.4.0 x64
gnu 17
gnu++ 14
nvcc 11.5.119
NVIDIA-SMI 535.104.05 Driver Version: 535.104.05 CUDA Version: 12.2

Aluno: Jose Carlos de Almeida Machado


Hello, aqui vou disponibilizar codigos feitos para treinar CUDA e prog. paralela.


Aula 1 e 2: ./openmp/ e ./openacc
Aula 1 e 2: ./openmp - 1/ e ./openacc - 2

Aula 3: Esqueleto - ./cuda/particleSystem/*

Aula 4 - Soma de matrizes: ./cuda/matrixSum, processamento de imagem (tornar Grayscale) ./cuda/imageShorteningParallel.cu

Aula 5 - Shared Memory - ./cuda/exercicios de aula/sharedMem1.cu

Binary file modified build/main
Binary file not shown.
Binary file added build/sharedMem1
Binary file not shown.
File renamed without changes.
File renamed without changes.
File renamed without changes.
74 changes: 74 additions & 0 deletions cuda/exercicios de aula/sharedMem1.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#include <math.h>
#include <stdio.h>

#define THREADS 8 // 1000 threads per block
#define ARRAYSIZE 8

// Professor, nao lembro do exemplo que o senhor fez em aula, e nao
// consegui achar o seu slide, entao inventei um exemplo para testar


// Um teste de troca de elementos pelo seu corresponde na outra metade do vetor
// apos se dobrar)
// exemplo: um array de tamanho N
// a[0] *= 2
// se index < n/2
// a[index] = a[(index + n/2)+1] //em caso de impar, o elemento do meio nao sera trocado
// ex: input:[0, 1, 5, 3, 7]
// output:[6, 14, 10, 0, 2]
__global__ void swapElement(int* data){
int index = threadIdx.x + blockIdx.x * blockDim.x;
__shared__ int shared[ARRAYSIZE];
int aux;
shared[threadIdx.x] = data[threadIdx.x];
shared[index] *= 2;
__syncthreads(); //Se remover, o comportamento sera indeterminado
if (index < ARRAYSIZE/2)
aux = shared[index];
shared[index] = shared[(index + (ARRAYSIZE+1)/2)];
shared[(index + (ARRAYSIZE+1)/2)] = aux;
__syncthreads();
data[threadIdx.x] = shared[threadIdx.x];
}

int main(){
int *a, *d_a;

a = (int*) malloc(sizeof(int) * ARRAYSIZE);
for(int i = 0; i < ARRAYSIZE; i++){
a[i] = i;
}

printf("\nTeste Entrada\n");

printf("%d ", a[0]);
for(int i = 0; i < ARRAYSIZE; i++){
printf("%d ", a[i]);
}
cudaMalloc((void**) &d_a, sizeof(int)*ARRAYSIZE);
cudaMemcpy(d_a, a, sizeof(int) * ARRAYSIZE, cudaMemcpyHostToDevice);

swapElement<<<1,THREADS>>>(d_a);

cudaMemcpy(a, d_a, sizeof(int) * ARRAYSIZE, cudaMemcpyDeviceToHost);

printf("\nTeste saida: \n");

// for(int i = 0; i < ARRAYSIZE; i++){
// //first element is
// if(i % 10 == 0){
// printf("\n");
// }
// printf("%d ", a[i]);

// }
printf("%d ", a[0]);
for(int i = 0; i < ARRAYSIZE; i++){
printf("%d ", a[i]);

}

free(a);
cudaFree(d_a);
return 0;
}
File renamed without changes.
5 changes: 4 additions & 1 deletion cuda/particleSystem/jaxeUtils/jx_generator.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
#include <vector>
#include "jx_particle.h"
#include "jx_math.h"
#include "jx_geometry.h"

#define NPARTICLES 1000
#define NPARTICLES 1000000

using namespace std;
// talvez separar generator de painter

vector<particle> generateParticles(int amount=NPARTICLES) {
vector<particle> particles;
Expand All @@ -23,3 +25,4 @@ vector<particle> generateParticles(int amount=NPARTICLES) {
}
return particles;
}

127 changes: 115 additions & 12 deletions cuda/particleSystem/jaxeUtils/jx_geometry.h
Original file line number Diff line number Diff line change
@@ -1,16 +1,119 @@
#include <GL/glut.h>
#include "glm/glm.hpp"
#include <vector>
// #include <GL/glut.h>
// #include "glm/glm.hpp"
// #include <vector>


struct Mesh
{
GLuint PositionHandle;
GLuint NormalHandle;
GLuint IndexHandle;
// struct Mesh
// {
// GLuint PositionHandle;
// GLuint NormalHandle;
// GLuint IndexHandle;

unsigned int IndexBufferLength;
// unsigned int IndexBufferLength;

// Mesh();
// Mesh(std::vector<float> const & Positions, std::vector<float> const & Normals, std::vector<unsigned short> const & Indices);
// void Draw();
// };

// Mesh * GeometryCreator::CreateCube(glm::vec3 const & Size)
// {
// std::vector<float> Positions, Normals;
// std::vector<unsigned short> Indices;

// static float const CubePositions[] =
// {
// -0.5, -0.5, -0.5, // back face verts [0-3]
// -0.5, 0.5, -0.5,
// 0.5, 0.5, -0.5,
// 0.5, -0.5, -0.5,

// -0.5, -0.5, 0.5, // front face verts [4-7]
// -0.5, 0.5, 0.5,
// 0.5, 0.5, 0.5,
// 0.5, -0.5, 0.5,

// -0.5, -0.5, 0.5, // left face verts [8-11]
// -0.5, -0.5, -0.5,
// -0.5, 0.5, -0.5,
// -0.5, 0.5, 0.5,

// 0.5, -0.5, 0.5, // right face verts [12-15]
// 0.5, -0.5, -0.5,
// 0.5, 0.5, -0.5,
// 0.5, 0.5, 0.5,

// -0.5, 0.5, 0.5, // top face verts [16-19]
// -0.5, 0.5, -0.5,
// 0.5, 0.5, -0.5,
// 0.5, 0.5, 0.5,

// -0.5, -0.5, 0.5, // bottom face verts [20-23]
// -0.5, -0.5, -0.5,
// 0.5, -0.5, -0.5,
// 0.5, -0.5, 0.5
// };
// Positions = std::vector<float>(CubePositions, CubePositions + 24 * 3);
// int i = 0;
// for (std::vector<float>::iterator it = Positions.begin(); it != Positions.end(); ++ it, ++ i)
// * it *= Size[i %= 3];

// static float const CubeNormals[] =
// {
// 0, 0, -1, // back face verts [0-3]
// 0, 0, -1,
// 0, 0, -1,
// 0, 0, -1,

// 0, 0, 1, // front face verts [4-7]
// 0, 0, 1,
// 0, 0, 1,
// 0, 0, 1,

// -1, 0, 0, // left face verts [8-11]
// -1, 0, 0,
// -1, 0, 0,
// -1, 0, 0,

// 1, 0, 0, // right face verts [12-15]
// 1, 0, 0,
// 1, 0, 0,
// 1, 0, 0,

// 0, 1, 0, // top face verts [16-19]
// 0, 1, 0,
// 0, 1, 0,
// 0, 1, 0,

// 0, -1, 0, // bottom face verts [20-23]
// 0, -1, 0,
// 0, -1, 0,
// 0, -1, 0
// };
// Normals = std::vector<float>(CubeNormals, CubeNormals + 24 * 3);

// static unsigned short const CubeIndices[] =
// {
// 0, 1, 2, // back face verts [0-3]
// 2, 3, 0,

// 4, 7, 6, // front face verts [4-7]
// 6, 5, 4,

// 8, 11, 10, // left face verts [8-11]
// 10, 9, 8,

// 12, 13, 14, // right face verts [12-15]
// 14, 15, 12,

// 16, 19, 18, // top face verts [16-19]
// 18, 17, 16,

// 20, 21, 22, // bottom face verts [20-23]
// 22, 23, 20
// };
// Indices = std::vector<unsigned short>(CubeIndices, CubeIndices + 12 * 3);

// return new Mesh(Positions, Normals, Indices);
// }

Mesh();
Mesh(std::vector<float> const & Positions, std::vector<float> const & Normals, std::vector<unsigned short> const & Indices);
};
20 changes: 15 additions & 5 deletions cuda/particleSystem/jaxeUtils/jx_particle.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
//Abstracao da particula

// Std. Includes
#include <vector>

//cuda
#include <cuda_runtime.h>

// GL Includes
#include <glm/glm.hpp>
class particle {
Expand All @@ -16,9 +20,8 @@ class particle {

float mass;


// particle(glm::vec3 pos, glm::vec3 vel, glm::vec3 acc, glm::vec4 colr , float s) {
particle(glm::vec3 pos, glm::vec3 vel, glm::vec4 colr , float siz, float mas, int life=100) {
__host__ __device__ particle(glm::vec3 pos, glm::vec3 vel, glm::vec4 colr , float siz, float mas, int life=100) {
position = pos;
velocity = vel;
// acceleration = acc;
Expand All @@ -27,19 +30,26 @@ class particle {
mass = mas;
lifeTime = life;
}
void applyForce(glm::vec3 force) {
__device__ void applyForce(glm::vec3 force) {
acceleration += force;
}

void update(float dt) {
// void update(float dt) {
// velocity += acceleration * dt;
// position += velocity * dt;
// }

__host__ __device__ void update(float dt) {
// pensando como a funcao de euler,
// newPosition = oldPosition + dt * (Forcas/massa)
// onde forcas/massa = derivEval
velocity += acceleration * dt;
position += velocity * dt;

lifeTime--;
// lifeTime--;
acceleration *=0; //reset acceleration, its not cumulative
}



};
9 changes: 6 additions & 3 deletions cuda/particleSystem/jaxeUtils/jx_physics.h
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
// inclide std
#include <glm/glm.hpp>
#include <cuda_runtime.h>

#define GRAVITY glm::vec3(0.0f, -9.8f, 0.0f)

// Weigth of a particle
// Weight = mass * gravity
glm::vec3 weightForce(float mass){
return GRAVITY * mass;
// glm::vec3 weightForce(float mass){
// return GRAVITY * mass;
// }
__device__ glm::vec3 weightForce(float mass){
return GRAVITY * mass;
}

// Drag of a particle
// Drag = -velocity * drag
// glm::vec3 returnDrag(glm::vec3 velocity, float drag){
Expand Down
25 changes: 22 additions & 3 deletions cuda/particleSystem/main.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,32 @@ void draw_particles() {
glEnd();
}

void update_particles(float dt) {
// Ideally, summing update would receive a sum of forces
for (int i = 0; i < particles.size(); i++) {
// void update_particles(float dt) {
// // Ideally, summing update would receive a sum of forces
// for (int i = 0; i < particles.size(); i++) {
// particles[i].applyForce(weightForce(particles[i].mass));
// particles[i].update(dt);
// }
// }
__global__ void update_particles_kernel(particle* particles, float dt, int num_particles) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < num_particles) {
particles[i].applyForce(weightForce(particles[i].mass));
particles[i].update(dt);
}
}
void update_particles(float dt) {
particle* d_particles;
cudaMalloc(&d_particles, particles.size() * sizeof(particle));
cudaMemcpy(d_particles, particles.data(), particles.size() * sizeof(particle), cudaMemcpyHostToDevice);

int block_size = 256;
int num_blocks = (particles.size() + block_size - 1) / block_size;
update_particles_kernel<<<num_blocks, block_size>>>(d_particles, dt, particles.size());

cudaMemcpy(particles.data(), d_particles, particles.size() * sizeof(particle), cudaMemcpyDeviceToHost);
cudaFree(d_particles);
}

void drawCube(float scale=1.0f, bool isWireframe=true) {
glLoadIdentity(); // Reset the model-view matrix
Expand Down
Binary file not shown.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 comments on commit ad22c17

Please sign in to comment.