Skip to content

Commit

Permalink
Finish yolo layer shader
Browse files Browse the repository at this point in the history
  • Loading branch information
yourcomrade committed Aug 14, 2023
1 parent d2c3788 commit ee4c4bd
Show file tree
Hide file tree
Showing 17 changed files with 906,027 additions and 9 deletions.
16 changes: 13 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@ OPENMP=0
LIBSO=0
ZED_CAMERA=0
ZED_CAMERA_v2_8=0

GLES2 = 1
# set GPU=1 and CUDNN=1 to speedup on GPU
# set CUDNN_HALF=1 to further speedup 3 x times (Mixed-precision on Tensor Cores) GPU: Volta, Xavier, Turing and higher
# set AVX=1 and OPENMP=1 to speedup on CPU (if error occurs then set AVX=0)
# set ZED_CAMERA=1 to enable ZED SDK 3.0 and above
# set ZED_CAMERA_v2_8=1 to enable ZED SDK 2.X

USE_CPP=0
DEBUG=0
DEBUG=1

ARCH= -gencode arch=compute_35,code=sm_35 \
-gencode arch=compute_50,code=[sm_50,compute_50] \
Expand Down Expand Up @@ -80,9 +80,10 @@ LDFLAGS= -lm -pthread
COMMON= -Iinclude/ -I3rdparty/stb/include
CFLAGS=-Wall -Wfatal-errors -Wno-unused-result -Wno-unknown-pragmas -fPIC -rdynamic


ifeq ($(DEBUG), 1)
#OPTS= -O0 -g
#OPTS= -Og -g
#OPTS= -Og -g -O0
COMMON+= -DDEBUG
CFLAGS+= -DDEBUG
else
Expand All @@ -91,6 +92,11 @@ CFLAGS+= -ffp-contract=fast -mavx -mavx2 -msse3 -msse4.1 -msse4.2 -msse4a
endif
endif

ifeq ($(GLES2), 1)
COMMON+= -DGLES2
CFLAGS+= -DGLES2
endif

CFLAGS+=$(OPTS)

ifneq (,$(findstring MSYS_NT,$(OS)))
Expand Down Expand Up @@ -157,6 +163,10 @@ LDFLAGS+= -lstdc++
OBJ+=convolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o network_kernels.o avgpool_layer_kernels.o
endif

ifeq ($(GLES2), 1)
OBJ+= gles2_helper.o
LDFLAGS+= -lgbm -lEGL -lGLESv2
endif
OBJS = $(addprefix $(OBJDIR), $(OBJ))
DEPS = $(wildcard src/*.h) Makefile include/darknet.h

Expand Down
Binary file added predictions.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
29 changes: 29 additions & 0 deletions src/activation_gles2_shader.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#include "gles2_helper.h"
#include "common_shader.h"
#ifndef ACTIVATION_SHADER_GLES2_H
#define ACTIVATION_SHADER_GLES2_H


const char frag_leaky_activate_shader[] = STRINGIFY(
uniform sampler2D data;
varying vec2 texco;
void main(){
vec4 inp_data = texture2D(data, texco);
float a = decode_float(inp_data);

float res = max(0.1*a, a);
gl_FragColor = encode_float(res);
}
);
const char frag_logistic_activate_shader[] = STRINGIFY(
uniform sampler2D data;
varying vec2 texco;
void main(){
vec4 inp_data = texture2D(data, texco);
float a = decode_float(inp_data);
//Use step instead of if to boost performance
float res = 1.0/(1.0 + exp(-a));
gl_FragColor = encode_float(res);
}
);
#endif
83 changes: 83 additions & 0 deletions src/activation_layer.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
#include "dark_cuda.h"
#include "blas.h"
#include "gemm.h"
#include "gles2_helper.h"
#include "activation_gles2_shader.h"

#include <math.h>
#include <stdio.h>
Expand All @@ -21,8 +23,13 @@ layer make_activation_layer(int batch, int inputs, ACTIVATION activation)
l.output = (float*)xcalloc(batch * inputs, sizeof(float));
l.delta = (float*)xcalloc(batch * inputs, sizeof(float));


l.forward = forward_activation_layer;
l.backward = backward_activation_layer;
// #ifdef GLES2
// printf("Make GLES2 activation layer\n");
// l.forward = forward_activation_layer_gles2;
// #endif
#ifdef GPU
l.forward_gpu = forward_activation_layer_gpu;
l.backward_gpu = backward_activation_layer_gpu;
Expand All @@ -37,6 +44,7 @@ layer make_activation_layer(int batch, int inputs, ACTIVATION activation)

void forward_activation_layer(layer l, network_state state)
{
printf("Use forward_activaion_layer\n");
copy_cpu(l.outputs*l.batch, state.input, 1, l.output, 1);
activate_array(l.output, l.outputs*l.batch, l.activation);
}
Expand All @@ -61,3 +69,78 @@ void backward_activation_layer_gpu(layer l, network_state state)
copy_ongpu(l.outputs*l.batch, l.delta_gpu, 1, state.delta, 1);
}
#endif
/*
#ifdef GLES2
char * choose_activate_shader(ACTIVATION a){
char *res = NULL;
switch(a){
case LEAKY:
res = (char*)(calloc(sizeof(encode_decode_float_shader) + sizeof(frag_leaky_activate_shader),
sizeof(char)));
break;
case LOGISTIC:
res = (char*)(calloc(sizeof(encode_decode_float_shader) + sizeof(frag_logistic_activate_shader),
sizeof(char)));
break;
default:
return NULL;
}
if( res == NULL){
perror("Cannot allocate memory for fragment shader activation!");
abort();
}
else{
strcat(res, encode_decode_float_shader);
switch (a)
{
case LEAKY:
strcat(res, frag_leaky_activate_shader);
break;
case LOGISTIC:
strcat(res, frag_logistic_activate_shader);
default:
break;
}
return res;
}
}
void forward_activation_layer_gles2(layer l, network_state state)
{
//copy_cpu(l.outputs*l.batch, state.input, 1, l.output, 1);
//activate_array_gles2(l.output, l.outputs*l.batch, l.activation);
printf("USE forward activation gles\n");
if(l.activation == LINEAR){
copy_cpu(l.outputs*l.batch, state.input, 1, l.output, 1);
}
else{
gles2_data* data = gles2_make_farr(state.input, l.outputs*l.batch);
gles2_data* res = gles2_make_farr(NULL, l.outputs*l.batch);
gles2_make_surface(my_sp, res->textSize, res->textSize);
gles2_push_farr(my_con, res, NULL, false);
my_con->ver_shader = vertex;
my_con->frag_shader = choose_activate_shader(l.activation);
gles2_build(my_con);
gles2_push_farr(my_con, data, "data", true);
gles2_make_fbo(my_con, res);
gles2_setViewport(res->textSize, res->textSize);
gles2_compute(my_con);
gles2_pull_farr(l.output, l.outputs*l.batch, res);
gles2_free_dev_farr(data);
gles2_free_dev_farr(res);
gles2_destroy_fbo(my_con);
gles2_destroy_surface(my_sp);
}
}
#endif
*/
3 changes: 3 additions & 0 deletions src/activation_layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ void forward_activation_layer_gpu(layer l, network_state state);
void backward_activation_layer_gpu(layer l, network_state state);
#endif

#ifdef GLES2
void forward_activation_layer_gles2(layer l, network_state state);
#endif
#ifdef __cplusplus
}
#endif
Expand Down
1 change: 1 addition & 0 deletions src/activations.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ float activate(float x, ACTIVATION a)

void activate_array(float *x, const int n, const ACTIVATION a)
{

int i;
if (a == LINEAR) {}
else if (a == LEAKY) {
Expand Down
8 changes: 8 additions & 0 deletions src/activations.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,14 @@ void gradient_array_normalize_channels_softmax_ongpu(float *output_gpu, int n, i

#endif

#ifdef GLES2
void activate_array_gles2(float *x, int n, ACTIVATION a);
void activate_array_swish_gles2(float *x, int n, float *output_sigmoid_gpu, float *output_gpu);
void activate_array_mish_gles2(float *x, int n, float *activation_input_gpu, float *output_gpu);
void activate_array_hard_mish_gles2(float *x, int n, float *activation_input_gpu, float *output_gpu);
void activate_array_normalize_channels_gles2(float *x, int n, int batch, int channels, int wh_step, float *output_gpu);
void activate_array_normalize_channels_softmax_gles2(float *x, int n, int batch, int channels, int wh_step, float *output_gpu, int use_max_val);
#endif
static inline float stair_activate(float x)
{
int n = floorf(x);
Expand Down
1 change: 1 addition & 0 deletions src/col2im.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ void col2im_cpu_ext(const float* data_col, const int channels,
const int dilation_h, const int dilation_w,
float* data_im)
{
printf("use function col2im_cpu_ext\n");
caffe_set(height * width * channels, 0.0F, data_im);
const int output_h = (height + 2 * pad_h -
(dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
Expand Down
62 changes: 62 additions & 0 deletions src/common_shader.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#ifndef COMMON_SHADER_H
#define COMMON_SHADER_H
#define STRINGIFY(x) #x
static const char vertex[] = STRINGIFY(
attribute vec3 pos;
attribute vec2 inputTexCoord;
varying vec2 texco;
void main() { gl_Position = vec4(pos, 1.0);
texco = inputTexCoord;}
);
static const char encode_decode_float_shader[] = STRINGIFY(
precision highp float;
const float max_float = 1.70141184e38;
const float min_float = 1.17549435e-38;
lowp vec4 encode_float(highp float v) {
highp float av = abs(v);
highp vec4 c = vec4(0,0,0,0);

//Compute exponent and mantissa
highp float e = floor(log2(av));
highp float m = av * pow(2.0, -e) - 1.0;

//Unpack mantissa
c[1] = floor(128.0 * m);
m -= c[1] / 128.0;
c[2] = floor(32768.0 * m);
m -= c[2] / 32768.0;
c[3] = floor(8388608.0 * m);

//Unpack exponent
highp float ebias = e + 127.0;
c[0] = floor(ebias / 2.0);
ebias -= c[0] * 2.0;
c[1] += floor(ebias) * 128.0;

//Unpack sign bit
c[0] += 128.0 * step(0.0, -v);

//Check for 1st condition if(av < min_float) return vec4(0)
highp vec4 res1 = vec4(step( min_float, av)*c);
//Check for 2nd condition if(v > max_float) return vec4(127.0, 128.0, 0.0, 0.0)
highp vec4 res2 = (1.0 - step(max_float, v))*res1 + step(max_float, v)*vec4(127.0, 128.0, 0.0, 0.0);
//Check for 3rd condition if(v < -max_float) return vec4(255.0, 128.0, 0.0, 0.0)
highp vec4 res3 = (1.0 - step(v, -max_float))*res2 + step(v, -max_float)*vec4(255.0, 128.0, 0.0, 0.0);
//Final result
highp vec4 fin_res = res3.abgr;

//Scale back to range
return fin_res / 255.0;
}
float decode_float(vec4 v) {
vec4 bits = v * 255.0;
float sign = mix(-1.0, 1.0, step(bits[3], 128.0));
float expo = floor(mod(bits[3] + 0.1, 128.0)) * 2.0 +
floor((bits[2] + 0.1) / 128.0) - 127.0;
float sig = bits[0] +
bits[1] * 256.0 +
floor(mod(bits[2] + 0.1, 128.0)) * 256.0 * 256.0;
return sign * (1.0 + sig / 8388607.0) * pow(2.0, expo);
}
);
#endif
20 changes: 17 additions & 3 deletions src/darknet.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "dark_cuda.h"
#include "blas.h"
#include "connected_layer.h"
#include "gles2_helper.h"


extern void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top);
Expand Down Expand Up @@ -461,9 +462,14 @@ int main(int argc, char **argv)
}

#ifndef GPU
gpu_index = -1;
printf(" GPU isn't used \n");
init_cpu();
#ifdef GLES2

#else
gpu_index = -1;
printf(" GPU isn't used \n");
init_cpu();
#endif

#else // GPU
if(gpu_index >= 0){
cuda_set_device(gpu_index);
Expand All @@ -479,6 +485,11 @@ int main(int argc, char **argv)

#endif // GPU

#ifdef GLES2


#endif

show_opencv_info();

if (0 == strcmp(argv[1], "average")){
Expand Down Expand Up @@ -555,5 +566,8 @@ int main(int argc, char **argv)
} else {
fprintf(stderr, "Not an option: %s\n", argv[1]);
}
#ifdef GLES2

#endif
return 0;
}
Loading

0 comments on commit ee4c4bd

Please sign in to comment.