c - OpenCL kernel causes application to run indefinitely and only stops after I shut down my IDE -
i trying run portions of this code on gpu using opencl. trying run function deals ycbcr rgb conversion.
please note of not trying optimize gpu code. want output identical on cpu.
the function written this:
void ycbcr_to_argb(uint8_t *ycbcr_mcu[3], uint32_t *rgb_mcu, uint32_t nb_mcu_h, uint32_t nb_mcu_v) { uint8_t *mcu_y, *mcu_cb, *mcu_cr; int r, g, b; uint32_t argb; uint8_t index, i, j; mcu_y = ycbcr_mcu[0]; mcu_cb = ycbcr_mcu[1]; mcu_cr = ycbcr_mcu[2]; (i = 0; < 8 * nb_mcu_v; i++) { (j = 0; j < 8 * nb_mcu_h; j++) { index = * (8 * nb_mcu_h) + j; r = (mcu_cr[index] - 128) * 1.402f + mcu_y[index]; b = (mcu_cb[index] - 128) * 1.7772f + mcu_y[index]; g = mcu_y[index] - (mcu_cb[index] - 128) * 0.34414f - (mcu_cr[index] - 128) * 0.71414f; /* saturate */ if (r > 255) r = 255; if (r < 0) r = 0; if (g > 255) g = 255; if (g < 0) g = 0; if (b > 255) b = 255; if (b < 0) b = 0; argb = ((r & 0xff) << 16) | ((g & 0xff) << 8) | (b & 0xff); // argb = 0xff << 8; rgb_mcu[(i * (8 * nb_mcu_h) + j)] = argb; } } }
the variables of function declared in main.c
in following way:
cl_uchar* ycbcr_mcu[3] = { null, null, null}; cl_uint* rgb_mcu = null;
memory these variables being allocated in way:
if (screen_init_needed == 1) { screen_init_needed = 0; ..... ..... //some code (index = 0 ; index < sof_section.n ; index++) { ycbcr_mcu[index] = malloc(mcu_sx * mcu_sy * max_ss_h * max_ss_v); ycbcr_mcu_ds[index] = malloc(mcu_sx * mcu_sy * max_ss_h * max_ss_v); } rgb_mcu = malloc (mcu_sx * mcu_sy * max_ss_h * max_ss_v * sizeof(cl_int)); } break; }
i directly copied , pasted in .cl
file , made few minor changes make agree opencl standards. modified opencl code looked this:
__kernel void ycbcr_to_argb(__global uchar* ycbcr_mcu[3], __global uint* rgb_mcu, uint nb_mcu_h, uint nb_mcu_v) { __global uchar *mcu_y, *mcu_cb, *mcu_cr; int r, g, b; uint argb; uchar index, i, j; mcu_y = ycbcr_mcu[0]; mcu_cb = ycbcr_mcu[1]; mcu_cr = ycbcr_mcu[2]; //same code first code snippet ...... ...... ...... }
when built , ran application above kernel code in .cl
file, got errors. 1 of errors stated opencl doesn't allow pointer pointer arguments.
in order around these errors, modified code again this:
__kernel void ycbcr_to_argb(__global uchar ycbcr_mcu[3], __global uint* rgb_mcu, uint nb_mcu_h, uint nb_mcu_v) { __global uchar *mcu_y, *mcu_cb, *mcu_cr; int r, g, b; uint argb; uchar index, i, j; mcu_y = &ycbcr_mcu[0]; mcu_cb = &ycbcr_mcu[1]; mcu_cr = &ycbcr_mcu[2]; //same code first code snippet ...... ...... ...... }
when built , ran application again, did not errors. prompted me write host code kernel.
it looks this:
color_kernel= clcreatekernel(program, "ycbcr_to_argb", &ret); //ycbcr_mcu ycbcrtoargb cl_mem colormcu_gpu= clcreatebuffer(context, cl_mem_read_write, 3 * sizeof(cl_uchar), null, &ret); //rgb_mcu ycbcrtoargb cl_mem rgb_gpu= clcreatebuffer(context, cl_mem_read_write, sizeof(cl_uint), null, &ret);
i called kernel arguments original function called in main.c
. performed remaining steps kernel in following way:
if(color&&(sof_section.n>1) { ret = clenqueuewritebuffer(command_queue, colormcu_gpu, cl_true, 0, 3 * sizeof(cl_uchar), ycbcr_mcu, 0, null, null); ret = clenqueuewritebuffer(command_queue, rgb_gpu, cl_true, 0, sizeof(cl_uint), rgb_mcu, 0, null, null); ret = clsetkernelarg(color_kernel, 0, sizeof(cl_mem), (void *)&colormcu_gpu); ret |= clsetkernelarg(color_kernel, 1, sizeof(cl_mem), (void *)&rgb_gpu); ret = clsetkernelarg(color_kernel, 2, sizeof(cl_uint), (void *)&max_ss_h); ret |= clsetkernelarg(color_kernel, 3, sizeof(cl_uint), (void *)&max_ss_v); ret = clenqueuetask(command_queue, color_kernel, 0, null, null); ret = clenqueuereadbuffer(command_queue, rgb_gpu, cl_true, 0, sizeof(cl_uint), rgb_mcu, 0, null, null); //ycbcr_to_argb(ycbcr_mcu, rgb_mcu, max_ss_h, max_ss_v);
after run , build code these arguments, code keeps running indefinitely (the output of supposed movie clip running on screen. code, black screen). have close eclipse , reopen make additional changes code after this.
what causing program behave this? there anyway safely run function on gpu?
update:
i followed anders cedronius' advice , changed kernel code in following way:
__kernel void ycbcr_to_argb(__global uchar ycbcr_mcu[3], __global uint* rgb_mcu, uint nb_mcu_h, uint nb_mcu_v) { printf("doing color conversion\n"); __global uchar *mcu_y, *mcu_cb, *mcu_cr; int r, g, b; uint argb; uchar index, i, j; i= get_global_id(0); j= get_global_id(1); mcu_y = &ycbcr_mcu[0]; mcu_cb = &ycbcr_mcu[1]; mcu_cr = &ycbcr_mcu[2]; if (i < 8 * nb_mcu_v && j < 8 * nb_mcu_h) { index = * (8 * nb_mcu_h) + j; r = (mcu_cr[index] - 128) * 1.402f + mcu_y[index]; b = (mcu_cb[index] - 128) * 1.7772f + mcu_y[index]; g = mcu_y[index] - (mcu_cb[index] - 128) * 0.34414f - (mcu_cr[index] - 128) * 0.71414f; /* saturate */ if (r > 255) r = 255; if (r < 0) r = 0; if (g > 255) g = 255; if (g < 0) g = 0; if (b > 255) b = 255; if (b < 0) b = 0; argb = ((r & 0xff) << 16) | ((g & 0xff) << 8) | (b & 0xff); // argb = 0xff << 8; rgb_mcu[(i * (8 * nb_mcu_h) + j)] = argb; } printf("finished color conversion\n"); }
my host code calling kernel looks this:
color_kernel= clcreatekernel(program, "ycbcr_to_argb", &ret);
i setting work size , kernel arguments in following way:
ret = clenqueuewritebuffer(command_queue, colormcu_gpu, cl_true, 0, 3*sizeof(cl_uchar), ycbcr_mcu, 0, null, null); chk(ret, "clenqueuewritebuffer"); ret = clenqueuewritebuffer(command_queue, rgb_gpu, cl_true, 0, sizeof(cl_uint), rgb_mcu, 0, null, null); chk(ret, "clenqueuewritebuffer"); ret = clsetkernelarg(color_kernel, 0, sizeof(cl_mem), (void *)&colormcu_gpu); ret |= clsetkernelarg(color_kernel, 1, sizeof(cl_mem), (void *)&rgb_gpu); ret = clsetkernelarg(color_kernel, 2, sizeof(cl_uint), (void *)&max_ss_h); ret |= clsetkernelarg(color_kernel, 3, sizeof(cl_uint), (void *)&max_ss_v); size_t itemcolor[2] = {1, 1}; ret = clenqueuendrangekernel(command_queue, kernel, 2, null, itemcolor, null, 0, null, null); chk(ret, "clenqueuendrange"); ret = clenqueuereadbuffer(command_queue, rgb_gpu, cl_true, 0, sizeof(cl_uint), rgb_mcu, 0, null, null); clfinish(command_queue);
i ran code , no longer black screen. however, kernel "ycbcr rgb" not being recognized now. even printf comments not being displayed on output console. code not have color conversion function.
update:
i hadn't changed name of kernel in command enqueuendrangekernel
. changed name , printf statements appearing on console. however, not still not getting correct output.
size_t itemcolor[2] = {1, 1}; ret = clenqueuendrangekernel(command_queue, color_kernel, 2, null, itemcolor, null, 0, null, null); chk(ret, "clenqueuendrange"); clfinish(command_queue);
update:
i followed pmdj's advice , made changes kernel code. looks this:
__kernel void ycbcr_to_argb(__global uchar* y_gpu, __global uchar* cb_gpu, __global uchar* cr_gpu, __global uint* rgb_mcu, uint nb_mcu_h, uint nb_mcu_v) { __global uchar *mcu_y, *mcu_cb, *mcu_cr; int r, g, b; uint argb; uchar index, i, j; unsigned char iid= get_global_id(0); unsigned char jid= get_global_id(1); // mcu_y = &ycbcr_mcu[0]; // mcu_cb = &ycbcr_mcu[1]; // mcu_cr = &ycbcr_mcu[2]; mcu_y= y_gpu; mcu_cb= cb_gpu; mcu_cr= cr_gpu; if (iid <= (8 * nb_mcu_v) && jid <= (8 * nb_mcu_h)) { index = iid * (8 * nb_mcu_h) + jid; r = (mcu_cr[index] - 128) * 1.402f + mcu_y[index]; b = (mcu_cb[index] - 128) * 1.7772f + mcu_y[index]; g = mcu_y[index] - (mcu_cb[index] - 128) * 0.34414f - (mcu_cr[index] - 128) * 0.71414f; /* saturate */ if (r > 255) r = 255; if (r < 0) r = 0; if (g > 255) g = 255; if (g < 0) g = 0; if (b > 255) b = 255; if (b < 0) b = 0; argb = ((r & 0xff) << 16) | ((g & 0xff) << 8) | (b & 0xff); rgb_mcu[(iid * (8 * nb_mcu_h) + jid)] = argb; } }
in host code, created , allocated memory 4 new variables:
y_forgpu= (cl_uchar *)malloc(mcu_sx * mcu_sy * max_ss_h * max_ss_v); cb_forgpu= (cl_uchar *)malloc(mcu_sx * mcu_sy * max_ss_h * max_ss_v); cr_forgpu= (cl_uchar *)malloc(mcu_sx * mcu_sy * max_ss_h * max_ss_v); //now rgb rgb_testing= (cl_uint *)malloc (mcu_sx * mcu_sy * max_ss_h * max_ss_v * sizeof(cl_int));
i created buffers in following way:
cl_mem for_y= clcreatebuffer(context, cl_mem_read_write| cl_mem_copy_host_ptr, (mcu_sx * mcu_sy * max_ss_h * max_ss_v), y_forgpu, &ret); cl_mem for_cb= clcreatebuffer(context, cl_mem_read_write| cl_mem_copy_host_ptr, (mcu_sx * mcu_sy * max_ss_h * max_ss_v), cb_forgpu , &ret); cl_mem for_cr= clcreatebuffer(context, cl_mem_read_write| cl_mem_copy_host_ptr, (mcu_sx * mcu_sy * max_ss_h * max_ss_v), cr_forgpu, &ret); //rgb_mcu ycbcrtoargb cl_mem rgb_gpu= clcreatebuffer(context, cl_mem_read_write, (mcu_sx * mcu_sy * max_ss_h * max_ss_v * sizeof(cl_int)), null, &ret);
i set kernel arguments, executed kernel , sent computed data on host:
ret = clsetkernelarg(color_kernel, 0, sizeof(cl_mem), &for_y); ret |= clsetkernelarg(color_kernel, 1, sizeof(cl_mem), &for_cb); ret |= clsetkernelarg(color_kernel, 2, sizeof(cl_mem), &for_cr); ret |= clsetkernelarg(color_kernel, 3, sizeof(cl_mem), &rgb_gpu); ret |= clsetkernelarg(color_kernel, 4, sizeof(cl_uint), &max_ss_h); ret |= clsetkernelarg(color_kernel, 5, sizeof(cl_uint), &max_ss_v); const size_t itemcolor[2] = {100, 100}; ret = clenqueuendrangekernel(command_queue, color_kernel, 2, null, itemcolor, null, 0, null, null); clfinish(command_queue); //copy result host ret = clenqueuereadbuffer(command_queue, rgb_gpu, cl_true, 0, (mcu_sx * mcu_sy * max_ss_h * max_ss_v * sizeof(cl_int)), rgb_testing, 0, null, null);
however, code terminates abruptly. why might happening?
update:
my code working. problems occurring due differences in pointers. set y, cb, cr , rgb variables (which created) equal original variables in host code.
//---setting color variables equal array elements----// y_forgpu= ycbcr_mcu[0]; cb_forgpu= ycbcr_mcu[1]; cr_forgpu= ycbcr_mcu[2]; //----rgb being assigned value-----// rgb_testing= rgb_mcu;
i don't know if cause problems (there may more haven't yet spotted), have type mismatch in ycbcr_mcu
kernel argument. can't have pointer-to-pointer arguments, true. removing *
won't fix though.
in particular, line
mcu_cb = &ycbcr_mcu[1];
in kernel gets 1 byte past start of whatever ycbcr_mcu points to, which, looking @ host code, start of array of pointers, not array of pixels.
ret = clsetkernelarg(color_kernel, 0, sizeof(cl_mem), (void *)&colormcu_gpu);
it looks ycbcr_mcu
supposed array of 3 pointers y, cb, cr planes containing source pixels. need pass these kernel 3 direct pointers 3 arrays instead of pointer 3 pointers. in other words, turn y, cb, and, cr arguments, , set them colormcu_gpu[0]
through colormcu_gpu[2]
on host.
Comments
Post a Comment