OpenCL Sample Program issue on Nvidia GEForce

I am new to OpenCL and trying to run a sample code from one of tutorials for matrix vector multiplication. There are two files, one is matvec.c which launches the kernel and the second which holds the kernel function. The program is as follows:

#define PROGRAM_FILE ""
#define KERNEL_FUNC "matvec_mult"
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#ifdef MAC
#include <OpenCL/cl.h>
#include <CL/cl.h>
int main() {
cl_platform_id platform;
cl_device_id device;
cl_context context;
cl_command_queue queue;
cl_int i, err;
cl_program program;
FILE *program_handle;
char *program_buffer, *program_log;
size_t program_size, log_size;
cl_kernel kernel;
size_t work_units_per_kernel;
float mat[16], vec[4], result[4];
float correct[4] = {0.0f, 0.0f, 0.0f, 0.0f};
cl_mem mat_buff, vec_buff, res_buff;
for(i=0; i<16; i++) {
mat[i] = i * 2.0f;
for(i=0; i<4; i++) {
vec[i] = i * 3.0f;
    correct[0] += mat[i] * vec[i];
    correct[1] += mat[i+4] * vec[i];
    correct[2] += mat[i+8] * vec[i];
    correct[3] += mat[i+12] * vec[i];
clGetPlatformIDs(1, &platform, NULL);
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1,
&device, NULL);
context = clCreateContext(NULL, 1, &device, NULL,
NULL, &err);
program_handle = fopen(PROGRAM_FILE, "r");
fseek(program_handle, 0, SEEK_END);
program_size = ftell(program_handle);
program_buffer = (char*)malloc(program_size + 1);
program_buffer[program_size] = '\0';
fread(program_buffer, sizeof(char), program_size,
program = clCreateProgramWithSource(context, 1,
(const char**)&program_buffer, &program_size, &err);
clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
kernel = clCreateKernel(program, KERNEL_FUNC, &err);
queue = clCreateCommandQueue(context, device, 0, &err);
mat_buff = clCreateBuffer(context, CL_MEM_READ_ONLY |
CL_MEM_COPY_HOST_PTR, sizeof(float)*16, mat, &err);
vec_buff = clCreateBuffer(context, CL_MEM_READ_ONLY |
CL_MEM_COPY_HOST_PTR, sizeof(float)*4, vec, &err);
res_buff = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(float)*4, NULL, &err);
clSetKernelArg(kernel, 0, sizeof(cl_mem), &mat_buff);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &vec_buff);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &res_buff);
work_units_per_kernel = 4;
clEnqueueNDRangeKernel(queue, kernel, 1, NULL,
&work_units_per_kernel, NULL, 0, NULL, NULL);
clEnqueueReadBuffer(queue, res_buff, CL_TRUE, 0,
sizeof(float)*4, result, 0, NULL, NULL);
if((result[0] == correct[0]) && (result[1] == correct[1])
&& (result[2] == correct[2]) && (result[3] == correct[3])) {
printf("Matrix-vector multiplication successful.\n");
else {
printf("Matrix-vector multiplication unsuccessful.\n");
return 0;

The Kernel function is as follows:

#include <CL\cl.h>

__kernel void matvec_mult(__global float4* matrix, __global float4* vector,__global   
float* result) {                       
int i = get_global_id(0);
result[i] = dot(matrix[i], vector[0]);

When I run this sample code, it displays "Matrix-vector multiplication unsuccessful". I am using Visual Studio 2010 and tried to debug the code. The values of the correct variable which is generated by a simple 'for loop' does not match the result variable which is generated by the kernel function. Can somebody help me with this issue & point out where I am going wrong, I am not able to confirm whether it is the code issue or the system issue. Thanks in advance for any help.

One of the errors: "IntelliSense: identifier "get_global_id" is undefined".


Error codes are defined in <CL/cl.h>

Error -45 is CL_INVALID_PROGRAM_EXECUTABLE. According to Khronos it means that "there is no successfully built executable for program". There is unnecessary inclusion at first raw of your kernel source. Delete it:

#include <CL\cl.h>

OpenCL C doesn't allow to include regular C/C++ headers. Only OpenCL C-compliant source files can be included.

Generally, in order to see, what errors OpenCL compiler produces, when building kernel, include source code like this: (I took snippet from existing code, so check variable names, etc.)

cl_int ret;

program = clCreateProgramWithSource(
    context, 1, (const char**)&src_file, NULL, &ret);

if(ret != CL_SUCCESS){
    fprintf(stderr, "Error with code %d happened.\n", ret);

// Warnings will be treated like errors, this is useful for debug
char build_params[] = {"-Werror"};    
ret = clBuildProgram(program, 0, NULL, build_params, NULL, NULL);

if (ret != CL_SUCCESS)
    size_t len = 0;
    char *buffer;

        device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &len);

    buffer = calloc(len, sizeof(char));

        device_id, CL_PROGRAM_BUILD_LOG, len, buffer, NULL);

    fprintf(stderr, "%s\n", buffer);


Need Your Help

Alternatives to using web.config to store settings (for complex solutions) web-config configuration-files

In our web applications, we seperate our Data Access Layers out into their own projects.