c - Running OpenCL using Visual Studio 2015 -
i'm newbie in opencl, far referred dr. dobbs tutorials opencl , few others , ran on ubuntu worked same codes won't/refuse work on windows using visual studio required environment variables set correctly. i'm using 980m cuda sdk 8 on vs 2015. have 2 files, 1 in c , kernel(cl) file. whenever add both .c & .cl files, program refuses run throwing errors can't find program files , things that. however, if write kernel file within c file, works 1 out of 3. same program works fine on pc running ubuntu 16 , pc amd card running on ubuntu 16. program i'm trying run vector addition written in c. i've attached link code. opencl vector addition
add_numbers.c
#define program_file "add_numbers.cl" #define kernel_func "vecadd" #include <math.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <time.h> #ifdef __linux #include <unistd.h> #include <fcntl.h> #endif // __linux #ifdef mac #include <opencl/cl.h> #else #include <cl/cl.h> #endif /* find gpu or cpu associated first available platform */ cl_device_id create_device() { cl_platform_id platform; cl_device_id dev; int err; /* identify platform */ err = clgetplatformids(1, &platform, null); if(err < 0) { perror("couldn't identify platform"); exit(1); } /* access device */ err = clgetdeviceids(platform, cl_device_type_cpu, 1, &dev, null); if(err == cl_device_not_found) { err = clgetdeviceids(platform, cl_device_type_gpu, 1, &dev, null); } if(err < 0) { perror("couldn't access devices"); exit(1); } return dev; } /* create program file , compile */ cl_program build_program(cl_context ctx, cl_device_id dev, const char* filename) { cl_program program; file *program_handle; char *program_buffer, *program_log; size_t program_size, log_size; int err; /* read program file , place content buffer */ program_handle = fopen(filename, "r"); if(program_handle == null) { perror("couldn't find program file"); exit(1); } fseek(program_handle, 0, seek_end); program_size = ftell(program_handle); rewind(program_handle); program_buffer = (char*)malloc(program_size + 1); program_buffer[program_size] = '\0'; fread(program_buffer, sizeof(char), program_size, program_handle); fclose(program_handle); /* create program file */ program = clcreateprogramwithsource(ctx, 1, (const char**)&program_buffer, &program_size, &err); if(err < 0) { perror("couldn't create program"); exit(1); } free(program_buffer); /* build program */ err = clbuildprogram(program, 0, null, null, null, null); if(err < 0) { /* find size of log , print std output */ clgetprogrambuildinfo(program, dev, cl_program_build_log, 0, null, &log_size); program_log = (char*) malloc(log_size + 1); program_log[log_size] = '\0'; clgetprogrambuildinfo(program, dev, cl_program_build_log, log_size + 1, program_log, null); printf("%s\n", program_log); free(program_log); exit(1); } return program; } int main() { /* opencl structures */ cl_device_id device; cl_context context; cl_program program; cl_kernel kernel; cl_command_queue queue; cl_device_type dev_type; //new // cl_int i, j, err; size_t local_size, global_size; // vector add pgm // length of vectors unsigned int n = 1000000; // host input vectors double *h_a; double *h_b; // host output vector double *h_c; // device input buffers cl_mem d_a; cl_mem d_b; // device output buffer cl_mem d_c; // size, in bytes, of each vector size_t bytes = n*sizeof(double); // allocate memory each vector on host h_a = (double *)malloc(bytes); h_b = (double *)malloc(bytes); h_c = (double *)malloc(bytes); // initialize vectors on host int i; for( = 0; < n; i++ ) { /*h_a[i] = sinf(i)*sinf(i); h_b[i] = cosf(i)*cosf(i);*/ h_a[i] = i+1; h_b[i] = i+2; } // size_t globalsize, localsize; cl_int err; // number of work items in each local work group local_size = 64; // number of total work items - localsize must devisor global_size = (n/local_size)*local_size; /* create device , context */ device = create_device(); context = clcreatecontext(null, 1, &device, null, null, &err); if(err < 0) { perror("couldn't create context"); exit(1); } /* build program */ program = build_program(context, device, program_file); /* create data buffer */ input_buffer = clcreatebuffer(context, cl_mem_read_only | cl_mem_copy_host_ptr, array_size * sizeof(float), data, &err); sum_buffer = clcreatebuffer(context, cl_mem_read_write | cl_mem_copy_host_ptr, num_groups * sizeof(float), sum, &err); if(err < 0) { perror("couldn't create buffer"); exit(1); };*/ //input buffers d_a = clcreatebuffer(context, cl_mem_read_only, bytes, null, null); d_b = clcreatebuffer(context, cl_mem_read_only, bytes, null, null); d_c = clcreatebuffer(context, cl_mem_write_only, bytes, null, null); /* create command queue */ queue = clcreatecommandqueue(context, device, 0, &err); if(err < 0) { perror("couldn't create command queue"); exit(1); }; /* create kernel */ kernel = clcreatekernel(program, kernel_func, &err); if(err < 0) { perror("couldn't create kernel"); exit(1); }; // newer args under test err = clsetkernelarg(kernel, 0, sizeof(cl_mem), &d_a); err |= clsetkernelarg(kernel, 1, sizeof(cl_mem), &d_b); err |= clsetkernelarg(kernel, 2, sizeof(cl_mem), &d_c); err |= clsetkernelarg(kernel, 3, sizeof(unsigned int), &n); if(err < 0) { perror("couldn't create kernel argument"); exit(1); } /* enqueue kernel */ err = clenqueuendrangekernel(queue, kernel, 1, null, &global_size, &local_size, 0, null, null); if(err < 0) { perror("couldn't enqueue kernel"); exit(1); } /* read kernel's output */ err = clenqueuereadbuffer(queue, d_c, cl_true, 0, bytes, h_c, 0, null, null ); if(err < 0) { perror("couldn't read buffer"); exit(1); } //cl_device_type dev_type; clgetdeviceinfo(device, cl_device_type, sizeof(dev_type), &dev_type, null); if (dev_type == cl_device_type_gpu) { puts("i'm 100% sure device gpu"); } else puts("device cpu\n"); // sum calc. double sum = 0; for(i=0; i<n; i++) //sum += h_c[i]; sum = h_a[i] + h_b[i]; printf("final result: %lf\n",(sum/n)); /* deallocate resources */ clreleasememobject(d_a); clreleasememobject(d_b); clreleasememobject(d_c); clreleasekernel(kernel); clreleasecommandqueue(queue); clreleaseprogram(program); clreleasecontext(context); return 0; }
add_number.cl kernel file
__kernel void vecadd( __global double *a, __global double *b, __global double *c, const unsigned int n) { #pragma opencl extension cl_khr_fp64 :enable //get our global thread id int id = get_global_id(0); //make sure not go out of bounds if (id < n) c[id] = a[id] + b[id]; }
Comments
Post a Comment