设为首页 加入收藏

TOP

OpenCL之简单的向量加法实现(三)
2015-11-21 01:03:16 来源: 作者: 【 】 浏览:5
Tags:OpenCL 简单 向量 加法 实现
\n", filename); return 1; } int main(int argc, char* argv[]) { //在 host 内存中创建三个缓冲区 float *buf1 = 0; float *buf2 = 0; float *buf = 0; buf1 = (float *)malloc(NWITEMS * sizeof(float)); buf2 = (float *)malloc(NWITEMS * sizeof(float)); buf = (float *)malloc(NWITEMS * sizeof(float)); //初始化 buf1 和buf2 的内容 int i; srand((unsigned)time(NULL)); for (i = 0; i < NWITEMS; i++) cin >> buf1[i]; //srand((unsigned)time(NULL) + 1000); for (i = 0; i < NWITEMS; i++) cin >> buf2[i]; for (i = 0; i < NWITEMS; i++) buf[i] = buf1[i] + buf2[i]; cl_uint status; cl_platform_id platform; //创建平台对象 status = clGetPlatformIDs(1, &platform, NULL); cl_device_id device; //创建 GPU 设备 clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); //创建context cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL); //创建命令队列 cl_command_queue queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, NULL); //创建三个 OpenCL 内存对象,并把buf1 的内容通过隐式拷贝的方式 //拷贝到clbuf1, buf2 的内容通过显示拷贝的方式拷贝到clbuf2 cl_mem clbuf1 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, NWITEMS*sizeof(cl_float), buf1, NULL); cl_mem clbuf2 = clCreateBuffer(context, CL_MEM_READ_ONLY, NWITEMS*sizeof(cl_float), NULL, NULL); status = clEnqueueWriteBuffer(queue, clbuf2, 1, 0, NWITEMS*sizeof(cl_float), buf2, 0, 0, 0); cl_mem buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, NWITEMS * sizeof(cl_float), NULL, NULL); const char * filename = "Vadd.cl"; std::string sourceStr; status = convertToString(filename, sourceStr); const char * source = sourceStr.c_str(); size_t sourceSize[] = { strlen(source) }; //创建程序对象 cl_program program = clCreateProgramWithSource( context, 1, &source, sourceSize, NULL); //编译程序对象 status = clBuildProgram(program, 1, &device, NULL, NULL, NULL); if (status) cout << status << endl; if (status != 0) { printf("clBuild failed:%d\n", status); char tbuf[0x10000]; clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0x10000, tbuf, NULL); printf("\n%s\n", tbuf); //return ?1; } //创建 Kernel 对象 cl_kernel kernel = clCreateKernel(program, "Vadd", NULL); //设置 Kernel 参数 cl_int clnum = NWITEMS; status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&clbuf1); if (status) cout << status << endl; status = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&clbuf2); if (status) cout << status << endl; clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&buffer); if (status) cout << status << endl; //执行 kernel cl_event ev; size_t global_work_size = NWITEMS; clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_work_size, NULL, 0, NULL, &ev); //clFinish(queue); //数据拷回 host 内存 cl_float *ptr; ptr = (cl_float *)clEnqueueMapBuffer(queue, buffer, CL_TRUE, CL_MAP_READ, 0, NWITEMS * sizeof(cl_float), 0, NULL, NULL, NULL); //结果验证,和 cpu 计算的结果比较 for (int i = 0; i < NWITEMS; i++) cout << ptr[i] << endl; if (!memcmp(buf, ptr, NWITEMS)) printf("Verify passed\n"); else printf("verify failed\n"); if (buf) free(buf); if (buf1) free(buf1); if (buf2) free(buf2); //删除 OpenCL 资源对象 clReleaseMemObject(clbuf1); clReleaseMemObject(clbuf2); clReleaseMemObject(buffer); clReleaseProgram(program); clReleaseCommandQueue(queue); clReleaseContext(context); system("pause"); return 0; }

效果截图

向量加法

Hello World

代码如下

写得非常详细的过程,各个步骤都写出来了。一个hello world就已经这么变态,要是改别的算法,简直不敢想像。

/********************************************************************** Copyright ?014 Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: ?Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. ?Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ********************************************************************/ // For clarity,error checking has been omitted. #include 
    
      #include 
     
       #include 
      
        #include 
       
         #include 
        
          #include 
         
           #include 
          
            #define SUCCESS 0 #define FAILURE 1 using namespace std; /* convert the kernel file into a string */ int convertToString(const char *filename, std::string& s) { size_t size; char* str; std::fstream f(filename, (std::fstream::in | std::fstream::binary)); if(f.is_open()) { size_t fileSize; f.seekg(0, std::fstream::end); size = fileSize = (size_t)f.tellg(); f.seekg(0, std::fstream::beg); str = new char[size+1]; if(!str) { f.close(); return 0; } f.read(str, fileSize); f.close(); str[size] = '\0'; s = str; delete[] str; return 0; } cout<<"Error: failed to open file\n:"<
           
             0) { cl_platform_id* platforms = (cl_platform_id* )malloc(numPlatforms* sizeof(cl_platform_id)); status = clGetPlatformIDs(numPlatforms, platforms, NULL); platform = platforms[0]; free(platforms); } /*Step 2:Query the platform and choose the first GPU device if has one.Otherwise use the CPU as device.*/ cl_uint numDevices = 0; cl_device_id *devices; status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices); if (numDevices == 0) //no GPU available. { cout << "No GPU device available." << endl; cout << "Choose CPU as default device." << endl; status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 0, NULL, &numDevices); devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id)); status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, numDevices, devices, NULL); } else { devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id)); status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL); } /*Step 3: Create context.*/ cl_context context = clCreateContext(NULL,1, devices,NULL,NULL,NULL); /*Step 4: Creating command queue associate with the context.*/ cl_command_queue commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL); /*Step 5: Create program object */ const char *filename = "HelloWorld_Kernel.cl"; string sourceStr; status = convertToString(filename, sourceStr); const char *source = sourceStr.c_str(); size_t sourceSize[] = {strlen(source)}; cl_program program = clCreateProgramWithSource(context, 1, &source, sourceSize, NULL); /*Step 6: Build program. */ status=clBuildProgram(program, 1,devices,NULL,NULL,NULL); /*Step 7: Initial input,output for the host and create memory objects for the kernel*/ const char* input = "GdkknVnqkc"; size_t strlength = strlen(input); cout << "input string:" << endl; cout << input << endl; char *output = (char*) malloc(strlength + 1); cl_mem inputBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, (strlength + 1) * sizeof(char),(void *) input, NULL); cl_mem outputBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY , (strlength + 1) * sizeof(char), NULL, NULL); /*Step 8: Create kernel object */ cl_kernel kernel = clCreateKernel(program,"helloworld", NULL); /*Step 9: Sets Kernel arguments.*/ status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&inputBuffer); status = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&outputBuffer); /*Step 10: Running the kernel.*/ size_t global_work_size[1] = {strlength}; status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL); /*Step 11: Read the cout put back to host memory.*/ status = clEnqueueReadBuffer(commandQueue, outputBuffer, CL_TRUE, 0, strlength * sizeof(char), output, 0, NULL, NULL); output[strlength] = '\0'; //Add the terminal character to the end of output. cout << "\noutput string:" << endl; cout << output << endl; /*Step 12: Clean the resources.*/ status = clReleaseKernel(kernel); //Release kernel. status = clReleaseProgram(program); //Release the program object. status = clReleaseMemObject(inputBuffer); //Release mem object. status = clReleaseMemObject(outputBuffer); status = clReleaseCommandQueue(commandQueue); //Release Command queue. status = clReleaseContext(context); //Release context. if (output != NULL) { free(output); output = NULL; } if (devices != NULL) { free(devices); devices = NULL; } std::cout<<"Passed!\n"; return SUCCESS; }
           
          
         
        
       
      
     
    
首页 上一页 1 2 3 下一页 尾页 3/3/3
】【打印繁体】【投稿】【收藏】 【推荐】【举报】【评论】 【关闭】 【返回顶部
分享到: 
上一篇CF# 149 D Coloring Brackets(区.. 下一篇CSU1602: Needle Throwing Game(..

评论

帐  号: 密码: (新用户注册)
验 证 码:
表  情:
内  容: