OpenCL——AMD平台搭建

Keelia ·
更新时间:2024-09-20
· 887 次阅读

AMD OpenCL

介绍在Windows系统下基于AMD GPU的OpenCL环境的搭建,并且基于矢量加法实例程序。

构建示例

在传统的AMD APP SDK设计中,已经不存在了,不知道为什么。但是AMD给了一种其他的方式,

https://github.com/GPUOpen-LibrariesAndSDKs/OCL-SDK/releases

在这里插入图片描述
下载OCL_SDK_Light_AMD.exe,进行安装。

安装完成之后
在这里插入图片描述
在这里插入图片描述

在VS中创建一个项目,博主使用的是VS 2019。创建完的项目以后,具体操作如下

在新建的解决方案中,右键点击解决方案名称,选择属性选项。

依次选择 C++ —> 常规 ——> 附加包含目录 ——> 编辑
在这里插入图片描述

依次选择 链接器 ——> 常规 ——> 附加库目录

在这里插入图片描述

链接器 ——> 输入 ——> 附加依赖项

在这里插入图片描述

矢量加示例

main.cpp

#include #include #include const int ARRAY_SIZE = 1000; /* 1. 创建平台 2. 创建设备 3. 根据设备创建上下文 */ cl_context CreateContext(cl_device_id* device) { cl_int errNum; cl_uint numPlateforms; cl_platform_id firstPlatformId; cl_context context = NULL; errNum = clGetPlatformIDs(1, &firstPlatformId, &numPlateforms); if (errNum != CL_SUCCESS || numPlateforms <= 0) { printf("Failed to find any OpenCL platforms.\n"); return NULL; } errNum = clGetDeviceIDs(firstPlatformId, CL_DEVICE_TYPE_GPU, 1, device, NULL); if (errNum != CL_SUCCESS) { printf("There is no GPU , trying CPU...\n"); errNum = clGetDeviceIDs(firstPlatformId, CL_DEVICE_TYPE_CPU, 1, device, NULL); } if (errNum != CL_SUCCESS) { printf("There is NO CPU or GPU\n"); return NULL; } context = clCreateContext(NULL, 1, device, NULL, NULL, &errNum); if (errNum != CL_SUCCESS) { printf("Create context error\n"); return NULL; } return context; } /* @ 在上下文可用的第一个设备中创建命令队列 */ cl_command_queue CreateCommandQueue(cl_context context, cl_device_id device) { cl_int errNum; cl_command_queue commandQueue = NULL; //commandQueue = clCreateCommandQueue(context, device, 0, NULL); // OpenCL 2.0 的用法 commandQueue = clCreateCommandQueueWithProperties(context, device, 0, NULL); if (commandQueue == NULL) { printf("Failed to create commandQueue for device 0\n"); return NULL; } return commandQueue; } char* ReadKernelSourceFile(const char* filename, size_t* length) { FILE* file = NULL; size_t sourceLenth; char* sourceString; int ret; file = fopen(filename, "rb"); if (file == NULL) { printf("%s at %d : Can't open %s \n", __FILE__, __LINE__ - 2, filename); return NULL; } //重定位到文件末尾 fseek(file, 0, SEEK_END); sourceLenth = ftell(file); //重定位到文件开头 fseek(file, 0, SEEK_SET); sourceString = (char*)malloc(sourceLenth + 1); sourceString[0] = '\0'; ret = fread(sourceString, sourceLenth, 1, file); if (ret == 0) { printf("%s at %d : Cant't read source %s\n", __FILE__, __LINE__ - 2, filename); return NULL; } fclose(file); if (length != 0) { *length = sourceLenth; } sourceString[sourceLenth] = '\0'; return sourceString; } cl_program CreateProgram(cl_context context, cl_device_id device, const char* filename) { cl_int errNum; cl_program program; //记录大小的数据类型 size_t program_length; char* const source = ReadKernelSourceFile(filename, &program_length); program = clCreateProgramWithSource(context, 1, (const char**)& source, NULL, NULL); if (program == NULL) { printf("Failed to creae CL program from source.\n"); return NULL; } errNum = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if (errNum != CL_SUCCESS) { char buildLog[16384]; clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sizeof(buildLog), buildLog, NULL); printf("Error in kernel : %s \n", buildLog); clReleaseProgram(program); return NULL; } return program; } /* @ 创建内存对象 */ bool CreateMemObjects(cl_context context, cl_mem memObjects[3], float* a, float* b) { memObjects[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * ARRAY_SIZE, a, NULL); memObjects[1] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * ARRAY_SIZE, b, NULL); memObjects[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float) * ARRAY_SIZE, NULL, NULL); if (memObjects[0] == NULL || memObjects[1] == NULL || memObjects[2] == NULL) { printf("Error creating memeory objects.\n"); return false; } return true; } /* @ 清楚OpenCL资源 */ void CleanUp(cl_context context, cl_command_queue commandQueue, cl_program program, cl_kernel kernel, cl_mem memObjects[3]) { for (int i = 0; i < 3; i++) { if (memObjects[i] != 0) { clReleaseMemObject(memObjects[i]); } } if (commandQueue != 0) { clReleaseCommandQueue(commandQueue); } if (kernel != 0) { clReleaseKernel(kernel); } if (program != 0) { clReleaseProgram(program); } if (context != 0) { clReleaseContext(context); } } /*main函数*/ int main(int argc, char** agrv) { cl_context context = 0; cl_command_queue commandQueue = 0; cl_program program = 0; cl_device_id device = 0; cl_kernel kernel = 0; cl_mem memObjects[3] = { 0,0,0 }; cl_int errNum; //创建OpenCL上下文 context = CreateContext(&device); if (context == NULL) { printf("Failed to create OpenCL context\n"); return 1; } //获得OpenCL设备,并创建命令队列 commandQueue = CreateCommandQueue(context, device); if (commandQueue == NULL) { CleanUp(context, commandQueue, program, kernel, memObjects); return 1; } //创建OpenCL程序 program = CreateProgram(context, device, "vecAdd.cl"); if (program == NULL) { CleanUp(context, commandQueue, program, kernel, memObjects); return 1; } kernel = clCreateKernel(program, "vector_add", NULL); if (kernel == NULL) { printf("Failed to create kernel\n"); CleanUp(context, commandQueue, program, kernel, memObjects); return 1; } //创建OpenCL内存对象 float result[ARRAY_SIZE]; float a[ARRAY_SIZE]; float b[ARRAY_SIZE]; for (int i = 0; i < ARRAY_SIZE; i++) { a[i] = (float)i; b[i] = (float)(i * 2); } if (!CreateMemObjects(context, memObjects, a, b)) { CleanUp(context, commandQueue, program, kernel, memObjects); return 1; } //设置内核参数 errNum = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]); errNum |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]); errNum |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObjects[2]); if (errNum != CL_SUCCESS) { printf("Error setting kernel arguments.\n"); CleanUp(context, commandQueue, program, kernel, memObjects); return 1; } size_t globalWorkSize[1] = { ARRAY_SIZE }; size_t localWorkSize[1] = { 1 }; //执行内核 errNum = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL); if (errNum != CL_SUCCESS) { printf("Error queueing kernel for execution\n"); CleanUp(context, commandQueue, program, kernel, memObjects); return 1; } //将计算的结果拷贝到主机上 errNum = clEnqueueReadBuffer(commandQueue, memObjects[2], CL_TRUE, 0, ARRAY_SIZE * sizeof(float), result, 0, NULL, NULL); if (errNum != CL_SUCCESS) { printf("Error reading result buffer.\n"); CleanUp(context, commandQueue, program, kernel, memObjects); return 1; } for (int i = 0; i < ARRAY_SIZE; i++) { printf("i=%d:%f\n", i, result[i]); } printf("Executed program succesfully\n"); CleanUp(context, commandQueue, program, kernel, memObjects); return 0; }

vecAdd.cl

__kernel void vector_add(global const float *a, global const float *b, global float * result) { int gid = get_global_id(0); result[gid] = a[gid] + b[gid]; }

支持OpenCL的平台和设备很多,为了兼顾不同的设备,OpenCL程序的第一步就是确定OpenCL执行的平台,在确定平台之后,再确定执行OpenCL计算的设备。

确定设备后创建上下文、上下文包含了上一步查询的OpenCL计算的设备,以及接下来创建的内核、程序对象和内存对象。

创建上下文之后,需要创建命令队列,但一个OpenCL设备可以对应一个命令队列和一个OpenCL设备。例如,如果需要使用上下文中包含了两个计算设备时,为每个设备创建各自的命令队列。主机与OpenCL设备间的数据传输,执行内核等交互操作都是通过入队到命令队列中。命令队列中的每个命令交给OpenCL驱动或相应的硬件单元去执行。

由于在运行时才知道OpenCL设备信息,所在以OpenCL主机算程序中读取内核源码并创建程序对象。根据OpenCL设备编译构建程序对象,最后创建内核对象。通过这三部操作,把OpenCL 设备上执行的内核代码编译完成。

对于OpenCL设备上执行的内核函数需要完成输入参数,在主机端调用设备内核参数函数,除此之外,还需要设备在设备上用于执行的工作组和工作项的参数。
在这里插入图片描述

OpenCL执行流程 获得OpenCL平台和设备及其属性

OpenCL程序的第一步就是选择OpenCL平台。OpenCL平台指的是OpenCL设备和OpenCL框架的组合。一个异构计算机可以同时存在多个OpenCL平台。

#include #include #include #include int main() { cl_platform_id* platform; cl_uint num_platform; cl_int err; err = clGetPlatformIDs(0, NULL, &num_platform); platform = (cl_platform_id*)malloc(sizeof(cl_platform_id) * num_platform); err = clGetPlatformIDs(num_platform, platform, NULL); for (int i = 0; i < num_platform; i++) { printf("\nPlatform %d information\n", i); size_t size; err = clGetPlatformInfo(platform[i], CL_PLATFORM_NAME, 0, NULL, &size); char* PName = (char*)malloc(size); err = clGetPlatformInfo(platform[i], CL_PLATFORM_NAME, size, PName, NULL); printf("CL_PLATFORM_NAME: %s\n", PName); err = clGetPlatformInfo(platform[i], CL_PLATFORM_VENDOR, 0, NULL, &size); char* PVendor = (char*)malloc(size); err = clGetPlatformInfo(platform[i], CL_PLATFORM_VENDOR, size, PVendor, NULL); printf("CL_PLATFORM_VENDOR: %s\n", PVendor); err = clGetPlatformInfo(platform[i], CL_PLATFORM_VERSION, 0, NULL, &size); char* PVersion = (char*)malloc(size); err = clGetPlatformInfo(platform[i], CL_PLATFORM_VERSION, size, PVersion, NULL); printf("CL_PLATFORM_VERSION: %s\n", PVersion); err = clGetPlatformInfo(platform[i], CL_PLATFORM_PROFILE, 0, NULL, &size); char* PProfile = (char*)malloc(size); err = clGetPlatformInfo(platform[i], CL_PLATFORM_PROFILE, size, PProfile, NULL); printf("CL_PLATFORM_PROFILE: %s\n", PProfile); err = clGetPlatformInfo(platform[i], CL_PLATFORM_EXTENSIONS, 0, NULL, &size); char* PExten = (char*)malloc(size); err = clGetPlatformInfo(platform[i], CL_PLATFORM_EXTENSIONS, size, PExten, NULL); printf("CL_PLATFORM_EXTENSIONS: %s\n", PExten); free(PName); free(PVendor); free(PVersion); free(PProfile); free(PExten); } }

可以查询本机上的设备。

Platform 0 information CL_PLATFORM_NAME: Intel(R) OpenCL CL_PLATFORM_VENDOR: Intel(R) Corporation CL_PLATFORM_VERSION: OpenCL 2.0 CL_PLATFORM_PROFILE: FULL_PROFILE CL_PLATFORM_EXTENSIONS: cl_intel_dx9_media_sharing cl_khr_3d_image_writes cl_khr_byte_addressable_store cl_khr_d3d11_sharing cl_khr_depth_images cl_khr_dx9_media_sharing cl_khr_gl_sharing cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_icd cl_khr_image2d_from_buffer cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_spir Platform 1 information CL_PLATFORM_NAME: AMD Accelerated Parallel Processing CL_PLATFORM_VENDOR: Advanced Micro Devices, Inc. CL_PLATFORM_VERSION: OpenCL 2.0 AMD-APP (2117.8) CL_PLATFORM_PROFILE: FULL_PROFILE CL_PLATFORM_EXTENSIONS: cl_khr_icd cl_khr_d3d10_sharing cl_khr_d3d11_sharing cl_khr_dx9_media_sharing cl_amd_event_callback cl_amd_offline_devices OpenCL平台信息参数
cl_platform_info 返回类型 描述
CL_PLATFORM_PROFILE char[] 识别平台支持FULL_PROFILE还是EMBEDDED_PROFILE
CL_PLATFORM_VERSION char[] 返回平台支持的最大OpenCL版本
CL_PLATFORM_NAME char[] 返回OpenCL平台名称
CL_PLATFORM_VENDOR char[] 返回OpenCL平台开发商名称
CL_PLATFORM_EXTENSIONS char[] 返回OpenCL平台支持的扩展列表
FULL_PROFILE:支持OpenCL规范的所有功能。 EMBEDDED_PROFILE:支持OpenCL嵌入式简档,是OpenCL规范的一个子集。
作者:绝尘花遗落



amd opencl

需要 登录 后方可回复, 如果你还没有账号请 注册新账号
相关文章