OpenCL 2.0 Reference Guide
OpenCL (Open Computing Language) is a multi-vendor
open standard for general-purpose parallel programming of heterogeneous systems that include CPUs, GPUs, and other processors. OpenCL provides a uniform programming environment for software developers to write efficient, portable code for high- performance compute servers, desktop computer systems, and handheld devices. Specifications and online reference available at www.khronos.org/opencl.
[n.n.n] and purple text: sections and text in the OpenCL API Spec. [n.n.n] and green text: sections and text in the OpenCL C Spec. [n.n.n] and blue text: sections and text in the OpenCL Extension Spec.
Copyright By PowCoder代写 加微信 powcoder
The OpenCL Platform Layer
The OpenCL platform layer implements platform-specific features that allow applications to query OpenCL devices, device configuration information, and to create OpenCL contexts using one or more devices. Items in blue apply when the appropriate extension is supported.
Querying Platform Info & Devices [4.1-2] [9.16.9] cl_int clGetPlatformIDs (cl_uint num_entries,
cl_platform_id *platforms, cl_uint *num_platforms) cl_int clIcdGetPlatformIDsKHR (cl_uint num_entries,
cl_platform_id * platfoms, cl_uint *num_platforms)
cl_int clGetPlatformInfo (cl_platform_id platform, cl_platform_info param_name,
size_t param_value_size, void *param_value, size_t *param_value_size_ret)
param_name: CL_PLATFORM_{PROFILE, VERSION}, CL_PLATFORM_{NAME, VENDOR, EXTENSIONS}, CL_PLATFORM_ICD_SUFFIX_KHR [Table 4.1]
cl_int clGetDeviceIDs (cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices)
device_type: [Table 4.2] CL_DEVICE_TYPE_{ACCELERATOR, ALL, CPU}, CL_DEVICE_TYPE_{CUSTOM, DEFAULT, GPU}
cl_int clGetDeviceInfo (cl_device_id device, cl_device_info param_name,
size_t param_value_size, void *param_value, size_t *param_value_size_ret)
param_name: [Table4.3]
CL_DEVICE_ADDRESS_BITS, CL_DEVICE_AVAILABLE, CL_DEVICE_BUILT_IN_KERNELS, CL_DEVICE_COMPILER_AVAILABLE, CL_DEVICE_{DOUBLE, HALF, SINGLE}_FP_CONFIG, CL_DEVICE_ENDIAN_LITTLE, CL_DEVICE_EXTENSIONS, CL_DEVICE_ERROR_CORRECTION_SUPPORT, CL_DEVICE_EXECUTION_CAPABILITIES, CL_DEVICE_GLOBAL_MEM_CACHE_{SIZE, TYPE}, CL_DEVICE_GLOBAL_MEM_{CACHELINE_SIZE, SIZE}, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, CL_DEVICE_PREFERRED_{PLATFORM, LOCAL,
GLOBAL}_ATOMIC_ALIGNMENT, CL_DEVICE_GLOBAL_VARIABLE_SHARING, CL_DEVICE_HOST_UNIFIED_MEMORY, CL_DEVICE_IMAGE_MAX_{ARRAY, BUFFER}_SIZE, CL_DEVICE_IMAGE_SUPPORT, CL_DEVICE_IMAGE2D_MAX_{WIDTH, HEIGHT}, CL_DEVICE_IMAGE3D_MAX_{WIDTH, HEIGHT, DEPTH}, CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, CL_DEVICE_IMAGE_PITCH_ALIGNMENT, CL_DEVICE_LINKER_AVAILABLE, CL_DEVICE_LOCAL_MEM_{TYPE, SIZE}, CL_DEVICE_MAX_READ_IMAGE_ARGS, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, CL_DEVICE_MAX_{CLOCK_FREQUENCY, PIPE_ARGS}, CL_DEVICE_MAX_{COMPUTE_UNITS, SAMPLERS}, CL_DEVICE_MAX_CONSTANT_{ARGS, BUFFER_SIZE}, CL_DEVICE_MAX_{MEM_ALLOC, PARAMETER}_SIZE, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, CL_DEVICE_MAX_ON_DEVICE_{QUEUES, EVENTS}, CL_DEVICE_MAX_WORK_GROUP_SIZE, CL_DEVICE_MAX_WORK_ITEM_{DIMENSIONS, SIZES}, CL_DEVICE_MEM_BASE_ADDR_ALIGN, CL_DEVICE_NAME, CL_DEVICE_NATIVE_VECTOR_WIDTH_{CHAR, INT}, CL_DEVICE_NATIVE_VECTOR_WIDTH_{LONG, SHORT}, CL_DEVICE_NATIVE_VECTOR_WIDTH_{DOUBLE, HALF}, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, CL_DEVICE_{OPENCL_C_VERSION, PARENT_DEVICE}, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, CL_DEVICE_PARTITION_MAX_SUB_DEVICES, CL_DEVICE_PARTITION_{PROPERTIES, TYPE}, CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS, CL_DEVICE_PIPE_MAX_PACKET_SIZE,
CL_DEVICE_{PLATFORM,PRINTF_BUFFER_SIZE}, CL_DEVICE_PREFERRED_VECTOR_WIDTH_{CHAR, INT}, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, CL_DEVICE_PROFILE, CL_DEVICE_PROFILING_TIMER_RESOLUTION, CL_DEVICE_SPIR_VERSIONS, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, CL_DEVICE_{REFERENCE_COUNT, VENDOR_ID}, CL_DEVICE_SVM_CAPABILITIES, CL_DEVICE_TERMINATE_CAPABILITY_KHR, CL_DEVICE_{TYPE, VENDOR},
CL_{DEVICE, DRIVER}_VERSION
Partitioning a Device [4.3]
cl_int clCreateSubDevices (cl_device_id in_device,
const cl_device_partition_property *properties, cl_uint num_devices, cl_device_id *out_devices, cl_uint *num_devices_ret)
properties: CL_DEVICE_PARTITION_EQUALLY, CL_DEVICE_PARTITION_BY_COUNTS, CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN
cl_int clRetainDevice (cl_device_id device) cl_int clReleaseDevice (cl_device_id device)
Contexts [4.4]
cl_context clCreateContext (
const cl_context_properties *properties,
cl_uint num_devices, const cl_device_id *devices, void (CL_CALLBACK*pfn_notify)
(const char *errinfo, const void *private_info,
size_t cb, void *user_data), void *user_data, cl_int *errcode_ret)
The OpenCL Runtime
API calls that manage OpenCL objects such as command-queues, memory objects, program objects, kernel objects for __kernel functions in a program and calls that allow you to enqueue commands to a command-queue such as executing a kernel, reading, or writing a memory object.
Command Queues [5.1] cl_command_queue
clCreateCommandQueueWithProperties ( cl_context context, cl_device_id device,
const cl_command_queue_properties *properties, cl_int *errcode_ret)
properties: [Table 5.1] CL_QUEUE_SIZE, CL_QUEUE_PROPERTIES (bitfield which may be
set to an OR of CL_QUEUE_* where * may be: OUT_OF_ORDER_EXEC_MODE_ENABLE, PROFILING_ENABLE, ON_DEVICE[_DEFAULT])
cl_int clRetainCommandQueue ( cl_command_queue command_queue)
cl_int clReleaseCommandQueue ( cl_command_queue command_queue)
cl_int clGetCommandQueueInfo ( cl_command_queue command_queue, cl_command_queue_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
param_name: [Table 5.2] CL_QUEUE_CONTEXT, CL_QUEUE_DEVICE, CL_QUEUE_SIZE, CL_QUEUE_REFERENCE_COUNT, CL_QUEUE_PROPERTIES
properties: [Table4.5]
NULL or CL_CONTEXT_PLATFORM, CL_CONTEXT_INTEROP_USER_SYNC, CL_CONTEXT_{D3D10, D3D11}_DEVICE_KHR, CL_CONTEXT_ADAPTER_{D3D9, D3D9EX}_KHR, CL_CONTEXT_ADAPTER_DXVA_KHR, CL_CONTEXT_MEMORY_INITIALIZE_KHR, CL_CONTEXT_TERMINATE_KHR, CL_GL_CONTEXT_KHR, CL_CGL_SHAREGROUP_KHR, CL_{EGL, GLX}_DISPLAY_KHR, CL_WGL_HDC_KHR
cl_context clCreateContextFromType ( const cl_context_properties *properties, cl_device_type device_type,
void (CL_CALLBACK *pfn_notify)
(const char *errinfo, const void *private_info, size_t cb, void *user_data),
void *user_data, cl_int *errcode_ret) properties: See clCreateContext device_type: See clGetDeviceIDs
cl_int clRetainContext (cl_context context)
cl_int clReleaseContext (cl_context context)
cl_int clGetContextInfo (cl_context context, cl_context_info param_name,
size_t param_value_size, void *param_value, size_t *param_value_size_ret)
param_name: CL_CONTEXT_REFERENCE_COUNT, CL_CONTEXT_{DEVICES, NUM_DEVICES, PROPERTIES}, CL_CONTEXT_{D3D10, D3D11}_ PREFER_SHARED_RESOURCES_KHR [Table 4.6]
cl_int clTerminateContextKHR (cl context context) Get CL Extension Function Pointers [9.2]
void* clGetExtensionFunctionAddressForPlatform ( cl_platform_id platform, const char *funcname)
Buffer Objects
Elements are stored sequentially and accessed using a pointer by a kernel executing on a device.
Create Buffer Objects [5.2.1]
cl_mem clCreateBuffer (cl_context context,
cl_mem_flags flags, size_t size, void *host_ptr,
cl_int *errcode_ret)
flags: [Table5.3] CL_MEM_READ_WRITE,
CL_MEM_{WRITE, READ}_ONLY, CL_MEM_HOST_NO_ACCESS, CL_MEM_HOST_{READ, WRITE}_ONLY, CL_MEM_{USE, ALLOC, COPY}_HOST_PTR
cl_mem clCreateSubBuffer (cl_mem buffer, cl_mem_flags flags,
cl_buffer_create_type buffer_create_type,
const void *buffer_create_info, cl_int *errcode_ret)
flags: See clCreateBuffer
buffer_create_type: CL_BUFFER_CREATE_TYPE_REGION
Read, Write, Copy Buffer Objects [5.2.2] cl_int clEnqueueReadBuffer (
cl_command_queue command_queue,
cl_mem buffer,
cl_bool blocking_read, size_t offset, size_t size, void *ptr, cl_uint num_events_in_wait_list,
const cl_event *event_wait_list, cl_event *event)
cl_int clEnqueueReadBufferRect (
cl_command_queue command_queue,
cl_mem buffer, cl_bool blocking_read,
const size_t *buffer_origin, const size_t *host_origin, const size_t *region, size_t buffer_row_pitch,
size_t buffer_slice_pitch, size_t host_row_pitch,
size_t host_slice_pitch, void *ptr,
cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event)
(Continued on next page >)
www.khronos.org/opencl
©2015 Khronos Group – Rev. 1118
OpenCL API Reference
OpenCL API
Relationships
abstract classes
{abstract}
aggregations
inheritance
relationship navigability
Cardinality
one and only one
optionally one
one or more
Dynamic allocation R/W access
No allocation R/W access
Dynamic allocation R/W access
Static allocation R-only access
Dynamic allocation No access
Static allocation R/W access
No allocation No access
Static allocation R/W access
OpenCL 2.0 Reference Guide
Buffer Objects (continued)
cl_int clEnqueueWriteBuffer (
cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, size_t offset, size_t size, const void *ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event)
cl_int clEnqueueWriteBufferRect (
cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, const size_t *buffer_origin, const size_t *host_origin, const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event)
cl_int clEnqueueFillBuffer (
cl_command_queue command_queue, cl_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event)
cl_int clEnqueueCopyBuffer (
cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event)
cl_int clEnqueueCopyBufferRect (
cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, const size_t *src_origin, const size_t *dst_origin, const size_t *region,
size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch,
size_t dst_slice_pitch, cl_uint num_events_in_wait_list,
const cl_event *event_wait_list, cl_event *event)
Map Buffer Objects [5.2.4]
void * clEnqueueMapBuffer (
cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_map, cl_map_flags map_flags, size_t offset, size_t size,
cl_uint num_events_in_wait_list, const cl_event *event_wait_list,
cl_event *event, cl_int *errcode_ret)
map_flags: CL_MAP_{READ, WRITE}, CL_MAP_WRITE_INVALIDATE_REGION Conversions and Type Casting Examples [6.2]
OpenCL Class Diagram
The figure below describes the OpenCL specification as a class diagram using the Unified Modeling Language1 (UML) notation. The diagram shows both nodes and edges which are classes and their relationships. As a simplification it shows only classes, and no attributes or operations.
Annotations
1 Unified Modeling Language (http://www.uml.org/) is a trademark of Object Management Group (OMG).
OpenCL Device Architecture Diagram
The table below shows memory regions with allocation and memory access capabilities. R=Read, W=Write
This conceptual OpenCL device architecture diagram shows processing elements (PE), compute units (CU), and devices. The host is not shown.
A pipe is a memory object that stores data organized as a FIFO. Pipe objects can only be accessed using built-in functions that read from and write to a pipe. Pipe objects are not accessible from the host.
Create Pipe Objects [5.4.1]
cl_mem clCreatePipe (cl_context context, cl_mem_flags flags, cl_uint pipe_packet_size,
cl_uint pipe_max_packets, const cl_pipe_properties *properties, cl_int *errcode_ret)
0 or CL_MEM_READ_WRITE, CL_MEM_{READ, WRITE}_ONLY, CL_MEM_HOST_NO_ACCESS
Pipe Object Queries [5.4.2]
cl_int clGetPipeInfo (cl_mem pipe, cl_pipe_info param_name, size_t param_value_size,
void *param_value, size_t *param_value_size_ret) param_name:
CL_PIPE_PACKET_SIZE, CL_PIPE_MAX_PACKETS
Shared Virtual Memory
Shared Virtual Memory (SVM) allows the host and kernels executing on devices to directly share complex, pointer-containing data structures such as trees and linked lists.
SVM Sharing Granularity [5.6.1]
void* clSVMAlloc (cl_context context, cl_svm_mem_flags flags, size_t size,
unsigned int alignment)
flags: [Table 5.13]
CL_MEM_READ_WRITE, CL_MEM_{WRITE, READ}_ONLY, CL_MEM_SVM_FINE_GRAIN_BUFFER, CL_MEM_SVM_ATOMICS
void clSVMFree (cl_context context, void *svm_pointer) Enqueuing SVM Operations [5.6.2]
cl_int clEnqueueSVMFree (
cl_command_queue command_queue,
cl_uint num_svm_pointers, void *sym_pointers[], void (CL_CALLBACK*pfn_free_func)(
cl_command_queue command_queue, cl_uint num_svm_pointers,
void *sym_pointers[], void *user_data),
void *user_data, cl_uint num_events_in_wait_list,
T a = (T)b; // Scalar to scalar, // or scalar to vector
T a = convert_T(b);
T a = convert_T_R(b);
T a = as_T(b);
T a = convert_T_sat_R(b);
R: one of the following rounding modes:
Memory Objects
_rte to nearest even _rtz toward zero _rtp toward + infinity _rtn toward – infinity
A memory object is a handle to a reference counted region of global memory. Includes Buffer Objects, Image Objects, and Pipe Objects. Items in blue apply when the appropriate extension is supported.
Memory Objects [5.5.1, 5.5.2]
cl_int clRetainMemObject (cl_mem memobj)
cl_int clReleaseMemObject (cl_mem memobj)
cl_int clSetMemObjectDestructorCallback (cl_mem memobj, void (CL_CALLBACK *pfn_notify)
(cl_mem memobj, void *user_data), void *user_data)
cl_int clEnqueueUnmapMemObject (cl_command_queue command_queue, cl_mem memobj, void *mapped_ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event)
Migrate Memory Objects [5.5.4]
cl_int clEnqueueMigrateMemObjects (cl_command_queue command_queue, cl_uint num_mem_objects, const cl_mem *mem_objects, cl_mem_migration_flags flags, cl_uint num_events_in_wait_list,
const cl_event *event_wait_list, cl_event *event)
flags: CL_MIGRATE_MEM_OBJECT_HOST, CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED
Query Memory Object [5.5.5]
cl_int clGetMemObjectInfo (cl_mem memobj, cl_mem_info param_name,
size_t param_value_size, void *param_value, size_t *param_value_size_ret)
param_name: CL_MEM_{TYPE, FLAGS, SIZE, HOST_PTR}, CL_MEM_OFFSET, CL_MEM_{MAP, REFERENCE}_COUNT, CL_MEM_ASSOCIATED_MEMOBJECT, CL_MEM_CONTEXT, CL_MEM_USES_SVM_ POINTER,
CL_MEM_{D3D10, D3D11}_RESOURCE_KHR, CL_MEM_DX9_MEDIA_{ADAPTER_TYPE, SURFACE_INFO}_KHR [Table 5.12]
const cl_event *event_wait_list, cl_event *event)
©2015 Khronos Group – Rev. 1118
(Continued on next page >)
www.khronos.org/opencl
OpenCL API
OpenCL 2.0 Reference Guide
Shared Virtual Memory (continued)
cl_int clEnqueueSVMMemcpy ( cl_command_queue command_queue,
cl_bool blocking_copy, void *dst_ptr,
const void *src_ptr, size_t size,
cl_uint num_events_in_wait_list,
const cl_event *event_wait_list, cl_event *event)
cl_int clEnqueueSVMMemFill ( cl_command_queue command_queue,
void *svm_ptr, const void *pattern,
size_t pattern_size, size_t size,
cl_uint num_events_in_wait_list,
const cl_event *event_wait_list, cl_event *event)
cl_int clEnqueueSVMMap (
cl_command_queue command_queue,
cl_bool blocking_map, cl_map_flags map_flags, void *svm_ptr, size_t size,
cl_uint num_events_in_wait_list,
const cl_event *event_wait_list, cl_event *event)
cl_int clEnqueueSVMUnmap (
cl_command_queue command_queue,
void *svm_ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event)
Kernel Objects
A kernel is a function declared in a program, identified by the __kernel qualifier. A kernel object encapsulates the specific __kernel function and the argument values to be used when executing it. Items in blue apply when the appropriate extension is supported.
Create Kernel Objects [5.9.1]
cl_kernel clCreateKernel (cl_program program,
const char *kernel_name, cl_int *errcode_ret)
cl_int clCreateKernelsInProgram (cl_program program,
cl_uint num_kernels, cl_kernel *kernels, cl_uint *num_kernels_ret)
cl_int clRetainKernel (cl_kernel kernel) cl_int clReleaseKernel (cl_kernel kernel)
Kernel Arguments and Queries [5.9.2, 5.9.3]
cl_int clSetKernelArg (cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value)
cl_int clSetKernelArgSVMPointer (cl_kernel kernel, cl_uint arg_index, const void *arg_value)
cl_int clSetKernelExecInfo (cl_kernel kernel, cl_kernel_exec_info param_name,
size_t param_value_size, const void *param_value)
param_name: CL_KERNEL_EXEC_INFO_SVM_PTRS, CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM
cl_int clGetKernelInfo (cl_kernel kernel, cl_kernel_info param_name,
size_t param_value_size, void *param_value, size_t *param_value_size_ret)
param_name: [Table 5.19] CL_KERNEL_FUNCTION_NAME, CL_KERNEL_NUM_ARGS, CL_KERNEL_REFERENCE_COUNT, CL_KERNEL_{ATTRIBUTES, CONTEXT, PROGRAM}
cl_int clGetKernelWorkGroupInfo (cl_kernel kernel, cl_device_id device,
cl_kernel_work_group_info param_name,
size_t param_value_size, void *param_value, size_t *param_value_size_ret)
param_name: CL_KERNEL_GLOBAL_WORK_SIZE, CL_KERNEL_[COMPILE_]WORK_GROUP_SIZE, CL_KERNEL_{LOCAL, PRIVATE}_MEM_SIZE, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_- MULTIPLE [Table 5.20]
cl_int clGetKernelArgInfo (cl_kernel kernel,
cl_uint arg_indx, cl_kernel_arg_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
param_name:
CL_KERNEL_ARG_{ACCESS, ADDRESS}_QUALIFIER, CL_KERNEL_ARG_NAME, CL_KERNEL_ARG_TYPE_{NAME, QUALIFIER} [Table 5.21]
Program Objects
An OpenCL program consists of a set of kernels that are identified as functions declared with the __kernel qualifier in the program source.
Create Program Objects [5.8.1]
cl_program clCreateProgramWithSource ( cl_context context, cl_uint count,
const char **strings, const size_t *lengths, cl_int *errcode_ret)
cl_program clCreateProgramWithBinary (
cl_context context, cl_uint num_devices,
const cl_device_id *device_list, const size_t *lengths, const unsigned char **binaries,
cl_int *binary_status, cl_int *errcode_ret)
cl_program clCreateProgramWithBuiltInKernels ( cl_context context, cl_uint num_devices,
const cl_device_id *device_list,
const char *kernel_names, cl_int *errcode_ret)
cl_int clRetainProgram (cl_program program) cl_int clReleaseProgram (cl_program program)
Building Program Executables [5.8.2]
cl_int clBuildProgram (cl_program program,
cl_uint num_devices, const cl_device_id *device_list, const char *options, void (CL_CALLBACK*pfn_notify)
(cl_program program, void *user_data), void *user_data)
Separate Compilation and Linking [5.8.3]
cl_int clCompileProgram (cl_program program,
cl_uint num_devices, const cl_device_id *device_list, const char *options, cl_uint num_input_headers, const cl_program *input_headers,
const char **header_include_names,
void (CL_CALLBACK*pfn_notify)
(cl_program program, void *user_data), void *user_data)
cl_int clGetKernelSubGroupInfoKHR
(cl_kernel kernel, cl_device_id device, cl_kernel_sub_group_info param_name,
size_t input_value_size, const void *input_value, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
param_name: CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE
Execute Kernels [5.10]
cl_int clEnqueueNDRangeKernel (
cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t *global_work_offset, const size_t *global_work_size,
const size_t *local_work_size,
cl_uint num_events_in_wait_list,
const cl_event *event_wait_list, cl_event *event)
cl_int clEnqueueNativeKernel (
程序代写 CS代考 加微信: powcoder QQ: 1823890830 Email: powcoder@163.com