代写代考 #include - PowCoder代写

#include
#include
#include
#include

#include

#include “helper_cuda.h”

// kernel routine

__global__ void my_first_kernel(float *x)
int tid = threadIdx.x + blockDim.x*blockIdx.x;
x[tid] = (float)threadIdx.x;

// main code

int main(int argc, const char **argv)
// initialise card
findCudaDevice(argc, argv);

// set number of blocks, and threads per block
int nblocks = 2;
int nthreads = 8;
int nsize = nblocks*nthreads ;

// allocate memory for array
checkCudaErrors(cudaMallocManaged(&x, nsize*sizeof(float)));

// execute kernel
my_first_kernel<<>>(x);
getLastCudaError(“my_first_kernel execution failed\n”);

// synchronize to wait for kernel to finish, and data copied back
cudaDeviceSynchronize();

for (int n=0;nCS代考加微信: powcoder QQ: 1823890830 Email: powcoder@163.com

Related Posts