留学生辅导 #include

#include
#include
#include
#include

Copyright By PowCoder代写 加微信 powcoder

#include

#include “helper_cuda.h”

// kernel routine

__global__ void my_first_kernel(float *x)
int tid = threadIdx.x + blockDim.x*blockIdx.x;
x[tid] = (float)threadIdx.x;

// main code

int main(int argc, const char **argv)
// initialise card
findCudaDevice(argc, argv);

// set number of blocks, and threads per block
int nblocks = 2;
int nthreads = 8;
int nsize = nblocks*nthreads ;

// allocate memory for array
float *x_d;
checkCudaErrors(cudaMalloc((void **)&x_d, nsize*sizeof(float)));
std::vector x_h(nsize);

// execute kernel
my_first_kernel<<>>(x_d);
getLastCudaError(“my_first_kernel execution failed\n”);

// copy back results and print them out
checkCudaErrors(cudaMemcpy(&x_h[0], x_d, nsize*sizeof(float),
cudaMemcpyDeviceToHost));

for (int n=0;nCS代考 加微信: powcoder QQ: 1823890830 Email: powcoder@163.com