CS代写 __global__ void ArrayMul( float *A, float *B, float *C )

__global__ void ArrayMul( float *A, float *B, float *C )
__shared__ float prods[BLOCKSIZE];

unsigned int numItems = blockDim.x;

Copyright By PowCoder代写 加微信 powcoder

unsigned int tnum = threadIdx.x;
unsigned int wgNum = blockIdx.x;
unsigned int gid = blockIdx.x*blockDim.x + threadIdx.x;

prods[tnum] = A[gid] * B[gid];

for (int offset = 1; offset < numItems; offset *= 2) int mask = 2 * offset - 1; __syncthreads(); if ((tnum & mask) == 0) prods[tnum] += prods[tnum + offset]; __syncthreads(); if (tnum == 0) C[wgNum] = prods[0]; 程序代写 CS代考 加微信: powcoder QQ: 1823890830 Email: powcoder@163.com