CUDA实验两则 wu-kan

HelloWorld

完成 cuda 的「Hello world」程序,编译运行grid(2, 4), block(8, 16),给出输出结果文件。

源代码HelloWorld.cu

#include <stdio.h>
__global__ void kernel()
{
	printf("Hello world from block(%d,%d,%d) thread(%d,%d,%d).\n",
		   blockIdx.x, blockIdx.y, blockIdx.z,
		   threadIdx.x, threadIdx.y, threadIdx.z);
}
int main()
{
	dim3 grid(2, 4), block(8, 16);
	kernel<<<grid, block>>>();
	cudaDeviceSynchronize();
}

调度脚本HelloWorld.pbs

#PBS -N HelloWorld
#PBS -l nodes=1:ppn=32:gpus=1
#PBS -j oe
#PBS -q gpu

source /public/software/profile.d/cuda10.0.sh
cd $PBS_O_WORKDIR
nvcc HelloWorld.cu -o HelloWorld
./HelloWorld

运行结果HelloWorld.o12142

Hello world from block(1,2,0) thread(0,4,0).
Hello world from block(1,2,0) thread(1,4,0).
Hello world from block(1,2,0) thread(2,4,0).
Hello world from block(1,2,0) thread(3,4,0).
Hello world from block(1,2,0) thread(4,4,0).
Hello world from block(1,2,0) thread(5,4,0).
Hello world from block(1,2,0) thread(6,4,0).
Hello world from block(1,2,0) thread(7,4,0).
Hello world from block(1,2,0) thread(0,5,0).
Hello world from block(1,2,0) thread(1,5,0).
Hello world from block(1,2,0) thread(2,5,0).
Hello world from block(1,2,0) thread(3,5,0).
Hello world from block(1,2,0) thread(4,5,0).
Hello world from block(1,2,0) thread(5,5,0).
Hello world from block(1,2,0) thread(6,5,0).
Hello world from block(1,2,0) thread(7,5,0).
Hello world from block(1,2,0) thread(0,6,0).
Hello world from block(1,2,0) thread(1,6,0).
Hello world from block(1,2,0) thread(2,6,0).
Hello world from block(1,2,0) thread(3,6,0).
Hello world from block(1,2,0) thread(4,6,0).
Hello world from block(1,2,0) thread(5,6,0).
Hello world from block(1,2,0) thread(6,6,0).
Hello world from block(1,2,0) thread(7,6,0).
Hello world from block(1,2,0) thread(0,7,0).
Hello world from block(1,2,0) thread(1,7,0).
Hello world from block(1,2,0) thread(2,7,0).
Hello world from block(1,2,0) thread(3,7,0).
Hello world from block(1,2,0) thread(4,7,0).
Hello world from block(1,2,0) thread(5,7,0).
Hello world from block(1,2,0) thread(6,7,0).
Hello world from block(1,2,0) thread(7,7,0).
Hello world from block(1,2,0) thread(0,0,0).
Hello world from block(1,2,0) thread(1,0,0).
Hello world from block(1,2,0) thread(2,0,0).
Hello world from block(1,2,0) thread(3,0,0).
Hello world from block(1,2,0) thread(4,0,0).
Hello world from block(1,2,0) thread(5,0,0).
Hello world from block(1,2,0) thread(6,0,0).
Hello world from block(1,2,0) thread(7,0,0).
Hello world from block(1,2,0) thread(0,1,0).
Hello world from block(1,2,0) thread(1,1,0).
Hello world from block(1,2,0) thread(2,1,0).
Hello world from block(1,2,0) thread(3,1,0).
Hello world from block(1,2,0) thread(4,1,0).
Hello world from block(1,2,0) thread(5,1,0).
Hello world from block(1,2,0) thread(6,1,0).
Hello world from block(1,2,0) thread(7,1,0).
Hello world from block(1,2,0) thread(0,2,0).
Hello world from block(1,2,0) thread(1,2,0).
Hello world from block(1,2,0) thread(2,2,0).
Hello world from block(1,2,0) thread(3,2,0).
Hello world from block(1,2,0) thread(4,2,0).
Hello world from block(1,2,0) thread(5,2,0).
Hello world from block(1,2,0) thread(6,2,0).
Hello world from block(1,2,0) thread(7,2,0).
Hello world from block(1,2,0) thread(0,3,0).
Hello world from block(1,2,0) thread(1,3,0).
Hello world from block(1,2,0) thread(2,3,0).
Hello world from block(1,2,0) thread(3,3,0).
Hello world from block(1,2,0) thread(4,3,0).
Hello world from block(1,2,0) thread(5,3,0).
Hello world from block(1,2,0) thread(6,3,0).
Hello world from block(1,2,0) thread(7,3,0).
Hello world from block(1,2,0) thread(0,8,0).
Hello world from block(1,2,0) thread(1,8,0).
Hello world from block(1,2,0) thread(2,8,0).
Hello world from block(1,2,0) thread(3,8,0).
Hello world from block(1,2,0) thread(4,8,0).
Hello world from block(1,2,0) thread(5,8,0).
Hello world from block(1,2,0) thread(6,8,0).
Hello world from block(1,2,0) thread(7,8,0).
Hello world from block(1,2,0) thread(0,9,0).
Hello world from block(1,2,0) thread(1,9,0).
Hello world from block(1,2,0) thread(2,9,0).
Hello world from block(1,2,0) thread(3,9,0).
Hello world from block(1,2,0) thread(4,9,0).
Hello world from block(1,2,0) thread(5,9,0).
Hello world from block(1,2,0) thread(6,9,0).
Hello world from block(1,2,0) thread(7,9,0).
Hello world from block(1,2,0) thread(0,10,0).
Hello world from block(1,2,0) thread(1,10,0).
Hello world from block(1,2,0) thread(2,10,0).
Hello world from block(1,2,0) thread(3,10,0).
Hello world from block(1,2,0) thread(4,10,0).
Hello world from block(1,2,0) thread(5,10,0).
Hello world from block(1,2,0) thread(6,10,0).
Hello world from block(1,2,0) thread(7,10,0).
Hello world from block(1,2,0) thread(0,11,0).
Hello world from block(1,2,0) thread(1,11,0).
Hello world from block(1,2,0) thread(2,11,0).
Hello world from block(1,2,0) thread(3,11,0).
Hello world from block(1,2,0) thread(4,11,0).
Hello world from block(1,2,0) thread(5,11,0).
Hello world from block(1,2,0) thread(6,11,0).
Hello world from block(1,2,0) thread(7,11,0).
Hello world from block(1,2,0) thread(0,12,0).
Hello world from block(1,2,0) thread(1,12,0).
Hello world from block(1,2,0) thread(2,12,0).
Hello world from block(1,2,0) thread(3,12,0).
Hello world from block(1,2,0) thread(4,12,0).
Hello world from block(1,2,0) thread(5,12,0).
Hello world from block(1,2,0) thread(6,12,0).
Hello world from block(1,2,0) thread(7,12,0).
Hello world from block(1,2,0) thread(0,13,0).
Hello world from block(1,2,0) thread(1,13,0).
Hello world from block(1,2,0) thread(2,13,0).
Hello world from block(1,2,0) thread(3,13,0).
Hello world from block(1,2,0) thread(4,13,0).
Hello world from block(1,2,0) thread(5,13,0).
Hello world from block(1,2,0) thread(6,13,0).
Hello world from block(1,2,0) thread(7,13,0).
Hello world from block(1,2,0) thread(0,14,0).
Hello world from block(1,2,0) thread(1,14,0).
Hello world from block(1,2,0) thread(2,14,0).
Hello world from block(1,2,0) thread(3,14,0).
Hello world from block(1,2,0) thread(4,14,0).
Hello world from block(1,2,0) thread(5,14,0).
Hello world from block(1,2,0) thread(6,14,0).
Hello world from block(1,2,0) thread(7,14,0).
Hello world from block(1,2,0) thread(0,15,0).
Hello world from block(1,2,0) thread(1,15,0).
Hello world from block(1,2,0) thread(2,15,0).
Hello world from block(1,2,0) thread(3,15,0).
Hello world from block(1,2,0) thread(4,15,0).
Hello world from block(1,2,0) thread(5,15,0).
Hello world from block(1,2,0) thread(6,15,0).
Hello world from block(1,2,0) thread(7,15,0).
Hello world from block(1,3,0) thread(0,4,0).
Hello world from block(1,3,0) thread(1,4,0).
Hello world from block(1,3,0) thread(2,4,0).
Hello world from block(1,3,0) thread(3,4,0).
Hello world from block(1,3,0) thread(4,4,0).
Hello world from block(1,3,0) thread(5,4,0).
Hello world from block(1,3,0) thread(6,4,0).
Hello world from block(1,3,0) thread(7,4,0).
Hello world from block(1,3,0) thread(0,5,0).
Hello world from block(1,3,0) thread(1,5,0).
Hello world from block(1,3,0) thread(2,5,0).
Hello world from block(1,3,0) thread(3,5,0).
Hello world from block(1,3,0) thread(4,5,0).
Hello world from block(1,3,0) thread(5,5,0).
Hello world from block(1,3,0) thread(6,5,0).
Hello world from block(1,3,0) thread(7,5,0).
Hello world from block(1,3,0) thread(0,6,0).
Hello world from block(1,3,0) thread(1,6,0).
Hello world from block(1,3,0) thread(2,6,0).
Hello world from block(1,3,0) thread(3,6,0).
Hello world from block(1,3,0) thread(4,6,0).
Hello world from block(1,3,0) thread(5,6,0).
Hello world from block(1,3,0) thread(6,6,0).
Hello world from block(1,3,0) thread(7,6,0).
Hello world from block(1,3,0) thread(0,7,0).
Hello world from block(1,3,0) thread(1,7,0).
Hello world from block(1,3,0) thread(2,7,0).
Hello world from block(1,3,0) thread(3,7,0).
Hello world from block(1,3,0) thread(4,7,0).
Hello world from block(1,3,0) thread(5,7,0).
Hello world from block(1,3,0) thread(6,7,0).
Hello world from block(1,3,0) thread(7,7,0).
Hello world from block(1,0,0) thread(0,0,0).
Hello world from block(1,0,0) thread(1,0,0).
Hello world from block(1,0,0) thread(2,0,0).
Hello world from block(1,0,0) thread(3,0,0).
Hello world from block(1,0,0) thread(4,0,0).
Hello world from block(1,0,0) thread(5,0,0).
Hello world from block(1,0,0) thread(6,0,0).
Hello world from block(1,0,0) thread(7,0,0).
Hello world from block(1,0,0) thread(0,1,0).
Hello world from block(1,0,0) thread(1,1,0).
Hello world from block(1,0,0) thread(2,1,0).
Hello world from block(1,0,0) thread(3,1,0).
Hello world from block(1,0,0) thread(4,1,0).
Hello world from block(1,0,0) thread(5,1,0).
Hello world from block(1,0,0) thread(6,1,0).
Hello world from block(1,0,0) thread(7,1,0).
Hello world from block(1,0,0) thread(0,2,0).
Hello world from block(1,0,0) thread(1,2,0).
Hello world from block(1,0,0) thread(2,2,0).
Hello world from block(1,0,0) thread(3,2,0).
Hello world from block(1,0,0) thread(4,2,0).
Hello world from block(1,0,0) thread(5,2,0).
Hello world from block(1,0,0) thread(6,2,0).
Hello world from block(1,0,0) thread(7,2,0).
Hello world from block(1,0,0) thread(0,3,0).
Hello world from block(1,0,0) thread(1,3,0).
Hello world from block(1,0,0) thread(2,3,0).
Hello world from block(1,0,0) thread(3,3,0).
Hello world from block(1,0,0) thread(4,3,0).
Hello world from block(1,0,0) thread(5,3,0).
Hello world from block(1,0,0) thread(6,3,0).
Hello world from block(1,0,0) thread(7,3,0).
Hello world from block(1,3,0) thread(0,0,0).
Hello world from block(1,3,0) thread(1,0,0).
Hello world from block(1,3,0) thread(2,0,0).
Hello world from block(1,3,0) thread(3,0,0).
Hello world from block(1,3,0) thread(4,0,0).
Hello world from block(1,3,0) thread(5,0,0).
Hello world from block(1,3,0) thread(6,0,0).
Hello world from block(1,3,0) thread(7,0,0).
Hello world from block(1,3,0) thread(0,1,0).
Hello world from block(1,3,0) thread(1,1,0).
Hello world from block(1,3,0) thread(2,1,0).
Hello world from block(1,3,0) thread(3,1,0).
Hello world from block(1,3,0) thread(4,1,0).
Hello world from block(1,3,0) thread(5,1,0).
Hello world from block(1,3,0) thread(6,1,0).
Hello world from block(1,3,0) thread(7,1,0).
Hello world from block(1,3,0) thread(0,2,0).
Hello world from block(1,3,0) thread(1,2,0).
Hello world from block(1,3,0) thread(2,2,0).
Hello world from block(1,3,0) thread(3,2,0).
Hello world from block(1,3,0) thread(4,2,0).
Hello world from block(1,3,0) thread(5,2,0).
Hello world from block(1,3,0) thread(6,2,0).
Hello world from block(1,3,0) thread(7,2,0).
Hello world from block(1,3,0) thread(0,3,0).
Hello world from block(1,3,0) thread(1,3,0).
Hello world from block(1,3,0) thread(2,3,0).
Hello world from block(1,3,0) thread(3,3,0).
Hello world from block(1,3,0) thread(4,3,0).
Hello world from block(1,3,0) thread(5,3,0).
Hello world from block(1,3,0) thread(6,3,0).
Hello world from block(1,3,0) thread(7,3,0).
Hello world from block(1,0,0) thread(0,4,0).
Hello world from block(1,0,0) thread(1,4,0).
Hello world from block(1,0,0) thread(2,4,0).
Hello world from block(1,0,0) thread(3,4,0).
Hello world from block(1,0,0) thread(4,4,0).
Hello world from block(1,0,0) thread(5,4,0).
Hello world from block(1,0,0) thread(6,4,0).
Hello world from block(1,0,0) thread(7,4,0).
Hello world from block(1,0,0) thread(0,5,0).
Hello world from block(1,0,0) thread(1,5,0).
Hello world from block(1,0,0) thread(2,5,0).
Hello world from block(1,0,0) thread(3,5,0).
Hello world from block(1,0,0) thread(4,5,0).
Hello world from block(1,0,0) thread(5,5,0).
Hello world from block(1,0,0) thread(6,5,0).
Hello world from block(1,0,0) thread(7,5,0).
Hello world from block(1,0,0) thread(0,6,0).
Hello world from block(1,0,0) thread(1,6,0).
Hello world from block(1,0,0) thread(2,6,0).
Hello world from block(1,0,0) thread(3,6,0).
Hello world from block(1,0,0) thread(4,6,0).
Hello world from block(1,0,0) thread(5,6,0).
Hello world from block(1,0,0) thread(6,6,0).
Hello world from block(1,0,0) thread(7,6,0).
Hello world from block(1,0,0) thread(0,7,0).
Hello world from block(1,0,0) thread(1,7,0).
Hello world from block(1,0,0) thread(2,7,0).
Hello world from block(1,0,0) thread(3,7,0).
Hello world from block(1,0,0) thread(4,7,0).
Hello world from block(1,0,0) thread(5,7,0).
Hello world from block(1,0,0) thread(6,7,0).
Hello world from block(1,0,0) thread(7,7,0).
Hello world from block(1,3,0) thread(0,8,0).
Hello world from block(1,3,0) thread(1,8,0).
Hello world from block(1,3,0) thread(2,8,0).
Hello world from block(1,3,0) thread(3,8,0).
Hello world from block(1,3,0) thread(4,8,0).
Hello world from block(1,3,0) thread(5,8,0).
Hello world from block(1,3,0) thread(6,8,0).
Hello world from block(1,3,0) thread(7,8,0).
Hello world from block(1,3,0) thread(0,9,0).
Hello world from block(1,3,0) thread(1,9,0).
Hello world from block(1,3,0) thread(2,9,0).
Hello world from block(1,3,0) thread(3,9,0).
Hello world from block(1,3,0) thread(4,9,0).
Hello world from block(1,3,0) thread(5,9,0).
Hello world from block(1,3,0) thread(6,9,0).
Hello world from block(1,3,0) thread(7,9,0).
Hello world from block(1,3,0) thread(0,10,0).
Hello world from block(1,3,0) thread(1,10,0).
Hello world from block(1,3,0) thread(2,10,0).
Hello world from block(1,3,0) thread(3,10,0).
Hello world from block(1,3,0) thread(4,10,0).
Hello world from block(1,3,0) thread(5,10,0).
Hello world from block(1,3,0) thread(6,10,0).
Hello world from block(1,3,0) thread(7,10,0).
Hello world from block(1,3,0) thread(0,11,0).
Hello world from block(1,3,0) thread(1,11,0).
Hello world from block(1,3,0) thread(2,11,0).
Hello world from block(1,3,0) thread(3,11,0).
Hello world from block(1,3,0) thread(4,11,0).
Hello world from block(1,3,0) thread(5,11,0).
Hello world from block(1,3,0) thread(6,11,0).
Hello world from block(1,3,0) thread(7,11,0).
Hello world from block(1,0,0) thread(0,8,0).
Hello world from block(1,0,0) thread(1,8,0).
Hello world from block(1,0,0) thread(2,8,0).
Hello world from block(1,0,0) thread(3,8,0).
Hello world from block(1,0,0) thread(4,8,0).
Hello world from block(1,0,0) thread(5,8,0).
Hello world from block(1,0,0) thread(6,8,0).
Hello world from block(1,0,0) thread(7,8,0).
Hello world from block(1,0,0) thread(0,9,0).
Hello world from block(1,0,0) thread(1,9,0).
Hello world from block(1,0,0) thread(2,9,0).
Hello world from block(1,0,0) thread(3,9,0).
Hello world from block(1,0,0) thread(4,9,0).
Hello world from block(1,0,0) thread(5,9,0).
Hello world from block(1,0,0) thread(6,9,0).
Hello world from block(1,0,0) thread(7,9,0).
Hello world from block(1,0,0) thread(0,10,0).
Hello world from block(1,0,0) thread(1,10,0).
Hello world from block(1,0,0) thread(2,10,0).
Hello world from block(1,0,0) thread(3,10,0).
Hello world from block(1,0,0) thread(4,10,0).
Hello world from block(1,0,0) thread(5,10,0).
Hello world from block(1,0,0) thread(6,10,0).
Hello world from block(1,0,0) thread(7,10,0).
Hello world from block(1,0,0) thread(0,11,0).
Hello world from block(1,0,0) thread(1,11,0).
Hello world from block(1,0,0) thread(2,11,0).
Hello world from block(1,0,0) thread(3,11,0).
Hello world from block(1,0,0) thread(4,11,0).
Hello world from block(1,0,0) thread(5,11,0).
Hello world from block(1,0,0) thread(6,11,0).
Hello world from block(1,0,0) thread(7,11,0).
Hello world from block(1,3,0) thread(0,12,0).
Hello world from block(1,3,0) thread(1,12,0).
Hello world from block(1,3,0) thread(2,12,0).
Hello world from block(1,3,0) thread(3,12,0).
Hello world from block(1,3,0) thread(4,12,0).
Hello world from block(1,3,0) thread(5,12,0).
Hello world from block(1,3,0) thread(6,12,0).
Hello world from block(1,3,0) thread(7,12,0).
Hello world from block(1,3,0) thread(0,13,0).
Hello world from block(1,3,0) thread(1,13,0).
Hello world from block(1,3,0) thread(2,13,0).
Hello world from block(1,3,0) thread(3,13,0).
Hello world from block(1,3,0) thread(4,13,0).
Hello world from block(1,3,0) thread(5,13,0).
Hello world from block(1,3,0) thread(6,13,0).
Hello world from block(1,3,0) thread(7,13,0).
Hello world from block(1,3,0) thread(0,14,0).
Hello world from block(1,3,0) thread(1,14,0).
Hello world from block(1,3,0) thread(2,14,0).
Hello world from block(1,3,0) thread(3,14,0).
Hello world from block(1,3,0) thread(4,14,0).
Hello world from block(1,3,0) thread(5,14,0).
Hello world from block(1,3,0) thread(6,14,0).
Hello world from block(1,3,0) thread(7,14,0).
Hello world from block(1,3,0) thread(0,15,0).
Hello world from block(1,3,0) thread(1,15,0).
Hello world from block(1,3,0) thread(2,15,0).
Hello world from block(1,3,0) thread(3,15,0).
Hello world from block(1,3,0) thread(4,15,0).
Hello world from block(1,3,0) thread(5,15,0).
Hello world from block(1,3,0) thread(6,15,0).
Hello world from block(1,3,0) thread(7,15,0).
Hello world from block(1,0,0) thread(0,12,0).
Hello world from block(1,0,0) thread(1,12,0).
Hello world from block(1,0,0) thread(2,12,0).
Hello world from block(1,0,0) thread(3,12,0).
Hello world from block(1,0,0) thread(4,12,0).
Hello world from block(1,0,0) thread(5,12,0).
Hello world from block(1,0,0) thread(6,12,0).
Hello world from block(1,0,0) thread(7,12,0).
Hello world from block(1,0,0) thread(0,13,0).
Hello world from block(1,0,0) thread(1,13,0).
Hello world from block(1,0,0) thread(2,13,0).
Hello world from block(1,0,0) thread(3,13,0).
Hello world from block(1,0,0) thread(4,13,0).
Hello world from block(1,0,0) thread(5,13,0).
Hello world from block(1,0,0) thread(6,13,0).
Hello world from block(1,0,0) thread(7,13,0).
Hello world from block(1,0,0) thread(0,14,0).
Hello world from block(1,0,0) thread(1,14,0).
Hello world from block(1,0,0) thread(2,14,0).
Hello world from block(1,0,0) thread(3,14,0).
Hello world from block(1,0,0) thread(4,14,0).
Hello world from block(1,0,0) thread(5,14,0).
Hello world from block(1,0,0) thread(6,14,0).
Hello world from block(1,0,0) thread(7,14,0).
Hello world from block(1,0,0) thread(0,15,0).
Hello world from block(1,0,0) thread(1,15,0).
Hello world from block(1,0,0) thread(2,15,0).
Hello world from block(1,0,0) thread(3,15,0).
Hello world from block(1,0,0) thread(4,15,0).
Hello world from block(1,0,0) thread(5,15,0).
Hello world from block(1,0,0) thread(6,15,0).
Hello world from block(1,0,0) thread(7,15,0).
Hello world from block(1,1,0) thread(0,4,0).
Hello world from block(1,1,0) thread(1,4,0).
Hello world from block(1,1,0) thread(2,4,0).
Hello world from block(1,1,0) thread(3,4,0).
Hello world from block(1,1,0) thread(4,4,0).
Hello world from block(1,1,0) thread(5,4,0).
Hello world from block(1,1,0) thread(6,4,0).
Hello world from block(1,1,0) thread(7,4,0).
Hello world from block(1,1,0) thread(0,5,0).
Hello world from block(1,1,0) thread(1,5,0).
Hello world from block(1,1,0) thread(2,5,0).
Hello world from block(1,1,0) thread(3,5,0).
Hello world from block(1,1,0) thread(4,5,0).
Hello world from block(1,1,0) thread(5,5,0).
Hello world from block(1,1,0) thread(6,5,0).
Hello world from block(1,1,0) thread(7,5,0).
Hello world from block(1,1,0) thread(0,6,0).
Hello world from block(1,1,0) thread(1,6,0).
Hello world from block(1,1,0) thread(2,6,0).
Hello world from block(1,1,0) thread(3,6,0).
Hello world from block(1,1,0) thread(4,6,0).
Hello world from block(1,1,0) thread(5,6,0).
Hello world from block(1,1,0) thread(6,6,0).
Hello world from block(1,1,0) thread(7,6,0).
Hello world from block(1,1,0) thread(0,7,0).
Hello world from block(1,1,0) thread(1,7,0).
Hello world from block(1,1,0) thread(2,7,0).
Hello world from block(1,1,0) thread(3,7,0).
Hello world from block(1,1,0) thread(4,7,0).
Hello world from block(1,1,0) thread(5,7,0).
Hello world from block(1,1,0) thread(6,7,0).
Hello world from block(1,1,0) thread(7,7,0).
Hello world from block(1,1,0) thread(0,0,0).
Hello world from block(1,1,0) thread(1,0,0).
Hello world from block(1,1,0) thread(2,0,0).
Hello world from block(1,1,0) thread(3,0,0).
Hello world from block(1,1,0) thread(4,0,0).
Hello world from block(1,1,0) thread(5,0,0).
Hello world from block(1,1,0) thread(6,0,0).
Hello world from block(1,1,0) thread(7,0,0).
Hello world from block(1,1,0) thread(0,1,0).
Hello world from block(1,1,0) thread(1,1,0).
Hello world from block(1,1,0) thread(2,1,0).
Hello world from block(1,1,0) thread(3,1,0).
Hello world from block(1,1,0) thread(4,1,0).
Hello world from block(1,1,0) thread(5,1,0).
Hello world from block(1,1,0) thread(6,1,0).
Hello world from block(1,1,0) thread(7,1,0).
Hello world from block(1,1,0) thread(0,2,0).
Hello world from block(1,1,0) thread(1,2,0).
Hello world from block(1,1,0) thread(2,2,0).
Hello world from block(1,1,0) thread(3,2,0).
Hello world from block(1,1,0) thread(4,2,0).
Hello world from block(1,1,0) thread(5,2,0).
Hello world from block(1,1,0) thread(6,2,0).
Hello world from block(1,1,0) thread(7,2,0).
Hello world from block(1,1,0) thread(0,3,0).
Hello world from block(1,1,0) thread(1,3,0).
Hello world from block(1,1,0) thread(2,3,0).
Hello world from block(1,1,0) thread(3,3,0).
Hello world from block(1,1,0) thread(4,3,0).
Hello world from block(1,1,0) thread(5,3,0).
Hello world from block(1,1,0) thread(6,3,0).
Hello world from block(1,1,0) thread(7,3,0).
Hello world from block(1,1,0) thread(0,8,0).
Hello world from block(1,1,0) thread(1,8,0).
Hello world from block(1,1,0) thread(2,8,0).
Hello world from block(1,1,0) thread(3,8,0).
Hello world from block(1,1,0) thread(4,8,0).
Hello world from block(1,1,0) thread(5,8,0).
Hello world from block(1,1,0) thread(6,8,0).
Hello world from block(1,1,0) thread(7,8,0).
Hello world from block(1,1,0) thread(0,9,0).
Hello world from block(1,1,0) thread(1,9,0).
Hello world from block(1,1,0) thread(2,9,0).
Hello world from block(1,1,0) thread(3,9,0).
Hello world from block(1,1,0) thread(4,9,0).
Hello world from block(1,1,0) thread(5,9,0).
Hello world from block(1,1,0) thread(6,9,0).
Hello world from block(1,1,0) thread(7,9,0).
Hello world from block(1,1,0) thread(0,10,0).
Hello world from block(1,1,0) thread(1,10,0).
Hello world from block(1,1,0) thread(2,10,0).
Hello world from block(1,1,0) thread(3,10,0).
Hello world from block(1,1,0) thread(4,10,0).
Hello world from block(1,1,0) thread(5,10,0).
Hello world from block(1,1,0) thread(6,10,0).
Hello world from block(1,1,0) thread(7,10,0).
Hello world from block(1,1,0) thread(0,11,0).
Hello world from block(1,1,0) thread(1,11,0).
Hello world from block(1,1,0) thread(2,11,0).
Hello world from block(1,1,0) thread(3,11,0).
Hello world from block(1,1,0) thread(4,11,0).
Hello world from block(1,1,0) thread(5,11,0).
Hello world from block(1,1,0) thread(6,11,0).
Hello world from block(1,1,0) thread(7,11,0).
Hello world from block(1,1,0) thread(0,12,0).
Hello world from block(1,1,0) thread(1,12,0).
Hello world from block(1,1,0) thread(2,12,0).
Hello world from block(1,1,0) thread(3,12,0).
Hello world from block(1,1,0) thread(4,12,0).
Hello world from block(1,1,0) thread(5,12,0).
Hello world from block(1,1,0) thread(6,12,0).
Hello world from block(1,1,0) thread(7,12,0).
Hello world from block(1,1,0) thread(0,13,0).
Hello world from block(1,1,0) thread(1,13,0).
Hello world from block(1,1,0) thread(2,13,0).
Hello world from block(1,1,0) thread(3,13,0).
Hello world from block(1,1,0) thread(4,13,0).
Hello world from block(1,1,0) thread(5,13,0).
Hello world from block(1,1,0) thread(6,13,0).
Hello world from block(1,1,0) thread(7,13,0).
Hello world from block(1,1,0) thread(0,14,0).
Hello world from block(1,1,0) thread(1,14,0).
Hello world from block(1,1,0) thread(2,14,0).
Hello world from block(1,1,0) thread(3,14,0).
Hello world from block(1,1,0) thread(4,14,0).
Hello world from block(1,1,0) thread(5,14,0).
Hello world from block(1,1,0) thread(6,14,0).
Hello world from block(1,1,0) thread(7,14,0).
Hello world from block(1,1,0) thread(0,15,0).
Hello world from block(1,1,0) thread(1,15,0).
Hello world from block(1,1,0) thread(2,15,0).
Hello world from block(1,1,0) thread(3,15,0).
Hello world from block(1,1,0) thread(4,15,0).
Hello world from block(1,1,0) thread(5,15,0).
Hello world from block(1,1,0) thread(6,15,0).
Hello world from block(1,1,0) thread(7,15,0).
Hello world from block(0,0,0) thread(0,0,0).
Hello world from block(0,0,0) thread(1,0,0).
Hello world from block(0,0,0) thread(2,0,0).
Hello world from block(0,0,0) thread(3,0,0).
Hello world from block(0,0,0) thread(4,0,0).
Hello world from block(0,0,0) thread(5,0,0).
Hello world from block(0,0,0) thread(6,0,0).
Hello world from block(0,0,0) thread(7,0,0).
Hello world from block(0,0,0) thread(0,1,0).
Hello world from block(0,0,0) thread(1,1,0).
Hello world from block(0,0,0) thread(2,1,0).
Hello world from block(0,0,0) thread(3,1,0).
Hello world from block(0,0,0) thread(4,1,0).
Hello world from block(0,0,0) thread(5,1,0).
Hello world from block(0,0,0) thread(6,1,0).
Hello world from block(0,0,0) thread(7,1,0).
Hello world from block(0,0,0) thread(0,2,0).
Hello world from block(0,0,0) thread(1,2,0).
Hello world from block(0,0,0) thread(2,2,0).
Hello world from block(0,0,0) thread(3,2,0).
Hello world from block(0,0,0) thread(4,2,0).
Hello world from block(0,0,0) thread(5,2,0).
Hello world from block(0,0,0) thread(6,2,0).
Hello world from block(0,0,0) thread(7,2,0).
Hello world from block(0,0,0) thread(0,3,0).
Hello world from block(0,0,0) thread(1,3,0).
Hello world from block(0,0,0) thread(2,3,0).
Hello world from block(0,0,0) thread(3,3,0).
Hello world from block(0,0,0) thread(4,3,0).
Hello world from block(0,0,0) thread(5,3,0).
Hello world from block(0,0,0) thread(6,3,0).
Hello world from block(0,0,0) thread(7,3,0).
Hello world from block(0,3,0) thread(0,0,0).
Hello world from block(0,3,0) thread(1,0,0).
Hello world from block(0,3,0) thread(2,0,0).
Hello world from block(0,3,0) thread(3,0,0).
Hello world from block(0,3,0) thread(4,0,0).
Hello world from block(0,3,0) thread(5,0,0).
Hello world from block(0,3,0) thread(6,0,0).
Hello world from block(0,3,0) thread(7,0,0).
Hello world from block(0,3,0) thread(0,1,0).
Hello world from block(0,3,0) thread(1,1,0).
Hello world from block(0,3,0) thread(2,1,0).
Hello world from block(0,3,0) thread(3,1,0).
Hello world from block(0,3,0) thread(4,1,0).
Hello world from block(0,3,0) thread(5,1,0).
Hello world from block(0,3,0) thread(6,1,0).
Hello world from block(0,3,0) thread(7,1,0).
Hello world from block(0,3,0) thread(0,2,0).
Hello world from block(0,3,0) thread(1,2,0).
Hello world from block(0,3,0) thread(2,2,0).
Hello world from block(0,3,0) thread(3,2,0).
Hello world from block(0,3,0) thread(4,2,0).
Hello world from block(0,3,0) thread(5,2,0).
Hello world from block(0,3,0) thread(6,2,0).
Hello world from block(0,3,0) thread(7,2,0).
Hello world from block(0,3,0) thread(0,3,0).
Hello world from block(0,3,0) thread(1,3,0).
Hello world from block(0,3,0) thread(2,3,0).
Hello world from block(0,3,0) thread(3,3,0).
Hello world from block(0,3,0) thread(4,3,0).
Hello world from block(0,3,0) thread(5,3,0).
Hello world from block(0,3,0) thread(6,3,0).
Hello world from block(0,3,0) thread(7,3,0).
Hello world from block(0,0,0) thread(0,4,0).
Hello world from block(0,0,0) thread(1,4,0).
Hello world from block(0,0,0) thread(2,4,0).
Hello world from block(0,0,0) thread(3,4,0).
Hello world from block(0,0,0) thread(4,4,0).
Hello world from block(0,0,0) thread(5,4,0).
Hello world from block(0,0,0) thread(6,4,0).
Hello world from block(0,0,0) thread(7,4,0).
Hello world from block(0,0,0) thread(0,5,0).
Hello world from block(0,0,0) thread(1,5,0).
Hello world from block(0,0,0) thread(2,5,0).
Hello world from block(0,0,0) thread(3,5,0).
Hello world from block(0,0,0) thread(4,5,0).
Hello world from block(0,0,0) thread(5,5,0).
Hello world from block(0,0,0) thread(6,5,0).
Hello world from block(0,0,0) thread(7,5,0).
Hello world from block(0,0,0) thread(0,6,0).
Hello world from block(0,0,0) thread(1,6,0).
Hello world from block(0,0,0) thread(2,6,0).
Hello world from block(0,0,0) thread(3,6,0).
Hello world from block(0,0,0) thread(4,6,0).
Hello world from block(0,0,0) thread(5,6,0).
Hello world from block(0,0,0) thread(6,6,0).
Hello world from block(0,0,0) thread(7,6,0).
Hello world from block(0,0,0) thread(0,7,0).
Hello world from block(0,0,0) thread(1,7,0).
Hello world from block(0,0,0) thread(2,7,0).
Hello world from block(0,0,0) thread(3,7,0).
Hello world from block(0,0,0) thread(4,7,0).
Hello world from block(0,0,0) thread(5,7,0).
Hello world from block(0,0,0) thread(6,7,0).
Hello world from block(0,0,0) thread(7,7,0).
Hello world from block(0,3,0) thread(0,4,0).
Hello world from block(0,3,0) thread(1,4,0).
Hello world from block(0,3,0) thread(2,4,0).
Hello world from block(0,3,0) thread(3,4,0).
Hello world from block(0,3,0) thread(4,4,0).
Hello world from block(0,3,0) thread(5,4,0).
Hello world from block(0,3,0) thread(6,4,0).
Hello world from block(0,3,0) thread(7,4,0).
Hello world from block(0,3,0) thread(0,5,0).
Hello world from block(0,3,0) thread(1,5,0).
Hello world from block(0,3,0) thread(2,5,0).
Hello world from block(0,3,0) thread(3,5,0).
Hello world from block(0,3,0) thread(4,5,0).
Hello world from block(0,3,0) thread(5,5,0).
Hello world from block(0,3,0) thread(6,5,0).
Hello world from block(0,3,0) thread(7,5,0).
Hello world from block(0,3,0) thread(0,6,0).
Hello world from block(0,3,0) thread(1,6,0).
Hello world from block(0,3,0) thread(2,6,0).
Hello world from block(0,3,0) thread(3,6,0).
Hello world from block(0,3,0) thread(4,6,0).
Hello world from block(0,3,0) thread(5,6,0).
Hello world from block(0,3,0) thread(6,6,0).
Hello world from block(0,3,0) thread(7,6,0).
Hello world from block(0,3,0) thread(0,7,0).
Hello world from block(0,3,0) thread(1,7,0).
Hello world from block(0,3,0) thread(2,7,0).
Hello world from block(0,3,0) thread(3,7,0).
Hello world from block(0,3,0) thread(4,7,0).
Hello world from block(0,3,0) thread(5,7,0).
Hello world from block(0,3,0) thread(6,7,0).
Hello world from block(0,3,0) thread(7,7,0).
Hello world from block(0,0,0) thread(0,8,0).
Hello world from block(0,0,0) thread(1,8,0).
Hello world from block(0,0,0) thread(2,8,0).
Hello world from block(0,0,0) thread(3,8,0).
Hello world from block(0,0,0) thread(4,8,0).
Hello world from block(0,0,0) thread(5,8,0).
Hello world from block(0,0,0) thread(6,8,0).
Hello world from block(0,0,0) thread(7,8,0).
Hello world from block(0,0,0) thread(0,9,0).
Hello world from block(0,0,0) thread(1,9,0).
Hello world from block(0,0,0) thread(2,9,0).
Hello world from block(0,0,0) thread(3,9,0).
Hello world from block(0,0,0) thread(4,9,0).
Hello world from block(0,0,0) thread(5,9,0).
Hello world from block(0,0,0) thread(6,9,0).
Hello world from block(0,0,0) thread(7,9,0).
Hello world from block(0,0,0) thread(0,10,0).
Hello world from block(0,0,0) thread(1,10,0).
Hello world from block(0,0,0) thread(2,10,0).
Hello world from block(0,0,0) thread(3,10,0).
Hello world from block(0,0,0) thread(4,10,0).
Hello world from block(0,0,0) thread(5,10,0).
Hello world from block(0,0,0) thread(6,10,0).
Hello world from block(0,0,0) thread(7,10,0).
Hello world from block(0,0,0) thread(0,11,0).
Hello world from block(0,0,0) thread(1,11,0).
Hello world from block(0,0,0) thread(2,11,0).
Hello world from block(0,0,0) thread(3,11,0).
Hello world from block(0,0,0) thread(4,11,0).
Hello world from block(0,0,0) thread(5,11,0).
Hello world from block(0,0,0) thread(6,11,0).
Hello world from block(0,0,0) thread(7,11,0).
Hello world from block(0,3,0) thread(0,8,0).
Hello world from block(0,3,0) thread(1,8,0).
Hello world from block(0,3,0) thread(2,8,0).
Hello world from block(0,3,0) thread(3,8,0).
Hello world from block(0,3,0) thread(4,8,0).
Hello world from block(0,3,0) thread(5,8,0).
Hello world from block(0,3,0) thread(6,8,0).
Hello world from block(0,3,0) thread(7,8,0).
Hello world from block(0,3,0) thread(0,9,0).
Hello world from block(0,3,0) thread(1,9,0).
Hello world from block(0,3,0) thread(2,9,0).
Hello world from block(0,3,0) thread(3,9,0).
Hello world from block(0,3,0) thread(4,9,0).
Hello world from block(0,3,0) thread(5,9,0).
Hello world from block(0,3,0) thread(6,9,0).
Hello world from block(0,3,0) thread(7,9,0).
Hello world from block(0,3,0) thread(0,10,0).
Hello world from block(0,3,0) thread(1,10,0).
Hello world from block(0,3,0) thread(2,10,0).
Hello world from block(0,3,0) thread(3,10,0).
Hello world from block(0,3,0) thread(4,10,0).
Hello world from block(0,3,0) thread(5,10,0).
Hello world from block(0,3,0) thread(6,10,0).
Hello world from block(0,3,0) thread(7,10,0).
Hello world from block(0,3,0) thread(0,11,0).
Hello world from block(0,3,0) thread(1,11,0).
Hello world from block(0,3,0) thread(2,11,0).
Hello world from block(0,3,0) thread(3,11,0).
Hello world from block(0,3,0) thread(4,11,0).
Hello world from block(0,3,0) thread(5,11,0).
Hello world from block(0,3,0) thread(6,11,0).
Hello world from block(0,3,0) thread(7,11,0).
Hello world from block(0,0,0) thread(0,12,0).
Hello world from block(0,0,0) thread(1,12,0).
Hello world from block(0,0,0) thread(2,12,0).
Hello world from block(0,0,0) thread(3,12,0).
Hello world from block(0,0,0) thread(4,12,0).
Hello world from block(0,0,0) thread(5,12,0).
Hello world from block(0,0,0) thread(6,12,0).
Hello world from block(0,0,0) thread(7,12,0).
Hello world from block(0,0,0) thread(0,13,0).
Hello world from block(0,0,0) thread(1,13,0).
Hello world from block(0,0,0) thread(2,13,0).
Hello world from block(0,0,0) thread(3,13,0).
Hello world from block(0,0,0) thread(4,13,0).
Hello world from block(0,0,0) thread(5,13,0).
Hello world from block(0,0,0) thread(6,13,0).
Hello world from block(0,0,0) thread(7,13,0).
Hello world from block(0,0,0) thread(0,14,0).
Hello world from block(0,0,0) thread(1,14,0).
Hello world from block(0,0,0) thread(2,14,0).
Hello world from block(0,0,0) thread(3,14,0).
Hello world from block(0,0,0) thread(4,14,0).
Hello world from block(0,0,0) thread(5,14,0).
Hello world from block(0,0,0) thread(6,14,0).
Hello world from block(0,0,0) thread(7,14,0).
Hello world from block(0,0,0) thread(0,15,0).
Hello world from block(0,0,0) thread(1,15,0).
Hello world from block(0,0,0) thread(2,15,0).
Hello world from block(0,0,0) thread(3,15,0).
Hello world from block(0,0,0) thread(4,15,0).
Hello world from block(0,0,0) thread(5,15,0).
Hello world from block(0,0,0) thread(6,15,0).
Hello world from block(0,0,0) thread(7,15,0).
Hello world from block(0,3,0) thread(0,12,0).
Hello world from block(0,3,0) thread(1,12,0).
Hello world from block(0,3,0) thread(2,12,0).
Hello world from block(0,3,0) thread(3,12,0).
Hello world from block(0,3,0) thread(4,12,0).
Hello world from block(0,3,0) thread(5,12,0).
Hello world from block(0,3,0) thread(6,12,0).
Hello world from block(0,3,0) thread(7,12,0).
Hello world from block(0,3,0) thread(0,13,0).
Hello world from block(0,3,0) thread(1,13,0).
Hello world from block(0,3,0) thread(2,13,0).
Hello world from block(0,3,0) thread(3,13,0).
Hello world from block(0,3,0) thread(4,13,0).
Hello world from block(0,3,0) thread(5,13,0).
Hello world from block(0,3,0) thread(6,13,0).
Hello world from block(0,3,0) thread(7,13,0).
Hello world from block(0,3,0) thread(0,14,0).
Hello world from block(0,3,0) thread(1,14,0).
Hello world from block(0,3,0) thread(2,14,0).
Hello world from block(0,3,0) thread(3,14,0).
Hello world from block(0,3,0) thread(4,14,0).
Hello world from block(0,3,0) thread(5,14,0).
Hello world from block(0,3,0) thread(6,14,0).
Hello world from block(0,3,0) thread(7,14,0).
Hello world from block(0,3,0) thread(0,15,0).
Hello world from block(0,3,0) thread(1,15,0).
Hello world from block(0,3,0) thread(2,15,0).
Hello world from block(0,3,0) thread(3,15,0).
Hello world from block(0,3,0) thread(4,15,0).
Hello world from block(0,3,0) thread(5,15,0).
Hello world from block(0,3,0) thread(6,15,0).
Hello world from block(0,3,0) thread(7,15,0).
Hello world from block(0,2,0) thread(0,8,0).
Hello world from block(0,2,0) thread(1,8,0).
Hello world from block(0,2,0) thread(2,8,0).
Hello world from block(0,2,0) thread(3,8,0).
Hello world from block(0,2,0) thread(4,8,0).
Hello world from block(0,2,0) thread(5,8,0).
Hello world from block(0,2,0) thread(6,8,0).
Hello world from block(0,2,0) thread(7,8,0).
Hello world from block(0,2,0) thread(0,9,0).
Hello world from block(0,2,0) thread(1,9,0).
Hello world from block(0,2,0) thread(2,9,0).
Hello world from block(0,2,0) thread(3,9,0).
Hello world from block(0,2,0) thread(4,9,0).
Hello world from block(0,2,0) thread(5,9,0).
Hello world from block(0,2,0) thread(6,9,0).
Hello world from block(0,2,0) thread(7,9,0).
Hello world from block(0,2,0) thread(0,10,0).
Hello world from block(0,2,0) thread(1,10,0).
Hello world from block(0,2,0) thread(2,10,0).
Hello world from block(0,2,0) thread(3,10,0).
Hello world from block(0,2,0) thread(4,10,0).
Hello world from block(0,2,0) thread(5,10,0).
Hello world from block(0,2,0) thread(6,10,0).
Hello world from block(0,2,0) thread(7,10,0).
Hello world from block(0,2,0) thread(0,11,0).
Hello world from block(0,2,0) thread(1,11,0).
Hello world from block(0,2,0) thread(2,11,0).
Hello world from block(0,2,0) thread(3,11,0).
Hello world from block(0,2,0) thread(4,11,0).
Hello world from block(0,2,0) thread(5,11,0).
Hello world from block(0,2,0) thread(6,11,0).
Hello world from block(0,2,0) thread(7,11,0).
Hello world from block(0,2,0) thread(0,12,0).
Hello world from block(0,2,0) thread(1,12,0).
Hello world from block(0,2,0) thread(2,12,0).
Hello world from block(0,2,0) thread(3,12,0).
Hello world from block(0,2,0) thread(4,12,0).
Hello world from block(0,2,0) thread(5,12,0).
Hello world from block(0,2,0) thread(6,12,0).
Hello world from block(0,2,0) thread(7,12,0).
Hello world from block(0,2,0) thread(0,13,0).
Hello world from block(0,2,0) thread(1,13,0).
Hello world from block(0,2,0) thread(2,13,0).
Hello world from block(0,2,0) thread(3,13,0).
Hello world from block(0,2,0) thread(4,13,0).
Hello world from block(0,2,0) thread(5,13,0).
Hello world from block(0,2,0) thread(6,13,0).
Hello world from block(0,2,0) thread(7,13,0).
Hello world from block(0,2,0) thread(0,14,0).
Hello world from block(0,2,0) thread(1,14,0).
Hello world from block(0,2,0) thread(2,14,0).
Hello world from block(0,2,0) thread(3,14,0).
Hello world from block(0,2,0) thread(4,14,0).
Hello world from block(0,2,0) thread(5,14,0).
Hello world from block(0,2,0) thread(6,14,0).
Hello world from block(0,2,0) thread(7,14,0).
Hello world from block(0,2,0) thread(0,15,0).
Hello world from block(0,2,0) thread(1,15,0).
Hello world from block(0,2,0) thread(2,15,0).
Hello world from block(0,2,0) thread(3,15,0).
Hello world from block(0,2,0) thread(4,15,0).
Hello world from block(0,2,0) thread(5,15,0).
Hello world from block(0,2,0) thread(6,15,0).
Hello world from block(0,2,0) thread(7,15,0).
Hello world from block(0,2,0) thread(0,0,0).
Hello world from block(0,2,0) thread(1,0,0).
Hello world from block(0,2,0) thread(2,0,0).
Hello world from block(0,2,0) thread(3,0,0).
Hello world from block(0,2,0) thread(4,0,0).
Hello world from block(0,2,0) thread(5,0,0).
Hello world from block(0,2,0) thread(6,0,0).
Hello world from block(0,2,0) thread(7,0,0).
Hello world from block(0,2,0) thread(0,1,0).
Hello world from block(0,2,0) thread(1,1,0).
Hello world from block(0,2,0) thread(2,1,0).
Hello world from block(0,2,0) thread(3,1,0).
Hello world from block(0,2,0) thread(4,1,0).
Hello world from block(0,2,0) thread(5,1,0).
Hello world from block(0,2,0) thread(6,1,0).
Hello world from block(0,2,0) thread(7,1,0).
Hello world from block(0,2,0) thread(0,2,0).
Hello world from block(0,2,0) thread(1,2,0).
Hello world from block(0,2,0) thread(2,2,0).
Hello world from block(0,2,0) thread(3,2,0).
Hello world from block(0,2,0) thread(4,2,0).
Hello world from block(0,2,0) thread(5,2,0).
Hello world from block(0,2,0) thread(6,2,0).
Hello world from block(0,2,0) thread(7,2,0).
Hello world from block(0,2,0) thread(0,3,0).
Hello world from block(0,2,0) thread(1,3,0).
Hello world from block(0,2,0) thread(2,3,0).
Hello world from block(0,2,0) thread(3,3,0).
Hello world from block(0,2,0) thread(4,3,0).
Hello world from block(0,2,0) thread(5,3,0).
Hello world from block(0,2,0) thread(6,3,0).
Hello world from block(0,2,0) thread(7,3,0).
Hello world from block(0,2,0) thread(0,4,0).
Hello world from block(0,2,0) thread(1,4,0).
Hello world from block(0,2,0) thread(2,4,0).
Hello world from block(0,2,0) thread(3,4,0).
Hello world from block(0,2,0) thread(4,4,0).
Hello world from block(0,2,0) thread(5,4,0).
Hello world from block(0,2,0) thread(6,4,0).
Hello world from block(0,2,0) thread(7,4,0).
Hello world from block(0,2,0) thread(0,5,0).
Hello world from block(0,2,0) thread(1,5,0).
Hello world from block(0,2,0) thread(2,5,0).
Hello world from block(0,2,0) thread(3,5,0).
Hello world from block(0,2,0) thread(4,5,0).
Hello world from block(0,2,0) thread(5,5,0).
Hello world from block(0,2,0) thread(6,5,0).
Hello world from block(0,2,0) thread(7,5,0).
Hello world from block(0,2,0) thread(0,6,0).
Hello world from block(0,2,0) thread(1,6,0).
Hello world from block(0,2,0) thread(2,6,0).
Hello world from block(0,2,0) thread(3,6,0).
Hello world from block(0,2,0) thread(4,6,0).
Hello world from block(0,2,0) thread(5,6,0).
Hello world from block(0,2,0) thread(6,6,0).
Hello world from block(0,2,0) thread(7,6,0).
Hello world from block(0,2,0) thread(0,7,0).
Hello world from block(0,2,0) thread(1,7,0).
Hello world from block(0,2,0) thread(2,7,0).
Hello world from block(0,2,0) thread(3,7,0).
Hello world from block(0,2,0) thread(4,7,0).
Hello world from block(0,2,0) thread(5,7,0).
Hello world from block(0,2,0) thread(6,7,0).
Hello world from block(0,2,0) thread(7,7,0).
Hello world from block(0,1,0) thread(0,12,0).
Hello world from block(0,1,0) thread(1,12,0).
Hello world from block(0,1,0) thread(2,12,0).
Hello world from block(0,1,0) thread(3,12,0).
Hello world from block(0,1,0) thread(4,12,0).
Hello world from block(0,1,0) thread(5,12,0).
Hello world from block(0,1,0) thread(6,12,0).
Hello world from block(0,1,0) thread(7,12,0).
Hello world from block(0,1,0) thread(0,13,0).
Hello world from block(0,1,0) thread(1,13,0).
Hello world from block(0,1,0) thread(2,13,0).
Hello world from block(0,1,0) thread(3,13,0).
Hello world from block(0,1,0) thread(4,13,0).
Hello world from block(0,1,0) thread(5,13,0).
Hello world from block(0,1,0) thread(6,13,0).
Hello world from block(0,1,0) thread(7,13,0).
Hello world from block(0,1,0) thread(0,14,0).
Hello world from block(0,1,0) thread(1,14,0).
Hello world from block(0,1,0) thread(2,14,0).
Hello world from block(0,1,0) thread(3,14,0).
Hello world from block(0,1,0) thread(4,14,0).
Hello world from block(0,1,0) thread(5,14,0).
Hello world from block(0,1,0) thread(6,14,0).
Hello world from block(0,1,0) thread(7,14,0).
Hello world from block(0,1,0) thread(0,15,0).
Hello world from block(0,1,0) thread(1,15,0).
Hello world from block(0,1,0) thread(2,15,0).
Hello world from block(0,1,0) thread(3,15,0).
Hello world from block(0,1,0) thread(4,15,0).
Hello world from block(0,1,0) thread(5,15,0).
Hello world from block(0,1,0) thread(6,15,0).
Hello world from block(0,1,0) thread(7,15,0).
Hello world from block(0,1,0) thread(0,8,0).
Hello world from block(0,1,0) thread(1,8,0).
Hello world from block(0,1,0) thread(2,8,0).
Hello world from block(0,1,0) thread(3,8,0).
Hello world from block(0,1,0) thread(4,8,0).
Hello world from block(0,1,0) thread(5,8,0).
Hello world from block(0,1,0) thread(6,8,0).
Hello world from block(0,1,0) thread(7,8,0).
Hello world from block(0,1,0) thread(0,9,0).
Hello world from block(0,1,0) thread(1,9,0).
Hello world from block(0,1,0) thread(2,9,0).
Hello world from block(0,1,0) thread(3,9,0).
Hello world from block(0,1,0) thread(4,9,0).
Hello world from block(0,1,0) thread(5,9,0).
Hello world from block(0,1,0) thread(6,9,0).
Hello world from block(0,1,0) thread(7,9,0).
Hello world from block(0,1,0) thread(0,10,0).
Hello world from block(0,1,0) thread(1,10,0).
Hello world from block(0,1,0) thread(2,10,0).
Hello world from block(0,1,0) thread(3,10,0).
Hello world from block(0,1,0) thread(4,10,0).
Hello world from block(0,1,0) thread(5,10,0).
Hello world from block(0,1,0) thread(6,10,0).
Hello world from block(0,1,0) thread(7,10,0).
Hello world from block(0,1,0) thread(0,11,0).
Hello world from block(0,1,0) thread(1,11,0).
Hello world from block(0,1,0) thread(2,11,0).
Hello world from block(0,1,0) thread(3,11,0).
Hello world from block(0,1,0) thread(4,11,0).
Hello world from block(0,1,0) thread(5,11,0).
Hello world from block(0,1,0) thread(6,11,0).
Hello world from block(0,1,0) thread(7,11,0).
Hello world from block(0,1,0) thread(0,0,0).
Hello world from block(0,1,0) thread(1,0,0).
Hello world from block(0,1,0) thread(2,0,0).
Hello world from block(0,1,0) thread(3,0,0).
Hello world from block(0,1,0) thread(4,0,0).
Hello world from block(0,1,0) thread(5,0,0).
Hello world from block(0,1,0) thread(6,0,0).
Hello world from block(0,1,0) thread(7,0,0).
Hello world from block(0,1,0) thread(0,1,0).
Hello world from block(0,1,0) thread(1,1,0).
Hello world from block(0,1,0) thread(2,1,0).
Hello world from block(0,1,0) thread(3,1,0).
Hello world from block(0,1,0) thread(4,1,0).
Hello world from block(0,1,0) thread(5,1,0).
Hello world from block(0,1,0) thread(6,1,0).
Hello world from block(0,1,0) thread(7,1,0).
Hello world from block(0,1,0) thread(0,2,0).
Hello world from block(0,1,0) thread(1,2,0).
Hello world from block(0,1,0) thread(2,2,0).
Hello world from block(0,1,0) thread(3,2,0).
Hello world from block(0,1,0) thread(4,2,0).
Hello world from block(0,1,0) thread(5,2,0).
Hello world from block(0,1,0) thread(6,2,0).
Hello world from block(0,1,0) thread(7,2,0).
Hello world from block(0,1,0) thread(0,3,0).
Hello world from block(0,1,0) thread(1,3,0).
Hello world from block(0,1,0) thread(2,3,0).
Hello world from block(0,1,0) thread(3,3,0).
Hello world from block(0,1,0) thread(4,3,0).
Hello world from block(0,1,0) thread(5,3,0).
Hello world from block(0,1,0) thread(6,3,0).
Hello world from block(0,1,0) thread(7,3,0).
Hello world from block(0,1,0) thread(0,4,0).
Hello world from block(0,1,0) thread(1,4,0).
Hello world from block(0,1,0) thread(2,4,0).
Hello world from block(0,1,0) thread(3,4,0).
Hello world from block(0,1,0) thread(4,4,0).
Hello world from block(0,1,0) thread(5,4,0).
Hello world from block(0,1,0) thread(6,4,0).
Hello world from block(0,1,0) thread(7,4,0).
Hello world from block(0,1,0) thread(0,5,0).
Hello world from block(0,1,0) thread(1,5,0).
Hello world from block(0,1,0) thread(2,5,0).
Hello world from block(0,1,0) thread(3,5,0).
Hello world from block(0,1,0) thread(4,5,0).
Hello world from block(0,1,0) thread(5,5,0).
Hello world from block(0,1,0) thread(6,5,0).
Hello world from block(0,1,0) thread(7,5,0).
Hello world from block(0,1,0) thread(0,6,0).
Hello world from block(0,1,0) thread(1,6,0).
Hello world from block(0,1,0) thread(2,6,0).
Hello world from block(0,1,0) thread(3,6,0).
Hello world from block(0,1,0) thread(4,6,0).
Hello world from block(0,1,0) thread(5,6,0).
Hello world from block(0,1,0) thread(6,6,0).
Hello world from block(0,1,0) thread(7,6,0).
Hello world from block(0,1,0) thread(0,7,0).
Hello world from block(0,1,0) thread(1,7,0).
Hello world from block(0,1,0) thread(2,7,0).
Hello world from block(0,1,0) thread(3,7,0).
Hello world from block(0,1,0) thread(4,7,0).
Hello world from block(0,1,0) thread(5,7,0).
Hello world from block(0,1,0) thread(6,7,0).
Hello world from block(0,1,0) thread(7,7,0).

MatMatAdd

完成 CUDA 的两个矩阵加法 A+B=C,其中 A、B 是$2^{13}\times 2^{13}$的方阵。假设矩阵 A 的元素为a[i][j]=i-0.1*j+1,矩阵 B 的元素为b[i][j]=0.2*j-0.1*i

  • 比较 CPU 计算 A+B=C 的时间和 GPU 计算的时间(GPU 的时间从拷贝矩阵 A,B 到显存开始至将计算结果复制到 host 为止)
  • 比较 CPU 计算结果和 GPU 计算结果

源代码MatMatAdd.cu

矩阵加法可以直接类比于向量加法来做;使用 CUDA 在 nvidia 显卡上面编程时,可以使用 CUDA 提供的 Event 进行程序计时。

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <cuda_runtime.h>
typedef float lf;
__global__ void MatAdd(lf *A, const lf *B, const int nRow, const int nCol)
{
	int
		i = blockIdx.x * blockDim.x + threadIdx.x,
		j = blockIdx.y * blockDim.y + threadIdx.y;
	if (i < nRow && j < nCol)
		A[i * nCol + j] += B[i * nCol + j];
}
int main()
{
	int
		nRow = 1 << 13,
		nCol = 1 << 13,
		nElem = nRow * nCol,
		nBytes = nElem * sizeof(lf);
	lf
		*h_A = (lf *)malloc(nBytes),
		*h_B = (lf *)malloc(nBytes),
		*h_C = (lf *)malloc(nBytes),
		*d_A,
		*d_B,
		elapsed_time,
		EPS = 1e-3;
	for (int i = 0; i < nRow; ++i)
		for (int j = 0; j < nCol; ++j)
		{
			h_A[i * nCol + j] = i - 0.1 * j + 1;
			h_B[i * nCol + j] = 0.2 * j - 0.1 * i;
		}

	cudaEvent_t beg, end;
	cudaEventCreate(&beg);
	cudaEventCreate(&end);
	cudaEventRecord(beg, 0);

	cudaMalloc((lf **)&d_A, nBytes);
	cudaMalloc((lf **)&d_B, nBytes);
	cudaMemcpy(d_A, h_A, nBytes, cudaMemcpyHostToDevice);
	cudaMemcpy(d_B, h_B, nBytes, cudaMemcpyHostToDevice);
	dim3
		threadsPerBlock(32, 32),
		numBlocks(nRow / threadsPerBlock.x, nCol / threadsPerBlock.y);
	MatAdd<<<numBlocks, threadsPerBlock>>>(d_A, d_B, nRow, nCol);
	cudaMemcpy(h_C, d_A, nBytes, cudaMemcpyDeviceToHost);
	cudaFree(d_A);
	cudaFree(d_B);

	cudaEventRecord(end, 0);
	cudaEventSynchronize(beg);
	cudaEventSynchronize(end);
	cudaEventElapsedTime(&elapsed_time, beg, end);
	printf("GPU elapsed time:%fms\n", elapsed_time);

	cudaEventCreate(&beg);
	cudaEventCreate(&end);
	cudaEventRecord(beg, 0);

	for (int i = 0; i < nElem; ++i)
		h_A[i] += h_B[i];

	cudaEventRecord(end, 0);
	cudaEventSynchronize(beg);
	cudaEventSynchronize(end);
	cudaEventElapsedTime(&elapsed_time, beg, end);
	printf("CPU elapsed time:%fms\n", elapsed_time);

	for (int i = 0; i < nElem; ++i)
		if (fabs(h_A[i] - h_C[i]) > EPS)
		{
			printf("Error: the result at position %d is not correct.\n", i);
			break;
		}

	free(h_A);
	free(h_B);
	free(h_C);
}

调度脚本MatMatAdd.pbs

#PBS -N MatMatAdd
#PBS -l nodes=1:ppn=32:gpus=1
#PBS -j oe
#PBS -q gpu

source /public/software/profile.d/cuda10.0.sh
cd $PBS_O_WORKDIR
nvcc MatMatAdd.cu -o MatMatAdd
./MatMatAdd

运行结果MatMatAdd.o12143

可以看到,即使算上设备内存分配和拷贝的事件,GPU 上的矩阵加法还是要快于 CPU 的。

GPU elapsed time:246.043457ms
CPU elapsed time:302.463348ms