- 首页 > it技术 > >
#include
#include #define N 10typedef struct
{
int *A;
int *B;
}vn;
typedef struct
{
vn *v_n;
int *m;
int *n;
}data;
__global__ void kernel(data *d, int n)
{
int idx = threadIdx.x;
if(idx < n)
{
d->v_n->A[idx] = d->v_n->A[idx] + *d->m;
d->v_n->B[idx] = d->v_n->B[idx] + *d->n;
}
}int main(int argc, char **argv)
{
CUT_DEVICE_INIT(argc, argv);
int i;
data h_input;
h_input.m = (int *)malloc(sizeof(int));
h_input.n = (int *)malloc(sizeof(int));
h_input.v_n = (vn *)malloc(sizeof(vn));
h_input.v_n->A = (int *)malloc(sizeof(int) * N);
h_input.v_n->B = (int *)malloc(sizeof(int) * N);
*h_input.m = 5;
*h_input.n = 10;
for(i = 0;
i < N;
++i)
{
h_input.v_n->A[i] = i;
h_input.v_n->B[i] = i;
}//在显存上定义结构体tmp,使用过渡变量,如果包含多级结构体就需要使用多个过渡变量
data tmp;
cudaMalloc((void**)& tmp.m, sizeof(int));
cudaMalloc((void**)& tmp.n, sizeof(int));
cudaMalloc((void**)& tmp.v_n, sizeof(vn));
vn VN;
cudaMalloc((void**)&VN.A, sizeof(int) * N);
cudaMalloc((void**)&VN.B, sizeof(int) * N);
cudaMemcpy(tmp.v_n, &VN, sizeof(vn), cudaMemcpyHostToDevice);
data *d_input;
cudaMalloc((void**)&d_input, sizeof(data));
cudaMemcpy(d_input, &tmp, sizeof(data), cudaMemcpyHostToDevice);
//将数据拷贝到显存中,要使用先前定义的过渡变量
cudaMemcpy(tmp.m, h_input.m, sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(tmp.n, h_input.n, sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(VN.A, h_input.v_n->A, sizeof(int) * N, cudaMemcpyHostToDevice);
cudaMemcpy(VN.B, h_input.v_n->B, sizeof(int) * N, cudaMemcpyHostToDevice);
kernel<<<1, N>>>(d_input, N);
//将数据从显存拷贝到内存中,也需要使用过渡变量来完成,先拷贝结构体变量,然后在拷贝基本类型的数据
data tmp1;
cudaMemcpy(&tmp1, d_input, sizeof(data), cudaMemcpyDeviceToHost);
vn VN1;
cudaMemcpy(&VN1, tmp1.v_n, sizeof(vn), cudaMemcpyDeviceToHost);
cudaMemcpy(h_input.v_n->A, VN1.A, sizeof(int) * N, cudaMemcpyDeviceToHost);
cudaMemcpy(h_input.v_n->B, VN1.B, sizeof(int) * N, cudaMemcpyDeviceToHost);
for(i = 0;
i < N;
++i)
printf("%d ", h_input.v_n->A[i]);
printf("\n");
for(i = 0;
i < N;
++i)
printf("%d ", h_input.v_n->B[i]);
printf("\n");
CUT_EXIT(argc, argv);
return 0;
}
推荐阅读