cuda中结构体的赋值

#include #include #define N 10typedef struct { int *A; int *B; }vn; typedef struct { vn *v_n; int *m; int *n; }data; __global__ void kernel(data *d, int n) { int idx = threadIdx.x; if(idx < n) { d->v_n->A[idx] = d->v_n->A[idx] + *d->m; d->v_n->B[idx] = d->v_n->B[idx] + *d->n; } }int main(int argc, char **argv) { CUT_DEVICE_INIT(argc, argv); int i; data h_input; h_input.m = (int *)malloc(sizeof(int)); h_input.n = (int *)malloc(sizeof(int)); h_input.v_n = (vn *)malloc(sizeof(vn)); h_input.v_n->A = (int *)malloc(sizeof(int) * N); h_input.v_n->B = (int *)malloc(sizeof(int) * N); *h_input.m = 5; *h_input.n = 10; for(i = 0; i < N; ++i) { h_input.v_n->A[i] = i; h_input.v_n->B[i] = i; }//在显存上定义结构体tmp,使用过渡变量,如果包含多级结构体就需要使用多个过渡变量 data tmp; cudaMalloc((void**)& tmp.m, sizeof(int)); cudaMalloc((void**)& tmp.n, sizeof(int)); cudaMalloc((void**)& tmp.v_n, sizeof(vn)); vn VN; cudaMalloc((void**)&VN.A, sizeof(int) * N); cudaMalloc((void**)&VN.B, sizeof(int) * N); cudaMemcpy(tmp.v_n, &VN, sizeof(vn), cudaMemcpyHostToDevice); data *d_input; cudaMalloc((void**)&d_input, sizeof(data)); cudaMemcpy(d_input, &tmp, sizeof(data), cudaMemcpyHostToDevice); //将数据拷贝到显存中,要使用先前定义的过渡变量 cudaMemcpy(tmp.m, h_input.m, sizeof(int), cudaMemcpyHostToDevice); cudaMemcpy(tmp.n, h_input.n, sizeof(int), cudaMemcpyHostToDevice); cudaMemcpy(VN.A, h_input.v_n->A, sizeof(int) * N, cudaMemcpyHostToDevice); cudaMemcpy(VN.B, h_input.v_n->B, sizeof(int) * N, cudaMemcpyHostToDevice); kernel<<<1, N>>>(d_input, N); //将数据从显存拷贝到内存中,也需要使用过渡变量来完成,先拷贝结构体变量,然后在拷贝基本类型的数据 data tmp1; cudaMemcpy(&tmp1, d_input, sizeof(data), cudaMemcpyDeviceToHost); vn VN1; cudaMemcpy(&VN1, tmp1.v_n, sizeof(vn), cudaMemcpyDeviceToHost); cudaMemcpy(h_input.v_n->A, VN1.A, sizeof(int) * N, cudaMemcpyDeviceToHost); cudaMemcpy(h_input.v_n->B, VN1.B, sizeof(int) * N, cudaMemcpyDeviceToHost); for(i = 0; i < N; ++i) printf("%d ", h_input.v_n->A[i]); printf("\n"); for(i = 0; i < N; ++i) printf("%d ", h_input.v_n->B[i]); printf("\n"); CUT_EXIT(argc, argv); return 0; }


    推荐阅读