[CUDA]异构并行vector查询,CPU端vector转GPU端数组
文章图片
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include
#include "iostream"
#include "cstdlib"
#include "vector"
#include "thread"
using namespace std;
#define USE_CPU 1
#define USE_GPU 0
struct stu_info
{
char stu_num[14];
int try_seat_num;
int exam_seat_num;
};
struct select_info
{
char stu_num[14];
int try_seat_num;
int exam_seat_num;
};
vector stu;
vector select;
__device__ void gpu_strcpy(char* a, char *b)
{
for (int i = 0;
i < 14;
i++)
{
a[i] = b[i];
}
}
void cpu_strcpy(char* a, char *b)
{
for (int i = 0;
i < 14;
i++)
{
a[i] = b[i];
}
}
__global__ void gpu_select_kernel(stu_info *dev_stu,select_info *dev_select,int *n)
{
int index = threadIdx.x;
for (int i = 0;
i < *n;
i++)
{
if (dev_select[index].try_seat_num == dev_stu[i].try_seat_num)
{
gpu_strcpy(dev_select[index].stu_num, dev_stu[i].stu_num);
dev_select[index].exam_seat_num = dev_stu[i].exam_seat_num;
break;
}
}
}
void fun_select_cpu(int index, int n)
{
for (int i = 0;
i < n;
i++)
{
if (select[index].try_seat_num == stu[i].try_seat_num)
{
//cout << stu[index].stu_num << " " << stu[index].exam_seat_num<> > (dev_stu, dev_select, dev_n);
cudaMemcpy(host_stu, dev_stu, sizeof(stu_info)*n, cudaMemcpyDeviceToHost);
cudaMemcpy(host_select, dev_select, sizeof(select_info)*n, cudaMemcpyDeviceToHost);
for (int i = 0;
i < n;
i++)
{
cpu_strcpy(select[i].stu_num, host_select[i].stu_num);
select[i].exam_seat_num = host_select[i].exam_seat_num;
}
cudaFree(dev_stu);
cudaFree(dev_select);
cudaFree(dev_n);
}
int main()
{
stu_info info_temp;
select_info select_temp;
int n, m,sign;
cudaError_t cudaStatus;
cudaStatus = cudaSetDevice(0);
if (cudaStatus != cudaSuccess) {
cout << "检测到你的计算机没有支持CUDA的NVIDIA的GPU设备,程序将使用CPU并行查询" << endl;
sign = USE_CPU;
}
else
{
cout << "检测到你的计算机有支持CUDA的NVIDIA的GPU设备,程序将使用GPU并行查询" << endl;
sign = USE_GPU;
}
cin >> n;
for (int i = 0;
i < n;
i++)
{
cin >> info_temp.stu_num >> info_temp.try_seat_num >> info_temp.exam_seat_num;
stu.push_back(info_temp);
}
cin >> m;
for (int i = 0;
i < m;
i++)
{
cin >> select_temp.try_seat_num;
select.push_back(select_temp);
}
if (sign == USE_CPU)
{
thread **thread_p = new thread*[m];
int thread_id = 0;
for (thread_id;
thread_id < m;
thread_id++)
{
thread_p[thread_id] = new thread(fun_select_cpu, thread_id, n);
thread_p[thread_id]->detach();
}
delete[] thread_p;
}
else if (sign == USE_GPU)
{
fun_select_gpu(n, m, m);
}
for (int i = 0;
i < m;
i++)
{
cout << select[i].stu_num << " " << select[i].exam_seat_num << endl;
}
system("pause");
return 0;
}
【[CUDA]异构并行vector查询,CPU端vector转GPU端数组】转载于:https://www.cnblogs.com/lee-li/p/8560609.html
推荐阅读
- 为Google|为Google Cloud配置深度学习环境(CUDA、cuDNN、Tensorflow2、VScode远程ssh等)
- Nvidia|lxd容器安装cuda11
- 5.|5. JDK8的并行数据处理
- 并发,并行,阻塞,非阻塞,异步,同步
- 安装问题|win10+cuda11.1+anaconda+pytorch+pycharm配置环境
- 我们公司是如何做到高效并行测试的()
- 构建面向异构算力的边缘计算云平台
- Anaconda3+CUDA10.1+CUDNN7.6+TensorFlow2.6安装(Ubuntu16)
- Android异步(AsyncTask)并行(executeOnExecutor)|Android异步(AsyncTask)并行(executeOnExecutor) ping-ip
- 数据并行(提升训练吞吐的高效方法 |深度学习分布式训练专题)