cuda编程的简单案例

发布时间 2023-12-16 19:22:57作者: 小丑_jk

一个简单的案例:

header.h

void addKernel(const int* a, const int* b, int* c, int size);

 

test.cu

#include "cuda_runtime.h"  
#include "device_launch_parameters.h"  
#include "header.h"
__global__ void add(const int* a, const int* b, int* c, int size)
{
    int tid = threadIdx.x + blockIdx.x * blockDim.x;
    c[tid] = b[tid] + a[tid];
}

void addKernel(const int* a, const int* b, int* c, int size)
{
    int* dev_a = 0;
    int* dev_b = 0;
    int* dev_c = 0;

    cudaSetDevice(0);
    cudaMalloc((void**)&dev_a, sizeof(int) * size);
    cudaMalloc((void**)&dev_b, sizeof(int) * size);
    cudaMalloc((void**)&dev_c, sizeof(int) * size);
    cudaMemcpy(dev_a, a, sizeof(int) * size, cudaMemcpyHostToDevice);
    cudaMemcpy(dev_b, b, sizeof(int) * size, cudaMemcpyHostToDevice);
    cudaMemcpy(dev_c, c, sizeof(int) * size, cudaMemcpyHostToDevice);

    add <<<2, 128 >>> (dev_a, dev_b, dev_c, size);

    cudaMemcpy(c, dev_c, size * sizeof(int), cudaMemcpyDeviceToHost);

    cudaFree(dev_a);
    cudaFree(dev_b);
    cudaFree(dev_c);
}

 

test.cpp

#include <stdio.h>  
#include <stdlib.h>  
#include "header.h"

#define N 256

int main(int argc, char** argv)
{
    int a[N];
    int b[N];
    int c[N];

    for (int i = 0; i < N; i++)
    {
        a[i] = i;
        b[i] = 2 * i;
        c[i] = 0;
    }
    addKernel(a, b, c, N);
    for (int i = 0; i < N; i++)
    {
        printf("%d is %d.\n", i, c[i]);
    }
    return 0;
}