Simple program for vector addition using OpenCL

OpenCL

OpenCL is a framework for parallel computing that allows you to execute code on different compute devices, such as GPUs, CPUs, and FPGAs. Below is a simple OpenCL program in C that demonstrates how to perform vector addition using OpenCL. This program adds two vectors (arrays) together on a compute device.

Before running this code, you’ll need to have OpenCL installed and a compatible OpenCL device, such as an AMD or NVIDIA GPU, or you can use a CPU-based OpenCL implementation.

#include <CL/cl.h>
#include <stdio.h>
#include <stdlib.h>

#define NUM_ELEMENTS 1024

int main() {
    cl_int err;

    // Create data arrays
    int A[NUM_ELEMENTS];
    int B[NUM_ELEMENTS];
    int C[NUM_ELEMENTS];

    // Initialize the data arrays
    for (int i = 0; i < NUM_ELEMENTS; i++) {
        A[i] = i;
        B[i] = i * 2;
    }

    // Load the OpenCL platform
    cl_platform_id platform;
    err = clGetPlatformIDs(1, &platform, NULL);

    // Get the GPU device
    cl_device_id device;
    err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);

    // Create an OpenCL context
    cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);

    // Create a command queue
    cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err);

    // Create memory buffers on the device
    cl_mem bufferA = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int) * NUM_ELEMENTS, NULL, &err);
    cl_mem bufferB = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int) * NUM_ELEMENTS, NULL, &err);
    cl_mem bufferC = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(int) * NUM_ELEMENTS, NULL, &err);

    // Write data to the memory buffers
    err = clEnqueueWriteBuffer(queue, bufferA, CL_TRUE, 0, sizeof(int) * NUM_ELEMENTS, A, 0, NULL, NULL);
    err = clEnqueueWriteBuffer(queue, bufferB, CL_TRUE, 0, sizeof(int) * NUM_ELEMENTS, B, 0, NULL, NULL);

    // Create the OpenCL program from source code
    const char* source = "__kernel void vectorAdd(__global const int* A, __global const int* B, __global int* C) { int i = get_global_id(0); C[i] = A[i] + B[i]; }";
    cl_program program = clCreateProgramWithSource(context, 1, &source, NULL, &err);

    // Build the program
    err = clBuildProgram(program, 1, &device, NULL, NULL, NULL);

    // Create the kernel
    cl_kernel kernel = clCreateKernel(program, "vectorAdd", &err);

    // Set kernel arguments
    err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferA);
    err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferB);
    err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &bufferC);

    // Execute the OpenCL kernel
    size_t globalSize = NUM_ELEMENTS;
    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalSize, NULL, 0, NULL, NULL);

    // Read the result back from the device
    err = clEnqueueReadBuffer(queue, bufferC, CL_TRUE, 0, sizeof(int) * NUM_ELEMENTS, C, 0, NULL, NULL);

    // Clean up
    clReleaseMemObject(bufferA);
    clReleaseMemObject(bufferB);
    clReleaseMemObject(bufferC);
    clReleaseKernel(kernel);
    clReleaseProgram(program);
    clReleaseCommandQueue(queue);
    clReleaseContext(context);

    // Verify the results
    for (int i = 0; i < NUM_ELEMENTS; i++) {
        if (C[i] != A[i] + B[i]) {
            printf("Error: Incorrect result at index %d\n", i);
            break;
        }
    }

    printf("Vector addition completed successfully.\n");

    return 0;
}

This code performs vector addition on an OpenCL device, such as a GPU. Make sure you have the necessary OpenCL setup and libraries installed, and adjust the code as needed to suit your specific OpenCL environment. This example provides a basic understanding of how to set up an OpenCL program for vector addition.

Leave a Reply

Your email address will not be published. Required fields are marked *