RPMのインストール

$ dnf install pocl ocl-icd-devel opencl-headers clinfo

動作確認

$ clinfo

コンパイル方法

$ clang hello.c -o hello `pkg-config --libs --cflags OpenCL`

OpenCLでπを求めてみる

pi_leibniz.cl

__kernel void pi_leibniz(global float *a)
{
	for (int i=0; i<10000000; i++) {
		a[0] += pow(-1.0f, i) / (2 * i + 1);
	}
}

pi_leibniz.c

// clang pi_leibniz.c -o pi_leibniz `pkg-config --libs --cflags OpenCL`
#include <stdio.h>
#include <stdlib.h>

#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif

#define MAX_SOURCE_SIZE (0x100000)

void OpenCL_run(char *name, char *func)
{
	cl_device_id device_id = NULL;
	cl_context context = NULL;
	cl_command_queue command_queue = NULL;
	cl_mem memobj = NULL;
	cl_program program = NULL;
	cl_kernel kernel = NULL;
	cl_platform_id platform_id = NULL;
	cl_uint ret_num_devices;
	cl_uint ret_num_platforms;
	cl_int ret;

	char *source_str;
	size_t source_size;

	FILE *fp = fopen(name, "r");
	if (!fp) {
		fprintf(stderr, "Failed to load kernel.\n");
		exit(1);
	}
	source_str = malloc(MAX_SOURCE_SIZE);
	source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
	fclose(fp);

	ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
	ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices);

	context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
	command_queue = clCreateCommandQueue(context, device_id, 0, &ret);

	float mem[100];
	mem[0] = 0;
	memobj = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(float), mem, &ret);

	program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret);
	ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
	if (ret) {
		size_t len = 0;
		cl_int ret = CL_SUCCESS;
		ret = clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &len);
		char *buffer = calloc(len, sizeof(char));
		ret = clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, len, buffer, NULL);
		printf("\n%s\n", buffer);
	}
	free(source_str);

	kernel = clCreateKernel(program, func, &ret);
	ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&memobj);
	ret = clEnqueueTask(command_queue, kernel, 0, NULL, NULL);

	ret = clEnqueueReadBuffer(command_queue, memobj, CL_TRUE, 0, sizeof(float), mem, 0, NULL, NULL);
	printf("%lf\n", mem[0]*4);

	ret = clFlush(command_queue);
	ret = clFinish(command_queue);
	ret = clReleaseKernel(kernel);
	ret = clReleaseProgram(program);
	ret = clReleaseMemObject(memobj);
	ret = clReleaseCommandQueue(command_queue);
	ret = clReleaseContext(context);
}

int main()
{
	OpenCL_run("pi_leibniz.cl", "pi_leibniz");
	return 0;
}