2 /* Copyright (C) 2018 Federico Simmross Wattenberg,
3 * Manuel Rodríguez Cayetano,
4 * Javier Royuela del Val,
5 * Elena Martín González,
7 * Marcos Martín Fernández and
8 * Carlos Alberola López
10 * This file is part of OpenCLIPER.
12 * OpenCLIPER is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; version 3 of the License.
16 * OpenCLIPER is distributed in the hope that it will be useful, but
17 * WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with OpenCLIPER; If not, see <http://www.gnu.org/licenses/>.
27 * Federico Simmross Wattenberg
28 * E.T.S.I. Telecomunicación
29 * Universidad de Valladolid
31 * 47011 Valladolid, Spain.
34 #include <OpenCLIPER/processes/performanceTests/ArrayAddProcess.hpp>
35 #define CLASSNAME "OpenCLIPER::ArrayAddProcess"
37 namespace OpenCLIPER {
38 void ArrayAddProcess::init() {
39 kernel = getApp()->getKernel("arrayAdd_kernel");
40 queue = getApp()->getCommandQueue();
43 void ArrayAddProcess::launch(ProfileParameters profileParameters) {
44 // Set input and output OpenCL buffers on device memory
45 auto pLP = dynamic_pointer_cast<LaunchParameters>(pLaunchParameters);
46 cl::Buffer* pInBufA = getInput()->getNDArray(0)->getDeviceBuffer();
47 DataHandle inBufBDataHandle = pLP->inHandleB;
48 if (inBufBDataHandle == INVALIDDATAHANDLE) {
49 throw invalid_argument(std::string(CLASSNAME) + std::string("::launch: non-existing second array"));
52 cl::Buffer* pInBufB = getApp()->getData(inBufBDataHandle)->getNDArray(0)->getDeviceBuffer();
53 cl::Buffer* pOutBuf = getOutput()->getNDArray(0)->getDeviceBuffer();
55 // Set kernel parameters
56 kernel.setArg(0, *pInBufA);
57 kernel.setArg(1, *pInBufB);
58 kernel.setArg(2, *pOutBuf);
59 kernel.setArg(3, pLP->rows);
60 kernel.setArg(4, pLP->cols);
61 // Set kernel work items size: number of rows to process
62 dimIndexType height = NDARRAYHEIGHT(getInput()->getNDArray(0));
63 dimIndexType width = NDARRAYWIDTH(getInput()->getNDArray(0));
64 cerr << "number of rows: " << height << "\tnumber of columns: " << width << std::endl;
65 //cl::NDRange globalSizes = cl::NDRange(height, width);
66 cl::NDRange globalSizes = cl::NDRange(height * width);
67 cl::NDRange localSizes;
68 unsigned int blockSize = pLP->blockSize;
71 localSizes = getApp()->getMaxLocalWorkItemSizes(globalSizes);
74 localSizes = cl::NDRange();
77 localSizes = cl::NDRange(blockSize);
79 cerr << "globalSizes: " << globalSizes[0] << ", " << globalSizes[1] << std::endl;
80 cerr << "localsizes: " << localSizes[0] << ", " << localSizes[1] << std::endl;
83 //startProfiling(profilingEnabled);
84 eventsVector.resize(profileParameters.numOfRepetitions);
85 for (unsigned long i = 0; i < profileParameters.numOfRepetitions; i++) {
86 queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalSizes, localSizes, NULL, &eventsVector.at(i));
88 buildKernelProfilingInfo(profileParameters.profilingEnabled);
89 //stopProfiling(profilingEnabled);