1 /* Copyright (C) 2018 Federico Simmross Wattenberg,
2 * Manuel Rodríguez Cayetano,
3 * Javier Royuela del Val,
4 * Elena Martín González,
6 * Marcos Martín Fernández and
7 * Carlos Alberola López
9 * This file is part of OpenCLIPER.
11 * OpenCLIPER is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; version 3 of the License.
15 * OpenCLIPER is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with OpenCLIPER; If not, see <http://www.gnu.org/licenses/>.
26 * Federico Simmross Wattenberg
27 * E.T.S.I. Telecomunicación
28 * Universidad de Valladolid
30 * 47011 Valladolid, Spain.
33 #include <OpenCLIPER/processes/ComplexElementProd.hpp>
34 #include <OpenCLIPER/CLapp.hpp>
35 //#include <OpenCLIPER/Data.hpp>
36 #include <OpenCLIPER/XData.hpp>
37 #include <OpenCLIPER/KData.hpp>
38 #include <OpenCLIPER/SensitivityMapsData.hpp>
39 #include <LPISupport/InfoItems.hpp>
42 #define KERNELCOMPILEOPTS "-I../include/"
43 //#define KERNELCOMPILEOPTS "-cl-std=CL2.0 -I../include/ -g"
44 #define CLASSNAME "OpenCLIPER::ComplexElementProd"
46 namespace OpenCLIPER {
48 ComplexElementProd::~ComplexElementProd() {
49 // TODO Auto-generated destructor stub
52 void ComplexElementProd::init() {
53 kernel=getApp()->getKernel("complexElementProd_kernel");
56 void ComplexElementProd::launch(ProfileParameters profileParameters) {
57 auto pLP=dynamic_pointer_cast<LaunchParameters>(pLaunchParameters);
60 cl::Device selected_device;
61 cl::CommandQueue queue;
62 checkCommonLaunchParameters();
63 infoItems.addInfoItem("Title", "ComplexElementProd info");
65 startProfiling(profileParameters.profilingEnabled);
67 std::vector<cl::Event> kernelsExecEventList;
68 selected_device = getApp()->getDevice();
69 cl::Context context = getApp()->getContext();
70 queue = getApp()->getCommandQueue();
71 const Data* pSensitivityMapsData;
72 cl::Buffer* pInputBuffer;
73 cl::Buffer* pSensitivityMapsBuffer;
74 cl::Buffer* pOutputBuffer;
75 bool inputIsKData=false, outputIsKData=false;
76 cl_ulong max_work_group_size = selected_device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
78 //CERR("getInput()->getData()->size: " << getInput()->getData()->size() << std::endl);
80 shared_ptr<Data> pTypedInputData, pTypedOutputData;
81 pTypedInputData = std::dynamic_pointer_cast<KData>(getInput());
82 if (pTypedInputData != nullptr) {
85 pTypedInputData = std::dynamic_pointer_cast<XData>(getInput());
86 if (pTypedInputData != nullptr) {
89 throw std::invalid_argument("inputData should be of type KData or XData");
92 pTypedOutputData = std::dynamic_pointer_cast<KData>(getOutput());
93 if (pTypedOutputData != nullptr) {
96 pTypedOutputData = std::dynamic_pointer_cast<XData>(getOutput());
97 if (pTypedOutputData != nullptr) {
98 outputIsKData = false;
100 throw std::invalid_argument(std::string(CLASSNAME) + std::string("::launch: outputData should be of type KData or XData"));
103 if ((inputIsKData == false) && (outputIsKData == false)) {
104 throw invalid_argument(std::string(CLASSNAME) +
105 std::string("::launch: input or output data should be of type KData (including valid Sensitivity Maps)"));
108 if (getInput()->getData()->size() == 0) {
109 throw invalid_argument(std::string(CLASSNAME) + std::string("::launch: inputData size is 0"));
112 if (pLP->sensitivityMapsDataHandle == INVALIDDATAHANDLE) {
113 throw invalid_argument(std::string(CLASSNAME) + std::string("::launch: non-existing SensitivityMaps"));
116 pInputBuffer = getInput()->getContiguousMemoryDeviceBuffer();
117 pSensitivityMapsData = (const Data*) (getApp()->getData(pLP->sensitivityMapsDataHandle).get());
118 pSensitivityMapsBuffer = pSensitivityMapsData->getContiguousMemoryDeviceBuffer();
119 pOutputBuffer = getOutput()->getContiguousMemoryDeviceBuffer();
121 max_work_group_size = selected_device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
123 cl::Buffer *pInputDataDims, *pOutputDataDims;
124 cl::Buffer *pSensitivityMapsDataDims;
125 cl::Buffer *pInputDataStrides, *pOutputDataStrides, *pSensitivityMapsDataStrides;
127 pInputDataDims = getInput()->getDataDimsDeviceBuffer();
128 pInputDataStrides = getInput()->getDataStridesDeviceBuffer();
129 pSensitivityMapsDataDims = pSensitivityMapsData->getDataDimsDeviceBuffer();
130 pSensitivityMapsDataStrides = pSensitivityMapsData->getDataStridesDeviceBuffer();
132 pOutputDataDims = getOutput()->getDataDimsDeviceBuffer();
133 pOutputDataStrides = getOutput()->getDataStridesDeviceBuffer();
135 #ifdef ComplexElementProd_DEBUG
136 CERR("NSD AllSE NCoils NTD, TD(0) ... TD(NTD-1) SD(0) ... SD(NSD-1)" << std::endl);
137 PRINTVECTOR("inputDataDims", *getInput()->getDataDimsVector(), uint);
138 PRINTVECTOR("sensitivityMapsDataDims", *pSensitivityMapsData->getDataDimsVector(), uint);
139 PRINTVECTOR("outputDataDims", *getOutput()->getDataDimsVector(), uint);
140 PRINTVECTOR("inputDataStrides", *getInput()->getDataStridesVector(), uint);
141 PRINTVECTOR("sensitivityMapsDataStrides", *pSensitivityMapsData->getDataStridesVector(), uint);
142 PRINTVECTOR("outputDataStrides", *getOutput()->getDataStridesVector(), uint);
144 kernel.setArg(0, *pInputBuffer);
145 kernel.setArg(1, *pSensitivityMapsBuffer);
146 kernel.setArg(2, *pOutputBuffer);
147 kernel.setArg(3, (ushort) pLP->conjugateSensMap);
148 kernel.setArg(4, *(pInputDataDims));
149 kernel.setArg(5, *(pSensitivityMapsDataDims));
150 kernel.setArg(6, *(pOutputDataDims));
151 kernel.setArg(7, *(pInputDataStrides)); // numRows
152 kernel.setArg(8, *(pSensitivityMapsDataStrides)); // numRows
153 kernel.setArg(9, *(pOutputDataStrides)); // numRows
155 cl_uint numCoils, numFrames;
157 numCoils = (std::dynamic_pointer_cast<KData>(getInput()))->getNCoils();
158 } else { // if input is not KData, output must be KData
159 numCoils = (std::dynamic_pointer_cast<KData>(getOutput()))->getNCoils();
161 numFrames = getInput()->getDynDimsTotalSize();
163 cl::NDRange globalSizes = {NDARRAYWIDTH(getInput()->getData()->at(0)) * NDARRAYHEIGHT(getInput()->getData()->at(0)) * NDARRAYDEPTH(getInput()->getData()->at(0)),
164 numCoils, numFrames};
166 //cl::NDRange globalSizes = {1, 1, 1};
168 //cl::NDRange localSizes = {min(max_work_group_size, min(min(globalSizes[0], globalSizes[1]),globalSizes[2]))};
170 cl::NDRange localSizes = {min(globalSizes[0],maxGroupSizePerDim), min(globalSizes[1],maxGroupSizePerDim),
171 min(globalSizes[2],maxGroupSizePerDim)};
173 //cl::NDRange localSizes = {1,1,min(globalSizes[2],max_work_group_size)};
175 //cl::NDRange localSizes = getApp()->getMaxLocalWorkItemSizes(globalSizes);
176 cl::NDRange localSizes = cl::NDRange();
178 //cl::NDRange localSizes = {1, 1, 1};
180 unsigned long maxGroupSizePerDim = cbrtl(max_work_group_size);
181 addGlobalAndLocalWorkItemSizeInfo(globalSizes, localSizes, profileParameters.profilingEnabled);
182 #ifdef ComplexElementProd_DEBUG
183 CERR("max_work_group_size: " << max_work_group_size << " " << "maxGroupSizePerDim: " << maxGroupSizePerDim << std::endl);
184 CERR("localSizes: " << localSizes[0] << " " << localSizes[1] << " " << localSizes[2] << std::endl);
186 const vector<uint>* inputDimsVector = (getInput()->getDataDimsVector());
187 //const uint* inputDims = inputDimsVector->data();
188 const uint* inputDims = (uint *) getInput()->getDataDimsHostBuffer();
190 #ifdef ComplexElementProd_DEBUG
191 CERR("In ComplexElementProd process launch, NSD: " << inputDims[NumSpatialDimsPos]
192 << "\tAllsizesEqual: " << inputDims[AllSizesEqualPos] << "\tNCoils: " << inputDims[NumCoilsPos]
193 << "\tNTD: " << inputDims[NumTemporalDimsPos] << std::endl);
194 CERR("enqueueNDRangeKernel " << kernelName << "..." << std::endl);
196 queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalSizes, localSizes, NULL, &event);
198 #ifdef ComplexElementProd_DEBUG
199 CERR("done." << std::endl);
202 stopProfiling(profileParameters.profilingEnabled);
203 if (profileParameters.profilingEnabled) {
204 if (profilingSupported) {
205 getKernelGroupExecutionTimes(kernelsExecEventList, "OpenCLIPER::ComplexElementProd::launch kernel",
206 "OpenCLIPER::ComplexElementProd::launch group of kernels");
209 } catch (cl::Error err) {
210 /////////////////////////////////////////////////////////////////
211 // Catch OpenCL errors and print log if it is a build error
212 /////////////////////////////////////////////////////////////////
213 std::cerr << "ERROR: " << err.what() << " (" << err.err() << ", "
214 << OpenCLIPER::CLapp::getOpenCLErrorCodeStr(err.err()) << ")"
215 << "\tfile: " << __FILE__ << "\tline: " << __LINE__
217 if ((err.err() == CL_BUILD_PROGRAM_FAILURE) || (err.err() == CL_INVALID_KERNEL)) {
218 std::cerr << "Extended info: ";
219 std::string str = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(selected_device);
220 std::cerr << "Program Info: " << str << std::endl << std::flush;
223 } catch (std::string msg) {
224 std::cerr << "Exception caught in ComplexElementProd(): " << msg << std::endl;
229 } /* namespace OpenCLIPER */