Initial public release.
[OpenCLIPER] / src / processes / ComplexElementProd.cpp
1 /* Copyright (C) 2018 Federico Simmross Wattenberg,
2  *                    Manuel Rodríguez Cayetano,
3  *                    Javier Royuela del Val,
4  *                    Elena Martín González,
5  *                    Elisa Moya Sáez,
6  *                    Marcos Martín Fernández and
7  *                    Carlos Alberola López
8  *
9  * This file is part of OpenCLIPER.
10  *
11  * OpenCLIPER is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU General Public License as published by
13  * the Free Software Foundation; version 3 of the License.
14  *
15  * OpenCLIPER is distributed in the hope that it will be useful, but
16  * WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with OpenCLIPER; If not, see <http://www.gnu.org/licenses/>.
22  *
23  *
24  *  Contact:
25  *
26  *  Federico Simmross Wattenberg
27  *  E.T.S.I. Telecomunicación
28  *  Universidad de Valladolid
29  *  Paseo de Belén 15
30  *  47011 Valladolid, Spain.
31  *  fedsim@tel.uva.es
32  */
33 #include <OpenCLIPER/processes/ComplexElementProd.hpp>
34 #include <OpenCLIPER/CLapp.hpp>
35 //#include <OpenCLIPER/Data.hpp>
36 #include <OpenCLIPER/XData.hpp>
37 #include <OpenCLIPER/KData.hpp>
38 #include <OpenCLIPER/SensitivityMapsData.hpp>
39 #include <LPISupport/InfoItems.hpp>
40 #include <iostream>
41
42 #define KERNELCOMPILEOPTS "-I../include/"
43 //#define KERNELCOMPILEOPTS "-cl-std=CL2.0 -I../include/ -g"
44 #define CLASSNAME "OpenCLIPER::ComplexElementProd"
45
46 namespace OpenCLIPER {
47
48 ComplexElementProd::~ComplexElementProd() {
49     // TODO Auto-generated destructor stub
50 }
51
52 void ComplexElementProd::init() {
53     kernel=getApp()->getKernel("complexElementProd_kernel");
54 }
55
56 void ComplexElementProd::launch(ProfileParameters profileParameters) {
57     auto pLP=dynamic_pointer_cast<LaunchParameters>(pLaunchParameters);
58
59     cl::Program program;
60     cl::Device selected_device;
61     cl::CommandQueue queue;
62     checkCommonLaunchParameters();
63     infoItems.addInfoItem("Title", "ComplexElementProd info");
64
65     startProfiling(profileParameters.profilingEnabled);
66     try {
67         std::vector<cl::Event> kernelsExecEventList;
68         selected_device = getApp()->getDevice();
69         cl::Context context = getApp()->getContext();
70         queue = getApp()->getCommandQueue();
71         const Data* pSensitivityMapsData;
72         cl::Buffer* pInputBuffer;
73         cl::Buffer* pSensitivityMapsBuffer;
74         cl::Buffer* pOutputBuffer;
75         bool inputIsKData=false, outputIsKData=false;
76         cl_ulong max_work_group_size = selected_device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
77         cl::Event event;
78         //CERR("getInput()->getData()->size: " << getInput()->getData()->size() << std::endl);
79
80         shared_ptr<Data> pTypedInputData, pTypedOutputData;
81         pTypedInputData = std::dynamic_pointer_cast<KData>(getInput());
82         if (pTypedInputData != nullptr) {
83             inputIsKData = true;
84         } else {
85             pTypedInputData = std::dynamic_pointer_cast<XData>(getInput());
86             if (pTypedInputData != nullptr) {
87                 inputIsKData = false;
88             } else {
89                 throw std::invalid_argument("inputData should be of type KData or XData");
90             }
91         }
92         pTypedOutputData = std::dynamic_pointer_cast<KData>(getOutput());
93         if (pTypedOutputData != nullptr) {
94             outputIsKData = true;
95         } else {
96             pTypedOutputData = std::dynamic_pointer_cast<XData>(getOutput());
97             if (pTypedOutputData != nullptr) {
98                 outputIsKData = false;
99             } else {
100                 throw std::invalid_argument(std::string(CLASSNAME) + std::string("::launch: outputData should be of type KData or XData"));
101             }
102         }
103         if ((inputIsKData == false) && (outputIsKData == false)) {
104             throw invalid_argument(std::string(CLASSNAME) +
105                 std::string("::launch: input or output data should be of type KData (including valid Sensitivity Maps)"));
106         }
107
108         if (getInput()->getData()->size() == 0) {
109             throw invalid_argument(std::string(CLASSNAME) + std::string("::launch: inputData size is 0"));
110         }
111
112         if (pLP->sensitivityMapsDataHandle == INVALIDDATAHANDLE) {
113             throw invalid_argument(std::string(CLASSNAME) + std::string("::launch: non-existing SensitivityMaps"));
114         }
115
116         pInputBuffer = getInput()->getContiguousMemoryDeviceBuffer();
117         pSensitivityMapsData = (const Data*) (getApp()->getData(pLP->sensitivityMapsDataHandle).get());
118         pSensitivityMapsBuffer = pSensitivityMapsData->getContiguousMemoryDeviceBuffer();
119         pOutputBuffer = getOutput()->getContiguousMemoryDeviceBuffer();
120
121         max_work_group_size = selected_device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
122
123         cl::Buffer *pInputDataDims, *pOutputDataDims;
124         cl::Buffer *pSensitivityMapsDataDims;
125         cl::Buffer *pInputDataStrides, *pOutputDataStrides, *pSensitivityMapsDataStrides;
126
127         pInputDataDims = getInput()->getDataDimsDeviceBuffer();
128         pInputDataStrides = getInput()->getDataStridesDeviceBuffer();
129         pSensitivityMapsDataDims = pSensitivityMapsData->getDataDimsDeviceBuffer();
130         pSensitivityMapsDataStrides = pSensitivityMapsData->getDataStridesDeviceBuffer();
131
132         pOutputDataDims = getOutput()->getDataDimsDeviceBuffer();
133         pOutputDataStrides = getOutput()->getDataStridesDeviceBuffer();
134
135 #ifdef ComplexElementProd_DEBUG
136         CERR("NSD AllSE NCoils NTD, TD(0) ... TD(NTD-1) SD(0) ... SD(NSD-1)" << std::endl);
137         PRINTVECTOR("inputDataDims", *getInput()->getDataDimsVector(), uint);
138         PRINTVECTOR("sensitivityMapsDataDims", *pSensitivityMapsData->getDataDimsVector(), uint);
139         PRINTVECTOR("outputDataDims", *getOutput()->getDataDimsVector(), uint);
140         PRINTVECTOR("inputDataStrides", *getInput()->getDataStridesVector(), uint);
141         PRINTVECTOR("sensitivityMapsDataStrides", *pSensitivityMapsData->getDataStridesVector(), uint);
142         PRINTVECTOR("outputDataStrides", *getOutput()->getDataStridesVector(), uint);
143 #endif
144         kernel.setArg(0, *pInputBuffer);
145         kernel.setArg(1, *pSensitivityMapsBuffer);
146         kernel.setArg(2, *pOutputBuffer);
147         kernel.setArg(3, (ushort) pLP->conjugateSensMap);
148         kernel.setArg(4, *(pInputDataDims));
149         kernel.setArg(5, *(pSensitivityMapsDataDims));
150         kernel.setArg(6, *(pOutputDataDims));
151         kernel.setArg(7, *(pInputDataStrides)); // numRows
152         kernel.setArg(8, *(pSensitivityMapsDataStrides)); // numRows
153         kernel.setArg(9, *(pOutputDataStrides)); // numRows
154
155         cl_uint numCoils, numFrames;
156         if (inputIsKData) {
157             numCoils = (std::dynamic_pointer_cast<KData>(getInput()))->getNCoils();
158         } else { // if input is not KData, output must be KData
159             numCoils = (std::dynamic_pointer_cast<KData>(getOutput()))->getNCoils();
160         }
161         numFrames = getInput()->getDynDimsTotalSize();
162         ///*
163         cl::NDRange globalSizes = {NDARRAYWIDTH(getInput()->getData()->at(0)) * NDARRAYHEIGHT(getInput()->getData()->at(0)) * NDARRAYDEPTH(getInput()->getData()->at(0)),
164                                    numCoils, numFrames};
165         //*/
166         //cl::NDRange globalSizes = {1, 1, 1};
167
168         //cl::NDRange localSizes = {min(max_work_group_size, min(min(globalSizes[0], globalSizes[1]),globalSizes[2]))};
169         /*
170         cl::NDRange localSizes = {min(globalSizes[0],maxGroupSizePerDim), min(globalSizes[1],maxGroupSizePerDim),
171             min(globalSizes[2],maxGroupSizePerDim)};
172         */
173         //cl::NDRange localSizes = {1,1,min(globalSizes[2],max_work_group_size)};
174
175         //cl::NDRange localSizes = getApp()->getMaxLocalWorkItemSizes(globalSizes);
176         cl::NDRange localSizes = cl::NDRange();
177
178         //cl::NDRange localSizes = {1, 1, 1};
179
180         unsigned long maxGroupSizePerDim = cbrtl(max_work_group_size);
181         addGlobalAndLocalWorkItemSizeInfo(globalSizes, localSizes, profileParameters.profilingEnabled);
182 #ifdef ComplexElementProd_DEBUG
183         CERR("max_work_group_size: " << max_work_group_size << " " << "maxGroupSizePerDim: " << maxGroupSizePerDim << std::endl);
184         CERR("localSizes: " << localSizes[0] << " " << localSizes[1] << " " << localSizes[2] << std::endl);
185 #endif
186         const vector<uint>* inputDimsVector = (getInput()->getDataDimsVector());
187         //const uint* inputDims = inputDimsVector->data();
188         const uint* inputDims = (uint *) getInput()->getDataDimsHostBuffer();
189
190 #ifdef ComplexElementProd_DEBUG
191         CERR("In ComplexElementProd process  launch, NSD: " << inputDims[NumSpatialDimsPos] 
192             << "\tAllsizesEqual: " << inputDims[AllSizesEqualPos] << "\tNCoils: " << inputDims[NumCoilsPos] 
193             << "\tNTD: " << inputDims[NumTemporalDimsPos] << std::endl);
194         CERR("enqueueNDRangeKernel " << kernelName << "..." << std::endl);
195 #endif
196         queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalSizes, localSizes, NULL, &event);
197
198 #ifdef ComplexElementProd_DEBUG
199         CERR("done." << std::endl);
200 #endif
201
202         stopProfiling(profileParameters.profilingEnabled);
203         if (profileParameters.profilingEnabled) {
204             if (profilingSupported) {
205                 getKernelGroupExecutionTimes(kernelsExecEventList, "OpenCLIPER::ComplexElementProd::launch kernel",
206                                              "OpenCLIPER::ComplexElementProd::launch group of kernels");
207             }
208         }
209     } catch (cl::Error err) {
210         /////////////////////////////////////////////////////////////////
211         // Catch OpenCL errors and print log if it is a build error
212         /////////////////////////////////////////////////////////////////
213         std::cerr << "ERROR: " << err.what() << " (" << err.err() << ", "
214                     << OpenCLIPER::CLapp::getOpenCLErrorCodeStr(err.err()) << ")"
215                     << "\tfile: " << __FILE__ << "\tline: " << __LINE__
216                     << std::endl;
217         if ((err.err() == CL_BUILD_PROGRAM_FAILURE)     || (err.err() == CL_INVALID_KERNEL)) {
218                     std::cerr << "Extended info: ";
219                     std::string str = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(selected_device);
220                     std::cerr << "Program Info: " << str << std::endl << std::flush;
221         }
222         throw;
223     } catch (std::string msg) {
224         std::cerr << "Exception caught in ComplexElementProd(): " << msg << std::endl;
225         throw;
226     }
227 }
228
229 } /* namespace OpenCLIPER */