1 /* Copyright (C) 2018 Federico Simmross Wattenberg,
2 * Manuel Rodríguez Cayetano,
3 * Javier Royuela del Val,
4 * Elena Martín González,
6 * Marcos Martín Fernández and
7 * Carlos Alberola López
9 * This file is part of OpenCLIPER.
11 * OpenCLIPER is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; version 3 of the License.
15 * OpenCLIPER is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with OpenCLIPER; If not, see <http://www.gnu.org/licenses/>.
26 * Federico Simmross Wattenberg
27 * E.T.S.I. Telecomunicación
28 * Universidad de Valladolid
30 * 47011 Valladolid, Spain.
34 * RCS/CVS version control info
35 * $Id: reduce_kernel.cl,v 1.2 2016/11/02 12:34:19 manrod Exp $
37 * $Date: 2016/11/02 12:34:19 $
42 #include <OpenCLIPER/OpenCLIPERDataModelCommonDefs.hpp>
43 #include <OpenCLIPER/kernels/hostKernelFunctions.h>
45 //#pragma OPENCL_EXTENSION cl_amd_printf
46 //#pragma OPENCL EXTENSION cl_amd_printf : enable
47 //#define KERNEL_DEBUG
48 #define complexMul(p1,p2) (float2) ((p1).s0*(p2).s0-(p1).s1*(p2).s1, (p1).s0*(p2).s1+(p1).s1*(p2).s0)
50 #define VECTORDATATYPESIZE 16
51 #define VECTORDATATYPEMACRO(baseType,size) {baseType ## size}
52 #define VECTORDATATYPE float16
53 #define VECTORDATATYPEHALFSIZE (VECTORDATATYPESIZE)/2
54 #define HALFVECTORDATATYPE float8
55 #define VLOADN vload16
56 #define VSTOREN vstore16
58 //#define DEBUGKERNEL 1
61 #define PRINTVECTOR(name, vector, numberOfElements) do {printVector(name, vector, numberOfElements);} while (0)
63 #define PRINTVECTOR(name, vector, numberOfElements)
66 __kernel void xImageSum_kernel(__global realType* pInputBuffer, __global realType* pOutputBuffer,
67 __global uint* inputDims, __global uint* outputDims,
68 __global uint* inputStrides, __global uint* outputStrides,
69 __global realType* pInputBufferFirstNDArray,
70 __global realType* pInputBufferSecondNDArray,
71 __global realType* pOutputBufferFirstNDArray) {
73 for (uint i = 0; i < dims.deviceMemBaseAddrAlign; i++) {
74 //PRINTF(("pInputBuffer[%d]: %f\n", i, pInputBuffer[i]));
77 //PRINTF(("numRows: %d\tnumColumns: %d\tnumCoils: %d\tnumFrames: %d\n", inputDims.height, inputDims.width, inputDims.numCoils, inputDims.numFrames));
78 //PRINTF(("kernel deviceMemBaseAddrAlign: %d\n", inputDims.deviceMemBaseAddrAlignInBytes));
79 //PRINTF(("pInputBuffer: %p\n", pInputBuffer));
80 //PRINTF(("pInputBuffer: %p\npInputBufferFirstNDArray: %p\npInputBufferSecondNDArray: %p\n", pInputBuffer, pInputBufferFirstNDArray, pInputBufferSecondNDArray));
81 //PRINTF(("*pInputBuffer[0]: %f\n", pInputBuffer[0]));
82 //PRINTF(("*pInputBuffer[1]: %f\n", pInputBuffer[1]));
83 //PRINTF(("get_global_size(0): %d\tget_global_size(1): %d\tget_global_size(2): %d\n", get_global_size(0), get_global_size(1), get_global_size(2)));
85 //stridesInfo_t dataOffsets;
86 //get1DArrayOffsetsFrom4DimsImageData(dims, &dataOffsets);
88 uint offsetCoilId, inputOffsetRowAndFrameId, outputOffsetRowAndFrameId;
89 uint inputIndexRealPartElement, outputIndexRealPartElement;
90 float acumRealPart = 0.0, acumImagPart = 0.0;
91 uint frameId = get_global_id(0);
93 uint numFrames = getTemporalDimSize(inputDims, 0);
94 uint numCoils = getNumCoils(inputDims);
95 uint numColumns, numRows;
96 uint frameDimIndex = 0;
97 if ((get_global_id(0) == 0) && (get_global_id(1) == 0) && (get_global_id(2) == 0)) {
98 printf("pointer to input contiguous memory device buffer: %x\n", pInputBuffer);
99 printf("pointer to input device subbuffer of NDArray(0): %x\n", pInputBufferFirstNDArray);
100 printf("pointer to output contiguous memory device buffer: %x\n", pOutputBuffer);
101 printf("pointer to output device subbuffer of NDArray(0): %x\n", pOutputBufferFirstNDArray);
103 PRINTF(("numFrames: %d\tnumCoils: %d\tnumRows: %d\tnumColumns: %d\n", numFrames, numCoils, getSpatialDimSize(inputDims, ROWS, 0),
104 getSpatialDimSize(inputDims, COLUMNS, 0)));
105 while (frameId < numFrames) {
106 rowId = get_global_id(1);
107 numRows = getSpatialDimSize(inputDims, ROWS, 0);
108 while (rowId < numRows) {
109 inputOffsetRowAndFrameId = rowId * getSpatialDimStride(inputDims, inputStrides, ROWS, 0) + frameId * getTemporalDimStride(inputDims, inputStrides, frameDimIndex, 0);
110 outputOffsetRowAndFrameId = rowId * getSpatialDimStride(outputDims, outputStrides, ROWS, 0) + frameId * getTemporalDimStride(outputDims, outputStrides, frameDimIndex, 0);
111 columnId = get_global_id(2);
112 numColumns = getSpatialDimSize(inputDims, COLUMNS, 0);
113 while (columnId < numColumns) {
114 //for (uint columnId = 0; columnId < numColumns; columnId++) {
116 for (uint coilId = 0; coilId < numCoils; coilId++) {
117 PRINTF(("global_id(0): %d\t", get_global_id(0)));
118 PRINTF(("global_id(1): %d\t", get_global_id(1)));
119 PRINTF(("frameId: %d\t", frameId));
120 PRINTF(("rowId: %d\t", rowId));
121 PRINTF(("columnId: %d\t", columnId));
122 PRINTF(("coilId: %d\n", coilId));
123 inputIndexRealPartElement = inputOffsetRowAndFrameId + offsetCoilId + columnId * getSpatialDimStride(inputDims, inputStrides, COLUMNS, 0);
124 acumRealPart = acumRealPart + pInputBuffer[inputIndexRealPartElement];
125 acumImagPart = acumImagPart + pInputBuffer[inputIndexRealPartElement + 1];
126 PRINTF(("inputIndexRealPartElement: %d\n", inputIndexRealPartElement));
127 PRINTF(("pInputBuffer[%d]: %f\n", inputIndexRealPartElement, pInputBuffer[inputIndexRealPartElement]));
128 PRINTF(("acumRealPart: %f\n", acumRealPart));
129 PRINTF(("inputIndexImagPartElement: %d\n", inputIndexRealPartElement + 1));
130 PRINTF(("pInputBuffer[%d]: %f\n", inputIndexRealPartElement + 1, pInputBuffer[inputIndexRealPartElement + 1]));
131 PRINTF(("acumImagPart: %f\n", acumImagPart));
132 offsetCoilId += getCoilStride(inputDims, inputStrides, 0);
134 outputIndexRealPartElement = outputOffsetRowAndFrameId + columnId * getSpatialDimStride(outputDims, outputStrides, COLUMNS, 0);
135 // store average of addition of elements from all coils and same frame
136 pOutputBuffer[outputIndexRealPartElement] = acumRealPart / numCoils;
137 pOutputBuffer[outputIndexRealPartElement + 1] = acumImagPart / numCoils;
138 PRINTF(("\noutputIndexRealPartElement: %d\n", outputIndexRealPartElement));
139 PRINTF(("pOutputBuffer[%d]: %f\n", outputIndexRealPartElement, pOutputBuffer[outputIndexRealPartElement]));
140 PRINTF(("outputIndexImagPartElement: %d\n", outputIndexRealPartElement + 1));
141 PRINTF(("pOutputBuffer[%d]: %f\n\n", outputIndexRealPartElement + 1, pOutputBuffer[outputIndexRealPartElement + 1]));
144 columnId += get_global_size(2);
145 PRINTF(("columnId new value: %d\n", columnId));
147 //printVector("Result: ", (float *)pOutputBuffer, numColumns * 2);
148 rowId += get_global_size(1);
149 PRINTF(("rowId new value: %d\n", rowId));
151 frameId += get_global_size(0);
152 PRINTF(("frameId new value: %d\n", frameId));