Initial public release.
[OpenCLIPER] / performanceTests / arrayAddOpenCLIPER.cpp
1
2 // Extraído y adaptado de
3 // http://developer.amd.com/tools-and-sdks/opencl-zone/opencl-resources/introductory-tutorial-to-opencl/
4
5 #include <LPISupport/InfoItems.hpp>
6 #include "vectorUtils.hpp"
7 #include "commonArrayMult.hpp"
8 #include <OpenCLIPER/processes/performanceTests/ArrayAddProcess.hpp>
9 #include <cstdio>
10 #include <cstdlib>
11 #include <fstream>
12 #include <iostream>
13 #include <string>
14 #include <iterator>
15 #include <vector>
16 #include <array>
17 #include <chrono> // Para medir tiempos de ejecución
18 using namespace std;
19 using namespace std::chrono;
20 #include <iomanip> // Para std::setprecision
21 #include <OpenCLIPER/OpenCLIPERDataModel.hpp>
22 #include "PerformanceTestArrayOpParallel.hpp"
23 #include <utility>
24 #include <omp.h>
25 //#define __NO_STD_VECTOR // Use cl::vector instead of STL version
26
27 int main (int argc, char* argv[]) {
28
29     unsigned long numberOfIterations = 1;
30     unsigned int size = 2048, blockSize = 0;
31     unsigned int RowsA;
32     unsigned int ColsA;
33     unsigned int RowsB;
34     unsigned int ColsB;
35     unsigned int RowsC;
36     unsigned int ColsC;
37     std::shared_ptr<LPISupport::SampleCollection> pSamples = std::make_shared<LPISupport::SampleCollection>("execution time");
38     
39     // Step 0: get a new OpenCLIPER app
40     std::shared_ptr<CLapp> pCLapp = std::make_shared<CLapp>();
41
42     try {
43         PerformanceTestArrayOpParallel* pPerfTest = new PerformanceTestArrayOpParallel(argc, argv);
44         auto pConfigTraits = std::dynamic_pointer_cast<PerformanceTestArrayOpParallel::ConfigTraits>(pPerfTest->getConfigTraits());
45         size = pConfigTraits->size;
46         numberOfIterations = pConfigTraits->repetitions;
47         blockSize = pConfigTraits->dimBlockOrLocalSize;
48         std::cout << "read size: " << size << std::endl;
49         std::cout << "read blockSize: " << blockSize << std::endl;
50         RowsA = size;
51         ColsA = size;
52         RowsB = ColsA;
53         ColsB = size;
54         RowsC = RowsA;
55         ColsC = ColsB;
56
57         // Número de operaciones es ColsA productos de dos números y ColsA-1 sumas
58         // de dos números por cada elemento de la matriz resultado, que tiene
59         // RowsC*ColsC elementos.
60         unsigned long numOpsPerIteration = RowsC*ColsC;
61         pConfigTraits->numOpsPerCalc = numOpsPerIteration;
62
63         // Step 1: initialize computing device
64         CLapp::PlatformTraits platformTraits;
65         CLapp::DeviceTraits deviceTraits;
66 #ifdef USE_GPU
67         deviceTraits.type=CLapp::DEVICE_TYPE_GPU;
68         if (!pConfigTraits->deviceName.empty()) {
69             deviceTraits.name = pConfigTraits->deviceName;
70         }
71 #else
72         deviceTraits.type=CLapp::DEVICE_TYPE_CPU;
73 #endif
74         deviceTraits.queueProperties = cl::QueueProperties(CL_QUEUE_PROFILING_ENABLE);
75         pCLapp->init(platformTraits,deviceTraits);
76
77         // Step 2: load OpenCL kernel(s)
78         pCLapp->loadKernels("performanceTests/arrayAdd.cl", "");
79
80         const unsigned int SHOW_SIZE = 10;
81         const unsigned int PRECISION_DIGITS = 10;
82
83         cerr << argv[0] << " performance measurement" << std::endl;
84         cout << "Starting program... " << flush;
85
86         // Step 3: load input data
87         cout << "Creating and filling arrays ... " << flush;
88         std::shared_ptr<Data> XDataA(XData::genTestXData(ColsA, RowsA, 1, type_index(typeid(realType)), XData::CONSTANT));
89         std::shared_ptr<Data> XDataB(XData::genTestXData(ColsB, RowsB, 1, type_index(typeid(realType)), XData::CONSTANT));
90
91         // Step 4: create output with same size as input
92         std::vector<dimIndexType>* pArrayDims = new std::vector<dimIndexType>({ColsC, RowsC});
93         std::vector<std::vector<dimIndexType>*>* pArraysDims = new std::vector<std::vector<dimIndexType>*>;
94         pArraysDims->push_back(pArrayDims);
95         std::vector<dimIndexType>* pDynDims = new std::vector<dimIndexType>(); // Constructor value only sets vector size not value, that is 0 by default
96         pDynDims->push_back(1); // Sets value
97         std::shared_ptr<Data> XDataC(new XData(pArraysDims, pDynDims, type_index(typeid(realType))));
98         //std::shared_ptr<Data> XDataC(new XData((dynamic_pointer_cast<XData>(XDataA)), false));
99
100         // Set 5: register input and output in our CL app
101         DataHandle inHandleA = pCLapp->addData(XDataA);
102         DataHandle inHandleB = pCLapp->addData(XDataB);
103         DataHandle outHandle = pCLapp->addData(XDataC);
104
105         cout << "Done." << endl;
106
107         /*
108         cout << XDataA->getData()->at(0)->hostDataToString("a");
109         cout << XDataB->getData()->at(0)->hostDataToString("b");
110         cout << endl;
111         */
112
113         // Step 6: create new process bound to our CL app
114         // and set its input/output data sets
115         std::unique_ptr<Process> pProcess(new ArrayAddProcess(pCLapp));
116         pProcess->setInHandle(inHandleA);
117         pProcess->setOutHandle(outHandle);
118
119         // Set parameters: handle of second array to be added
120         auto launchParamsArrayAddProcess = make_shared<ArrayAddProcess::LaunchParameters>(inHandleB, RowsA, ColsA, blockSize);
121         pProcess->setLaunchParameters(launchParamsArrayAddProcess);
122
123         // Step 7: initialize process
124         pProcess->init();
125
126         cerr << "Executing " << numberOfIterations << " iteration(s)\n";
127         cerr << "- Matrix [1 .. " << size << ", 1 .. " << size << "]" << endl;
128         LPISupport::InfoItems infoItems;
129         cerr << pCLapp->getHWSWInfo().to_string(pConfigTraits->outputFormat);
130         //TIME_DIFF_TYPE diffT2T1 = 0;
131         std::stringstream strstr;
132         strstr << setprecision(PRECISION_DIGITS);
133         cout << "Starting product... " << endl;
134         /*
135          for (unsigned long iteration = 0; iteration < numberOfIterations; iteration++) {
136             cout << "Iteration #" << iteration << std::endl;
137             BEGIN_TIME(t1);
138         */
139             // Step 7.2 launch process
140             Process::ProfileParameters profileParameters;
141             // Enable gpu profiling
142             profileParameters.profilingEnabled = true;
143             profileParameters.numOfRepetitions = numberOfIterations;
144             pProcess->launch(profileParameters);
145
146         /*
147             END_TIME(t2);
148             TIME_DIFF(diffT2T1, t1, t2);
149             pSamples->appendSample(diffT2T1);
150         }
151         */
152             // Step 8: get data back from computing device
153             pCLapp->device2Host(outHandle, SyncSource::BUFFER_ONLY);
154         cout << "Product finished." << endl;
155         if (size <= SHOW_SIZE) {
156             cout << XDataC->getData()->at(0)->hostBufferToString("c");
157         }
158
159         LPISupport::InfoItems infoItemsProfilingGPU;
160         infoItemsProfilingGPU.append(pProcess->getSamplesGPUExecTime()->to_infoItems(PRECISION_DIGITS));
161         /*
162          for (unsigned int i = 0; i < infoItemsProfilingGPU.size(); i++) {
163             infoItems.push_back(infoItemsProfilingGPU.at(i));
164         }
165         */
166         cerr << infoItemsProfilingGPU.to_string(LPISupport::InfoItems::OutputFormat::HUMAN);
167
168         LPISupport::InfoItems infoItemsProfilingGPUAndCPU;
169         infoItemsProfilingGPUAndCPU.append(pProcess->getSamplesGPU_CPUExecTime()->to_infoItems(PRECISION_DIGITS));
170         cerr << infoItemsProfilingGPUAndCPU.to_string(LPISupport::InfoItems::OutputFormat::HUMAN);
171
172         pConfigTraits->deviceType = pCLapp->getDeviceTypeAsString();
173         pConfigTraits->deviceName = pCLapp->getDeviceVendor() +  " " + pCLapp->getDeviceName();
174 #if USE_GPU
175         pPerfTest->buildTestInfo(pProcess->getSamplesGPUExecTime());
176 #else
177         //pPerfTest->buildTestInfo(pProcess->getSamplesGPU_CPUExecTime());
178         pPerfTest->buildTestInfo(pProcess->getSamplesGPUExecTime());
179 #endif
180         pPerfTest->saveOrPrint();
181
182         // Step 10: clean up
183         pProcess.reset(nullptr);
184         pCLapp->delData(inHandleA);
185         pCLapp->delData(inHandleB);
186         pCLapp->delData(outHandle);
187         pCLapp = nullptr;
188         pSamples = nullptr;
189
190
191     } catch(string msg) {
192         cerr << "Exception caught in main(): " << msg << endl;
193         //cleanupHost();
194     }
195 }