2 // Extraído y adaptado de
3 // http://developer.amd.com/tools-and-sdks/opencl-zone/opencl-resources/introductory-tutorial-to-opencl/
5 #include <LPISupport/InfoItems.hpp>
6 #include "vectorUtils.hpp"
7 #include "commonArrayMult.hpp"
8 #include <OpenCLIPER/processes/performanceTests/ArrayAddProcess.hpp>
17 #include <chrono> // Para medir tiempos de ejecución
19 using namespace std::chrono;
20 #include <iomanip> // Para std::setprecision
21 #include <OpenCLIPER/OpenCLIPERDataModel.hpp>
22 #include "PerformanceTestArrayOpParallel.hpp"
25 //#define __NO_STD_VECTOR // Use cl::vector instead of STL version
27 int main (int argc, char* argv[]) {
29 unsigned long numberOfIterations = 1;
30 unsigned int size = 2048, blockSize = 0;
37 std::shared_ptr<LPISupport::SampleCollection> pSamples = std::make_shared<LPISupport::SampleCollection>("execution time");
39 // Step 0: get a new OpenCLIPER app
40 std::shared_ptr<CLapp> pCLapp = std::make_shared<CLapp>();
43 PerformanceTestArrayOpParallel* pPerfTest = new PerformanceTestArrayOpParallel(argc, argv);
44 auto pConfigTraits = std::dynamic_pointer_cast<PerformanceTestArrayOpParallel::ConfigTraits>(pPerfTest->getConfigTraits());
45 size = pConfigTraits->size;
46 numberOfIterations = pConfigTraits->repetitions;
47 blockSize = pConfigTraits->dimBlockOrLocalSize;
48 std::cout << "read size: " << size << std::endl;
49 std::cout << "read blockSize: " << blockSize << std::endl;
57 // Número de operaciones es ColsA productos de dos números y ColsA-1 sumas
58 // de dos números por cada elemento de la matriz resultado, que tiene
59 // RowsC*ColsC elementos.
60 unsigned long numOpsPerIteration = RowsC*ColsC;
61 pConfigTraits->numOpsPerCalc = numOpsPerIteration;
63 // Step 1: initialize computing device
64 CLapp::PlatformTraits platformTraits;
65 CLapp::DeviceTraits deviceTraits;
67 deviceTraits.type=CLapp::DEVICE_TYPE_GPU;
68 if (!pConfigTraits->deviceName.empty()) {
69 deviceTraits.name = pConfigTraits->deviceName;
72 deviceTraits.type=CLapp::DEVICE_TYPE_CPU;
74 deviceTraits.queueProperties = cl::QueueProperties(CL_QUEUE_PROFILING_ENABLE);
75 pCLapp->init(platformTraits,deviceTraits);
77 // Step 2: load OpenCL kernel(s)
78 pCLapp->loadKernels("performanceTests/arrayAdd.cl", "");
80 const unsigned int SHOW_SIZE = 10;
81 const unsigned int PRECISION_DIGITS = 10;
83 cerr << argv[0] << " performance measurement" << std::endl;
84 cout << "Starting program... " << flush;
86 // Step 3: load input data
87 cout << "Creating and filling arrays ... " << flush;
88 std::shared_ptr<Data> XDataA(XData::genTestXData(ColsA, RowsA, 1, type_index(typeid(realType)), XData::CONSTANT));
89 std::shared_ptr<Data> XDataB(XData::genTestXData(ColsB, RowsB, 1, type_index(typeid(realType)), XData::CONSTANT));
91 // Step 4: create output with same size as input
92 std::vector<dimIndexType>* pArrayDims = new std::vector<dimIndexType>({ColsC, RowsC});
93 std::vector<std::vector<dimIndexType>*>* pArraysDims = new std::vector<std::vector<dimIndexType>*>;
94 pArraysDims->push_back(pArrayDims);
95 std::vector<dimIndexType>* pDynDims = new std::vector<dimIndexType>(); // Constructor value only sets vector size not value, that is 0 by default
96 pDynDims->push_back(1); // Sets value
97 std::shared_ptr<Data> XDataC(new XData(pArraysDims, pDynDims, type_index(typeid(realType))));
98 //std::shared_ptr<Data> XDataC(new XData((dynamic_pointer_cast<XData>(XDataA)), false));
100 // Set 5: register input and output in our CL app
101 DataHandle inHandleA = pCLapp->addData(XDataA);
102 DataHandle inHandleB = pCLapp->addData(XDataB);
103 DataHandle outHandle = pCLapp->addData(XDataC);
105 cout << "Done." << endl;
108 cout << XDataA->getData()->at(0)->hostDataToString("a");
109 cout << XDataB->getData()->at(0)->hostDataToString("b");
113 // Step 6: create new process bound to our CL app
114 // and set its input/output data sets
115 std::unique_ptr<Process> pProcess(new ArrayAddProcess(pCLapp));
116 pProcess->setInHandle(inHandleA);
117 pProcess->setOutHandle(outHandle);
119 // Set parameters: handle of second array to be added
120 auto launchParamsArrayAddProcess = make_shared<ArrayAddProcess::LaunchParameters>(inHandleB, RowsA, ColsA, blockSize);
121 pProcess->setLaunchParameters(launchParamsArrayAddProcess);
123 // Step 7: initialize process
126 cerr << "Executing " << numberOfIterations << " iteration(s)\n";
127 cerr << "- Matrix [1 .. " << size << ", 1 .. " << size << "]" << endl;
128 LPISupport::InfoItems infoItems;
129 cerr << pCLapp->getHWSWInfo().to_string(pConfigTraits->outputFormat);
130 //TIME_DIFF_TYPE diffT2T1 = 0;
131 std::stringstream strstr;
132 strstr << setprecision(PRECISION_DIGITS);
133 cout << "Starting product... " << endl;
135 for (unsigned long iteration = 0; iteration < numberOfIterations; iteration++) {
136 cout << "Iteration #" << iteration << std::endl;
139 // Step 7.2 launch process
140 Process::ProfileParameters profileParameters;
141 // Enable gpu profiling
142 profileParameters.profilingEnabled = true;
143 profileParameters.numOfRepetitions = numberOfIterations;
144 pProcess->launch(profileParameters);
148 TIME_DIFF(diffT2T1, t1, t2);
149 pSamples->appendSample(diffT2T1);
152 // Step 8: get data back from computing device
153 pCLapp->device2Host(outHandle, SyncSource::BUFFER_ONLY);
154 cout << "Product finished." << endl;
155 if (size <= SHOW_SIZE) {
156 cout << XDataC->getData()->at(0)->hostBufferToString("c");
159 LPISupport::InfoItems infoItemsProfilingGPU;
160 infoItemsProfilingGPU.append(pProcess->getSamplesGPUExecTime()->to_infoItems(PRECISION_DIGITS));
162 for (unsigned int i = 0; i < infoItemsProfilingGPU.size(); i++) {
163 infoItems.push_back(infoItemsProfilingGPU.at(i));
166 cerr << infoItemsProfilingGPU.to_string(LPISupport::InfoItems::OutputFormat::HUMAN);
168 LPISupport::InfoItems infoItemsProfilingGPUAndCPU;
169 infoItemsProfilingGPUAndCPU.append(pProcess->getSamplesGPU_CPUExecTime()->to_infoItems(PRECISION_DIGITS));
170 cerr << infoItemsProfilingGPUAndCPU.to_string(LPISupport::InfoItems::OutputFormat::HUMAN);
172 pConfigTraits->deviceType = pCLapp->getDeviceTypeAsString();
173 pConfigTraits->deviceName = pCLapp->getDeviceVendor() + " " + pCLapp->getDeviceName();
175 pPerfTest->buildTestInfo(pProcess->getSamplesGPUExecTime());
177 //pPerfTest->buildTestInfo(pProcess->getSamplesGPU_CPUExecTime());
178 pPerfTest->buildTestInfo(pProcess->getSamplesGPUExecTime());
180 pPerfTest->saveOrPrint();
183 pProcess.reset(nullptr);
184 pCLapp->delData(inHandleA);
185 pCLapp->delData(inHandleB);
186 pCLapp->delData(outHandle);
191 } catch(string msg) {
192 cerr << "Exception caught in main(): " << msg << endl;