opencliper.lpi.tel.uva.es Git - OpenCLIPER/blob - performanceTests/arrayAdd.cpp

   1
   2 // Extraído y adaptado de
   3 // http://developer.amd.com/tools-and-sdks/opencl-zone/opencl-resources/introductory-tutorial-to-opencl/
   4
   5 #include "vectorUtils.hpp"
   6 #include "commonArrayMult.hpp"
   7 #include <cstdio>
   8 #include <cstdlib>
   9 #include <fstream>
  10 #include <iostream>
  11 #include <string>
  12 #include <iterator>
  13 #include <vector>
  14 #include <array>
  15 #include <chrono> // Para medir tiempos de ejecución
  16 using namespace std;
  17 using namespace std::chrono;
  18 #include <iomanip> // Para std::setprecision
  19 #include <utility>
  20 #include <omp.h>
  21 #include "PerformanceTestArrayOpParallel.hpp"
  22 //#define __NO_STD_VECTOR // Use cl::vector instead of STL version
  23
  24 int main (int argc, char* argv[]) {
  25
  26     float *A;
  27     float *B;
  28     float *C;
  29     unsigned long numberOfIterations = 1;
  30     unsigned int size = 2048;
  31     unsigned int RowsA;
  32     unsigned int ColsA;
  33     unsigned int RowsB;
  34     unsigned int ColsB;
  35     unsigned int RowsC;
  36     unsigned int ColsC;
  37     string outputFileName = "", deviceName;
  38     std::shared_ptr<LPISupport::SampleCollection> pSamples = make_shared<LPISupport::SampleCollection>("execution time");
  39
  40     try {
  41         PerformanceTestArrayOpParallel* pPerfTest = new PerformanceTestArrayOpParallel(argc, argv);
  42         auto pConfigTraits = std::dynamic_pointer_cast<PerformanceTestArrayOpParallel::ConfigTraits>(pPerfTest->getConfigTraits());
  43         size = pConfigTraits->size;
  44         numberOfIterations = pConfigTraits->repetitions;
  45         std::cout << "read size: " << size << std::endl;
  46         RowsA = size;
  47         ColsA = size;
  48         RowsB = ColsA;
  49         ColsB = size;
  50         RowsC = RowsA;
  51         ColsC = ColsB;
  52
  53         const unsigned int SHOW_SIZE = 10;
  54         // Número de operaciones es ColsA productos de dos números y ColsA-1 sumas
  55         // de dos números por cada elemento de la matriz resultado, que tiene
  56         // RowsC*ColsC elementos.
  57         unsigned long numOpsPerIteration = RowsC*ColsC;
  58         pConfigTraits->numOpsPerCalc = numOpsPerIteration;
  59
  60         cerr << argv[0] << " performance measurement" << std::endl;
  61         cout << "Starting program... " << flush;
  62         cout << "Creating and filling arrays ... " << flush;
  63         initArray(A, RowsA, ColsA, 2.0);
  64         initArray(B, RowsB, ColsB, 2.0);
  65         initArray(C, RowsC, ColsC, 0.0);
  66         cout << "Done." << endl;
  67         print_array("a", A, RowsA, ColsA, SHOW_SIZE);
  68         print_array("b", B, RowsB, ColsB, SHOW_SIZE);
  69         cout << endl;
  70
  71         cerr << "Executing " << numberOfIterations << " iteration(s)\n";
  72         cerr << "- Matrix [1 .. " << size << ", 1 .. " << size << "]" << endl;
  73
  74         TIME_DIFF_TYPE diffT2T1 = 0;
  75         cout << "Starting product... " << endl;
  76         for (unsigned int iteration = 0; iteration < numberOfIterations; iteration++) {
  77             cout << "Iteration #" << iteration << std::endl;
  78             BEGIN_TIME(t1);
  79             unsigned int row;
  80 #ifdef USE_OPENMP_GPU
  81             cerr << "OpenMP enabled" << std::endl;
  82             cerr << "OpenMP execution on GPU enabled" << std::endl;
  83             //#pragma omp target map(to:A[0:RowsA*ColsA],B[0:RowsB*ColsB]) map(from:C[0:RowsC*ColsC])
  84             #pragma omp target teams distribute
  85 #endif //USE_OPENMP_GPU
  86 #ifdef USE_OPENMP_CPU
  87             cerr << "OpenMP execution on GPU disabled" << std::endl;
  88             cerr << "Number of threads for CPU: " << omp_get_max_threads() << std::endl;
  89             #pragma omp parallel for
  90 #endif /* USE_OPENMP_CPU */
  91 #ifdef USE_OPENACC_GPU
  92             cerr << "OpenACC enabled" << std::endl;
  93             cerr << "OpenACC execution on GPU enabled" << std::endl;
  94             #pragma acc target teams distribute parallel for
  95 #endif
  96 #ifdef USE_OPENACC_CPU
  97             cerr << "OpenACC execution on GPU disabled" << std::endl;
  98             cerr << "Number of threads for CPU: " << omp_get_max_threads() << std::endl;
  99             //#pragma acc kernels
 100             //{
 101             #pragma acc loop
 102 //#pragma acc parallel  //#pragma acc kernels
 103 #endif //USE_OPENACC_CPU
 104             for (row = 0; row < RowsA; row ++) {
 105                 unsigned col;
 106
 107 #ifdef USE_OPENMP_GPU /* OpenMP on GPU */
 108                 #pragma omp target parallel for
 109 #endif // USE_OPENMP_GPU
 110 #ifdef USE_OPENMP_CPU /* OpenMP on CPU */
 111                 #pragma omp parallel for
 112 #endif //USE_OPENMP_GPU
 113 #ifdef USE_OPENACC_GPU /* OpenACC on GPU */
 114                 #pragma acc target teams distribute parallel for
 115 #endif // OpenACC on GPU
 116 #ifdef USE_OPENACC_CPU
 117         //#pragma acc kernels
 118         //#pragma acc parallel loop gang vector //#pragma acc kernels
 119                 #pragma acc loop
 120 #endif //USE_OPENACC_CPU
 121                 for (col = 0; col < ColsA; col ++) {
 122                     float res = 0.0;
 123                     C[row*ColsC+col] = A[row*ColsC+col] + B[row*ColsC+col];
 124                 }
 125             }
 126 #ifdef USE_OPENACC_CPU
 127             //}
 128 #endif
 129             END_TIME(t2);
 130             TIME_DIFF(diffT2T1, t1, t2);
 131             pSamples->appendSample(diffT2T1);
 132         }
 133         cout << "Product finished." << endl;
 134         print_array<float>("c", C, RowsC, ColsC, SHOW_SIZE);
 135
 136 #if defined(USE_OPENMP_GPU) || defined(USE_OPENACC_GPU)
 137         pConfigTraits->deviceType = "GPU";
 138 #else
 139         pConfigTraits->deviceType = "CPU";
 140 #endif
 141         pPerfTest->buildTestInfo(pSamples);
 142         pPerfTest->saveOrPrint();
 143         freeArrays(A,B,C);
 144         pSamples = nullptr;
 145     } catch(string msg) {
 146         cerr << "Exception caught in main(): " << msg << endl;
 147         //cleanupHost();
 148     }
 149 }