Stokhos Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
HostScaling.cpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Stokhos Package
5// Copyright (2009) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38//
39// ***********************************************************************
40// @HEADER
41
42#include <string>
43#include <iostream>
44#include <cstdlib>
45
46#include "Kokkos_Core.hpp"
47
48#include "Teuchos_CommandLineProcessor.hpp"
49#include "Teuchos_StandardCatchMacros.hpp"
50
51#include "TestStochastic.hpp"
52
54
55// Algorithms
57const int num_sg_alg = 2;
59const char *sg_alg_names[] = { "Original Matrix-Free", "Product CRS" };
60
61std::vector<double>
62run_test(const size_t num_cpu, const size_t num_core_per_cpu,
63 const size_t num_threads_per_core,
64 const size_t p, const size_t d, const size_t nGrid, const size_t nIter,
65 const bool symmetric, SG_Alg sg_alg,
66 const std::vector<double>& perf1 = std::vector<double>())
67{
68 typedef double Scalar;
69 typedef Kokkos::Threads Device;
70 const size_t team_count = num_cpu * num_core_per_cpu;
71 const size_t threads_per_team = num_threads_per_core;
72 Kokkos::InitializationSettings init_args;
73 init_args.set_num_threads(team_count*threads_per_team);
74 Kokkos::initialize( init_args );
75
76 std::vector<int> var_degree( d , p );
77
78 std::vector<double> perf;
79 if (sg_alg == PROD_CRS)
80 perf =
81 unit_test::test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(var_degree , nGrid , nIter , symmetric );
82 else if (sg_alg == ORIG_MAT_FREE)
83 perf =
84 unit_test::test_original_matrix_free_vec<Scalar,Device,Stokhos::DefaultMultiply>(
85 var_degree , nGrid , nIter , true , symmetric );
86
87 Kokkos::finalize();
88
89 double speed_up;
90 if (perf1.size() > 0)
91 speed_up = perf1[1] / perf[1];
92 else
93 speed_up = perf[1] / perf[1];
94 double efficiency = speed_up / team_count;
95
96 std::cout << team_count << " , "
97 << nGrid << " , "
98 << d << " , "
99 << p << " , "
100 << perf[1] << " , "
101 << perf[2] << " , "
102 << speed_up << " , "
103 << 100.0 * efficiency << " , "
104 << std::endl;
105
106 return perf;
107}
108
109int main(int argc, char *argv[])
110{
111 bool success = true;
112
113 try {
114 // Setup command line options
115 Teuchos::CommandLineProcessor CLP;
116 int p = 3;
117 CLP.setOption("p", &p, "Polynomial order");
118 int d = 4;
119 CLP.setOption("d", &d, "Stochastic dimension");
120 int nGrid = 64;
121 CLP.setOption("n", &nGrid, "Number of spatial grid points in each dimension");
122 int nIter = 1;
123 CLP.setOption("niter", &nIter, "Number of iterations");
124 int n_thread_per_core = 1;
125 CLP.setOption("nthread", &n_thread_per_core, "Number of threads per core to use");
126 int n_hyperthreads = 2;
127 CLP.setOption("nht", &n_hyperthreads, "Number of hyperthreads per core available");
128 SG_Alg sg_alg = PROD_CRS;
129 CLP.setOption("alg", &sg_alg, num_sg_alg, sg_alg_values, sg_alg_names,
130 "SG Mat-Vec Algorithm");
131 bool symmetric = true;
132 CLP.setOption("symmetric", "asymmetric", &symmetric, "Use symmetric PDF");
133 CLP.parse( argc, argv );
134
135 // Detect number of CPUs and number of cores
136 const size_t num_cpu = Kokkos::hwloc::get_available_numa_count();
137 const size_t num_core_per_cpu = Kokkos::hwloc::get_available_cores_per_numa();
138 const size_t core_capacity = Kokkos::hwloc::get_available_threads_per_core();
139 if (static_cast<size_t>(n_thread_per_core) > core_capacity )
140 n_thread_per_core = core_capacity;
141
142 // Print header
143 std::cout << std::endl
144 << "\"#nCore\" , "
145 << "\"#nGrid\" , "
146 << "\"#Variable\" , "
147 << "\"PolyDegree\" , "
148 << "\"" << sg_alg_names[sg_alg] << " MXV Time\" , "
149 << "\"" << sg_alg_names[sg_alg] << " MXV GFLOPS\" , "
150 << "\"" << sg_alg_names[sg_alg] << " MXV Speedup\" , "
151 << "\"" << sg_alg_names[sg_alg] << " MXV Efficiency\" , "
152 << std::endl ;
153
154 // Do a serial run to base speedup & efficiency from
155 const std::vector<double> perf1 =
156 run_test(1, 1, 1, p, d, nGrid, nIter, symmetric, sg_alg);
157
158 // First do 1 core per cpu
159 for (size_t n=2; n<=num_cpu; ++n) {
160 const std::vector<double> perf =
161 run_test(n, 1, 1, p, d, nGrid, nIter, symmetric, sg_alg, perf1);
162 }
163
164 // Now do all cpus, increasing number of cores
165 for (size_t n=2; n<=num_core_per_cpu; ++n) {
166 const std::vector<double> perf =
167 run_test(num_cpu, n, 1, p, d, nGrid, nIter, symmetric, sg_alg, perf1);
168 }
169
170 // Now do all cpus, all cores, with nthreads/core
171 const std::vector<double> perf =
172 run_test(num_cpu, num_core_per_cpu, n_thread_per_core, p, d, nGrid,
173 nIter, symmetric, sg_alg, perf1);
174
175
176 }
177 TEUCHOS_STANDARD_CATCH_STATEMENTS(true, std::cerr, success);
178
179 if (!success)
180 return -1;
181 return 0 ;
182}
int main(int argc, char *argv[])
const SG_Alg sg_alg_values[]
const char * sg_alg_names[]
const int num_sg_alg
std::vector< double > run_test(const size_t num_cpu, const size_t num_core_per_cpu, const size_t num_threads_per_core, const size_t p, const size_t d, const size_t nGrid, const size_t nIter, const bool symmetric, SG_Alg sg_alg, const std::vector< double > &perf1=std::vector< double >())
SG_Alg
@ ORIG_MAT_FREE
@ PROD_CRS