73 using ExecutionSpace =
typename DeviceType::execution_space;
74 using ScratchSpace =
typename ExecutionSpace::scratch_memory_space;
75 using OutputScratchView = Kokkos::View<OutputScalar*,ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
76 using OutputScratchView2D = Kokkos::View<OutputScalar**,ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
77 using PointScratchView = Kokkos::View<PointScalar*, ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
79 using TeamPolicy = Kokkos::TeamPolicy<ExecutionSpace>;
80 using TeamMember =
typename TeamPolicy::member_type;
84 OutputFieldType output_;
85 InputPointsType inputPoints_;
88 bool defineVertexFunctions_;
89 int numFields_, numPoints_;
91 size_t fad_size_output_;
93 static const int numVertices = 4;
94 static const int numEdges = 6;
96 const int edge_start_[numEdges] = {0,1,0,0,1,2};
97 const int edge_end_[numEdges] = {1,2,2,3,3,3};
99 static const int numFaces = 4;
100 const int face_vertex_0[numFaces] = {0,0,1,0};
101 const int face_vertex_1[numFaces] = {1,1,2,2};
102 const int face_vertex_2[numFaces] = {2,3,3,3};
106 const int face_ordinal_of_first_edge[numFaces] = {0,0,1,2};
109 int polyOrder,
bool defineVertexFunctions)
110 : opType_(opType), output_(output), inputPoints_(inputPoints),
111 polyOrder_(polyOrder), defineVertexFunctions_(defineVertexFunctions),
114 numFields_ = output.extent_int(0);
115 numPoints_ = output.extent_int(1);
116 INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != inputPoints.extent_int(0), std::invalid_argument,
"point counts need to match!");
117 INTREPID2_TEST_FOR_EXCEPTION(numFields_ != (polyOrder_+1)*(polyOrder_+2)*(polyOrder_+3)/6, std::invalid_argument,
"output field size does not match basis cardinality");
120 KOKKOS_INLINE_FUNCTION
121 void operator()(
const TeamMember & teamMember )
const
123 const int numFaceBasisFunctionsPerFace = (polyOrder_-2) * (polyOrder_-1) / 2;
124 const int numInteriorBasisFunctions = (polyOrder_-3) * (polyOrder_-2) * (polyOrder_-1) / 6;
126 auto pointOrdinal = teamMember.league_rank();
127 OutputScratchView legendre_values1_at_point, legendre_values2_at_point;
128 OutputScratchView2D jacobi_values1_at_point, jacobi_values2_at_point, jacobi_values3_at_point;
129 const int numAlphaValues = (polyOrder_-1 > 1) ? (polyOrder_-1) : 1;
130 if (fad_size_output_ > 0) {
131 legendre_values1_at_point = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
132 legendre_values2_at_point = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
133 jacobi_values1_at_point = OutputScratchView2D(teamMember.team_shmem(), numAlphaValues, polyOrder_ + 1, fad_size_output_);
134 jacobi_values2_at_point = OutputScratchView2D(teamMember.team_shmem(), numAlphaValues, polyOrder_ + 1, fad_size_output_);
135 jacobi_values3_at_point = OutputScratchView2D(teamMember.team_shmem(), numAlphaValues, polyOrder_ + 1, fad_size_output_);
138 legendre_values1_at_point = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
139 legendre_values2_at_point = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
140 jacobi_values1_at_point = OutputScratchView2D(teamMember.team_shmem(), numAlphaValues, polyOrder_ + 1);
141 jacobi_values2_at_point = OutputScratchView2D(teamMember.team_shmem(), numAlphaValues, polyOrder_ + 1);
142 jacobi_values3_at_point = OutputScratchView2D(teamMember.team_shmem(), numAlphaValues, polyOrder_ + 1);
145 const auto & x = inputPoints_(pointOrdinal,0);
146 const auto & y = inputPoints_(pointOrdinal,1);
147 const auto & z = inputPoints_(pointOrdinal,2);
150 const PointScalar lambda[numVertices] = {1. - x - y - z, x, y, z};
151 const PointScalar lambda_dx[numVertices] = {-1., 1., 0., 0.};
152 const PointScalar lambda_dy[numVertices] = {-1., 0., 1., 0.};
153 const PointScalar lambda_dz[numVertices] = {-1., 0., 0., 1.};
155 const int num1DEdgeFunctions = polyOrder_ - 1;
162 for (
int vertexOrdinal=0; vertexOrdinal<numVertices; vertexOrdinal++)
164 output_(vertexOrdinal,pointOrdinal) = lambda[vertexOrdinal];
166 if (!defineVertexFunctions_)
170 output_(0,pointOrdinal) = 1.0;
174 int fieldOrdinalOffset = numVertices;
175 for (
int edgeOrdinal=0; edgeOrdinal<numEdges; edgeOrdinal++)
177 const auto & s0 = lambda[edge_start_[edgeOrdinal]];
178 const auto & s1 = lambda[ edge_end_[edgeOrdinal]];
180 Polynomials::shiftedScaledIntegratedLegendreValues(legendre_values1_at_point, polyOrder_, PointScalar(s1), PointScalar(s0+s1));
181 for (
int edgeFunctionOrdinal=0; edgeFunctionOrdinal<num1DEdgeFunctions; edgeFunctionOrdinal++)
184 output_(edgeFunctionOrdinal+fieldOrdinalOffset,pointOrdinal) = legendre_values1_at_point(edgeFunctionOrdinal+2);
186 fieldOrdinalOffset += num1DEdgeFunctions;
192 for (
int faceOrdinal=0; faceOrdinal<numFaces; faceOrdinal++)
194 const auto & s0 = lambda[face_vertex_0[faceOrdinal]];
195 const auto & s1 = lambda[face_vertex_1[faceOrdinal]];
196 const auto & s2 = lambda[face_vertex_2[faceOrdinal]];
197 const PointScalar jacobiScaling = s0 + s1 + s2;
200 for (
int n=2; n<=polyOrder_; n++)
202 const double alpha = n*2;
203 const int alphaOrdinal = n-2;
204 using Kokkos::subview;
206 auto jacobi_alpha = subview(jacobi_values1_at_point, alphaOrdinal, ALL);
207 Polynomials::shiftedScaledIntegratedJacobiValues(jacobi_alpha, alpha, polyOrder_-2, s2, jacobiScaling);
210 const int edgeOrdinal = face_ordinal_of_first_edge[faceOrdinal];
211 int localFaceBasisOrdinal = 0;
212 for (
int totalPolyOrder=3; totalPolyOrder<=polyOrder_; totalPolyOrder++)
214 for (
int i=2; i<totalPolyOrder; i++)
216 const int edgeBasisOrdinal = edgeOrdinal*num1DEdgeFunctions + i-2 + numVertices;
217 const auto & edgeValue = output_(edgeBasisOrdinal,pointOrdinal);
218 const int alphaOrdinal = i-2;
220 const int j = totalPolyOrder - i;
221 const auto & jacobiValue = jacobi_values1_at_point(alphaOrdinal,j);
222 const int fieldOrdinal = fieldOrdinalOffset + localFaceBasisOrdinal;
223 output_(fieldOrdinal,pointOrdinal) = edgeValue * jacobiValue;
225 localFaceBasisOrdinal++;
228 fieldOrdinalOffset += numFaceBasisFunctionsPerFace;
232 for (
int n=3; n<=polyOrder_; n++)
234 const double alpha = n*2;
235 const double jacobiScaling = 1.0;
236 const int alphaOrdinal = n-3;
237 using Kokkos::subview;
239 auto jacobi_alpha = subview(jacobi_values1_at_point, alphaOrdinal, ALL);
240 Polynomials::shiftedScaledIntegratedJacobiValues(jacobi_alpha, alpha, polyOrder_-3, lambda[3], jacobiScaling);
245 const int min_ij = min_i + min_j;
246 const int min_ijk = min_ij + min_k;
247 int localInteriorBasisOrdinal = 0;
248 for (
int totalPolyOrder_ijk=min_ijk; totalPolyOrder_ijk <= polyOrder_; totalPolyOrder_ijk++)
250 int localFaceBasisOrdinal = 0;
251 for (
int totalPolyOrder_ij=min_ij; totalPolyOrder_ij <= totalPolyOrder_ijk-min_j; totalPolyOrder_ij++)
253 for (
int i=2; i <= totalPolyOrder_ij-min_j; i++)
255 const int j = totalPolyOrder_ij - i;
256 const int k = totalPolyOrder_ijk - totalPolyOrder_ij;
257 const int faceBasisOrdinal = numEdges*num1DEdgeFunctions + numVertices + localFaceBasisOrdinal;
258 const auto & faceValue = output_(faceBasisOrdinal,pointOrdinal);
259 const int alphaOrdinal = (i+j)-3;
260 localFaceBasisOrdinal++;
262 const int fieldOrdinal = fieldOrdinalOffset + localInteriorBasisOrdinal;
263 const auto & jacobiValue = jacobi_values1_at_point(alphaOrdinal,k);
264 output_(fieldOrdinal,pointOrdinal) = faceValue * jacobiValue;
265 localInteriorBasisOrdinal++;
269 fieldOrdinalOffset += numInteriorBasisFunctions;
276 if (defineVertexFunctions_)
280 output_(0,pointOrdinal,0) = -1.0;
281 output_(0,pointOrdinal,1) = -1.0;
282 output_(0,pointOrdinal,2) = -1.0;
288 output_(0,pointOrdinal,0) = 0.0;
289 output_(0,pointOrdinal,1) = 0.0;
290 output_(0,pointOrdinal,2) = 0.0;
293 output_(1,pointOrdinal,0) = 1.0;
294 output_(1,pointOrdinal,1) = 0.0;
295 output_(1,pointOrdinal,2) = 0.0;
297 output_(2,pointOrdinal,0) = 0.0;
298 output_(2,pointOrdinal,1) = 1.0;
299 output_(2,pointOrdinal,2) = 0.0;
301 output_(3,pointOrdinal,0) = 0.0;
302 output_(3,pointOrdinal,1) = 0.0;
303 output_(3,pointOrdinal,2) = 1.0;
306 int fieldOrdinalOffset = numVertices;
318 auto & P_i_minus_1 = legendre_values1_at_point;
319 auto & L_i_dt = legendre_values2_at_point;
320 for (
int edgeOrdinal=0; edgeOrdinal<numEdges; edgeOrdinal++)
322 const auto & s0 = lambda[edge_start_[edgeOrdinal]];
323 const auto & s1 = lambda[ edge_end_[edgeOrdinal]];
325 const auto & s0_dx = lambda_dx[edge_start_[edgeOrdinal]];
326 const auto & s0_dy = lambda_dy[edge_start_[edgeOrdinal]];
327 const auto & s0_dz = lambda_dz[edge_start_[edgeOrdinal]];
328 const auto & s1_dx = lambda_dx[ edge_end_[edgeOrdinal]];
329 const auto & s1_dy = lambda_dy[ edge_end_[edgeOrdinal]];
330 const auto & s1_dz = lambda_dz[ edge_end_[edgeOrdinal]];
332 Polynomials::shiftedScaledLegendreValues (P_i_minus_1, polyOrder_-1, PointScalar(s1), PointScalar(s0+s1));
333 Polynomials::shiftedScaledIntegratedLegendreValues_dt(L_i_dt, polyOrder_, PointScalar(s1), PointScalar(s0+s1));
334 for (
int edgeFunctionOrdinal=0; edgeFunctionOrdinal<num1DEdgeFunctions; edgeFunctionOrdinal++)
337 const int i = edgeFunctionOrdinal+2;
338 output_(edgeFunctionOrdinal+fieldOrdinalOffset,pointOrdinal,0) = P_i_minus_1(i-1) * s1_dx + L_i_dt(i) * (s1_dx + s0_dx);
339 output_(edgeFunctionOrdinal+fieldOrdinalOffset,pointOrdinal,1) = P_i_minus_1(i-1) * s1_dy + L_i_dt(i) * (s1_dy + s0_dy);
340 output_(edgeFunctionOrdinal+fieldOrdinalOffset,pointOrdinal,2) = P_i_minus_1(i-1) * s1_dz + L_i_dt(i) * (s1_dz + s0_dz);
342 fieldOrdinalOffset += num1DEdgeFunctions;
363 auto & L_i = legendre_values2_at_point;
364 auto & L_2i_j_dt = jacobi_values1_at_point;
365 auto & L_2i_j = jacobi_values2_at_point;
366 auto & P_2i_j_minus_1 = jacobi_values3_at_point;
368 for (
int faceOrdinal=0; faceOrdinal<numFaces; faceOrdinal++)
370 const auto & s0 = lambda[face_vertex_0[faceOrdinal]];
371 const auto & s1 = lambda[face_vertex_1[faceOrdinal]];
372 const auto & s2 = lambda[face_vertex_2[faceOrdinal]];
373 Polynomials::shiftedScaledIntegratedLegendreValues(L_i, polyOrder_, s1, s0+s1);
375 const PointScalar jacobiScaling = s0 + s1 + s2;
378 for (
int n=2; n<=polyOrder_; n++)
380 const double alpha = n*2;
381 const int alphaOrdinal = n-2;
382 using Kokkos::subview;
384 auto L_2i_j_dt_alpha = subview(L_2i_j_dt, alphaOrdinal, ALL);
385 auto L_2i_j_alpha = subview(L_2i_j, alphaOrdinal, ALL);
386 auto P_2i_j_minus_1_alpha = subview(P_2i_j_minus_1, alphaOrdinal, ALL);
387 Polynomials::shiftedScaledIntegratedJacobiValues_dt(L_2i_j_dt_alpha, alpha, polyOrder_-2, s2, jacobiScaling);
388 Polynomials::shiftedScaledIntegratedJacobiValues (L_2i_j_alpha, alpha, polyOrder_-2, s2, jacobiScaling);
389 Polynomials::shiftedScaledJacobiValues(P_2i_j_minus_1_alpha, alpha, polyOrder_-1, s2, jacobiScaling);
392 const int edgeOrdinal = face_ordinal_of_first_edge[faceOrdinal];
393 int localFaceOrdinal = 0;
394 for (
int totalPolyOrder=3; totalPolyOrder<=polyOrder_; totalPolyOrder++)
396 for (
int i=2; i<totalPolyOrder; i++)
398 const int edgeBasisOrdinal = edgeOrdinal*num1DEdgeFunctions + i-2 + numVertices;
399 const auto & grad_L_i_dx = output_(edgeBasisOrdinal,pointOrdinal,0);
400 const auto & grad_L_i_dy = output_(edgeBasisOrdinal,pointOrdinal,1);
401 const auto & grad_L_i_dz = output_(edgeBasisOrdinal,pointOrdinal,2);
403 const int alphaOrdinal = i-2;
405 const auto & s0_dx = lambda_dx[face_vertex_0[faceOrdinal]];
406 const auto & s0_dy = lambda_dy[face_vertex_0[faceOrdinal]];
407 const auto & s0_dz = lambda_dz[face_vertex_0[faceOrdinal]];
408 const auto & s1_dx = lambda_dx[face_vertex_1[faceOrdinal]];
409 const auto & s1_dy = lambda_dy[face_vertex_1[faceOrdinal]];
410 const auto & s1_dz = lambda_dz[face_vertex_1[faceOrdinal]];
411 const auto & s2_dx = lambda_dx[face_vertex_2[faceOrdinal]];
412 const auto & s2_dy = lambda_dy[face_vertex_2[faceOrdinal]];
413 const auto & s2_dz = lambda_dz[face_vertex_2[faceOrdinal]];
415 int j = totalPolyOrder - i;
418 auto & l_2i_j = L_2i_j(alphaOrdinal,j);
420 auto & l_2i_j_dt = L_2i_j_dt(alphaOrdinal,j);
421 auto & p_2i_j_minus_1 = P_2i_j_minus_1(alphaOrdinal,j-1);
423 const OutputScalar basisValue_dx = l_2i_j * grad_L_i_dx + l_i * (p_2i_j_minus_1 * s2_dx + l_2i_j_dt * (s0_dx + s1_dx + s2_dx));
424 const OutputScalar basisValue_dy = l_2i_j * grad_L_i_dy + l_i * (p_2i_j_minus_1 * s2_dy + l_2i_j_dt * (s0_dy + s1_dy + s2_dy));
425 const OutputScalar basisValue_dz = l_2i_j * grad_L_i_dz + l_i * (p_2i_j_minus_1 * s2_dz + l_2i_j_dt * (s0_dz + s1_dz + s2_dz));
427 const int fieldOrdinal = fieldOrdinalOffset + localFaceOrdinal;
429 output_(fieldOrdinal,pointOrdinal,0) = basisValue_dx;
430 output_(fieldOrdinal,pointOrdinal,1) = basisValue_dy;
431 output_(fieldOrdinal,pointOrdinal,2) = basisValue_dz;
436 fieldOrdinalOffset += numFaceBasisFunctionsPerFace;
455 auto & L_alpha = jacobi_values1_at_point;
456 auto & P_alpha = jacobi_values2_at_point;
460 const auto & s0 = lambda[0];
461 const auto & s1 = lambda[1];
462 const auto & s2 = lambda[2];
464 Polynomials::shiftedScaledIntegratedLegendreValues(legendre_values1_at_point, polyOrder_, PointScalar(s1), PointScalar(s0+s1));
467 const PointScalar jacobiScaling = s0 + s1 + s2;
468 for (
int n=2; n<=polyOrder_; n++)
470 const double alpha = n*2;
471 const int alphaOrdinal = n-2;
472 using Kokkos::subview;
474 auto jacobi_alpha = subview(jacobi_values3_at_point, alphaOrdinal, ALL);
475 Polynomials::shiftedScaledIntegratedJacobiValues(jacobi_alpha, alpha, polyOrder_-2, s2, jacobiScaling);
480 for (
int n=3; n<=polyOrder_; n++)
482 const double alpha = n*2;
483 const double jacobiScaling = 1.0;
484 const int alphaOrdinal = n-3;
485 using Kokkos::subview;
489 auto L = subview(L_alpha, alphaOrdinal, ALL);
490 auto P = subview(P_alpha, alphaOrdinal, ALL);
491 Polynomials::shiftedScaledIntegratedJacobiValues(L, alpha, polyOrder_-3, lambda[3], jacobiScaling);
492 Polynomials::shiftedScaledJacobiValues (P, alpha, polyOrder_-3, lambda[3], jacobiScaling);
498 const int min_ij = min_i + min_j;
499 const int min_ijk = min_ij + min_k;
500 int localInteriorBasisOrdinal = 0;
501 for (
int totalPolyOrder_ijk=min_ijk; totalPolyOrder_ijk <= polyOrder_; totalPolyOrder_ijk++)
503 int localFaceBasisOrdinal = 0;
504 for (
int totalPolyOrder_ij=min_ij; totalPolyOrder_ij <= totalPolyOrder_ijk-min_j; totalPolyOrder_ij++)
506 for (
int i=2; i <= totalPolyOrder_ij-min_j; i++)
508 const int j = totalPolyOrder_ij - i;
509 const int k = totalPolyOrder_ijk - totalPolyOrder_ij;
511 const int faceBasisOrdinal = numEdges*num1DEdgeFunctions + numVertices + localFaceBasisOrdinal;
513 const auto & faceValue_dx = output_(faceBasisOrdinal,pointOrdinal,0);
514 const auto & faceValue_dy = output_(faceBasisOrdinal,pointOrdinal,1);
515 const auto & faceValue_dz = output_(faceBasisOrdinal,pointOrdinal,2);
518 OutputScalar faceValue;
520 const auto & edgeValue = legendre_values1_at_point(i);
521 const int alphaOrdinal = i-2;
522 const auto & jacobiValue = jacobi_values3_at_point(alphaOrdinal,j);
523 faceValue = edgeValue * jacobiValue;
525 localFaceBasisOrdinal++;
527 const int alphaOrdinal = (i+j)-3;
529 const int fieldOrdinal = fieldOrdinalOffset + localInteriorBasisOrdinal;
530 const auto & integratedJacobiValue = L_alpha(alphaOrdinal,k);
531 const auto & jacobiValue = P_alpha(alphaOrdinal,k-1);
532 output_(fieldOrdinal,pointOrdinal,0) = integratedJacobiValue * faceValue_dx + faceValue * jacobiValue * lambda_dx[3];
533 output_(fieldOrdinal,pointOrdinal,1) = integratedJacobiValue * faceValue_dy + faceValue * jacobiValue * lambda_dy[3];
534 output_(fieldOrdinal,pointOrdinal,2) = integratedJacobiValue * faceValue_dz + faceValue * jacobiValue * lambda_dz[3];
536 localInteriorBasisOrdinal++;
540 fieldOrdinalOffset += numInteriorBasisFunctions;
552 INTREPID2_TEST_FOR_ABORT(
true,
553 ">>> ERROR: (Intrepid2::Basis_HGRAD_TET_Cn_FEM_ORTH::OrthPolynomialTri) Computing of second and higher-order derivatives is not currently supported");
563 size_t team_shmem_size (
int team_size)
const
570 const int numAlphaValues = std::max(polyOrder_-1, 1);
571 size_t shmem_size = 0;
572 if (fad_size_output_ > 0)
575 shmem_size += 2 * OutputScratchView::shmem_size(polyOrder_ + 1, fad_size_output_);
577 shmem_size += 3 * OutputScratchView2D::shmem_size(numAlphaValues, polyOrder_ + 1, fad_size_output_);
582 shmem_size += 2 * OutputScratchView::shmem_size(polyOrder_ + 1);
584 shmem_size += 3 * OutputScratchView2D::shmem_size(numAlphaValues, polyOrder_ + 1);