Please, help us to better know about our user community by answering the following short survey: https://forms.gle/wpyrxWi18ox9Z5ae9
Eigen  3.4.0
 
Loading...
Searching...
No Matches
GPU/TypeCasting.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
5//
6// This Source Code Form is subject to the terms of the Mozilla
7// Public License v. 2.0. If a copy of the MPL was not distributed
8// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10#ifndef EIGEN_TYPE_CASTING_GPU_H
11#define EIGEN_TYPE_CASTING_GPU_H
12
13namespace Eigen {
14
15namespace internal {
16
17#if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \
18 (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
19
20
21template <>
22struct type_casting_traits<Eigen::half, float> {
23 enum {
24 VectorizedCast = 1,
25 SrcCoeffRatio = 1,
26 TgtCoeffRatio = 2
27 };
28};
29
30template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(const half2& a, const half2& b) {
31 float2 r1 = __half22float2(a);
32 float2 r2 = __half22float2(b);
33 return make_float4(r1.x, r1.y, r2.x, r2.y);
34}
35
36
37template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pcast<float4, Packet4h2>(const float4& a, const float4& b) {
38 Packet4h2 r;
39 half2* r_alias=reinterpret_cast<half2*>(&r);
40 r_alias[0]=__floats2half2_rn(a.x,a.y);
41 r_alias[1]=__floats2half2_rn(a.z,a.w);
42 r_alias[2]=__floats2half2_rn(b.x,b.y);
43 r_alias[3]=__floats2half2_rn(b.z,b.w);
44 return r;
45}
46
47template <>
48struct type_casting_traits<float, Eigen::half> {
49 enum {
50 VectorizedCast = 1,
51 SrcCoeffRatio = 2,
52 TgtCoeffRatio = 1
53 };
54};
55
56template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<Packet4h2, float4>(const Packet4h2& a) {
57 // Simply discard the second half of the input
58 float4 r;
59 const half2* a_alias=reinterpret_cast<const half2*>(&a);
60 float2 r1 = __half22float2(a_alias[0]);
61 float2 r2 = __half22float2(a_alias[1]);
62 r.x=static_cast<float>(r1.x);
63 r.y=static_cast<float>(r1.y);
64 r.z=static_cast<float>(r2.x);
65 r.w=static_cast<float>(r2.y);
66 return r;
67}
68
69template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) {
70 // Simply discard the second half of the input
71 return __floats2half2_rn(a.x, a.y);
72}
73
74#endif
75
76} // end namespace internal
77
78} // end namespace Eigen
79
80#endif // EIGEN_TYPE_CASTING_GPU_H
Namespace containing all symbols from the Eigen library.
Definition: Core:141