Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
volk_32f_64f_multiply_64f.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2018 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
71#ifndef INCLUDED_volk_32f_64f_multiply_64f_H
72#define INCLUDED_volk_32f_64f_multiply_64f_H
73
74#include <inttypes.h>
75
76
77#ifdef LV_HAVE_GENERIC
78
79static inline void volk_32f_64f_multiply_64f_generic(double* cVector,
80 const float* aVector,
81 const double* bVector,
82 unsigned int num_points)
83{
84 double* cPtr = cVector;
85 const float* aPtr = aVector;
86 const double* bPtr = bVector;
87 unsigned int number = 0;
88
89 for (number = 0; number < num_points; number++) {
90 *cPtr++ = ((double)(*aPtr++)) * (*bPtr++);
91 }
92}
93
94#endif /* LV_HAVE_GENERIC */
95
96/*
97 * Unaligned versions
98 */
99
100
101#ifdef LV_HAVE_AVX
102
103#include <immintrin.h>
104#include <xmmintrin.h>
105
106static inline void volk_32f_64f_multiply_64f_u_avx(double* cVector,
107 const float* aVector,
108 const double* bVector,
109 unsigned int num_points)
110{
111 unsigned int number = 0;
112 const unsigned int eighth_points = num_points / 8;
113
114 double* cPtr = cVector;
115 const float* aPtr = aVector;
116 const double* bPtr = bVector;
117
118 __m256 aVal;
119 __m128 aVal1, aVal2;
120 __m256d aDbl1, aDbl2, bVal1, bVal2, cVal1, cVal2;
121 for (; number < eighth_points; number++) {
122
123 aVal = _mm256_loadu_ps(aPtr);
124 bVal1 = _mm256_loadu_pd(bPtr);
125 bVal2 = _mm256_loadu_pd(bPtr + 4);
126
127 aVal1 = _mm256_extractf128_ps(aVal, 0);
128 aVal2 = _mm256_extractf128_ps(aVal, 1);
129
130 aDbl1 = _mm256_cvtps_pd(aVal1);
131 aDbl2 = _mm256_cvtps_pd(aVal2);
132
133 cVal1 = _mm256_mul_pd(aDbl1, bVal1);
134 cVal2 = _mm256_mul_pd(aDbl2, bVal2);
135
136 _mm256_storeu_pd(cPtr, cVal1); // Store the results back into the C container
137 _mm256_storeu_pd(cPtr + 4, cVal2); // Store the results back into the C container
138
139 aPtr += 8;
140 bPtr += 8;
141 cPtr += 8;
142 }
143
144 number = eighth_points * 8;
145 for (; number < num_points; number++) {
146 *cPtr++ = ((double)(*aPtr++)) * (*bPtr++);
147 }
148}
149
150#endif /* LV_HAVE_AVX */
151
152
153#ifdef LV_HAVE_AVX
154
155#include <immintrin.h>
156#include <xmmintrin.h>
157
158static inline void volk_32f_64f_multiply_64f_a_avx(double* cVector,
159 const float* aVector,
160 const double* bVector,
161 unsigned int num_points)
162{
163 unsigned int number = 0;
164 const unsigned int eighth_points = num_points / 8;
165
166 double* cPtr = cVector;
167 const float* aPtr = aVector;
168 const double* bPtr = bVector;
169
170 __m256 aVal;
171 __m128 aVal1, aVal2;
172 __m256d aDbl1, aDbl2, bVal1, bVal2, cVal1, cVal2;
173 for (; number < eighth_points; number++) {
174
175 aVal = _mm256_load_ps(aPtr);
176 bVal1 = _mm256_load_pd(bPtr);
177 bVal2 = _mm256_load_pd(bPtr + 4);
178
179 aVal1 = _mm256_extractf128_ps(aVal, 0);
180 aVal2 = _mm256_extractf128_ps(aVal, 1);
181
182 aDbl1 = _mm256_cvtps_pd(aVal1);
183 aDbl2 = _mm256_cvtps_pd(aVal2);
184
185 cVal1 = _mm256_mul_pd(aDbl1, bVal1);
186 cVal2 = _mm256_mul_pd(aDbl2, bVal2);
187
188 _mm256_store_pd(cPtr, cVal1); // Store the results back into the C container
189 _mm256_store_pd(cPtr + 4, cVal2); // Store the results back into the C container
190
191 aPtr += 8;
192 bPtr += 8;
193 cPtr += 8;
194 }
195
196 number = eighth_points * 8;
197 for (; number < num_points; number++) {
198 *cPtr++ = ((double)(*aPtr++)) * (*bPtr++);
199 }
200}
201
202#endif /* LV_HAVE_AVX */
203
204
205#endif /* INCLUDED_volk_32f_64f_multiply_64f_u_H */
static void volk_32f_64f_multiply_64f_generic(double *cVector, const float *aVector, const double *bVector, unsigned int num_points)
Definition: volk_32f_64f_multiply_64f.h:79
static void volk_32f_64f_multiply_64f_u_avx(double *cVector, const float *aVector, const double *bVector, unsigned int num_points)
Definition: volk_32f_64f_multiply_64f.h:106
static void volk_32f_64f_multiply_64f_a_avx(double *cVector, const float *aVector, const double *bVector, unsigned int num_points)
Definition: volk_32f_64f_multiply_64f.h:158