Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
volk_32fc_deinterleave_real_64f.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
71#ifndef INCLUDED_volk_32fc_deinterleave_real_64f_a_H
72#define INCLUDED_volk_32fc_deinterleave_real_64f_a_H
73
74#include <inttypes.h>
75#include <stdio.h>
76
77#ifdef LV_HAVE_AVX2
78#include <immintrin.h>
79
80static inline void volk_32fc_deinterleave_real_64f_a_avx2(double* iBuffer,
81 const lv_32fc_t* complexVector,
82 unsigned int num_points)
83{
84 unsigned int number = 0;
85
86 const float* complexVectorPtr = (float*)complexVector;
87 double* iBufferPtr = iBuffer;
88
89 const unsigned int quarterPoints = num_points / 4;
90 __m256 cplxValue;
91 __m128 fVal;
92 __m256d dVal;
93 __m256i idx = _mm256_set_epi32(0, 0, 0, 0, 6, 4, 2, 0);
94 for (; number < quarterPoints; number++) {
95
96 cplxValue = _mm256_load_ps(complexVectorPtr);
97 complexVectorPtr += 8;
98
99 // Arrange in i1i2i1i2 format
100 cplxValue = _mm256_permutevar8x32_ps(cplxValue, idx);
101 fVal = _mm256_extractf128_ps(cplxValue, 0);
102 dVal = _mm256_cvtps_pd(fVal);
103 _mm256_store_pd(iBufferPtr, dVal);
104
105 iBufferPtr += 4;
106 }
107
108 number = quarterPoints * 4;
109 for (; number < num_points; number++) {
110 *iBufferPtr++ = (double)*complexVectorPtr++;
111 complexVectorPtr++;
112 }
113}
114#endif /* LV_HAVE_AVX2 */
115
116#ifdef LV_HAVE_SSE2
117#include <emmintrin.h>
118
119static inline void volk_32fc_deinterleave_real_64f_a_sse2(double* iBuffer,
120 const lv_32fc_t* complexVector,
121 unsigned int num_points)
122{
123 unsigned int number = 0;
124
125 const float* complexVectorPtr = (float*)complexVector;
126 double* iBufferPtr = iBuffer;
127
128 const unsigned int halfPoints = num_points / 2;
129 __m128 cplxValue, fVal;
130 __m128d dVal;
131 for (; number < halfPoints; number++) {
132
133 cplxValue = _mm_load_ps(complexVectorPtr);
134 complexVectorPtr += 4;
135
136 // Arrange in i1i2i1i2 format
137 fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2, 0, 2, 0));
138 dVal = _mm_cvtps_pd(fVal);
139 _mm_store_pd(iBufferPtr, dVal);
140
141 iBufferPtr += 2;
142 }
143
144 number = halfPoints * 2;
145 for (; number < num_points; number++) {
146 *iBufferPtr++ = (double)*complexVectorPtr++;
147 complexVectorPtr++;
148 }
149}
150#endif /* LV_HAVE_SSE */
151
152#ifdef LV_HAVE_GENERIC
153
154static inline void volk_32fc_deinterleave_real_64f_generic(double* iBuffer,
155 const lv_32fc_t* complexVector,
156 unsigned int num_points)
157{
158 unsigned int number = 0;
159 const float* complexVectorPtr = (float*)complexVector;
160 double* iBufferPtr = iBuffer;
161 for (number = 0; number < num_points; number++) {
162 *iBufferPtr++ = (double)*complexVectorPtr++;
163 complexVectorPtr++;
164 }
165}
166#endif /* LV_HAVE_GENERIC */
167
168#ifdef LV_HAVE_NEONV8
169#include <arm_neon.h>
170
171static inline void volk_32fc_deinterleave_real_64f_neon(double* iBuffer,
172 const lv_32fc_t* complexVector,
173 unsigned int num_points)
174{
175 unsigned int number = 0;
176 unsigned int quarter_points = num_points / 4;
177 const float* complexVectorPtr = (float*)complexVector;
178 double* iBufferPtr = iBuffer;
179 float32x2x4_t complexInput;
180 float64x2_t iVal1;
181 float64x2_t iVal2;
182 float64x2x2_t iVal;
183
184 for (number = 0; number < quarter_points; number++) {
185 // Load data into register
186 complexInput = vld4_f32(complexVectorPtr);
187
188 // Perform single to double precision conversion
189 iVal1 = vcvt_f64_f32(complexInput.val[0]);
190 iVal2 = vcvt_f64_f32(complexInput.val[2]);
191 iVal.val[0] = iVal1;
192 iVal.val[1] = iVal2;
193
194 // Store results into memory buffer
195 vst2q_f64(iBufferPtr, iVal);
196
197 // Update pointers
198 iBufferPtr += 4;
199 complexVectorPtr += 8;
200 }
201
202 for (number = quarter_points * 4; number < num_points; number++) {
203 *iBufferPtr++ = (double)*complexVectorPtr++;
204 complexVectorPtr++;
205 }
206}
207#endif /* LV_HAVE_NEON */
208
209#endif /* INCLUDED_volk_32fc_deinterleave_real_64f_a_H */
210
211#ifndef INCLUDED_volk_32fc_deinterleave_real_64f_u_H
212#define INCLUDED_volk_32fc_deinterleave_real_64f_u_H
213
214#include <inttypes.h>
215#include <stdio.h>
216
217#ifdef LV_HAVE_AVX2
218#include <immintrin.h>
219
220static inline void volk_32fc_deinterleave_real_64f_u_avx2(double* iBuffer,
221 const lv_32fc_t* complexVector,
222 unsigned int num_points)
223{
224 unsigned int number = 0;
225
226 const float* complexVectorPtr = (float*)complexVector;
227 double* iBufferPtr = iBuffer;
228
229 const unsigned int quarterPoints = num_points / 4;
230 __m256 cplxValue;
231 __m128 fVal;
232 __m256d dVal;
233 __m256i idx = _mm256_set_epi32(0, 0, 0, 0, 6, 4, 2, 0);
234 for (; number < quarterPoints; number++) {
235
236 cplxValue = _mm256_loadu_ps(complexVectorPtr);
237 complexVectorPtr += 8;
238
239 // Arrange in i1i2i1i2 format
240 cplxValue = _mm256_permutevar8x32_ps(cplxValue, idx);
241 fVal = _mm256_extractf128_ps(cplxValue, 0);
242 dVal = _mm256_cvtps_pd(fVal);
243 _mm256_storeu_pd(iBufferPtr, dVal);
244
245 iBufferPtr += 4;
246 }
247
248 number = quarterPoints * 4;
249 for (; number < num_points; number++) {
250 *iBufferPtr++ = (double)*complexVectorPtr++;
251 complexVectorPtr++;
252 }
253}
254#endif /* LV_HAVE_AVX2 */
255
256#endif /* INCLUDED_volk_32fc_deinterleave_real_64f_u_H */
static void volk_32fc_deinterleave_real_64f_a_sse2(double *iBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_real_64f.h:119
static void volk_32fc_deinterleave_real_64f_generic(double *iBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_real_64f.h:154
float complex lv_32fc_t
Definition: volk_complex.h:65