Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
volk_32fc_deinterleave_real_32f.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
70#ifndef INCLUDED_volk_32fc_deinterleave_real_32f_a_H
71#define INCLUDED_volk_32fc_deinterleave_real_32f_a_H
72
73#include <inttypes.h>
74#include <stdio.h>
75
76#ifdef LV_HAVE_AVX2
77#include <immintrin.h>
78
79static inline void volk_32fc_deinterleave_real_32f_a_avx2(float* iBuffer,
80 const lv_32fc_t* complexVector,
81 unsigned int num_points)
82{
83 unsigned int number = 0;
84 const unsigned int eighthPoints = num_points / 8;
85
86 const float* complexVectorPtr = (const float*)complexVector;
87 float* iBufferPtr = iBuffer;
88
89 __m256 cplxValue1, cplxValue2;
90 __m256 iValue;
91 __m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
92 for (; number < eighthPoints; number++) {
93
94 cplxValue1 = _mm256_load_ps(complexVectorPtr);
95 complexVectorPtr += 8;
96
97 cplxValue2 = _mm256_load_ps(complexVectorPtr);
98 complexVectorPtr += 8;
99
100 // Arrange in i1i2i3i4 format
101 iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
102 iValue = _mm256_permutevar8x32_ps(iValue, idx);
103
104 _mm256_store_ps(iBufferPtr, iValue);
105
106 iBufferPtr += 8;
107 }
108
109 number = eighthPoints * 8;
110 for (; number < num_points; number++) {
111 *iBufferPtr++ = *complexVectorPtr++;
112 complexVectorPtr++;
113 }
114}
115#endif /* LV_HAVE_AVX2 */
116
117#ifdef LV_HAVE_SSE
118#include <xmmintrin.h>
119
120static inline void volk_32fc_deinterleave_real_32f_a_sse(float* iBuffer,
121 const lv_32fc_t* complexVector,
122 unsigned int num_points)
123{
124 unsigned int number = 0;
125 const unsigned int quarterPoints = num_points / 4;
126
127 const float* complexVectorPtr = (const float*)complexVector;
128 float* iBufferPtr = iBuffer;
129
130 __m128 cplxValue1, cplxValue2, iValue;
131 for (; number < quarterPoints; number++) {
132
133 cplxValue1 = _mm_load_ps(complexVectorPtr);
134 complexVectorPtr += 4;
135
136 cplxValue2 = _mm_load_ps(complexVectorPtr);
137 complexVectorPtr += 4;
138
139 // Arrange in i1i2i3i4 format
140 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
141
142 _mm_store_ps(iBufferPtr, iValue);
143
144 iBufferPtr += 4;
145 }
146
147 number = quarterPoints * 4;
148 for (; number < num_points; number++) {
149 *iBufferPtr++ = *complexVectorPtr++;
150 complexVectorPtr++;
151 }
152}
153#endif /* LV_HAVE_SSE */
154
155
156#ifdef LV_HAVE_GENERIC
157
158static inline void volk_32fc_deinterleave_real_32f_generic(float* iBuffer,
159 const lv_32fc_t* complexVector,
160 unsigned int num_points)
161{
162 unsigned int number = 0;
163 const float* complexVectorPtr = (float*)complexVector;
164 float* iBufferPtr = iBuffer;
165 for (number = 0; number < num_points; number++) {
166 *iBufferPtr++ = *complexVectorPtr++;
167 complexVectorPtr++;
168 }
169}
170#endif /* LV_HAVE_GENERIC */
171
172
173#ifdef LV_HAVE_NEON
174#include <arm_neon.h>
175
176static inline void volk_32fc_deinterleave_real_32f_neon(float* iBuffer,
177 const lv_32fc_t* complexVector,
178 unsigned int num_points)
179{
180 unsigned int number = 0;
181 unsigned int quarter_points = num_points / 4;
182 const float* complexVectorPtr = (float*)complexVector;
183 float* iBufferPtr = iBuffer;
184 float32x4x2_t complexInput;
185
186 for (number = 0; number < quarter_points; number++) {
187 complexInput = vld2q_f32(complexVectorPtr);
188 vst1q_f32(iBufferPtr, complexInput.val[0]);
189 complexVectorPtr += 8;
190 iBufferPtr += 4;
191 }
192
193 for (number = quarter_points * 4; number < num_points; number++) {
194 *iBufferPtr++ = *complexVectorPtr++;
195 complexVectorPtr++;
196 }
197}
198#endif /* LV_HAVE_NEON */
199
200#endif /* INCLUDED_volk_32fc_deinterleave_real_32f_a_H */
201
202
203#ifndef INCLUDED_volk_32fc_deinterleave_real_32f_u_H
204#define INCLUDED_volk_32fc_deinterleave_real_32f_u_H
205
206#include <inttypes.h>
207#include <stdio.h>
208
209#ifdef LV_HAVE_AVX2
210#include <immintrin.h>
211
212static inline void volk_32fc_deinterleave_real_32f_u_avx2(float* iBuffer,
213 const lv_32fc_t* complexVector,
214 unsigned int num_points)
215{
216 unsigned int number = 0;
217 const unsigned int eighthPoints = num_points / 8;
218
219 const float* complexVectorPtr = (const float*)complexVector;
220 float* iBufferPtr = iBuffer;
221
222 __m256 cplxValue1, cplxValue2;
223 __m256 iValue;
224 __m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
225 for (; number < eighthPoints; number++) {
226
227 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
228 complexVectorPtr += 8;
229
230 cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
231 complexVectorPtr += 8;
232
233 // Arrange in i1i2i3i4 format
234 iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
235 iValue = _mm256_permutevar8x32_ps(iValue, idx);
236
237 _mm256_storeu_ps(iBufferPtr, iValue);
238
239 iBufferPtr += 8;
240 }
241
242 number = eighthPoints * 8;
243 for (; number < num_points; number++) {
244 *iBufferPtr++ = *complexVectorPtr++;
245 complexVectorPtr++;
246 }
247}
248#endif /* LV_HAVE_AVX2 */
249
250#endif /* INCLUDED_volk_32fc_deinterleave_real_32f_u_H */
static void volk_32fc_deinterleave_real_32f_generic(float *iBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_real_32f.h:158
static void volk_32fc_deinterleave_real_32f_a_sse(float *iBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_real_32f.h:120
static void volk_32fc_deinterleave_real_32f_neon(float *iBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_real_32f.h:176
float complex lv_32fc_t
Definition: volk_complex.h:65