70#ifndef INCLUDED_volk_32fc_deinterleave_real_32f_a_H
71#define INCLUDED_volk_32fc_deinterleave_real_32f_a_H
79static inline void volk_32fc_deinterleave_real_32f_a_avx2(
float* iBuffer,
81 unsigned int num_points)
83 unsigned int number = 0;
84 const unsigned int eighthPoints = num_points / 8;
86 const float* complexVectorPtr = (
const float*)complexVector;
87 float* iBufferPtr = iBuffer;
89 __m256 cplxValue1, cplxValue2;
91 __m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
92 for (; number < eighthPoints; number++) {
94 cplxValue1 = _mm256_load_ps(complexVectorPtr);
95 complexVectorPtr += 8;
97 cplxValue2 = _mm256_load_ps(complexVectorPtr);
98 complexVectorPtr += 8;
101 iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
102 iValue = _mm256_permutevar8x32_ps(iValue, idx);
104 _mm256_store_ps(iBufferPtr, iValue);
109 number = eighthPoints * 8;
110 for (; number < num_points; number++) {
111 *iBufferPtr++ = *complexVectorPtr++;
118#include <xmmintrin.h>
122 unsigned int num_points)
124 unsigned int number = 0;
125 const unsigned int quarterPoints = num_points / 4;
127 const float* complexVectorPtr = (
const float*)complexVector;
128 float* iBufferPtr = iBuffer;
130 __m128 cplxValue1, cplxValue2, iValue;
131 for (; number < quarterPoints; number++) {
133 cplxValue1 = _mm_load_ps(complexVectorPtr);
134 complexVectorPtr += 4;
136 cplxValue2 = _mm_load_ps(complexVectorPtr);
137 complexVectorPtr += 4;
140 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
142 _mm_store_ps(iBufferPtr, iValue);
147 number = quarterPoints * 4;
148 for (; number < num_points; number++) {
149 *iBufferPtr++ = *complexVectorPtr++;
156#ifdef LV_HAVE_GENERIC
160 unsigned int num_points)
162 unsigned int number = 0;
163 const float* complexVectorPtr = (
float*)complexVector;
164 float* iBufferPtr = iBuffer;
165 for (number = 0; number < num_points; number++) {
166 *iBufferPtr++ = *complexVectorPtr++;
178 unsigned int num_points)
180 unsigned int number = 0;
181 unsigned int quarter_points = num_points / 4;
182 const float* complexVectorPtr = (
float*)complexVector;
183 float* iBufferPtr = iBuffer;
184 float32x4x2_t complexInput;
186 for (number = 0; number < quarter_points; number++) {
187 complexInput = vld2q_f32(complexVectorPtr);
188 vst1q_f32(iBufferPtr, complexInput.val[0]);
189 complexVectorPtr += 8;
193 for (number = quarter_points * 4; number < num_points; number++) {
194 *iBufferPtr++ = *complexVectorPtr++;
203#ifndef INCLUDED_volk_32fc_deinterleave_real_32f_u_H
204#define INCLUDED_volk_32fc_deinterleave_real_32f_u_H
210#include <immintrin.h>
212static inline void volk_32fc_deinterleave_real_32f_u_avx2(
float* iBuffer,
214 unsigned int num_points)
216 unsigned int number = 0;
217 const unsigned int eighthPoints = num_points / 8;
219 const float* complexVectorPtr = (
const float*)complexVector;
220 float* iBufferPtr = iBuffer;
222 __m256 cplxValue1, cplxValue2;
224 __m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
225 for (; number < eighthPoints; number++) {
227 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
228 complexVectorPtr += 8;
230 cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
231 complexVectorPtr += 8;
234 iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
235 iValue = _mm256_permutevar8x32_ps(iValue, idx);
237 _mm256_storeu_ps(iBufferPtr, iValue);
242 number = eighthPoints * 8;
243 for (; number < num_points; number++) {
244 *iBufferPtr++ = *complexVectorPtr++;
static void volk_32fc_deinterleave_real_32f_generic(float *iBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_real_32f.h:158
static void volk_32fc_deinterleave_real_32f_a_sse(float *iBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_real_32f.h:120
static void volk_32fc_deinterleave_real_32f_neon(float *iBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_real_32f.h:176
float complex lv_32fc_t
Definition: volk_complex.h:65