Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
volk_32fc_deinterleave_imag_32f.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
70#ifndef INCLUDED_volk_32fc_deinterleave_imag_32f_a_H
71#define INCLUDED_volk_32fc_deinterleave_imag_32f_a_H
72
73#include <inttypes.h>
74#include <stdio.h>
75
76#ifdef LV_HAVE_AVX
77#include <immintrin.h>
78
79static inline void volk_32fc_deinterleave_imag_32f_a_avx(float* qBuffer,
80 const lv_32fc_t* complexVector,
81 unsigned int num_points)
82{
83 unsigned int number = 0;
84 const unsigned int eighthPoints = num_points / 8;
85 const float* complexVectorPtr = (const float*)complexVector;
86 float* qBufferPtr = qBuffer;
87
88 __m256 cplxValue1, cplxValue2, complex1, complex2, qValue;
89 for (; number < eighthPoints; number++) {
90
91 cplxValue1 = _mm256_load_ps(complexVectorPtr);
92 complexVectorPtr += 8;
93
94 cplxValue2 = _mm256_load_ps(complexVectorPtr);
95 complexVectorPtr += 8;
96
97 complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
98 complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
99
100 // Arrange in q1q2q3q4 format
101 qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
102
103 _mm256_store_ps(qBufferPtr, qValue);
104
105 qBufferPtr += 8;
106 }
107
108 number = eighthPoints * 8;
109 for (; number < num_points; number++) {
110 complexVectorPtr++;
111 *qBufferPtr++ = *complexVectorPtr++;
112 }
113}
114#endif /* LV_HAVE_AVX */
115
116#ifdef LV_HAVE_SSE
117#include <xmmintrin.h>
118
119static inline void volk_32fc_deinterleave_imag_32f_a_sse(float* qBuffer,
120 const lv_32fc_t* complexVector,
121 unsigned int num_points)
122{
123 unsigned int number = 0;
124 const unsigned int quarterPoints = num_points / 4;
125
126 const float* complexVectorPtr = (const float*)complexVector;
127 float* qBufferPtr = qBuffer;
128
129 __m128 cplxValue1, cplxValue2, iValue;
130 for (; number < quarterPoints; number++) {
131
132 cplxValue1 = _mm_load_ps(complexVectorPtr);
133 complexVectorPtr += 4;
134
135 cplxValue2 = _mm_load_ps(complexVectorPtr);
136 complexVectorPtr += 4;
137
138 // Arrange in q1q2q3q4 format
139 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3, 1, 3, 1));
140
141 _mm_store_ps(qBufferPtr, iValue);
142
143 qBufferPtr += 4;
144 }
145
146 number = quarterPoints * 4;
147 for (; number < num_points; number++) {
148 complexVectorPtr++;
149 *qBufferPtr++ = *complexVectorPtr++;
150 }
151}
152#endif /* LV_HAVE_SSE */
153
154#ifdef LV_HAVE_NEON
155#include <arm_neon.h>
156
157static inline void volk_32fc_deinterleave_imag_32f_neon(float* qBuffer,
158 const lv_32fc_t* complexVector,
159 unsigned int num_points)
160{
161 unsigned int number = 0;
162 unsigned int quarter_points = num_points / 4;
163 const float* complexVectorPtr = (float*)complexVector;
164 float* qBufferPtr = qBuffer;
165 float32x4x2_t complexInput;
166
167 for (number = 0; number < quarter_points; number++) {
168 complexInput = vld2q_f32(complexVectorPtr);
169 vst1q_f32(qBufferPtr, complexInput.val[1]);
170 complexVectorPtr += 8;
171 qBufferPtr += 4;
172 }
173
174 for (number = quarter_points * 4; number < num_points; number++) {
175 complexVectorPtr++;
176 *qBufferPtr++ = *complexVectorPtr++;
177 }
178}
179#endif /* LV_HAVE_NEON */
180
181#ifdef LV_HAVE_GENERIC
182
183static inline void volk_32fc_deinterleave_imag_32f_generic(float* qBuffer,
184 const lv_32fc_t* complexVector,
185 unsigned int num_points)
186{
187 unsigned int number = 0;
188 const float* complexVectorPtr = (float*)complexVector;
189 float* qBufferPtr = qBuffer;
190 for (number = 0; number < num_points; number++) {
191 complexVectorPtr++;
192 *qBufferPtr++ = *complexVectorPtr++;
193 }
194}
195#endif /* LV_HAVE_GENERIC */
196
197
198#endif /* INCLUDED_volk_32fc_deinterleave_imag_32f_a_H */
199
200#ifndef INCLUDED_volk_32fc_deinterleave_imag_32f_u_H
201#define INCLUDED_volk_32fc_deinterleave_imag_32f_u_H
202
203#include <inttypes.h>
204#include <stdio.h>
205
206#ifdef LV_HAVE_AVX
207#include <immintrin.h>
208
209static inline void volk_32fc_deinterleave_imag_32f_u_avx(float* qBuffer,
210 const lv_32fc_t* complexVector,
211 unsigned int num_points)
212{
213 unsigned int number = 0;
214 const unsigned int eighthPoints = num_points / 8;
215 const float* complexVectorPtr = (const float*)complexVector;
216 float* qBufferPtr = qBuffer;
217
218 __m256 cplxValue1, cplxValue2, complex1, complex2, qValue;
219 for (; number < eighthPoints; number++) {
220
221 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
222 complexVectorPtr += 8;
223
224 cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
225 complexVectorPtr += 8;
226
227 complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
228 complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
229
230 // Arrange in q1q2q3q4 format
231 qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
232
233 _mm256_storeu_ps(qBufferPtr, qValue);
234
235 qBufferPtr += 8;
236 }
237
238 number = eighthPoints * 8;
239 for (; number < num_points; number++) {
240 complexVectorPtr++;
241 *qBufferPtr++ = *complexVectorPtr++;
242 }
243}
244#endif /* LV_HAVE_AVX */
245#endif /* INCLUDED_volk_32fc_deinterleave_imag_32f_u_H */
static void volk_32fc_deinterleave_imag_32f_a_sse(float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_imag_32f.h:119
static void volk_32fc_deinterleave_imag_32f_neon(float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_imag_32f.h:157
static void volk_32fc_deinterleave_imag_32f_u_avx(float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_imag_32f.h:209
static void volk_32fc_deinterleave_imag_32f_a_avx(float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_imag_32f.h:79
static void volk_32fc_deinterleave_imag_32f_generic(float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_imag_32f.h:183
float complex lv_32fc_t
Definition: volk_complex.h:65