Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
volk_32fc_s32f_deinterleave_real_16i.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
73#ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H
74#define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H
75
76#include <inttypes.h>
77#include <stdio.h>
78#include <volk/volk_common.h>
79
80
81#ifdef LV_HAVE_AVX2
82#include <immintrin.h>
83
84static inline void
85volk_32fc_s32f_deinterleave_real_16i_a_avx2(int16_t* iBuffer,
86 const lv_32fc_t* complexVector,
87 const float scalar,
88 unsigned int num_points)
89{
90 unsigned int number = 0;
91 const unsigned int eighthPoints = num_points / 8;
92
93 const float* complexVectorPtr = (float*)complexVector;
94 int16_t* iBufferPtr = iBuffer;
95
96 __m256 vScalar = _mm256_set1_ps(scalar);
97
98 __m256 cplxValue1, cplxValue2, iValue;
99 __m256i a;
100 __m128i b;
101
102 __m256i idx = _mm256_set_epi32(3, 3, 3, 3, 5, 1, 4, 0);
103
104 for (; number < eighthPoints; number++) {
105 cplxValue1 = _mm256_load_ps(complexVectorPtr);
106 complexVectorPtr += 8;
107
108 cplxValue2 = _mm256_load_ps(complexVectorPtr);
109 complexVectorPtr += 8;
110
111 // Arrange in i1i2i3i4 format
112 iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
113
114 iValue = _mm256_mul_ps(iValue, vScalar);
115
116 iValue = _mm256_round_ps(iValue, _MM_FROUND_TO_ZERO);
117 a = _mm256_cvtps_epi32(iValue);
118 a = _mm256_packs_epi32(a, a);
119 a = _mm256_permutevar8x32_epi32(a, idx);
120 b = _mm256_extracti128_si256(a, 0);
121
122 _mm_store_si128((__m128i*)iBufferPtr, b);
123 iBufferPtr += 8;
124 }
125
126 number = eighthPoints * 8;
127 iBufferPtr = &iBuffer[number];
128 for (; number < num_points; number++) {
129 *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
130 complexVectorPtr++;
131 }
132}
133
134
135#endif /* LV_HAVE_AVX2 */
136
137#ifdef LV_HAVE_SSE
138#include <xmmintrin.h>
139
140static inline void
142 const lv_32fc_t* complexVector,
143 const float scalar,
144 unsigned int num_points)
145{
146 unsigned int number = 0;
147 const unsigned int quarterPoints = num_points / 4;
148
149 const float* complexVectorPtr = (float*)complexVector;
150 int16_t* iBufferPtr = iBuffer;
151
152 __m128 vScalar = _mm_set_ps1(scalar);
153
154 __m128 cplxValue1, cplxValue2, iValue;
155
156 __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
157
158 for (; number < quarterPoints; number++) {
159 cplxValue1 = _mm_load_ps(complexVectorPtr);
160 complexVectorPtr += 4;
161
162 cplxValue2 = _mm_load_ps(complexVectorPtr);
163 complexVectorPtr += 4;
164
165 // Arrange in i1i2i3i4 format
166 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
167
168 iValue = _mm_mul_ps(iValue, vScalar);
169
170 _mm_store_ps(floatBuffer, iValue);
171 *iBufferPtr++ = (int16_t)(floatBuffer[0]);
172 *iBufferPtr++ = (int16_t)(floatBuffer[1]);
173 *iBufferPtr++ = (int16_t)(floatBuffer[2]);
174 *iBufferPtr++ = (int16_t)(floatBuffer[3]);
175 }
176
177 number = quarterPoints * 4;
178 iBufferPtr = &iBuffer[number];
179 for (; number < num_points; number++) {
180 *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
181 complexVectorPtr++;
182 }
183}
184
185#endif /* LV_HAVE_SSE */
186
187
188#ifdef LV_HAVE_GENERIC
189
190static inline void
192 const lv_32fc_t* complexVector,
193 const float scalar,
194 unsigned int num_points)
195{
196 const float* complexVectorPtr = (float*)complexVector;
197 int16_t* iBufferPtr = iBuffer;
198 unsigned int number = 0;
199 for (number = 0; number < num_points; number++) {
200 *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
201 complexVectorPtr++;
202 }
203}
204
205#endif /* LV_HAVE_GENERIC */
206
207#endif /* INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H */
208
209#ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_u_H
210#define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_u_H
211
212#include <inttypes.h>
213#include <stdio.h>
214#include <volk/volk_common.h>
215
216#ifdef LV_HAVE_AVX2
217#include <immintrin.h>
218
219static inline void
220volk_32fc_s32f_deinterleave_real_16i_u_avx2(int16_t* iBuffer,
221 const lv_32fc_t* complexVector,
222 const float scalar,
223 unsigned int num_points)
224{
225 unsigned int number = 0;
226 const unsigned int eighthPoints = num_points / 8;
227
228 const float* complexVectorPtr = (float*)complexVector;
229 int16_t* iBufferPtr = iBuffer;
230
231 __m256 vScalar = _mm256_set1_ps(scalar);
232
233 __m256 cplxValue1, cplxValue2, iValue;
234 __m256i a;
235 __m128i b;
236
237 __m256i idx = _mm256_set_epi32(3, 3, 3, 3, 5, 1, 4, 0);
238
239 for (; number < eighthPoints; number++) {
240 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
241 complexVectorPtr += 8;
242
243 cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
244 complexVectorPtr += 8;
245
246 // Arrange in i1i2i3i4 format
247 iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
248
249 iValue = _mm256_mul_ps(iValue, vScalar);
250
251 iValue = _mm256_round_ps(iValue, _MM_FROUND_TO_ZERO);
252 a = _mm256_cvtps_epi32(iValue);
253 a = _mm256_packs_epi32(a, a);
254 a = _mm256_permutevar8x32_epi32(a, idx);
255 b = _mm256_extracti128_si256(a, 0);
256
257 _mm_storeu_si128((__m128i*)iBufferPtr, b);
258 iBufferPtr += 8;
259 }
260
261 number = eighthPoints * 8;
262 iBufferPtr = &iBuffer[number];
263 for (; number < num_points; number++) {
264 *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
265 complexVectorPtr++;
266 }
267}
268
269#endif /* LV_HAVE_AVX2 */
270
271#endif /* INCLUDED_volk_32fc_s32f_deinterleave_real_16i_u_H */
static void volk_32fc_s32f_deinterleave_real_16i_generic(int16_t *iBuffer, const lv_32fc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_32fc_s32f_deinterleave_real_16i.h:191
static void volk_32fc_s32f_deinterleave_real_16i_a_sse(int16_t *iBuffer, const lv_32fc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_32fc_s32f_deinterleave_real_16i.h:141
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:56
float complex lv_32fc_t
Definition: volk_complex.h:65