Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
volk_8ic_deinterleave_real_16i.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
53#ifndef INCLUDED_volk_8ic_deinterleave_real_16i_a_H
54#define INCLUDED_volk_8ic_deinterleave_real_16i_a_H
55
56#include <inttypes.h>
57#include <stdio.h>
58
59
60#ifdef LV_HAVE_AVX2
61#include <immintrin.h>
62
63static inline void volk_8ic_deinterleave_real_16i_a_avx2(int16_t* iBuffer,
64 const lv_8sc_t* complexVector,
65 unsigned int num_points)
66{
67 unsigned int number = 0;
68 const int8_t* complexVectorPtr = (int8_t*)complexVector;
69 int16_t* iBufferPtr = iBuffer;
70 __m256i moveMask = _mm256_set_epi8(0x80,
71 0x80,
72 0x80,
73 0x80,
74 0x80,
75 0x80,
76 0x80,
77 0x80,
78 14,
79 12,
80 10,
81 8,
82 6,
83 4,
84 2,
85 0,
86 0x80,
87 0x80,
88 0x80,
89 0x80,
90 0x80,
91 0x80,
92 0x80,
93 0x80,
94 14,
95 12,
96 10,
97 8,
98 6,
99 4,
100 2,
101 0);
102 __m256i complexVal, outputVal;
103 __m128i outputVal0;
104
105 unsigned int sixteenthPoints = num_points / 16;
106
107 for (number = 0; number < sixteenthPoints; number++) {
108 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
109 complexVectorPtr += 32;
110
111 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
112 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
113
114 outputVal0 = _mm256_extractf128_si256(complexVal, 0);
115
116 outputVal = _mm256_cvtepi8_epi16(outputVal0);
117 outputVal = _mm256_slli_epi16(outputVal, 7);
118
119 _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
120
121 iBufferPtr += 16;
122 }
123
124 number = sixteenthPoints * 16;
125 for (; number < num_points; number++) {
126 *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
127 complexVectorPtr++;
128 }
129}
130#endif /* LV_HAVE_AVX2 */
131
132#ifdef LV_HAVE_SSE4_1
133#include <smmintrin.h>
134
135static inline void volk_8ic_deinterleave_real_16i_a_sse4_1(int16_t* iBuffer,
136 const lv_8sc_t* complexVector,
137 unsigned int num_points)
138{
139 unsigned int number = 0;
140 const int8_t* complexVectorPtr = (int8_t*)complexVector;
141 int16_t* iBufferPtr = iBuffer;
142 __m128i moveMask = _mm_set_epi8(
143 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
144 __m128i complexVal, outputVal;
145
146 unsigned int eighthPoints = num_points / 8;
147
148 for (number = 0; number < eighthPoints; number++) {
149 complexVal = _mm_load_si128((__m128i*)complexVectorPtr);
150 complexVectorPtr += 16;
151
152 complexVal = _mm_shuffle_epi8(complexVal, moveMask);
153
154 outputVal = _mm_cvtepi8_epi16(complexVal);
155 outputVal = _mm_slli_epi16(outputVal, 7);
156
157 _mm_store_si128((__m128i*)iBufferPtr, outputVal);
158 iBufferPtr += 8;
159 }
160
161 number = eighthPoints * 8;
162 for (; number < num_points; number++) {
163 *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
164 complexVectorPtr++;
165 }
166}
167#endif /* LV_HAVE_SSE4_1 */
168
169
170#ifdef LV_HAVE_AVX
171#include <immintrin.h>
172
173static inline void volk_8ic_deinterleave_real_16i_a_avx(int16_t* iBuffer,
174 const lv_8sc_t* complexVector,
175 unsigned int num_points)
176{
177 unsigned int number = 0;
178 const int8_t* complexVectorPtr = (int8_t*)complexVector;
179 int16_t* iBufferPtr = iBuffer;
180 __m128i moveMask = _mm_set_epi8(
181 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
182 __m256i complexVal, outputVal;
183 __m128i complexVal1, complexVal0, outputVal1, outputVal0;
184
185 unsigned int sixteenthPoints = num_points / 16;
186
187 for (number = 0; number < sixteenthPoints; number++) {
188 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
189 complexVectorPtr += 32;
190
191 complexVal1 = _mm256_extractf128_si256(complexVal, 1);
192 complexVal0 = _mm256_extractf128_si256(complexVal, 0);
193
194 outputVal1 = _mm_shuffle_epi8(complexVal1, moveMask);
195 outputVal0 = _mm_shuffle_epi8(complexVal0, moveMask);
196
197 outputVal1 = _mm_cvtepi8_epi16(outputVal1);
198 outputVal1 = _mm_slli_epi16(outputVal1, 7);
199 outputVal0 = _mm_cvtepi8_epi16(outputVal0);
200 outputVal0 = _mm_slli_epi16(outputVal0, 7);
201
202 __m256i dummy = _mm256_setzero_si256();
203 outputVal = _mm256_insertf128_si256(dummy, outputVal0, 0);
204 outputVal = _mm256_insertf128_si256(outputVal, outputVal1, 1);
205 _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
206
207 iBufferPtr += 16;
208 }
209
210 number = sixteenthPoints * 16;
211 for (; number < num_points; number++) {
212 *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
213 complexVectorPtr++;
214 }
215}
216#endif /* LV_HAVE_AVX */
217
218
219#ifdef LV_HAVE_GENERIC
220
221static inline void volk_8ic_deinterleave_real_16i_generic(int16_t* iBuffer,
222 const lv_8sc_t* complexVector,
223 unsigned int num_points)
224{
225 unsigned int number = 0;
226 const int8_t* complexVectorPtr = (const int8_t*)complexVector;
227 int16_t* iBufferPtr = iBuffer;
228 for (number = 0; number < num_points; number++) {
229 *iBufferPtr++ = ((int16_t)(*complexVectorPtr++)) * 128;
230 complexVectorPtr++;
231 }
232}
233#endif /* LV_HAVE_GENERIC */
234
235
236#endif /* INCLUDED_volk_8ic_deinterleave_real_16i_a_H */
237
238#ifndef INCLUDED_volk_8ic_deinterleave_real_16i_u_H
239#define INCLUDED_volk_8ic_deinterleave_real_16i_u_H
240
241#include <inttypes.h>
242#include <stdio.h>
243
244
245#ifdef LV_HAVE_AVX2
246#include <immintrin.h>
247
248static inline void volk_8ic_deinterleave_real_16i_u_avx2(int16_t* iBuffer,
249 const lv_8sc_t* complexVector,
250 unsigned int num_points)
251{
252 unsigned int number = 0;
253 const int8_t* complexVectorPtr = (int8_t*)complexVector;
254 int16_t* iBufferPtr = iBuffer;
255 __m256i moveMask = _mm256_set_epi8(0x80,
256 0x80,
257 0x80,
258 0x80,
259 0x80,
260 0x80,
261 0x80,
262 0x80,
263 14,
264 12,
265 10,
266 8,
267 6,
268 4,
269 2,
270 0,
271 0x80,
272 0x80,
273 0x80,
274 0x80,
275 0x80,
276 0x80,
277 0x80,
278 0x80,
279 14,
280 12,
281 10,
282 8,
283 6,
284 4,
285 2,
286 0);
287 __m256i complexVal, outputVal;
288 __m128i outputVal0;
289
290 unsigned int sixteenthPoints = num_points / 16;
291
292 for (number = 0; number < sixteenthPoints; number++) {
293 complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr);
294 complexVectorPtr += 32;
295
296 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
297 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
298
299 outputVal0 = _mm256_extractf128_si256(complexVal, 0);
300
301 outputVal = _mm256_cvtepi8_epi16(outputVal0);
302 outputVal = _mm256_slli_epi16(outputVal, 7);
303
304 _mm256_storeu_si256((__m256i*)iBufferPtr, outputVal);
305
306 iBufferPtr += 16;
307 }
308
309 number = sixteenthPoints * 16;
310 for (; number < num_points; number++) {
311 *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
312 complexVectorPtr++;
313 }
314}
315#endif /* LV_HAVE_AVX2 */
316#endif /* INCLUDED_volk_8ic_deinterleave_real_16i_u_H */
static void volk_8ic_deinterleave_real_16i_a_avx(int16_t *iBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition: volk_8ic_deinterleave_real_16i.h:173
static void volk_8ic_deinterleave_real_16i_generic(int16_t *iBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition: volk_8ic_deinterleave_real_16i.h:221
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:61