Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
volk_32f_binary_slicer_32i.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
70#ifndef INCLUDED_volk_32f_binary_slicer_32i_H
71#define INCLUDED_volk_32f_binary_slicer_32i_H
72
73
74#ifdef LV_HAVE_GENERIC
75
76static inline void volk_32f_binary_slicer_32i_generic(int* cVector,
77 const float* aVector,
78 unsigned int num_points)
79{
80 int* cPtr = cVector;
81 const float* aPtr = aVector;
82 unsigned int number = 0;
83
84 for (number = 0; number < num_points; number++) {
85 if (*aPtr++ >= 0) {
86 *cPtr++ = 1;
87 } else {
88 *cPtr++ = 0;
89 }
90 }
91}
92#endif /* LV_HAVE_GENERIC */
93
94
95#ifdef LV_HAVE_GENERIC
96
97static inline void volk_32f_binary_slicer_32i_generic_branchless(int* cVector,
98 const float* aVector,
99 unsigned int num_points)
100{
101 int* cPtr = cVector;
102 const float* aPtr = aVector;
103 unsigned int number = 0;
104
105 for (number = 0; number < num_points; number++) {
106 *cPtr++ = (*aPtr++ >= 0);
107 }
108}
109#endif /* LV_HAVE_GENERIC */
110
111
112#ifdef LV_HAVE_SSE2
113#include <emmintrin.h>
114
115static inline void volk_32f_binary_slicer_32i_a_sse2(int* cVector,
116 const float* aVector,
117 unsigned int num_points)
118{
119 int* cPtr = cVector;
120 const float* aPtr = aVector;
121 unsigned int number = 0;
122
123 unsigned int quarter_points = num_points / 4;
124 __m128 a_val, res_f;
125 __m128i res_i, binary_i;
126 __m128 zero_val;
127 zero_val = _mm_set1_ps(0.0f);
128
129 for (number = 0; number < quarter_points; number++) {
130 a_val = _mm_load_ps(aPtr);
131
132 res_f = _mm_cmpge_ps(a_val, zero_val);
133 res_i = _mm_cvtps_epi32(res_f);
134 binary_i = _mm_srli_epi32(res_i, 31);
135
136 _mm_store_si128((__m128i*)cPtr, binary_i);
137
138 cPtr += 4;
139 aPtr += 4;
140 }
141
142 for (number = quarter_points * 4; number < num_points; number++) {
143 if (*aPtr++ >= 0) {
144 *cPtr++ = 1;
145 } else {
146 *cPtr++ = 0;
147 }
148 }
149}
150#endif /* LV_HAVE_SSE2 */
151
152
153#ifdef LV_HAVE_AVX
154#include <immintrin.h>
155
156static inline void volk_32f_binary_slicer_32i_a_avx(int* cVector,
157 const float* aVector,
158 unsigned int num_points)
159{
160 int* cPtr = cVector;
161 const float* aPtr = aVector;
162 unsigned int number = 0;
163
164 unsigned int quarter_points = num_points / 8;
165 __m256 a_val, res_f, binary_f;
166 __m256i binary_i;
167 __m256 zero_val, one_val;
168 zero_val = _mm256_set1_ps(0.0f);
169 one_val = _mm256_set1_ps(1.0f);
170
171 for (number = 0; number < quarter_points; number++) {
172 a_val = _mm256_load_ps(aPtr);
173
174 res_f = _mm256_cmp_ps(a_val, zero_val, _CMP_GE_OS);
175 binary_f = _mm256_and_ps(res_f, one_val);
176 binary_i = _mm256_cvtps_epi32(binary_f);
177
178 _mm256_store_si256((__m256i*)cPtr, binary_i);
179
180 cPtr += 8;
181 aPtr += 8;
182 }
183
184 for (number = quarter_points * 8; number < num_points; number++) {
185 if (*aPtr++ >= 0) {
186 *cPtr++ = 1;
187 } else {
188 *cPtr++ = 0;
189 }
190 }
191}
192#endif /* LV_HAVE_AVX */
193
194
195#ifdef LV_HAVE_SSE2
196#include <emmintrin.h>
197
198static inline void volk_32f_binary_slicer_32i_u_sse2(int* cVector,
199 const float* aVector,
200 unsigned int num_points)
201{
202 int* cPtr = cVector;
203 const float* aPtr = aVector;
204 unsigned int number = 0;
205
206 unsigned int quarter_points = num_points / 4;
207 __m128 a_val, res_f;
208 __m128i res_i, binary_i;
209 __m128 zero_val;
210 zero_val = _mm_set1_ps(0.0f);
211
212 for (number = 0; number < quarter_points; number++) {
213 a_val = _mm_loadu_ps(aPtr);
214
215 res_f = _mm_cmpge_ps(a_val, zero_val);
216 res_i = _mm_cvtps_epi32(res_f);
217 binary_i = _mm_srli_epi32(res_i, 31);
218
219 _mm_storeu_si128((__m128i*)cPtr, binary_i);
220
221 cPtr += 4;
222 aPtr += 4;
223 }
224
225 for (number = quarter_points * 4; number < num_points; number++) {
226 if (*aPtr++ >= 0) {
227 *cPtr++ = 1;
228 } else {
229 *cPtr++ = 0;
230 }
231 }
232}
233#endif /* LV_HAVE_SSE2 */
234
235
236#ifdef LV_HAVE_AVX
237#include <immintrin.h>
238
239static inline void volk_32f_binary_slicer_32i_u_avx(int* cVector,
240 const float* aVector,
241 unsigned int num_points)
242{
243 int* cPtr = cVector;
244 const float* aPtr = aVector;
245 unsigned int number = 0;
246
247 unsigned int quarter_points = num_points / 8;
248 __m256 a_val, res_f, binary_f;
249 __m256i binary_i;
250 __m256 zero_val, one_val;
251 zero_val = _mm256_set1_ps(0.0f);
252 one_val = _mm256_set1_ps(1.0f);
253
254 for (number = 0; number < quarter_points; number++) {
255 a_val = _mm256_loadu_ps(aPtr);
256
257 res_f = _mm256_cmp_ps(a_val, zero_val, _CMP_GE_OS);
258 binary_f = _mm256_and_ps(res_f, one_val);
259 binary_i = _mm256_cvtps_epi32(binary_f);
260
261 _mm256_storeu_si256((__m256i*)cPtr, binary_i);
262
263 cPtr += 8;
264 aPtr += 8;
265 }
266
267 for (number = quarter_points * 8; number < num_points; number++) {
268 if (*aPtr++ >= 0) {
269 *cPtr++ = 1;
270 } else {
271 *cPtr++ = 0;
272 }
273 }
274}
275#endif /* LV_HAVE_AVX */
276
277
278#endif /* INCLUDED_volk_32f_binary_slicer_32i_H */
static void volk_32f_binary_slicer_32i_generic(int *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_binary_slicer_32i.h:76
static void volk_32f_binary_slicer_32i_generic_branchless(int *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_binary_slicer_32i.h:97
static void volk_32f_binary_slicer_32i_u_sse2(int *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_binary_slicer_32i.h:198
static void volk_32f_binary_slicer_32i_a_avx(int *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_binary_slicer_32i.h:156
static void volk_32f_binary_slicer_32i_a_sse2(int *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_binary_slicer_32i.h:115
static void volk_32f_binary_slicer_32i_u_avx(int *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_binary_slicer_32i.h:239