69#ifndef INCLUDED_volk_32f_s32f_multiply_32f_u_H
70#define INCLUDED_volk_32f_s32f_multiply_32f_u_H
81 unsigned int num_points)
83 unsigned int number = 0;
84 const unsigned int quarterPoints = num_points / 4;
86 float* cPtr = cVector;
87 const float* aPtr = aVector;
89 __m128 aVal, bVal, cVal;
90 bVal = _mm_set_ps1(scalar);
91 for (; number < quarterPoints; number++) {
92 aVal = _mm_loadu_ps(aPtr);
94 cVal = _mm_mul_ps(aVal, bVal);
96 _mm_storeu_ps(cPtr, cVal);
102 number = quarterPoints * 4;
103 for (; number < num_points; number++) {
104 *cPtr++ = (*aPtr++) * scalar;
110#include <immintrin.h>
113 const float* aVector,
115 unsigned int num_points)
117 unsigned int number = 0;
118 const unsigned int eighthPoints = num_points / 8;
120 float* cPtr = cVector;
121 const float* aPtr = aVector;
123 __m256 aVal, bVal, cVal;
124 bVal = _mm256_set1_ps(scalar);
125 for (; number < eighthPoints; number++) {
127 aVal = _mm256_loadu_ps(aPtr);
129 cVal = _mm256_mul_ps(aVal, bVal);
131 _mm256_storeu_ps(cPtr, cVal);
137 number = eighthPoints * 8;
138 for (; number < num_points; number++) {
139 *cPtr++ = (*aPtr++) * scalar;
144#ifdef LV_HAVE_GENERIC
147 const float* aVector,
149 unsigned int num_points)
151 unsigned int number = 0;
152 const float* inputPtr = aVector;
153 float* outputPtr = cVector;
154 for (number = 0; number < num_points; number++) {
155 *outputPtr = (*inputPtr) * scalar;
165#ifndef INCLUDED_volk_32f_s32f_multiply_32f_a_H
166#define INCLUDED_volk_32f_s32f_multiply_32f_a_H
172#include <xmmintrin.h>
175 const float* aVector,
177 unsigned int num_points)
179 unsigned int number = 0;
180 const unsigned int quarterPoints = num_points / 4;
182 float* cPtr = cVector;
183 const float* aPtr = aVector;
185 __m128 aVal, bVal, cVal;
186 bVal = _mm_set_ps1(scalar);
187 for (; number < quarterPoints; number++) {
188 aVal = _mm_load_ps(aPtr);
190 cVal = _mm_mul_ps(aVal, bVal);
192 _mm_store_ps(cPtr, cVal);
198 number = quarterPoints * 4;
199 for (; number < num_points; number++) {
200 *cPtr++ = (*aPtr++) * scalar;
206#include <immintrin.h>
209 const float* aVector,
211 unsigned int num_points)
213 unsigned int number = 0;
214 const unsigned int eighthPoints = num_points / 8;
216 float* cPtr = cVector;
217 const float* aPtr = aVector;
219 __m256 aVal, bVal, cVal;
220 bVal = _mm256_set1_ps(scalar);
221 for (; number < eighthPoints; number++) {
222 aVal = _mm256_load_ps(aPtr);
224 cVal = _mm256_mul_ps(aVal, bVal);
226 _mm256_store_ps(cPtr, cVal);
232 number = eighthPoints * 8;
233 for (; number < num_points; number++) {
234 *cPtr++ = (*aPtr++) * scalar;
243 const float* aVector,
245 unsigned int num_points)
247 unsigned int number = 0;
248 const float* inputPtr = aVector;
249 float* outputPtr = cVector;
250 const unsigned int quarterPoints = num_points / 4;
252 float32x4_t aVal, cVal;
254 for (number = 0; number < quarterPoints; number++) {
255 aVal = vld1q_f32(inputPtr);
256 cVal = vmulq_n_f32(aVal, scalar);
257 vst1q_f32(outputPtr, cVal);
261 for (number = quarterPoints * 4; number < num_points; number++) {
262 *outputPtr++ = (*inputPtr++) * scalar;
268#ifdef LV_HAVE_GENERIC
271 const float* aVector,
273 unsigned int num_points)
275 unsigned int number = 0;
276 const float* inputPtr = aVector;
277 float* outputPtr = cVector;
278 for (number = 0; number < num_points; number++) {
279 *outputPtr = (*inputPtr) * scalar;
289extern void volk_32f_s32f_multiply_32f_a_orc_impl(
float* dst,
292 unsigned int num_points);
294static inline void volk_32f_s32f_multiply_32f_u_orc(
float* cVector,
295 const float* aVector,
297 unsigned int num_points)
299 volk_32f_s32f_multiply_32f_a_orc_impl(cVector, aVector, scalar, num_points);
static void volk_32f_s32f_multiply_32f_a_avx(float *cVector, const float *aVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_multiply_32f.h:208
static void volk_32f_s32f_multiply_32f_a_generic(float *cVector, const float *aVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_multiply_32f.h:270
static void volk_32f_s32f_multiply_32f_u_sse(float *cVector, const float *aVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_multiply_32f.h:78
static void volk_32f_s32f_multiply_32f_u_avx(float *cVector, const float *aVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_multiply_32f.h:112
static void volk_32f_s32f_multiply_32f_a_sse(float *cVector, const float *aVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_multiply_32f.h:174
static void volk_32f_s32f_multiply_32f_generic(float *cVector, const float *aVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_multiply_32f.h:146
static void volk_32f_s32f_multiply_32f_u_neon(float *cVector, const float *aVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_multiply_32f.h:242