Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
volk_32fc_s32f_x2_power_spectral_density_32f.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
55#ifndef INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a_H
56#define INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a_H
57
58#include <inttypes.h>
59#include <math.h>
60#include <stdio.h>
61
62#ifdef LV_HAVE_AVX
63#include <immintrin.h>
64
65#ifdef LV_HAVE_LIB_SIMDMATH
66#include <simdmath.h>
67#endif /* LV_HAVE_LIB_SIMDMATH */
68
69static inline void
71 const lv_32fc_t* complexFFTInput,
72 const float normalizationFactor,
73 const float rbw,
74 unsigned int num_points)
75{
76 const float* inputPtr = (const float*)complexFFTInput;
77 float* destPtr = logPowerOutput;
78 uint64_t number = 0;
79 const float iRBW = 1.0 / rbw;
80 const float iNormalizationFactor = 1.0 / normalizationFactor;
81
82#ifdef LV_HAVE_LIB_SIMDMATH
83 __m256 magScalar = _mm256_set1_ps(10.0);
84 magScalar = _mm256_div_ps(magScalar, logf4(magScalar));
85
86 __m256 invRBW = _mm256_set1_ps(iRBW);
87
88 __m256 invNormalizationFactor = _mm256_set1_ps(iNormalizationFactor);
89
90 __m256 power;
91 __m256 input1, input2;
92 const uint64_t eighthPoints = num_points / 8;
93 for (; number < eighthPoints; number++) {
94 // Load the complex values
95 input1 = _mm256_load_ps(inputPtr);
96 inputPtr += 8;
97 input2 = _mm256_load_ps(inputPtr);
98 inputPtr += 8;
99
100 // Apply the normalization factor
101 input1 = _mm256_mul_ps(input1, invNormalizationFactor);
102 input2 = _mm256_mul_ps(input2, invNormalizationFactor);
103
104 // Multiply each value by itself
105 // (r1*r1), (i1*i1), (r2*r2), (i2*i2)
106 input1 = _mm256_mul_ps(input1, input1);
107 // (r3*r3), (i3*i3), (r4*r4), (i4*i4)
108 input2 = _mm256_mul_ps(input2, input2);
109
110 // Horizontal add, to add (r*r) + (i*i) for each complex value
111 // (r1*r1)+(i1*i1), (r2*r2) + (i2*i2), (r3*r3)+(i3*i3), (r4*r4)+(i4*i4)
112 inputVal1 = _mm256_permute2f128_ps(input1, input2, 0x20);
113 inputVal2 = _mm256_permute2f128_ps(input1, input2, 0x31);
114
115 power = _mm256_hadd_ps(inputVal1, inputVal2);
116
117 // Divide by the rbw
118 power = _mm256_mul_ps(power, invRBW);
119
120 // Calculate the natural log power
121 power = logf4(power);
122
123 // Convert to log10 and multiply by 10.0
124 power = _mm256_mul_ps(power, magScalar);
125
126 // Store the floating point results
127 _mm256_store_ps(destPtr, power);
128
129 destPtr += 8;
130 }
131
132 number = eighthPoints * 8;
133#endif /* LV_HAVE_LIB_SIMDMATH */
134 // Calculate the FFT for any remaining points
135 for (; number < num_points; number++) {
136 // Calculate dBm
137 // 50 ohm load assumption
138 // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
139 // 75 ohm load assumption
140 // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
141
142 const float real = *inputPtr++ * iNormalizationFactor;
143 const float imag = *inputPtr++ * iNormalizationFactor;
144
145 *destPtr = volk_log2to10factor *
146 log2f_non_ieee((((real * real) + (imag * imag))) * iRBW);
147 destPtr++;
148 }
149}
150#endif /* LV_HAVE_AVX */
151
152#ifdef LV_HAVE_SSE3
153#include <pmmintrin.h>
154
155
156#ifdef LV_HAVE_LIB_SIMDMATH
157#include <simdmath.h>
158#endif /* LV_HAVE_LIB_SIMDMATH */
159
160static inline void
162 const lv_32fc_t* complexFFTInput,
163 const float normalizationFactor,
164 const float rbw,
165 unsigned int num_points)
166{
167 const float* inputPtr = (const float*)complexFFTInput;
168 float* destPtr = logPowerOutput;
169 uint64_t number = 0;
170 const float iRBW = 1.0 / rbw;
171 const float iNormalizationFactor = 1.0 / normalizationFactor;
172
173#ifdef LV_HAVE_LIB_SIMDMATH
174 __m128 magScalar = _mm_set_ps1(10.0);
175 magScalar = _mm_div_ps(magScalar, logf4(magScalar));
176
177 __m128 invRBW = _mm_set_ps1(iRBW);
178
179 __m128 invNormalizationFactor = _mm_set_ps1(iNormalizationFactor);
180
181 __m128 power;
182 __m128 input1, input2;
183 const uint64_t quarterPoints = num_points / 4;
184 for (; number < quarterPoints; number++) {
185 // Load the complex values
186 input1 = _mm_load_ps(inputPtr);
187 inputPtr += 4;
188 input2 = _mm_load_ps(inputPtr);
189 inputPtr += 4;
190
191 // Apply the normalization factor
192 input1 = _mm_mul_ps(input1, invNormalizationFactor);
193 input2 = _mm_mul_ps(input2, invNormalizationFactor);
194
195 // Multiply each value by itself
196 // (r1*r1), (i1*i1), (r2*r2), (i2*i2)
197 input1 = _mm_mul_ps(input1, input1);
198 // (r3*r3), (i3*i3), (r4*r4), (i4*i4)
199 input2 = _mm_mul_ps(input2, input2);
200
201 // Horizontal add, to add (r*r) + (i*i) for each complex value
202 // (r1*r1)+(i1*i1), (r2*r2) + (i2*i2), (r3*r3)+(i3*i3), (r4*r4)+(i4*i4)
203 power = _mm_hadd_ps(input1, input2);
204
205 // Divide by the rbw
206 power = _mm_mul_ps(power, invRBW);
207
208 // Calculate the natural log power
209 power = logf4(power);
210
211 // Convert to log10 and multiply by 10.0
212 power = _mm_mul_ps(power, magScalar);
213
214 // Store the floating point results
215 _mm_store_ps(destPtr, power);
216
217 destPtr += 4;
218 }
219
220 number = quarterPoints * 4;
221#endif /* LV_HAVE_LIB_SIMDMATH */
222 // Calculate the FFT for any remaining points
223 for (; number < num_points; number++) {
224 // Calculate dBm
225 // 50 ohm load assumption
226 // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
227 // 75 ohm load assumption
228 // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
229
230 const float real = *inputPtr++ * iNormalizationFactor;
231 const float imag = *inputPtr++ * iNormalizationFactor;
232
233 *destPtr = volk_log2to10factor *
234 log2f_non_ieee((((real * real) + (imag * imag))) * iRBW);
235 destPtr++;
236 }
237}
238#endif /* LV_HAVE_SSE3 */
239
240
241#ifdef LV_HAVE_GENERIC
242
243static inline void
245 const lv_32fc_t* complexFFTInput,
246 const float normalizationFactor,
247 const float rbw,
248 unsigned int num_points)
249{
250 if (rbw != 1.0)
251 volk_32fc_s32f_power_spectrum_32f(
252 logPowerOutput, complexFFTInput, normalizationFactor * sqrt(rbw), num_points);
253 else
254 volk_32fc_s32f_power_spectrum_32f(
255 logPowerOutput, complexFFTInput, normalizationFactor, num_points);
256}
257
258#endif /* LV_HAVE_GENERIC */
259
260#endif /* INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a_H */
static void volk_32fc_s32f_x2_power_spectral_density_32f_generic(float *logPowerOutput, const lv_32fc_t *complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points)
Definition: volk_32fc_s32f_x2_power_spectral_density_32f.h:244
static void volk_32fc_s32f_x2_power_spectral_density_32f_a_avx(float *logPowerOutput, const lv_32fc_t *complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points)
Definition: volk_32fc_s32f_x2_power_spectral_density_32f.h:70
static void volk_32fc_s32f_x2_power_spectral_density_32f_a_sse3(float *logPowerOutput, const lv_32fc_t *complexFFTInput, const float normalizationFactor, const float rbw, unsigned int num_points)
Definition: volk_32fc_s32f_x2_power_spectral_density_32f.h:161
#define volk_log2to10factor
Definition: volk_common.h:160
static float log2f_non_ieee(float f)
Definition: volk_common.h:150
float complex lv_32fc_t
Definition: volk_complex.h:65