Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
volk_32fc_s32f_power_32fc.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
55#ifndef INCLUDED_volk_32fc_s32f_power_32fc_a_H
56#define INCLUDED_volk_32fc_s32f_power_32fc_a_H
57
58#include <inttypes.h>
59#include <math.h>
60#include <stdio.h>
61
64 const float power)
65{
66 const float arg = power * atan2f(lv_creal(exp), lv_cimag(exp));
67 const float mag =
68 powf(lv_creal(exp) * lv_creal(exp) + lv_cimag(exp) * lv_cimag(exp), power / 2);
69 return mag * lv_cmake(-cosf(arg), sinf(arg));
70}
71
72#ifdef LV_HAVE_SSE
73#include <xmmintrin.h>
74
75#ifdef LV_HAVE_LIB_SIMDMATH
76#include <simdmath.h>
77#endif /* LV_HAVE_LIB_SIMDMATH */
78
79static inline void volk_32fc_s32f_power_32fc_a_sse(lv_32fc_t* cVector,
80 const lv_32fc_t* aVector,
81 const float power,
82 unsigned int num_points)
83{
84 unsigned int number = 0;
85
86 lv_32fc_t* cPtr = cVector;
87 const lv_32fc_t* aPtr = aVector;
88
89#ifdef LV_HAVE_LIB_SIMDMATH
90 const unsigned int quarterPoints = num_points / 4;
91 __m128 vPower = _mm_set_ps1(power);
92
93 __m128 cplxValue1, cplxValue2, magnitude, phase, iValue, qValue;
94 for (; number < quarterPoints; number++) {
95
96 cplxValue1 = _mm_load_ps((float*)aPtr);
97 aPtr += 2;
98
99 cplxValue2 = _mm_load_ps((float*)aPtr);
100 aPtr += 2;
101
102 // Convert to polar coordinates
103
104 // Arrange in i1i2i3i4 format
105 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
106 // Arrange in q1q2q3q4 format
107 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3, 1, 3, 1));
108
109 phase = atan2f4(qValue, iValue); // Calculate the Phase
110
111 magnitude = _mm_sqrt_ps(
112 _mm_add_ps(_mm_mul_ps(iValue, iValue),
113 _mm_mul_ps(qValue, qValue))); // Calculate the magnitude by square
114 // rooting the added I2 and Q2 values
115
116 // Now calculate the power of the polar coordinate data
117 magnitude = powf4(magnitude, vPower); // Take the magnitude to the specified power
118
119 phase = _mm_mul_ps(phase, vPower); // Multiply the phase by the specified power
120
121 // Convert back to cartesian coordinates
122 iValue = _mm_mul_ps(cosf4(phase),
123 magnitude); // Multiply the cos of the phase by the magnitude
124 qValue = _mm_mul_ps(sinf4(phase),
125 magnitude); // Multiply the sin of the phase by the magnitude
126
127 cplxValue1 =
128 _mm_unpacklo_ps(iValue, qValue); // Interleave the lower two i & q values
129 cplxValue2 =
130 _mm_unpackhi_ps(iValue, qValue); // Interleave the upper two i & q values
131
132 _mm_store_ps((float*)cPtr,
133 cplxValue1); // Store the results back into the C container
134
135 cPtr += 2;
136
137 _mm_store_ps((float*)cPtr,
138 cplxValue2); // Store the results back into the C container
139
140 cPtr += 2;
141 }
142
143 number = quarterPoints * 4;
144#endif /* LV_HAVE_LIB_SIMDMATH */
145
146 for (; number < num_points; number++) {
147 *cPtr++ = __volk_s32fc_s32f_power_s32fc_a((*aPtr++), power);
148 }
149}
150#endif /* LV_HAVE_SSE */
151
152
153#ifdef LV_HAVE_GENERIC
154
156 const lv_32fc_t* aVector,
157 const float power,
158 unsigned int num_points)
159{
160 lv_32fc_t* cPtr = cVector;
161 const lv_32fc_t* aPtr = aVector;
162 unsigned int number = 0;
163
164 for (number = 0; number < num_points; number++) {
165 *cPtr++ = __volk_s32fc_s32f_power_s32fc_a((*aPtr++), power);
166 }
167}
168
169#endif /* LV_HAVE_GENERIC */
170
171
172#endif /* INCLUDED_volk_32fc_s32f_power_32fc_a_H */
static void volk_32fc_s32f_power_32fc_generic(lv_32fc_t *cVector, const lv_32fc_t *aVector, const float power, unsigned int num_points)
Definition: volk_32fc_s32f_power_32fc.h:155
static lv_32fc_t __volk_s32fc_s32f_power_s32fc_a(const lv_32fc_t exp, const float power)
raise a complex float to a real float power
Definition: volk_32fc_s32f_power_32fc.h:63
static void volk_32fc_s32f_power_32fc_a_sse(lv_32fc_t *cVector, const lv_32fc_t *aVector, const float power, unsigned int num_points)
Definition: volk_32fc_s32f_power_32fc.h:79
#define lv_cimag(x)
Definition: volk_complex.h:89
#define lv_cmake(r, i)
Definition: volk_complex.h:68
#define lv_creal(x)
Definition: volk_complex.h:87
float complex lv_32fc_t
Definition: volk_complex.h:65