Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
volk_8ic_x2_multiply_conjugate_16ic.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
23#ifndef INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H
24#define INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H
25
26#include <inttypes.h>
27#include <stdio.h>
28#include <volk/volk_complex.h>
29
30#ifdef LV_HAVE_AVX2
31#include <immintrin.h>
40static inline void volk_8ic_x2_multiply_conjugate_16ic_a_avx2(lv_16sc_t* cVector,
41 const lv_8sc_t* aVector,
42 const lv_8sc_t* bVector,
43 unsigned int num_points)
44{
45 unsigned int number = 0;
46 const unsigned int quarterPoints = num_points / 8;
47
48 __m256i x, y, realz, imagz;
49 lv_16sc_t* c = cVector;
50 const lv_8sc_t* a = aVector;
51 const lv_8sc_t* b = bVector;
52 __m256i conjugateSign =
53 _mm256_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1);
54
55 for (; number < quarterPoints; number++) {
56 // Convert 8 bit values into 16 bit values
57 x = _mm256_cvtepi8_epi16(_mm_load_si128((__m128i*)a));
58 y = _mm256_cvtepi8_epi16(_mm_load_si128((__m128i*)b));
59
60 // Calculate the ar*cr - ai*(-ci) portions
61 realz = _mm256_madd_epi16(x, y);
62
63 // Calculate the complex conjugate of the cr + ci j values
64 y = _mm256_sign_epi16(y, conjugateSign);
65
66 // Shift the order of the cr and ci values
67 y = _mm256_shufflehi_epi16(_mm256_shufflelo_epi16(y, _MM_SHUFFLE(2, 3, 0, 1)),
68 _MM_SHUFFLE(2, 3, 0, 1));
69
70 // Calculate the ar*(-ci) + cr*(ai)
71 imagz = _mm256_madd_epi16(x, y);
72
73 // Perform the addition of products
74
75 _mm256_store_si256((__m256i*)c,
76 _mm256_packs_epi32(_mm256_unpacklo_epi32(realz, imagz),
77 _mm256_unpackhi_epi32(realz, imagz)));
78
79 a += 8;
80 b += 8;
81 c += 8;
82 }
83
84 number = quarterPoints * 8;
85 int16_t* c16Ptr = (int16_t*)&cVector[number];
86 int8_t* a8Ptr = (int8_t*)&aVector[number];
87 int8_t* b8Ptr = (int8_t*)&bVector[number];
88 for (; number < num_points; number++) {
89 float aReal = (float)*a8Ptr++;
90 float aImag = (float)*a8Ptr++;
91 lv_32fc_t aVal = lv_cmake(aReal, aImag);
92 float bReal = (float)*b8Ptr++;
93 float bImag = (float)*b8Ptr++;
94 lv_32fc_t bVal = lv_cmake(bReal, -bImag);
95 lv_32fc_t temp = aVal * bVal;
96
97 *c16Ptr++ = (int16_t)lv_creal(temp);
98 *c16Ptr++ = (int16_t)lv_cimag(temp);
99 }
100}
101#endif /* LV_HAVE_AVX2 */
102
103
104#ifdef LV_HAVE_SSE4_1
105#include <smmintrin.h>
114static inline void volk_8ic_x2_multiply_conjugate_16ic_a_sse4_1(lv_16sc_t* cVector,
115 const lv_8sc_t* aVector,
116 const lv_8sc_t* bVector,
117 unsigned int num_points)
118{
119 unsigned int number = 0;
120 const unsigned int quarterPoints = num_points / 4;
121
122 __m128i x, y, realz, imagz;
123 lv_16sc_t* c = cVector;
124 const lv_8sc_t* a = aVector;
125 const lv_8sc_t* b = bVector;
126 __m128i conjugateSign = _mm_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1);
127
128 for (; number < quarterPoints; number++) {
129 // Convert into 8 bit values into 16 bit values
130 x = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)a));
131 y = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)b));
132
133 // Calculate the ar*cr - ai*(-ci) portions
134 realz = _mm_madd_epi16(x, y);
135
136 // Calculate the complex conjugate of the cr + ci j values
137 y = _mm_sign_epi16(y, conjugateSign);
138
139 // Shift the order of the cr and ci values
140 y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, _MM_SHUFFLE(2, 3, 0, 1)),
141 _MM_SHUFFLE(2, 3, 0, 1));
142
143 // Calculate the ar*(-ci) + cr*(ai)
144 imagz = _mm_madd_epi16(x, y);
145
146 _mm_store_si128((__m128i*)c,
147 _mm_packs_epi32(_mm_unpacklo_epi32(realz, imagz),
148 _mm_unpackhi_epi32(realz, imagz)));
149
150 a += 4;
151 b += 4;
152 c += 4;
153 }
154
155 number = quarterPoints * 4;
156 int16_t* c16Ptr = (int16_t*)&cVector[number];
157 int8_t* a8Ptr = (int8_t*)&aVector[number];
158 int8_t* b8Ptr = (int8_t*)&bVector[number];
159 for (; number < num_points; number++) {
160 float aReal = (float)*a8Ptr++;
161 float aImag = (float)*a8Ptr++;
162 lv_32fc_t aVal = lv_cmake(aReal, aImag);
163 float bReal = (float)*b8Ptr++;
164 float bImag = (float)*b8Ptr++;
165 lv_32fc_t bVal = lv_cmake(bReal, -bImag);
166 lv_32fc_t temp = aVal * bVal;
167
168 *c16Ptr++ = (int16_t)lv_creal(temp);
169 *c16Ptr++ = (int16_t)lv_cimag(temp);
170 }
171}
172#endif /* LV_HAVE_SSE4_1 */
173
174#ifdef LV_HAVE_GENERIC
184 const lv_8sc_t* aVector,
185 const lv_8sc_t* bVector,
186 unsigned int num_points)
187{
188 unsigned int number = 0;
189 int16_t* c16Ptr = (int16_t*)cVector;
190 int8_t* a8Ptr = (int8_t*)aVector;
191 int8_t* b8Ptr = (int8_t*)bVector;
192 for (number = 0; number < num_points; number++) {
193 float aReal = (float)*a8Ptr++;
194 float aImag = (float)*a8Ptr++;
195 lv_32fc_t aVal = lv_cmake(aReal, aImag);
196 float bReal = (float)*b8Ptr++;
197 float bImag = (float)*b8Ptr++;
198 lv_32fc_t bVal = lv_cmake(bReal, -bImag);
199 lv_32fc_t temp = aVal * bVal;
200
201 *c16Ptr++ = (int16_t)lv_creal(temp);
202 *c16Ptr++ = (int16_t)lv_cimag(temp);
203 }
204}
205#endif /* LV_HAVE_GENERIC */
206
207#endif /* INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H */
208
209#ifndef INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_u_H
210#define INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_u_H
211
212#include <inttypes.h>
213#include <stdio.h>
214#include <volk/volk_complex.h>
215
216#ifdef LV_HAVE_AVX2
217#include <immintrin.h>
226static inline void volk_8ic_x2_multiply_conjugate_16ic_u_avx2(lv_16sc_t* cVector,
227 const lv_8sc_t* aVector,
228 const lv_8sc_t* bVector,
229 unsigned int num_points)
230{
231 unsigned int number = 0;
232 const unsigned int oneEigthPoints = num_points / 8;
233
234 __m256i x, y, realz, imagz;
235 lv_16sc_t* c = cVector;
236 const lv_8sc_t* a = aVector;
237 const lv_8sc_t* b = bVector;
238 __m256i conjugateSign =
239 _mm256_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1);
240
241 for (; number < oneEigthPoints; number++) {
242 // Convert 8 bit values into 16 bit values
243 x = _mm256_cvtepi8_epi16(_mm_loadu_si128((__m128i*)a));
244 y = _mm256_cvtepi8_epi16(_mm_loadu_si128((__m128i*)b));
245
246 // Calculate the ar*cr - ai*(-ci) portions
247 realz = _mm256_madd_epi16(x, y);
248
249 // Calculate the complex conjugate of the cr + ci j values
250 y = _mm256_sign_epi16(y, conjugateSign);
251
252 // Shift the order of the cr and ci values
253 y = _mm256_shufflehi_epi16(_mm256_shufflelo_epi16(y, _MM_SHUFFLE(2, 3, 0, 1)),
254 _MM_SHUFFLE(2, 3, 0, 1));
255
256 // Calculate the ar*(-ci) + cr*(ai)
257 imagz = _mm256_madd_epi16(x, y);
258
259 // Perform the addition of products
260
261 _mm256_storeu_si256((__m256i*)c,
262 _mm256_packs_epi32(_mm256_unpacklo_epi32(realz, imagz),
263 _mm256_unpackhi_epi32(realz, imagz)));
264
265 a += 8;
266 b += 8;
267 c += 8;
268 }
269
270 number = oneEigthPoints * 8;
271 int16_t* c16Ptr = (int16_t*)&cVector[number];
272 int8_t* a8Ptr = (int8_t*)&aVector[number];
273 int8_t* b8Ptr = (int8_t*)&bVector[number];
274 for (; number < num_points; number++) {
275 float aReal = (float)*a8Ptr++;
276 float aImag = (float)*a8Ptr++;
277 lv_32fc_t aVal = lv_cmake(aReal, aImag);
278 float bReal = (float)*b8Ptr++;
279 float bImag = (float)*b8Ptr++;
280 lv_32fc_t bVal = lv_cmake(bReal, -bImag);
281 lv_32fc_t temp = aVal * bVal;
282
283 *c16Ptr++ = (int16_t)lv_creal(temp);
284 *c16Ptr++ = (int16_t)lv_cimag(temp);
285 }
286}
287#endif /* LV_HAVE_AVX2 */
288
289#endif /* INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_u_H */
static void volk_8ic_x2_multiply_conjugate_16ic_generic(lv_16sc_t *cVector, const lv_8sc_t *aVector, const lv_8sc_t *bVector, unsigned int num_points)
Multiplys the one complex vector with the complex conjugate of the second complex vector and stores t...
Definition: volk_8ic_x2_multiply_conjugate_16ic.h:183
#define lv_cimag(x)
Definition: volk_complex.h:89
#define lv_cmake(r, i)
Definition: volk_complex.h:68
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:61
#define lv_creal(x)
Definition: volk_complex.h:87
float complex lv_32fc_t
Definition: volk_complex.h:65
short complex lv_16sc_t
Definition: volk_complex.h:62