Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
volk_32f_convert_64f.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
66#ifndef INCLUDED_volk_32f_convert_64f_u_H
67#define INCLUDED_volk_32f_convert_64f_u_H
68
69#include <inttypes.h>
70#include <stdio.h>
71
72#ifdef LV_HAVE_AVX
73#include <immintrin.h>
74
75static inline void volk_32f_convert_64f_u_avx(double* outputVector,
76 const float* inputVector,
77 unsigned int num_points)
78{
79 unsigned int number = 0;
80
81 const unsigned int quarterPoints = num_points / 4;
82
83 const float* inputVectorPtr = (const float*)inputVector;
84 double* outputVectorPtr = outputVector;
85 __m256d ret;
86 __m128 inputVal;
87
88 for (; number < quarterPoints; number++) {
89 inputVal = _mm_loadu_ps(inputVectorPtr);
90 inputVectorPtr += 4;
91
92 ret = _mm256_cvtps_pd(inputVal);
93 _mm256_storeu_pd(outputVectorPtr, ret);
94
95 outputVectorPtr += 4;
96 }
97
98 number = quarterPoints * 4;
99 for (; number < num_points; number++) {
100 outputVector[number] = (double)(inputVector[number]);
101 }
102}
103
104#endif /* LV_HAVE_AVX */
105
106#ifdef LV_HAVE_SSE2
107#include <emmintrin.h>
108
109static inline void volk_32f_convert_64f_u_sse2(double* outputVector,
110 const float* inputVector,
111 unsigned int num_points)
112{
113 unsigned int number = 0;
114
115 const unsigned int quarterPoints = num_points / 4;
116
117 const float* inputVectorPtr = (const float*)inputVector;
118 double* outputVectorPtr = outputVector;
119 __m128d ret;
120 __m128 inputVal;
121
122 for (; number < quarterPoints; number++) {
123 inputVal = _mm_loadu_ps(inputVectorPtr);
124 inputVectorPtr += 4;
125
126 ret = _mm_cvtps_pd(inputVal);
127
128 _mm_storeu_pd(outputVectorPtr, ret);
129 outputVectorPtr += 2;
130
131 inputVal = _mm_movehl_ps(inputVal, inputVal);
132
133 ret = _mm_cvtps_pd(inputVal);
134
135 _mm_storeu_pd(outputVectorPtr, ret);
136 outputVectorPtr += 2;
137 }
138
139 number = quarterPoints * 4;
140 for (; number < num_points; number++) {
141 outputVector[number] = (double)(inputVector[number]);
142 }
143}
144#endif /* LV_HAVE_SSE2 */
145
146
147#ifdef LV_HAVE_GENERIC
148
149static inline void volk_32f_convert_64f_generic(double* outputVector,
150 const float* inputVector,
151 unsigned int num_points)
152{
153 double* outputVectorPtr = outputVector;
154 const float* inputVectorPtr = inputVector;
155 unsigned int number = 0;
156
157 for (number = 0; number < num_points; number++) {
158 *outputVectorPtr++ = ((double)(*inputVectorPtr++));
159 }
160}
161#endif /* LV_HAVE_GENERIC */
162
163
164#endif /* INCLUDED_volk_32f_convert_64f_u_H */
165
166
167#ifndef INCLUDED_volk_32f_convert_64f_a_H
168#define INCLUDED_volk_32f_convert_64f_a_H
169
170#include <inttypes.h>
171#include <stdio.h>
172
173#ifdef LV_HAVE_AVX
174#include <immintrin.h>
175
176static inline void volk_32f_convert_64f_a_avx(double* outputVector,
177 const float* inputVector,
178 unsigned int num_points)
179{
180 unsigned int number = 0;
181
182 const unsigned int quarterPoints = num_points / 4;
183
184 const float* inputVectorPtr = (const float*)inputVector;
185 double* outputVectorPtr = outputVector;
186 __m256d ret;
187 __m128 inputVal;
188
189 for (; number < quarterPoints; number++) {
190 inputVal = _mm_load_ps(inputVectorPtr);
191 inputVectorPtr += 4;
192
193 ret = _mm256_cvtps_pd(inputVal);
194 _mm256_store_pd(outputVectorPtr, ret);
195
196 outputVectorPtr += 4;
197 }
198
199 number = quarterPoints * 4;
200 for (; number < num_points; number++) {
201 outputVector[number] = (double)(inputVector[number]);
202 }
203}
204#endif /* LV_HAVE_AVX */
205
206#ifdef LV_HAVE_SSE2
207#include <emmintrin.h>
208
209static inline void volk_32f_convert_64f_a_sse2(double* outputVector,
210 const float* inputVector,
211 unsigned int num_points)
212{
213 unsigned int number = 0;
214
215 const unsigned int quarterPoints = num_points / 4;
216
217 const float* inputVectorPtr = (const float*)inputVector;
218 double* outputVectorPtr = outputVector;
219 __m128d ret;
220 __m128 inputVal;
221
222 for (; number < quarterPoints; number++) {
223 inputVal = _mm_load_ps(inputVectorPtr);
224 inputVectorPtr += 4;
225
226 ret = _mm_cvtps_pd(inputVal);
227
228 _mm_store_pd(outputVectorPtr, ret);
229 outputVectorPtr += 2;
230
231 inputVal = _mm_movehl_ps(inputVal, inputVal);
232
233 ret = _mm_cvtps_pd(inputVal);
234
235 _mm_store_pd(outputVectorPtr, ret);
236 outputVectorPtr += 2;
237 }
238
239 number = quarterPoints * 4;
240 for (; number < num_points; number++) {
241 outputVector[number] = (double)(inputVector[number]);
242 }
243}
244#endif /* LV_HAVE_SSE2 */
245
246
247#ifdef LV_HAVE_GENERIC
248
249static inline void volk_32f_convert_64f_a_generic(double* outputVector,
250 const float* inputVector,
251 unsigned int num_points)
252{
253 double* outputVectorPtr = outputVector;
254 const float* inputVectorPtr = inputVector;
255 unsigned int number = 0;
256
257 for (number = 0; number < num_points; number++) {
258 *outputVectorPtr++ = ((double)(*inputVectorPtr++));
259 }
260}
261#endif /* LV_HAVE_GENERIC */
262
263
264#endif /* INCLUDED_volk_32f_convert_64f_a_H */
static void volk_32f_convert_64f_a_generic(double *outputVector, const float *inputVector, unsigned int num_points)
Definition: volk_32f_convert_64f.h:249
static void volk_32f_convert_64f_u_sse2(double *outputVector, const float *inputVector, unsigned int num_points)
Definition: volk_32f_convert_64f.h:109
static void volk_32f_convert_64f_a_avx(double *outputVector, const float *inputVector, unsigned int num_points)
Definition: volk_32f_convert_64f.h:176
static void volk_32f_convert_64f_a_sse2(double *outputVector, const float *inputVector, unsigned int num_points)
Definition: volk_32f_convert_64f.h:209
static void volk_32f_convert_64f_generic(double *outputVector, const float *inputVector, unsigned int num_points)
Definition: volk_32f_convert_64f.h:149
static void volk_32f_convert_64f_u_avx(double *outputVector, const float *inputVector, unsigned int num_points)
Definition: volk_32f_convert_64f.h:75