60#ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_a_H
61#define INCLUDED_volk_32fc_s32f_magnitude_16i_a_H
73 unsigned int num_points)
75 const float* complexVectorPtr = (
float*)complexVector;
76 int16_t* magnitudeVectorPtr = magnitudeVector;
77 unsigned int number = 0;
78 for (number = 0; number < num_points; number++) {
79 float real = *complexVectorPtr++;
80 float imag = *complexVectorPtr++;
81 *magnitudeVectorPtr++ =
82 (int16_t)rintf(scalar * sqrtf((real * real) + (imag * imag)));
90static inline void volk_32fc_s32f_magnitude_16i_a_avx2(int16_t* magnitudeVector,
93 unsigned int num_points)
95 unsigned int number = 0;
96 const unsigned int eighthPoints = num_points / 8;
98 const float* complexVectorPtr = (
const float*)complexVector;
99 int16_t* magnitudeVectorPtr = magnitudeVector;
101 __m256 vScalar = _mm256_set1_ps(scalar);
102 __m256i idx = _mm256_set_epi32(0, 0, 0, 0, 5, 1, 4, 0);
103 __m256 cplxValue1, cplxValue2, result;
107 for (; number < eighthPoints; number++) {
108 cplxValue1 = _mm256_load_ps(complexVectorPtr);
109 complexVectorPtr += 8;
111 cplxValue2 = _mm256_load_ps(complexVectorPtr);
112 complexVectorPtr += 8;
114 cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1);
115 cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2);
117 result = _mm256_hadd_ps(cplxValue1, cplxValue2);
119 result = _mm256_sqrt_ps(result);
121 result = _mm256_mul_ps(result, vScalar);
123 resultInt = _mm256_cvtps_epi32(result);
124 resultInt = _mm256_packs_epi32(resultInt, resultInt);
125 resultInt = _mm256_permutevar8x32_epi32(
127 resultShort = _mm256_extracti128_si256(resultInt, 0);
129 magnitudeVectorPtr += 8;
132 number = eighthPoints * 8;
134 magnitudeVector + number, complexVector + number, scalar, num_points - number);
139#include <pmmintrin.h>
144 unsigned int num_points)
146 unsigned int number = 0;
147 const unsigned int quarterPoints = num_points / 4;
149 const float* complexVectorPtr = (
const float*)complexVector;
150 int16_t* magnitudeVectorPtr = magnitudeVector;
154 __m128 cplxValue1, cplxValue2, result;
158 for (; number < quarterPoints; number++) {
160 complexVectorPtr += 4;
163 complexVectorPtr += 4;
165 cplxValue1 =
_mm_mul_ps(cplxValue1, cplxValue1);
166 cplxValue2 =
_mm_mul_ps(cplxValue2, cplxValue2);
175 *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[0]);
176 *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[1]);
177 *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[2]);
178 *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[3]);
181 number = quarterPoints * 4;
183 magnitudeVector + number, complexVector + number, scalar, num_points - number);
189#include <xmmintrin.h>
194 unsigned int num_points)
196 unsigned int number = 0;
197 const unsigned int quarterPoints = num_points / 4;
199 const float* complexVectorPtr = (
const float*)complexVector;
200 int16_t* magnitudeVectorPtr = magnitudeVector;
204 __m128 cplxValue1, cplxValue2, result;
209 for (; number < quarterPoints; number++) {
211 complexVectorPtr += 4;
214 complexVectorPtr += 4;
231 *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[0]);
232 *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[1]);
233 *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[2]);
234 *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[3]);
237 number = quarterPoints * 4;
239 magnitudeVector + number, complexVector + number, scalar, num_points - number);
246#ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_u_H
247#define INCLUDED_volk_32fc_s32f_magnitude_16i_u_H
255#include <immintrin.h>
257static inline void volk_32fc_s32f_magnitude_16i_u_avx2(int16_t* magnitudeVector,
260 unsigned int num_points)
262 unsigned int number = 0;
263 const unsigned int eighthPoints = num_points / 8;
265 const float* complexVectorPtr = (
const float*)complexVector;
266 int16_t* magnitudeVectorPtr = magnitudeVector;
268 __m256 vScalar = _mm256_set1_ps(scalar);
269 __m256i idx = _mm256_set_epi32(0, 0, 0, 0, 5, 1, 4, 0);
270 __m256 cplxValue1, cplxValue2, result;
274 for (; number < eighthPoints; number++) {
275 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
276 complexVectorPtr += 8;
278 cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
279 complexVectorPtr += 8;
281 cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1);
282 cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2);
284 result = _mm256_hadd_ps(cplxValue1, cplxValue2);
286 result = _mm256_sqrt_ps(result);
288 result = _mm256_mul_ps(result, vScalar);
290 resultInt = _mm256_cvtps_epi32(result);
291 resultInt = _mm256_packs_epi32(resultInt, resultInt);
292 resultInt = _mm256_permutevar8x32_epi32(
294 resultShort = _mm256_extracti128_si256(resultInt, 0);
296 magnitudeVectorPtr += 8;
299 number = eighthPoints * 8;
301 magnitudeVector + number, complexVector + number, scalar, num_points - number);