10#ifndef INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H
11#define INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H
27static inline void volk_8ic_x2_multiply_conjugate_16ic_a_avx2(
lv_16sc_t* cVector,
30 unsigned int num_points)
32 unsigned int number = 0;
33 const unsigned int quarterPoints = num_points / 8;
35 __m256i x, y, realz, imagz;
39 __m256i conjugateSign =
40 _mm256_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1);
42 for (; number < quarterPoints; number++) {
48 realz = _mm256_madd_epi16(x, y);
51 y = _mm256_sign_epi16(y, conjugateSign);
54 y = _mm256_shufflehi_epi16(_mm256_shufflelo_epi16(y,
_MM_SHUFFLE(2, 3, 0, 1)),
58 imagz = _mm256_madd_epi16(x, y);
62 _mm256_store_si256((__m256i*)c,
63 _mm256_packs_epi32(_mm256_unpacklo_epi32(realz, imagz),
64 _mm256_unpackhi_epi32(realz, imagz)));
71 number = quarterPoints * 8;
72 int16_t* c16Ptr = (int16_t*)&cVector[number];
73 int8_t* a8Ptr = (int8_t*)&aVector[number];
74 int8_t* b8Ptr = (int8_t*)&bVector[number];
75 for (; number < num_points; number++) {
76 float aReal = (float)*a8Ptr++;
77 float aImag = (float)*a8Ptr++;
79 float bReal = (float)*b8Ptr++;
80 float bImag = (float)*b8Ptr++;
101static inline void volk_8ic_x2_multiply_conjugate_16ic_a_sse4_1(
lv_16sc_t* cVector,
104 unsigned int num_points)
106 unsigned int number = 0;
107 const unsigned int quarterPoints = num_points / 4;
115 for (; number < quarterPoints; number++) {
142 number = quarterPoints * 4;
143 int16_t* c16Ptr = (int16_t*)&cVector[number];
144 int8_t* a8Ptr = (int8_t*)&aVector[number];
145 int8_t* b8Ptr = (int8_t*)&bVector[number];
146 for (; number < num_points; number++) {
147 float aReal = (float)*a8Ptr++;
148 float aImag = (float)*a8Ptr++;
150 float bReal = (float)*b8Ptr++;
151 float bImag = (float)*b8Ptr++;
155 *c16Ptr++ = (int16_t)
lv_creal(temp);
156 *c16Ptr++ = (int16_t)
lv_cimag(temp);
161#ifdef LV_HAVE_GENERIC
173 unsigned int num_points)
175 unsigned int number = 0;
176 int16_t* c16Ptr = (int16_t*)cVector;
177 int8_t* a8Ptr = (int8_t*)aVector;
178 int8_t* b8Ptr = (int8_t*)bVector;
179 for (number = 0; number < num_points; number++) {
180 float aReal = (float)*a8Ptr++;
181 float aImag = (float)*a8Ptr++;
183 float bReal = (float)*b8Ptr++;
184 float bImag = (float)*b8Ptr++;
188 *c16Ptr++ = (int16_t)
lv_creal(temp);
189 *c16Ptr++ = (int16_t)
lv_cimag(temp);
196#ifndef INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_u_H
197#define INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_u_H
204#include <immintrin.h>
213static inline void volk_8ic_x2_multiply_conjugate_16ic_u_avx2(
lv_16sc_t* cVector,
216 unsigned int num_points)
218 unsigned int number = 0;
219 const unsigned int oneEigthPoints = num_points / 8;
221 __m256i x, y, realz, imagz;
225 __m256i conjugateSign =
226 _mm256_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1);
228 for (; number < oneEigthPoints; number++) {
234 realz = _mm256_madd_epi16(x, y);
237 y = _mm256_sign_epi16(y, conjugateSign);
240 y = _mm256_shufflehi_epi16(_mm256_shufflelo_epi16(y,
_MM_SHUFFLE(2, 3, 0, 1)),
244 imagz = _mm256_madd_epi16(x, y);
248 _mm256_storeu_si256((__m256i*)c,
249 _mm256_packs_epi32(_mm256_unpacklo_epi32(realz, imagz),
250 _mm256_unpackhi_epi32(realz, imagz)));
257 number = oneEigthPoints * 8;
258 int16_t* c16Ptr = (int16_t*)&cVector[number];
259 int8_t* a8Ptr = (int8_t*)&aVector[number];
260 int8_t* b8Ptr = (int8_t*)&bVector[number];
261 for (; number < num_points; number++) {
262 float aReal = (float)*a8Ptr++;
263 float aImag = (float)*a8Ptr++;
265 float bReal = (float)*b8Ptr++;
266 float bImag = (float)*b8Ptr++;
270 *c16Ptr++ = (int16_t)
lv_creal(temp);
271 *c16Ptr++ = (int16_t)
lv_cimag(temp);