154 unsigned int num_points)
156 unsigned int number = 0;
157 const int8_t* complexVectorPtr = (int8_t*)complexVector;
158 int8_t* iBufferPtr = iBuffer;
160 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
162 14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
163 __m128i complexVal1, complexVal2, outputVal;
165 unsigned int sixteenthPoints = num_points / 16;
167 for (number = 0; number < sixteenthPoints; number++) {
169 complexVectorPtr += 16;
171 complexVectorPtr += 16;
182 number = sixteenthPoints * 16;
183 for (; number < num_points; number++) {
184 *iBufferPtr++ = *complexVectorPtr++;
196 unsigned int num_points)
198 unsigned int number = 0;
199 const int8_t* complexVectorPtr = (int8_t*)complexVector;
200 int8_t* iBufferPtr = iBuffer;
202 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
204 14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
205 __m256i complexVal1, complexVal2, outputVal;
206 __m128i complexVal1H, complexVal1L, complexVal2H, complexVal2L, outputVal1,
209 unsigned int thirtysecondPoints = num_points / 32;
211 for (number = 0; number < thirtysecondPoints; number++) {
213 complexVal1 = _mm256_load_si256((__m256i*)complexVectorPtr);
214 complexVectorPtr += 32;
215 complexVal2 = _mm256_load_si256((__m256i*)complexVectorPtr);
216 complexVectorPtr += 32;
218 complexVal1H = _mm256_extractf128_si256(complexVal1, 1);
219 complexVal1L = _mm256_extractf128_si256(complexVal1, 0);
220 complexVal2H = _mm256_extractf128_si256(complexVal2, 1);
221 complexVal2L = _mm256_extractf128_si256(complexVal2, 0);
232 __m256i dummy = _mm256_setzero_si256();
233 outputVal = _mm256_insertf128_si256(dummy, outputVal1, 0);
234 outputVal = _mm256_insertf128_si256(outputVal, outputVal2, 1);
237 _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
241 number = thirtysecondPoints * 32;
242 for (; number < num_points; number++) {
243 *iBufferPtr++ = *complexVectorPtr++;
272 unsigned int num_points)
275 unsigned int sixteenth_points = num_points / 16;
277 int8x16x2_t input_vector;
278 for (number = 0; number < sixteenth_points; ++number) {
279 input_vector = vld2q_s8((int8_t*)complexVector);
280 vst1q_s8(iBuffer, input_vector.val[0]);
285 const int8_t* complexVectorPtr = (int8_t*)complexVector;
286 int8_t* iBufferPtr = iBuffer;
287 for (number = sixteenth_points * 16; number < num_points; number++) {
288 *iBufferPtr++ = *complexVectorPtr++;