Vector Optimized Library of Kernels 3.0.0
Architecture-tuned implementations of math kernels
volk_8u_conv_k7_r2puppet_8u.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10#ifndef INCLUDED_volk_8u_conv_k7_r2puppet_8u_H
11#define INCLUDED_volk_8u_conv_k7_r2puppet_8u_H
12
13#include <string.h>
14#include <volk/volk.h>
16
17typedef union {
18 // decision_t is a BIT vector
19 unsigned char* t;
20 unsigned int* w;
22
23static inline int parity(int x, unsigned char* Partab)
24{
25 x ^= (x >> 16);
26 x ^= (x >> 8);
27 return Partab[x];
28}
29
30static inline int chainback_viterbi(unsigned char* data,
31 unsigned int nbits,
32 unsigned int endstate,
33 unsigned int tailsize,
34 unsigned char* decisions)
35{
36 unsigned char* d;
37 int d_ADDSHIFT = 0;
38 int d_numstates = (1 << 6);
39 int d_decision_t_size = d_numstates / 8;
40 unsigned int d_k = 7;
41 int d_framebits = nbits;
42 /* ADDSHIFT and SUBSHIFT make sure that the thing returned is a byte. */
43 d = decisions;
44 /* Make room beyond the end of the encoder register so we can
45 * accumulate a full byte of decoded data
46 */
47
48 endstate = (endstate % d_numstates) << d_ADDSHIFT;
49
50 /* The store into data[] only needs to be done every 8 bits.
51 * But this avoids a conditional branch, and the writes will
52 * combine in the cache anyway
53 */
54
55 d += tailsize * d_decision_t_size; /* Look past tail */
56 int retval;
57 int dif = tailsize - (d_k - 1);
58 // printf("break, %d, %d\n", dif, (nbits+dif)%d_framebits);
59 p_decision_t dec;
60 while (nbits-- > d_framebits - (d_k - 1)) {
61 int k;
62 dec.t = &d[nbits * d_decision_t_size];
63 k = (dec.w[(endstate >> d_ADDSHIFT) / 32] >> ((endstate >> d_ADDSHIFT) % 32)) & 1;
64
65 endstate = (endstate >> 1) | (k << (d_k - 2 + d_ADDSHIFT));
66 // data[((nbits+dif)%nbits)>>3] = endstate>>d_SUBSHIFT;
67 // printf("%d, %d\n", k, (nbits+dif)%d_framebits);
68 data[((nbits + dif) % d_framebits)] = k;
69
70 retval = endstate;
71 }
72 nbits += 1;
73
74 while (nbits-- != 0) {
75 int k;
76
77 dec.t = &d[nbits * d_decision_t_size];
78
79 k = (dec.w[(endstate >> d_ADDSHIFT) / 32] >> ((endstate >> d_ADDSHIFT) % 32)) & 1;
80
81 endstate = (endstate >> 1) | (k << (d_k - 2 + d_ADDSHIFT));
82 data[((nbits + dif) % d_framebits)] = k;
83 }
84 // printf("%d, %d, %d, %d, %d, %d, %d, %d\n",
85 // data[4095],data[4094],data[4093],data[4092],data[4091],data[4090],data[4089],data[4088]);
86
87
88 return retval >> d_ADDSHIFT;
89}
90
91
92#if LV_HAVE_SSE3
93
94#include <emmintrin.h>
95#include <mmintrin.h>
96#include <pmmintrin.h>
97#include <stdio.h>
98#include <xmmintrin.h>
99
100static inline void volk_8u_conv_k7_r2puppet_8u_spiral(unsigned char* syms,
101 unsigned char* dec,
102 unsigned int framebits)
103{
104
105
106 static int once = 1;
107 int d_numstates = (1 << 6);
108 int rate = 2;
109 static unsigned char* D;
110 static unsigned char* Y;
111 static unsigned char* X;
112 static unsigned int excess = 6;
113 static unsigned char* Branchtab;
114 static unsigned char Partab[256];
115
116 int d_polys[2] = { 79, 109 };
117
118
119 if (once) {
120
121 X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
122 Y = X + d_numstates;
123 Branchtab =
124 (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
125 D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
126 volk_get_alignment());
127 int state, i;
128 int cnt, ti;
129
130 /* Initialize parity lookup table */
131 for (i = 0; i < 256; i++) {
132 cnt = 0;
133 ti = i;
134 while (ti) {
135 if (ti & 1)
136 cnt++;
137 ti >>= 1;
138 }
139 Partab[i] = cnt & 1;
140 }
141 /* Initialize the branch table */
142 for (state = 0; state < d_numstates / 2; state++) {
143 for (i = 0; i < rate; i++) {
144 Branchtab[i * d_numstates / 2 + state] =
145 parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
146 }
147 }
148
149 once = 0;
150 }
151
152 // unbias the old_metrics
153 memset(X, 31, d_numstates);
154
155 // initialize decisions
156 memset(D, 0, (d_numstates / 8) * (framebits + 6));
157
159 Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
160
161 unsigned int min = X[0];
162 int i = 0, state = 0;
163 for (i = 0; i < (d_numstates); ++i) {
164 if (X[i] < min) {
165 min = X[i];
166 state = i;
167 }
168 }
169
170 chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
171
172 return;
173}
174
175#endif /*LV_HAVE_SSE3*/
176
177
178#if LV_HAVE_NEON
179
180#include "volk/sse2neon.h"
181
182static inline void volk_8u_conv_k7_r2puppet_8u_neonspiral(unsigned char* syms,
183 unsigned char* dec,
184 unsigned int framebits)
185{
186
187
188 static int once = 1;
189 int d_numstates = (1 << 6);
190 int rate = 2;
191 static unsigned char* D;
192 static unsigned char* Y;
193 static unsigned char* X;
194 static unsigned int excess = 6;
195 static unsigned char* Branchtab;
196 static unsigned char Partab[256];
197
198 int d_polys[2] = { 79, 109 };
199
200
201 if (once) {
202
203 X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
204 Y = X + d_numstates;
205 Branchtab =
206 (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
207 D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
208 volk_get_alignment());
209 int state, i;
210 int cnt, ti;
211
212 /* Initialize parity lookup table */
213 for (i = 0; i < 256; i++) {
214 cnt = 0;
215 ti = i;
216 while (ti) {
217 if (ti & 1)
218 cnt++;
219 ti >>= 1;
220 }
221 Partab[i] = cnt & 1;
222 }
223 /* Initialize the branch table */
224 for (state = 0; state < d_numstates / 2; state++) {
225 for (i = 0; i < rate; i++) {
226 Branchtab[i * d_numstates / 2 + state] =
227 parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
228 }
229 }
230
231 once = 0;
232 }
233
234 // unbias the old_metrics
235 memset(X, 31, d_numstates);
236
237 // initialize decisions
238 memset(D, 0, (d_numstates / 8) * (framebits + 6));
239
241 Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
242
243 unsigned int min = X[0];
244 int i = 0, state = 0;
245 for (i = 0; i < (d_numstates); ++i) {
246 if (X[i] < min) {
247 min = X[i];
248 state = i;
249 }
250 }
251
252 chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
253
254 return;
255}
256
257#endif /*LV_HAVE_NEON*/
258
259
260//#if LV_HAVE_AVX2
261//
262//#include <immintrin.h>
263//#include <stdio.h>
264//
265// static inline void volk_8u_conv_k7_r2puppet_8u_avx2(unsigned char* syms,
266// unsigned char* dec,
267// unsigned int framebits)
268//{
269//
270//
271// static int once = 1;
272// int d_numstates = (1 << 6);
273// int rate = 2;
274// static unsigned char* D;
275// static unsigned char* Y;
276// static unsigned char* X;
277// static unsigned int excess = 6;
278// static unsigned char* Branchtab;
279// static unsigned char Partab[256];
280//
281// int d_polys[2] = { 79, 109 };
282//
283//
284// if (once) {
285//
286// X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
287// Y = X + d_numstates;
288// Branchtab =
289// (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
290// D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
291// volk_get_alignment());
292// int state, i;
293// int cnt, ti;
294//
295// /* Initialize parity lookup table */
296// for (i = 0; i < 256; i++) {
297// cnt = 0;
298// ti = i;
299// while (ti) {
300// if (ti & 1)
301// cnt++;
302// ti >>= 1;
303// }
304// Partab[i] = cnt & 1;
305// }
306// /* Initialize the branch table */
307// for (state = 0; state < d_numstates / 2; state++) {
308// for (i = 0; i < rate; i++) {
309// Branchtab[i * d_numstates / 2 + state] =
310// parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
311// }
312// }
313//
314// once = 0;
315// }
316//
317// // unbias the old_metrics
318// memset(X, 31, d_numstates);
319//
320// // initialize decisions
321// memset(D, 0, (d_numstates / 8) * (framebits + 6));
322//
323// volk_8u_x4_conv_k7_r2_8u_avx2(
324// Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
325//
326// unsigned int min = X[0];
327// int i = 0, state = 0;
328// for (i = 0; i < (d_numstates); ++i) {
329// if (X[i] < min) {
330// min = X[i];
331// state = i;
332// }
333// }
334//
335// chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
336//
337// return;
338//}
339//
340//#endif /*LV_HAVE_AVX2*/
341
342
343#if LV_HAVE_GENERIC
344
345
346static inline void volk_8u_conv_k7_r2puppet_8u_generic(unsigned char* syms,
347 unsigned char* dec,
348 unsigned int framebits)
349{
350
351
352 static int once = 1;
353 int d_numstates = (1 << 6);
354 int rate = 2;
355 static unsigned char* Y;
356 static unsigned char* X;
357 static unsigned char* D;
358 static unsigned int excess = 6;
359 static unsigned char* Branchtab;
360 static unsigned char Partab[256];
361
362 int d_polys[2] = { 79, 109 };
363
364
365 if (once) {
366
367 X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
368 Y = X + d_numstates;
369 Branchtab =
370 (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
371 D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
372 volk_get_alignment());
373
374 int state, i;
375 int cnt, ti;
376
377 /* Initialize parity lookup table */
378 for (i = 0; i < 256; i++) {
379 cnt = 0;
380 ti = i;
381 while (ti) {
382 if (ti & 1)
383 cnt++;
384 ti >>= 1;
385 }
386 Partab[i] = cnt & 1;
387 }
388 /* Initialize the branch table */
389 for (state = 0; state < d_numstates / 2; state++) {
390 for (i = 0; i < rate; i++) {
391 Branchtab[i * d_numstates / 2 + state] =
392 parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
393 }
394 }
395
396 once = 0;
397 }
398
399 // unbias the old_metrics
400 memset(X, 31, d_numstates);
401
402 // initialize decisions
403 memset(D, 0, (d_numstates / 8) * (framebits + 6));
404
406 Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
407
408 unsigned int min = X[0];
409 int i = 0, state = 0;
410 for (i = 0; i < (d_numstates); ++i) {
411 if (X[i] < min) {
412 min = X[i];
413 state = i;
414 }
415 }
416
417 chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
418
419 return;
420}
421
422#endif /* LV_HAVE_GENERIC */
423
424#endif /*INCLUDED_volk_8u_conv_k7_r2puppet_8u_H*/