blob: ce456d768ea8fd057663361d7731ff17e815c15b [file] [log] [blame]
 # mach: bfin // GENERIC CONVOLUTIONAL ENCODER // This a generic rate 1/n convolutional encoder. It computes n output // bits for each input bit, based on n generic polynomials. // It uses the set of BXOR_CC instructions to compute bit XOR // reduction from a state masked by a polynomial. For an alternate // solution based on assembling several partial words, as in // the BDT benchmark, see file conv_enc.c. The solution presented // here is slower than conv_enc.c, but more generic. // // Forward Shift Register // ----------------------- // This solution implements the XOR function by shifting the state // left by one, applying a mask to the state, and reducing // the result with a bit XOR reduction function. // ----- XOR------------> G0 // | | | | // +------------------------------+ // | b0 b1 b2 b3 b14 b15 | <- in // +------------------------------+ // | | | | | // ----- XOR------------> G1 // Instruction BXOR computes the bit G0 or G1 and stores it into CC // and also into a destination reg half. Here, we take CC and rotate it // into an output register. // However, one can also store the output bit directly by storing // the register half where this bit is placed. This would result // in an output structure similar to the one in the original function // Convolutional_Encode(), where an entire half word holds a bit. // The resulting execution speed would be roughly twice as fast, // since there is no need to rotate output bit via CC. .include "testutils.inc" start loadsym P0, input; loadsym P1, output; R1 = 0; R2 = 0;R3 = 0; R2.L = 0; R2.H = 0xa01d; // polynom 0 R3.L = 0; R3.H = 0x12f4; // polynom 1 // load and CurrentState to upper half of A0 A1 = A0 = 0; R0 = 0x0000; A0.w = R0; A0 = A0 << 16; // l-loop counter is in P4 P4 = 2(Z); // **** START l-LOOP ***** l\$0: // insert 16 bits of input into lower half of A0 // and advance input pointer R0 = W [ P0 ++ ] (Z); A0.L = R0.L; P5 = 2 (Z); LSETUP ( m\$0 , m\$0end ) LC0 = P5; // **** BEGIN m-LOOP ***** m\$0: P5 = 8 (Z); LSETUP ( i\$1 , i\$1end ) LC1 = P5; // **** BEGIN i-LOOP ***** i\$1: R4.L = CC = BXORSHIFT( A0 , R2 ); // polynom0 -> CC R1 = ROT R1 BY 1; // CC -> R1 R4.L = CC = BXOR( A0 , R3 ); // polynom1 -> CC i\$1end: R1 = ROT R1 BY 1; // CC -> R1 // store 16 bits of outdata RL1 m\$0end: W [ P1 ++ ] = R1; P4 += -1; CC = P4 == 0; IF !CC JUMP l\$0; // **** END l-LOOP ***** // Check results loadsym I2, output; R0.L = W [ I2 ++ ]; DBGA ( R0.L , 0x8c62 ); R0.L = W [ I2 ++ ]; DBGA ( R0.L , 0x262e ); R0.L = W [ I2 ++ ]; DBGA ( R0.L , 0x5b4d ); R0.L = W [ I2 ++ ]; DBGA ( R0.L , 0x834f ); pass .data input: .dw 0x999f .dw 0x1999 output: .dw 0x0000 .dw 0x0000 .dw 0x0000 .dw 0x0000