blob: f7ea41edbd1519590eb516b17dc912348150baa9 [file] [log] [blame]
/* Vector Dot Product
* This program computes a simple vector dot product using hard
* wired input buffers of 128 samples each. These values are in
* 1.15 signed .
*/
# mach: bfin
.include "testutils.inc"
start
// load buffer addresses into pointer regs
loadsym I0, data0;
loadsym I1, data1;
// loop control
// number of loop iterations is 2^N with r4|=1<<N
// to process 128 samples need 64 iterations
P4 = 63;
LSETUP ( loop1 , loop1 ) LC0 = P4;
A1 = A0 = 0;
// For now, serialize two 32b loads.
// These should be done in parallel with the dual mac.
R0 = [ I0 ++ ]; R1 = [ I1 ++ ];
loop1: A1 += R0.H * R1.H, A0 += R0.L * R1.L || R0 = [ I0 ++ ] || R1 = [ I1 ++ ];
A1 += R0.H * R1.H, A0 += R0.L * R1.L;
// extract two partial results from accumulators
// and do final addition
R0 = ( A0 += A1 );
DBGA ( R0.L , 0x5600 ); // 0x00545600 = 0.002574 fract
DBGA ( R0.H , 0x0054 );
pass
.data
data0:
.dw 0x0
.dw 0x2
.dw 0x4
.dw 0x6
.dw 0x8
.dw 0xA
.dw 0xC
.dw 0xE
.dw 0x10
.dw 0x12
.dw 0x14
.dw 0x16
.dw 0x18
.dw 0x1A
.dw 0x1C
.dw 0x1E
.dw 0x20
.dw 0x22
.dw 0x24
.dw 0x26
.dw 0x28
.dw 0x2A
.dw 0x2C
.dw 0x2E
.dw 0x30
.dw 0x32
.dw 0x34
.dw 0x36
.dw 0x38
.dw 0x3A
.dw 0x3C
.dw 0x3E
.dw 0x40
.dw 0x42
.dw 0x44
.dw 0x46
.dw 0x48
.dw 0x4A
.dw 0x4C
.dw 0x4E
.dw 0x50
.dw 0x52
.dw 0x54
.dw 0x56
.dw 0x58
.dw 0x5A
.dw 0x5C
.dw 0x5E
.dw 0x60
.dw 0x62
.dw 0x64
.dw 0x66
.dw 0x68
.dw 0x6A
.dw 0x6C
.dw 0x6E
.dw 0x70
.dw 0x72
.dw 0x74
.dw 0x76
.dw 0x78
.dw 0x7A
.dw 0x7C
.dw 0x7E
.dw 0x80
.dw 0x82
.dw 0x84
.dw 0x86
.dw 0x88
.dw 0x8A
.dw 0x8C
.dw 0x8E
.dw 0x90
.dw 0x92
.dw 0x94
.dw 0x96
.dw 0x98
.dw 0x9A
.dw 0x9C
.dw 0x9E
.dw 0xA0
.dw 0xA2
.dw 0xA4
.dw 0xA6
.dw 0xA8
.dw 0xAA
.dw 0xAC
.dw 0xAE
.dw 0xB0
.dw 0xB2
.dw 0xB4
.dw 0xB6
.dw 0xB8
.dw 0xBA
.dw 0xBC
.dw 0xBE
.dw 0xC0
.dw 0xC2
.dw 0xC4
.dw 0xC6
.dw 0xC8
.dw 0xCA
.dw 0xCC
.dw 0xCE
.dw 0xD0
.dw 0xD2
.dw 0xD4
.dw 0xD6
.dw 0xD8
.dw 0xDA
.dw 0xDC
.dw 0xDE
.dw 0xE0
.dw 0xE2
.dw 0xE4
.dw 0xE6
.dw 0xE8
.dw 0xEA
.dw 0xEC
.dw 0xEE
.dw 0xF0
.dw 0xF2
.dw 0xF4
.dw 0xF6
.dw 0xF8
.dw 0xFA
.dw 0xFC
.dw 0xFE
data1:
.dw 0x0
.dw 0x2
.dw 0x4
.dw 0x6
.dw 0x8
.dw 0xA
.dw 0xC
.dw 0xE
.dw 0x10
.dw 0x12
.dw 0x14
.dw 0x16
.dw 0x18
.dw 0x1A
.dw 0x1C
.dw 0x1E
.dw 0x20
.dw 0x22
.dw 0x24
.dw 0x26
.dw 0x28
.dw 0x2A
.dw 0x2C
.dw 0x2E
.dw 0x30
.dw 0x32
.dw 0x34
.dw 0x36
.dw 0x38
.dw 0x3A
.dw 0x3C
.dw 0x3E
.dw 0x40
.dw 0x42
.dw 0x44
.dw 0x46
.dw 0x48
.dw 0x4A
.dw 0x4C
.dw 0x4E
.dw 0x50
.dw 0x52
.dw 0x54
.dw 0x56
.dw 0x58
.dw 0x5A
.dw 0x5C
.dw 0x5E
.dw 0x60
.dw 0x62
.dw 0x64
.dw 0x66
.dw 0x68
.dw 0x6A
.dw 0x6C
.dw 0x6E
.dw 0x70
.dw 0x72
.dw 0x74
.dw 0x76
.dw 0x78
.dw 0x7A
.dw 0x7C
.dw 0x7E
.dw 0x80
.dw 0x82
.dw 0x84
.dw 0x86
.dw 0x88
.dw 0x8A
.dw 0x8C
.dw 0x8E
.dw 0x90
.dw 0x92
.dw 0x94
.dw 0x96
.dw 0x98
.dw 0x9A
.dw 0x9C
.dw 0x9E
.dw 0xA0
.dw 0xA2
.dw 0xA4
.dw 0xA6
.dw 0xA8
.dw 0xAA
.dw 0xAC
.dw 0xAE
.dw 0xB0
.dw 0xB2
.dw 0xB4
.dw 0xB6
.dw 0xB8
.dw 0xBA
.dw 0xBC
.dw 0xBE
.dw 0xC0
.dw 0xC2
.dw 0xC4
.dw 0xC6
.dw 0xC8
.dw 0xCA
.dw 0xCC
.dw 0xCE
.dw 0xD0
.dw 0xD2
.dw 0xD4
.dw 0xD6
.dw 0xD8
.dw 0xDA
.dw 0xDC
.dw 0xDE
.dw 0xE0
.dw 0xE2
.dw 0xE4
.dw 0xE6
.dw 0xE8
.dw 0xEA
.dw 0xEC
.dw 0xEE
.dw 0xF0
.dw 0xF2
.dw 0xF4
.dw 0xF6
.dw 0xF8
.dw 0xFA
.dw 0xFC
.dw 0xFE