blob: 11a471183193f64289bb4ea9b01a83a35036ce8b [file] [log] [blame]
// Written in the D programming language.
/**
* Builtin SIMD intrinsics
*
* Source: $(DRUNTIMESRC core/_simd.d)
*
* Copyright: Copyright Digital Mars 2012-2020
* License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
* Authors: $(HTTP digitalmars.com, Walter Bright),
* Source: $(DRUNTIMESRC core/_simd.d)
*/
module core.simd;
pure:
nothrow:
@safe:
@nogc:
/*******************************
* Create a vector type.
*
* Parameters:
* T = one of double[2], float[4], void[16], byte[16], ubyte[16],
* short[8], ushort[8], int[4], uint[4], long[2], ulong[2].
* For 256 bit vectors,
* one of double[4], float[8], void[32], byte[32], ubyte[32],
* short[16], ushort[16], int[8], uint[8], long[4], ulong[4]
*/
template Vector(T)
{
/* __vector is compiler magic, hide it behind a template.
* The compiler will reject T's that don't work.
*/
alias __vector(T) Vector;
}
/* Handy aliases
*/
static if (is(Vector!(void[8]))) alias Vector!(void[8]) void8; ///
static if (is(Vector!(double[1]))) alias Vector!(double[1]) double1; ///
static if (is(Vector!(float[2]))) alias Vector!(float[2]) float2; ///
static if (is(Vector!(byte[8]))) alias Vector!(byte[8]) byte8; ///
static if (is(Vector!(ubyte[8]))) alias Vector!(ubyte[8]) ubyte8; ///
static if (is(Vector!(short[4]))) alias Vector!(short[4]) short4; ///
static if (is(Vector!(ushort[4]))) alias Vector!(ushort[4]) ushort4; ///
static if (is(Vector!(int[2]))) alias Vector!(int[2]) int2; ///
static if (is(Vector!(uint[2]))) alias Vector!(uint[2]) uint2; ///
static if (is(Vector!(long[1]))) alias Vector!(long[1]) long1; ///
static if (is(Vector!(ulong[1]))) alias Vector!(ulong[1]) ulong1; ///
static if (is(Vector!(void[16]))) alias Vector!(void[16]) void16; ///
static if (is(Vector!(double[2]))) alias Vector!(double[2]) double2; ///
static if (is(Vector!(float[4]))) alias Vector!(float[4]) float4; ///
static if (is(Vector!(byte[16]))) alias Vector!(byte[16]) byte16; ///
static if (is(Vector!(ubyte[16]))) alias Vector!(ubyte[16]) ubyte16; ///
static if (is(Vector!(short[8]))) alias Vector!(short[8]) short8; ///
static if (is(Vector!(ushort[8]))) alias Vector!(ushort[8]) ushort8; ///
static if (is(Vector!(int[4]))) alias Vector!(int[4]) int4; ///
static if (is(Vector!(uint[4]))) alias Vector!(uint[4]) uint4; ///
static if (is(Vector!(long[2]))) alias Vector!(long[2]) long2; ///
static if (is(Vector!(ulong[2]))) alias Vector!(ulong[2]) ulong2; ///
static if (is(Vector!(void[32]))) alias Vector!(void[32]) void32; ///
static if (is(Vector!(double[4]))) alias Vector!(double[4]) double4; ///
static if (is(Vector!(float[8]))) alias Vector!(float[8]) float8; ///
static if (is(Vector!(byte[32]))) alias Vector!(byte[32]) byte32; ///
static if (is(Vector!(ubyte[32]))) alias Vector!(ubyte[32]) ubyte32; ///
static if (is(Vector!(short[16]))) alias Vector!(short[16]) short16; ///
static if (is(Vector!(ushort[16]))) alias Vector!(ushort[16]) ushort16; ///
static if (is(Vector!(int[8]))) alias Vector!(int[8]) int8; ///
static if (is(Vector!(uint[8]))) alias Vector!(uint[8]) uint8; ///
static if (is(Vector!(long[4]))) alias Vector!(long[4]) long4; ///
static if (is(Vector!(ulong[4]))) alias Vector!(ulong[4]) ulong4; ///
static if (is(Vector!(void[64]))) alias Vector!(void[64]) void64; ///
static if (is(Vector!(double[8]))) alias Vector!(double[8]) double8; ///
static if (is(Vector!(float[16]))) alias Vector!(float[16]) float16; ///
static if (is(Vector!(byte[64]))) alias Vector!(byte[64]) byte64; ///
static if (is(Vector!(ubyte[64]))) alias Vector!(ubyte[64]) ubyte64; ///
static if (is(Vector!(short[32]))) alias Vector!(short[32]) short32; ///
static if (is(Vector!(ushort[32]))) alias Vector!(ushort[32]) ushort32; ///
static if (is(Vector!(int[16]))) alias Vector!(int[16]) int16; ///
static if (is(Vector!(uint[16]))) alias Vector!(uint[16]) uint16; ///
static if (is(Vector!(long[8]))) alias Vector!(long[8]) long8; ///
static if (is(Vector!(ulong[8]))) alias Vector!(ulong[8]) ulong8; ///
version (D_SIMD)
{
/** XMM opcodes that conform to the following:
*
* opcode xmm1,xmm2/mem
*
* and do not have side effects (i.e. do not write to memory).
*/
enum XMM
{
ADDSS = 0xF30F58,
ADDSD = 0xF20F58,
ADDPS = 0x000F58,
ADDPD = 0x660F58,
PADDB = 0x660FFC,
PADDW = 0x660FFD,
PADDD = 0x660FFE,
PADDQ = 0x660FD4,
SUBSS = 0xF30F5C,
SUBSD = 0xF20F5C,
SUBPS = 0x000F5C,
SUBPD = 0x660F5C,
PSUBB = 0x660FF8,
PSUBW = 0x660FF9,
PSUBD = 0x660FFA,
PSUBQ = 0x660FFB,
MULSS = 0xF30F59,
MULSD = 0xF20F59,
MULPS = 0x000F59,
MULPD = 0x660F59,
PMULLW = 0x660FD5,
DIVSS = 0xF30F5E,
DIVSD = 0xF20F5E,
DIVPS = 0x000F5E,
DIVPD = 0x660F5E,
PAND = 0x660FDB,
POR = 0x660FEB,
UCOMISS = 0x000F2E,
UCOMISD = 0x660F2E,
XORPS = 0x000F57,
XORPD = 0x660F57,
// Use STO and LOD instead of MOV to distinguish the direction
// (Destination is first operand, Source is second operand)
STOSS = 0xF30F11, /// MOVSS xmm1/m32, xmm2
STOSD = 0xF20F11, /// MOVSD xmm1/m64, xmm2
STOAPS = 0x000F29, /// MOVAPS xmm2/m128, xmm1
STOAPD = 0x660F29, /// MOVAPD xmm2/m128, xmm1
STODQA = 0x660F7F, /// MOVDQA xmm2/m128, xmm1
STOD = 0x660F7E, /// MOVD reg/mem64, xmm 66 0F 7E /r
STOQ = 0x660FD6, /// MOVQ xmm2/m64, xmm1
LODSS = 0xF30F10, /// MOVSS xmm1, xmm2/m32
LODSD = 0xF20F10, /// MOVSD xmm1, xmm2/m64
LODAPS = 0x000F28, /// MOVAPS xmm1, xmm2/m128
LODAPD = 0x660F28, /// MOVAPD xmm1, xmm2/m128
LODDQA = 0x660F6F, /// MOVDQA xmm1, xmm2/m128
LODD = 0x660F6E, /// MOVD xmm, reg/mem64 66 0F 6E /r
LODQ = 0xF30F7E, /// MOVQ xmm1, xmm2/m64
LODDQU = 0xF30F6F, /// MOVDQU xmm1, xmm2/mem128 F3 0F 6F /r
STODQU = 0xF30F7F, /// MOVDQU xmm1/mem128, xmm2 F3 0F 7F /r
MOVDQ2Q = 0xF20FD6, /// MOVDQ2Q mmx, xmm F2 0F D6 /r
MOVHLPS = 0x0F12, /// MOVHLPS xmm1, xmm2 0F 12 /r
LODHPD = 0x660F16, /// MOVHPD xmm1, m64
STOHPD = 0x660F17, /// MOVHPD mem64, xmm1 66 0F 17 /r
LODHPS = 0x0F16, /// MOVHPS xmm1, m64
STOHPS = 0x0F17, /// MOVHPS m64, xmm1
MOVLHPS = 0x0F16, /// MOVLHPS xmm1, xmm2
LODLPD = 0x660F12, /// MOVLPD xmm1, m64
STOLPD = 0x660F13, /// MOVLPD m64, xmm1
LODLPS = 0x0F12, /// MOVLPS xmm1, m64
STOLPS = 0x0F13, /// MOVLPS m64, xmm1
MOVMSKPD = 0x660F50, /// MOVMSKPD reg, xmm
MOVMSKPS = 0x0F50, /// MOVMSKPS reg, xmm
MOVNTDQ = 0x660FE7, /// MOVNTDQ m128, xmm1
MOVNTI = 0x0FC3, /// MOVNTI m32, r32
MOVNTPD = 0x660F2B, /// MOVNTPD m128, xmm1
MOVNTPS = 0x0F2B, /// MOVNTPS m128, xmm1
MOVNTQ = 0x0FE7, /// MOVNTQ m64, mm
MOVQ2DQ = 0xF30FD6, /// MOVQ2DQ
LODUPD = 0x660F10, /// MOVUPD xmm1, xmm2/m128
STOUPD = 0x660F11, /// MOVUPD xmm2/m128, xmm1
LODUPS = 0x0F10, /// MOVUPS xmm1, xmm2/m128
STOUPS = 0x0F11, /// MOVUPS xmm2/m128, xmm1
PACKSSDW = 0x660F6B,
PACKSSWB = 0x660F63,
PACKUSWB = 0x660F67,
PADDSB = 0x660FEC,
PADDSW = 0x660FED,
PADDUSB = 0x660FDC,
PADDUSW = 0x660FDD,
PANDN = 0x660FDF,
PCMPEQB = 0x660F74,
PCMPEQD = 0x660F76,
PCMPEQW = 0x660F75,
PCMPGTB = 0x660F64,
PCMPGTD = 0x660F66,
PCMPGTW = 0x660F65,
PMADDWD = 0x660FF5,
PSLLW = 0x660FF1,
PSLLD = 0x660FF2,
PSLLQ = 0x660FF3,
PSRAW = 0x660FE1,
PSRAD = 0x660FE2,
PSRLW = 0x660FD1,
PSRLD = 0x660FD2,
PSRLQ = 0x660FD3,
PSUBSB = 0x660FE8,
PSUBSW = 0x660FE9,
PSUBUSB = 0x660FD8,
PSUBUSW = 0x660FD9,
PUNPCKHBW = 0x660F68,
PUNPCKHDQ = 0x660F6A,
PUNPCKHWD = 0x660F69,
PUNPCKLBW = 0x660F60,
PUNPCKLDQ = 0x660F62,
PUNPCKLWD = 0x660F61,
PXOR = 0x660FEF,
ANDPD = 0x660F54,
ANDPS = 0x0F54,
ANDNPD = 0x660F55,
ANDNPS = 0x0F55,
CMPPS = 0x0FC2,
CMPPD = 0x660FC2,
CMPSD = 0xF20FC2,
CMPSS = 0xF30FC2,
COMISD = 0x660F2F,
COMISS = 0x0F2F,
CVTDQ2PD = 0xF30FE6,
CVTDQ2PS = 0x0F5B,
CVTPD2DQ = 0xF20FE6,
CVTPD2PI = 0x660F2D,
CVTPD2PS = 0x660F5A,
CVTPI2PD = 0x660F2A,
CVTPI2PS = 0x0F2A,
CVTPS2DQ = 0x660F5B,
CVTPS2PD = 0x0F5A,
CVTPS2PI = 0x0F2D,
CVTSD2SI = 0xF20F2D,
CVTSD2SS = 0xF20F5A,
CVTSI2SD = 0xF20F2A,
CVTSI2SS = 0xF30F2A,
CVTSS2SD = 0xF30F5A,
CVTSS2SI = 0xF30F2D,
CVTTPD2PI = 0x660F2C,
CVTTPD2DQ = 0x660FE6,
CVTTPS2DQ = 0xF30F5B,
CVTTPS2PI = 0x0F2C,
CVTTSD2SI = 0xF20F2C,
CVTTSS2SI = 0xF30F2C,
MASKMOVDQU = 0x660FF7,
MASKMOVQ = 0x0FF7,
MAXPD = 0x660F5F,
MAXPS = 0x0F5F,
MAXSD = 0xF20F5F,
MAXSS = 0xF30F5F,
MINPD = 0x660F5D,
MINPS = 0x0F5D,
MINSD = 0xF20F5D,
MINSS = 0xF30F5D,
ORPD = 0x660F56,
ORPS = 0x0F56,
PAVGB = 0x660FE0,
PAVGW = 0x660FE3,
PMAXSW = 0x660FEE,
//PINSRW = 0x660FC4,
PMAXUB = 0x660FDE,
PMINSW = 0x660FEA,
PMINUB = 0x660FDA,
//PMOVMSKB = 0x660FD7,
PMULHUW = 0x660FE4,
PMULHW = 0x660FE5,
PMULUDQ = 0x660FF4,
PSADBW = 0x660FF6,
PUNPCKHQDQ = 0x660F6D,
PUNPCKLQDQ = 0x660F6C,
RCPPS = 0x0F53,
RCPSS = 0xF30F53,
RSQRTPS = 0x0F52,
RSQRTSS = 0xF30F52,
SQRTPD = 0x660F51,
SHUFPD = 0x660FC6,
SHUFPS = 0x0FC6,
SQRTPS = 0x0F51,
SQRTSD = 0xF20F51,
SQRTSS = 0xF30F51,
UNPCKHPD = 0x660F15,
UNPCKHPS = 0x0F15,
UNPCKLPD = 0x660F14,
UNPCKLPS = 0x0F14,
PSHUFD = 0x660F70,
PSHUFHW = 0xF30F70,
PSHUFLW = 0xF20F70,
PSHUFW = 0x0F70,
PSLLDQ = 0x07660F73,
PSRLDQ = 0x03660F73,
//PREFETCH = 0x0F18,
// SSE3 Pentium 4 (Prescott)
ADDSUBPD = 0x660FD0,
ADDSUBPS = 0xF20FD0,
HADDPD = 0x660F7C,
HADDPS = 0xF20F7C,
HSUBPD = 0x660F7D,
HSUBPS = 0xF20F7D,
MOVDDUP = 0xF20F12,
MOVSHDUP = 0xF30F16,
MOVSLDUP = 0xF30F12,
LDDQU = 0xF20FF0,
MONITOR = 0x0F01C8,
MWAIT = 0x0F01C9,
// SSSE3
PALIGNR = 0x660F3A0F,
PHADDD = 0x660F3802,
PHADDW = 0x660F3801,
PHADDSW = 0x660F3803,
PABSB = 0x660F381C,
PABSD = 0x660F381E,
PABSW = 0x660F381D,
PSIGNB = 0x660F3808,
PSIGND = 0x660F380A,
PSIGNW = 0x660F3809,
PSHUFB = 0x660F3800,
PMADDUBSW = 0x660F3804,
PMULHRSW = 0x660F380B,
PHSUBD = 0x660F3806,
PHSUBW = 0x660F3805,
PHSUBSW = 0x660F3807,
// SSE4.1
BLENDPD = 0x660F3A0D,
BLENDPS = 0x660F3A0C,
BLENDVPD = 0x660F3815,
BLENDVPS = 0x660F3814,
DPPD = 0x660F3A41,
DPPS = 0x660F3A40,
EXTRACTPS = 0x660F3A17,
INSERTPS = 0x660F3A21,
MPSADBW = 0x660F3A42,
PBLENDVB = 0x660F3810,
PBLENDW = 0x660F3A0E,
PEXTRD = 0x660F3A16,
PEXTRQ = 0x660F3A16,
PINSRB = 0x660F3A20,
PINSRD = 0x660F3A22,
PINSRQ = 0x660F3A22,
MOVNTDQA = 0x660F382A,
PACKUSDW = 0x660F382B,
PCMPEQQ = 0x660F3829,
PEXTRB = 0x660F3A14,
PHMINPOSUW = 0x660F3841,
PMAXSB = 0x660F383C,
PMAXSD = 0x660F383D,
PMAXUD = 0x660F383F,
PMAXUW = 0x660F383E,
PMINSB = 0x660F3838,
PMINSD = 0x660F3839,
PMINUD = 0x660F383B,
PMINUW = 0x660F383A,
PMOVSXBW = 0x660F3820,
PMOVSXBD = 0x660F3821,
PMOVSXBQ = 0x660F3822,
PMOVSXWD = 0x660F3823,
PMOVSXWQ = 0x660F3824,
PMOVSXDQ = 0x660F3825,
PMOVZXBW = 0x660F3830,
PMOVZXBD = 0x660F3831,
PMOVZXBQ = 0x660F3832,
PMOVZXWD = 0x660F3833,
PMOVZXWQ = 0x660F3834,
PMOVZXDQ = 0x660F3835,
PMULDQ = 0x660F3828,
PMULLD = 0x660F3840,
PTEST = 0x660F3817,
ROUNDPD = 0x660F3A09,
ROUNDPS = 0x660F3A08,
ROUNDSD = 0x660F3A0B,
ROUNDSS = 0x660F3A0A,
// SSE4.2
PCMPESTRI = 0x660F3A61,
PCMPESTRM = 0x660F3A60,
PCMPISTRI = 0x660F3A63,
PCMPISTRM = 0x660F3A62,
PCMPGTQ = 0x660F3837,
//CRC32
// SSE4a (AMD only)
// EXTRQ,INSERTQ,MOVNTSD,MOVNTSS
// POPCNT and LZCNT (have their own CPUID bits)
POPCNT = 0xF30FB8,
// LZCNT
}
/**
* Generate two operand instruction with XMM 128 bit operands.
*
* This is a compiler magic function - it doesn't behave like
* regular D functions.
*
* Parameters:
* opcode = any of the XMM opcodes; it must be a compile time constant
* op1 = first operand
* op2 = second operand
* Returns:
* result of opcode
*/
pure @safe void16 __simd(XMM opcode, void16 op1, void16 op2);
///
unittest
{
float4 a;
a = cast(float4)__simd(XMM.PXOR, a, a);
}
/**
* Unary SIMD instructions.
*/
pure @safe void16 __simd(XMM opcode, void16 op1);
pure @safe void16 __simd(XMM opcode, double d); ///
pure @safe void16 __simd(XMM opcode, float f); ///
///
unittest
{
float4 a;
a = cast(float4)__simd(XMM.LODSS, a);
}
/****
* For instructions:
* CMPPD, CMPSS, CMPSD, CMPPS,
* PSHUFD, PSHUFHW, PSHUFLW,
* BLENDPD, BLENDPS, DPPD, DPPS,
* MPSADBW, PBLENDW,
* ROUNDPD, ROUNDPS, ROUNDSD, ROUNDSS
* Parameters:
* opcode = any of the above XMM opcodes; it must be a compile time constant
* op1 = first operand
* op2 = second operand
* imm8 = third operand; must be a compile time constant
* Returns:
* result of opcode
*/
pure @safe void16 __simd(XMM opcode, void16 op1, void16 op2, ubyte imm8);
///
unittest
{
float4 a;
a = cast(float4)__simd(XMM.CMPPD, a, a, 0x7A);
}
/***
* For instructions with the imm8 version:
* PSLLD, PSLLQ, PSLLW, PSRAD, PSRAW, PSRLD, PSRLQ, PSRLW,
* PSRLDQ, PSLLDQ
* Parameters:
* opcode = any of the XMM opcodes; it must be a compile time constant
* op1 = first operand
* imm8 = second operand; must be a compile time constant
* Returns:
* result of opcode
*/
pure @safe void16 __simd_ib(XMM opcode, void16 op1, ubyte imm8);
///
unittest
{
float4 a;
a = cast(float4) __simd_ib(XMM.PSRLQ, a, 0x7A);
}
/*****
* For "store" operations of the form:
* op1 op= op2
* Returns:
* op2
* These cannot be marked as pure, as semantic() doesn't check them.
*/
@safe void16 __simd_sto(XMM opcode, void16 op1, void16 op2);
@safe void16 __simd_sto(XMM opcode, double op1, void16 op2); ///
@safe void16 __simd_sto(XMM opcode, float op1, void16 op2); ///
///
unittest
{
void16 a;
float f = 1;
double d = 1;
cast(void)__simd_sto(XMM.STOUPS, a, a);
cast(void)__simd_sto(XMM.STOUPS, f, a);
cast(void)__simd_sto(XMM.STOUPS, d, a);
}
/* The following use overloading to ensure correct typing.
* Compile with inlining on for best performance.
*/
pure @safe short8 pcmpeq()(short8 v1, short8 v2)
{
return cast(short8)__simd(XMM.PCMPEQW, v1, v2);
}
pure @safe ushort8 pcmpeq()(ushort8 v1, ushort8 v2)
{
return cast(ushort8)__simd(XMM.PCMPEQW, v1, v2);
}
/*********************
* Emit prefetch instruction.
* Params:
* address = address to be prefetched
* writeFetch = true for write fetch, false for read fetch
* locality = 0..3 (0 meaning least local, 3 meaning most local)
* Note:
* The Intel mappings are:
* $(TABLE
* $(THEAD writeFetch, locality, Instruction)
* $(TROW false, 0, prefetchnta)
* $(TROW false, 1, prefetch2)
* $(TROW false, 2, prefetch1)
* $(TROW false, 3, prefetch0)
* $(TROW true, 0, prefetchw)
* $(TROW true, 1, prefetchw)
* $(TROW true, 2, prefetchw)
* $(TROW true, 3, prefetchw)
* )
*/
void prefetch(bool writeFetch, ubyte locality)(const(void)* address)
{
static if (writeFetch)
__prefetch(address, 4);
else static if (locality < 4)
__prefetch(address, 3 - locality);
else
static assert(0, "0..3 expected for locality");
}
private void __prefetch(const(void*) address, ubyte encoding);
/*************************************
* Load unaligned vector from address.
* This is a compiler intrinsic.
* Params:
* p = pointer to vector
* Returns:
* vector
*/
V loadUnaligned(V)(const V* p)
if (is(V == void16) ||
is(V == byte16) ||
is(V == ubyte16) ||
is(V == short8) ||
is(V == ushort8) ||
is(V == int4) ||
is(V == uint4) ||
is(V == long2) ||
is(V == ulong2) ||
is(V == double2) ||
is(V == float4))
{
pragma(inline, true);
static if (is(V == double2))
return cast(V)__simd(XMM.LODUPD, *cast(const void16*)p);
else static if (is(V == float4))
return cast(V)__simd(XMM.LODUPS, *cast(const void16*)p);
else
return cast(V)__simd(XMM.LODDQU, *cast(const void16*)p);
}
@system
unittest
{
// Memory to load into the vector:
// Should have enough data to test all 16-byte alignments, and still
// have room for a 16-byte vector
ubyte[32] data;
foreach (i; 0..data.length)
{
data[i] = cast(ubyte)i;
}
// to test all alignments from 1 ~ 16
foreach (i; 0..16)
{
ubyte* d = &data[i];
void test(T)()
{
// load the data
T v = loadUnaligned(cast(T*)d);
// check that the data was loaded correctly
ubyte* ptrToV = cast(ubyte*)&v;
foreach (j; 0..T.sizeof)
{
assert(ptrToV[j] == d[j]);
}
}
test!void16();
test!byte16();
test!ubyte16();
test!short8();
test!ushort8();
test!int4();
test!uint4();
test!long2();
test!ulong2();
test!double2();
test!float4();
}
}
/*************************************
* Store vector to unaligned address.
* This is a compiler intrinsic.
* Params:
* p = pointer to vector
* value = value to store
* Returns:
* value
*/
V storeUnaligned(V)(V* p, V value)
if (is(V == void16) ||
is(V == byte16) ||
is(V == ubyte16) ||
is(V == short8) ||
is(V == ushort8) ||
is(V == int4) ||
is(V == uint4) ||
is(V == long2) ||
is(V == ulong2) ||
is(V == double2) ||
is(V == float4))
{
pragma(inline, true);
static if (is(V == double2))
return cast(V)__simd_sto(XMM.STOUPD, *cast(void16*)p, value);
else static if (is(V == float4))
return cast(V)__simd_sto(XMM.STOUPS, *cast(void16*)p, value);
else
return cast(V)__simd_sto(XMM.STODQU, *cast(void16*)p, value);
}
@system
unittest
{
// Memory to store the vector to:
// Should have enough data to test all 16-byte alignments, and still
// have room for a 16-byte vector
ubyte[32] data;
// to test all alignments from 1 ~ 16
foreach (i; 0..16)
{
ubyte* d = &data[i];
void test(T)()
{
T v;
// populate v` with data
ubyte* ptrToV = cast(ubyte*)&v;
foreach (j; 0..T.sizeof)
{
ptrToV[j] = cast(ubyte)j;
}
// store `v` to location pointed to by `d`
storeUnaligned(cast(T*)d, v);
// check that the the data was stored correctly
foreach (j; 0..T.sizeof)
{
assert(ptrToV[j] == d[j]);
}
}
test!void16();
test!byte16();
test!ubyte16();
test!short8();
test!ushort8();
test!int4();
test!uint4();
test!long2();
test!ulong2();
test!double2();
test!float4();
}
}
}