blob: 88b63bfb74b3ad0daa7d853d8d60172670e55912 [file] [log] [blame]
.version 3.1
.target sm_30
.address_size 64
.visible .entry delay
{
.reg .u64 %hr10;
.reg .u32 %r22;
.reg .u32 %r23;
.reg .u32 %r24;
.reg .u32 %r25;
.reg .u32 %r26;
.reg .u32 %r27;
.reg .u32 %r28;
.reg .u32 %r29;
.reg .pred %r30;
.reg .u64 %frame;
.local .align 8 .b8 %farray[16];
cvta.local.u64 %frame,%farray;
mov.u32 %r22,500000;
st.u32 [%frame+8],%r22;
mov.u32 %r23,0;
st.u32 [%frame],%r23;
bra $L2;
$L3:
ld.u32 %r25,[%frame+4];
add.u32 %r24,%r25,1;
st.u32 [%frame+4],%r24;
ld.u32 %r27,[%frame];
add.u32 %r26,%r27,1;
st.u32 [%frame],%r26;
$L2:
ld.u32 %r28,[%frame];
ld.u32 %r29,[%frame+8];
setp.lt.s32 %r30,%r28,%r29;
@%r30
bra $L3;
ret;
}
.visible .entry delay2 (.param .u64 %in_ar1, .param .u64 %in_ar2)
{
.reg .u64 %ar1;
.reg .u64 %ar2;
.reg .u64 %hr10;
.reg .u64 %r22;
.reg .u64 %r23;
.reg .u32 %r24;
.reg .u32 %r25;
.reg .u32 %r26;
.reg .u32 %r27;
.reg .u32 %r28;
.reg .u32 %r29;
.reg .u32 %r30;
.reg .u32 %r31;
.reg .pred %r32;
.reg .u64 %r33;
.reg .u64 %r34;
.reg .u64 %frame;
.local .align 8 .b8 %farray[32];
cvta.local.u64 %frame,%farray;
ld.param.u64 %ar1,[%in_ar1];
ld.param.u64 %ar2,[%in_ar2];
mov.u64 %r22,%ar1;
st.u64 [%frame+16],%r22;
mov.u64 %r23,%ar2;
st.u64 [%frame+24],%r23;
mov.u32 %r24,500000;
st.u32 [%frame+8],%r24;
mov.u32 %r25,0;
st.u32 [%frame],%r25;
bra $L5;
$L6:
ld.u32 %r27,[%frame+4];
add.u32 %r26,%r27,1;
st.u32 [%frame+4],%r26;
ld.u32 %r29,[%frame];
add.u32 %r28,%r29,1;
st.u32 [%frame],%r28;
$L5:
ld.u32 %r30,[%frame];
ld.u32 %r31,[%frame+8];
setp.lt.s32 %r32,%r30,%r31;
@%r32
bra $L6;
ld.u64 %r33,[%frame+16];
ld.u64 %r34,[%frame+24];
st.u64 [%r33],%r34;
ret;
}