blob: 591b1dc146cab2b6fb872bdee549c5120b145b19 [file] [log] [blame]
/**
* The fiber module provides lightweight threads aka fibers.
*
* Copyright: Copyright Sean Kelly 2005 - 2012.
* License: Distributed under the
* $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost Software License 1.0).
* (See accompanying file LICENSE)
* Authors: Sean Kelly, Walter Bright, Alex Rønne Petersen, Martin Nowak
* Source: $(DRUNTIMESRC core/thread/fiber/package.d)
*/
/* NOTE: This file has been patched from the original DMD distribution to
* work with the GDC compiler.
*/
module core.thread.fiber;
import core.thread.context;
import core.thread.fiber.base : fiber_entryPoint, FiberBase;
import core.thread.threadbase;
import core.thread.threadgroup;
import core.thread.types;
import core.memory : pageSize;
///////////////////////////////////////////////////////////////////////////////
// Fiber Platform Detection
///////////////////////////////////////////////////////////////////////////////
version (GNU)
{
import gcc.builtins;
version (GNU_StackGrowsDown)
version = StackGrowsDown;
}
else
{
// this should be true for most architectures
version = StackGrowsDown;
}
version (Windows)
{
import core.stdc.stdlib : free, malloc;
import core.sys.windows.winbase;
import core.sys.windows.winnt;
}
package
{
version (D_InlineAsm_X86)
{
version (Windows)
version = AsmX86_Windows;
else version (Posix)
version = AsmX86_Posix;
version = AlignFiberStackTo16Byte;
}
else version (D_InlineAsm_X86_64)
{
version (Windows)
{
version = AsmX86_64_Windows;
version = AlignFiberStackTo16Byte;
}
else version (Posix)
{
version = AsmX86_64_Posix;
version = AlignFiberStackTo16Byte;
}
}
else version (X86)
{
version = AlignFiberStackTo16Byte;
version (GNU_CET)
{
// fiber_switchContext does not support shadow stack from
// Intel CET. So use ucontext implementation.
}
else
{
version = AsmExternal;
version (MinGW)
version = GNU_AsmX86_Windows;
else version (OSX)
version = AsmX86_Posix;
else version (Posix)
version = AsmX86_Posix;
}
}
else version (X86_64)
{
version = AlignFiberStackTo16Byte;
version (GNU_CET)
{
// fiber_switchContext does not support shadow stack from
// Intel CET. So use ucontext implementation.
}
else version (D_X32)
{
// let X32 be handled by ucontext swapcontext
}
else
{
version = AsmExternal;
version (MinGW)
version = GNU_AsmX86_64_Windows;
else version (OSX)
version = AsmX86_64_Posix;
else version (Posix)
version = AsmX86_64_Posix;
}
}
else version (PPC)
{
version (OSX)
{
version = AsmPPC_Darwin;
version = AsmExternal;
version = AlignFiberStackTo16Byte;
}
else version (Posix)
{
version = AsmPPC_Posix;
version = AsmExternal;
}
}
else version (PPC64)
{
version (OSX)
{
version = AsmPPC_Darwin;
version = AsmExternal;
version = AlignFiberStackTo16Byte;
}
else version (Posix)
{
version = AlignFiberStackTo16Byte;
}
}
else version (MIPS_O32)
{
version (Posix)
{
version = AsmMIPS_O32_Posix;
version = AsmExternal;
}
}
else version (MIPS_N64)
{
version (Posix)
{
version = AsmMIPS_N64_Posix;
version = AsmExternal;
}
}
else version (AArch64)
{
version (Posix)
{
version = AsmAArch64_Posix;
version = AsmExternal;
version = AlignFiberStackTo16Byte;
}
}
else version (ARM)
{
version (Posix)
{
version = AsmARM_Posix;
version = AsmExternal;
}
}
else version (SPARC)
{
// NOTE: The SPARC ABI specifies only doubleword alignment.
version = AlignFiberStackTo16Byte;
}
else version (SPARC64)
{
version = AlignFiberStackTo16Byte;
}
else version (LoongArch64)
{
version (Posix)
{
version = AsmLoongArch64_Posix;
version = AsmExternal;
version = AlignFiberStackTo16Byte;
}
}
version (Posix)
{
version (AsmX86_Windows) {} else
version (AsmX86_Posix) {} else
version (AsmX86_64_Windows) {} else
version (AsmX86_64_Posix) {} else
version (AsmExternal) {} else
{
// NOTE: The ucontext implementation requires architecture specific
// data definitions to operate so testing for it must be done
// by checking for the existence of ucontext_t rather than by
// a version identifier. Please note that this is considered
// an obsolescent feature according to the POSIX spec, so a
// custom solution is still preferred.
import core.sys.posix.ucontext : getcontext, makecontext, MINSIGSTKSZ, swapcontext, ucontext_t;
}
}
}
///////////////////////////////////////////////////////////////////////////////
// Fiber Entry Point and Context Switch
///////////////////////////////////////////////////////////////////////////////
package
{
import core.atomic : atomicStore, cas, MemoryOrder;
import core.exception : onOutOfMemoryError;
import core.stdc.stdlib : abort;
// Look above the definition of 'class Fiber' for some information about the implementation of this routine
version (AsmExternal)
{
extern (C) void fiber_switchContext( void** oldp, void* newp ) nothrow @nogc;
version (AArch64)
extern (C) void fiber_trampoline() nothrow;
}
else
extern (C) void fiber_switchContext( void** oldp, void* newp ) nothrow @nogc
{
// NOTE: The data pushed and popped in this routine must match the
// default stack created by Fiber.initStack or the initial
// switch into a new context will fail.
version (AsmX86_Windows)
{
asm pure nothrow @nogc
{
naked;
// save current stack state
push EBP;
mov EBP, ESP;
push EDI;
push ESI;
push EBX;
push dword ptr FS:[0];
push dword ptr FS:[4];
push dword ptr FS:[8];
push EAX;
// store oldp again with more accurate address
mov EAX, dword ptr 8[EBP];
mov [EAX], ESP;
// load newp to begin context switch
mov ESP, dword ptr 12[EBP];
// load saved state from new stack
pop EAX;
pop dword ptr FS:[8];
pop dword ptr FS:[4];
pop dword ptr FS:[0];
pop EBX;
pop ESI;
pop EDI;
pop EBP;
// 'return' to complete switch
pop ECX;
jmp ECX;
}
}
else version (AsmX86_64_Windows)
{
asm pure nothrow @nogc
{
naked;
// save current stack state
// NOTE: When changing the layout of registers on the stack,
// make sure that the XMM registers are still aligned.
// On function entry, the stack is guaranteed to not
// be aligned to 16 bytes because of the return address
// on the stack.
push RBP;
mov RBP, RSP;
push R12;
push R13;
push R14;
push R15;
push RDI;
push RSI;
// 7 registers = 56 bytes; stack is now aligned to 16 bytes
sub RSP, 160;
movdqa [RSP + 144], XMM6;
movdqa [RSP + 128], XMM7;
movdqa [RSP + 112], XMM8;
movdqa [RSP + 96], XMM9;
movdqa [RSP + 80], XMM10;
movdqa [RSP + 64], XMM11;
movdqa [RSP + 48], XMM12;
movdqa [RSP + 32], XMM13;
movdqa [RSP + 16], XMM14;
movdqa [RSP], XMM15;
push RBX;
xor RAX,RAX;
push qword ptr GS:[RAX];
push qword ptr GS:8[RAX];
push qword ptr GS:16[RAX];
// store oldp
mov [RCX], RSP;
// load newp to begin context switch
mov RSP, RDX;
// load saved state from new stack
pop qword ptr GS:16[RAX];
pop qword ptr GS:8[RAX];
pop qword ptr GS:[RAX];
pop RBX;
movdqa XMM15, [RSP];
movdqa XMM14, [RSP + 16];
movdqa XMM13, [RSP + 32];
movdqa XMM12, [RSP + 48];
movdqa XMM11, [RSP + 64];
movdqa XMM10, [RSP + 80];
movdqa XMM9, [RSP + 96];
movdqa XMM8, [RSP + 112];
movdqa XMM7, [RSP + 128];
movdqa XMM6, [RSP + 144];
add RSP, 160;
pop RSI;
pop RDI;
pop R15;
pop R14;
pop R13;
pop R12;
pop RBP;
// 'return' to complete switch
pop RCX;
jmp RCX;
}
}
else version (AsmX86_Posix)
{
asm pure nothrow @nogc
{
naked;
// save current stack state
push EBP;
mov EBP, ESP;
push EDI;
push ESI;
push EBX;
push EAX;
// store oldp again with more accurate address
mov EAX, dword ptr 8[EBP];
mov [EAX], ESP;
// load newp to begin context switch
mov ESP, dword ptr 12[EBP];
// load saved state from new stack
pop EAX;
pop EBX;
pop ESI;
pop EDI;
pop EBP;
// 'return' to complete switch
pop ECX;
jmp ECX;
}
}
else version (AsmX86_64_Posix)
{
asm pure nothrow @nogc
{
naked;
// save current stack state
push RBP;
mov RBP, RSP;
push RBX;
push R12;
push R13;
push R14;
push R15;
// store oldp
mov [RDI], RSP;
// load newp to begin context switch
mov RSP, RSI;
// load saved state from new stack
pop R15;
pop R14;
pop R13;
pop R12;
pop RBX;
pop RBP;
// 'return' to complete switch
pop RCX;
jmp RCX;
}
}
else static if ( __traits( compiles, ucontext_t ) )
{
Fiber cfib = Fiber.getThis();
void* ucur = cfib.m_ucur;
*oldp = &ucur;
swapcontext( **(cast(ucontext_t***) oldp),
*(cast(ucontext_t**) newp) );
}
else
static assert(0, "Not implemented");
}
}
///////////////////////////////////////////////////////////////////////////////
// Fiber
///////////////////////////////////////////////////////////////////////////////
/*
* Documentation of Fiber internals:
*
* The main routines to implement when porting Fibers to new architectures are
* fiber_switchContext and initStack. Some version constants have to be defined
* for the new platform as well, search for "Fiber Platform Detection and Memory Allocation".
*
* Fibers are based on a concept called 'Context'. A Context describes the execution
* state of a Fiber or main thread which is fully described by the stack, some
* registers and a return address at which the Fiber/Thread should continue executing.
* Please note that not only each Fiber has a Context, but each thread also has got a
* Context which describes the threads stack and state. If you call Fiber fib; fib.call
* the first time in a thread you switch from Threads Context into the Fibers Context.
* If you call fib.yield in that Fiber you switch out of the Fibers context and back
* into the Thread Context. (However, this is not always the case. You can call a Fiber
* from within another Fiber, then you switch Contexts between the Fibers and the Thread
* Context is not involved)
*
* In all current implementations the registers and the return address are actually
* saved on a Contexts stack.
*
* The fiber_switchContext routine has got two parameters:
* void** a: This is the _location_ where we have to store the current stack pointer,
* the stack pointer of the currently executing Context (Fiber or Thread).
* void* b: This is the pointer to the stack of the Context which we want to switch into.
* Note that we get the same pointer here as the one we stored into the void** a
* in a previous call to fiber_switchContext.
*
* In the simplest case, a fiber_switchContext rountine looks like this:
* fiber_switchContext:
* push {return Address}
* push {registers}
* copy {stack pointer} into {location pointed to by a}
* //We have now switch to the stack of a different Context!
* copy {b} into {stack pointer}
* pop {registers}
* pop {return Address}
* jump to {return Address}
*
* The GC uses the value returned in parameter a to scan the Fibers stack. It scans from
* the stack base to that value. As the GC dislikes false pointers we can actually optimize
* this a little: By storing registers which can not contain references to memory managed
* by the GC outside of the region marked by the stack base pointer and the stack pointer
* saved in fiber_switchContext we can prevent the GC from scanning them.
* Such registers are usually floating point registers and the return address. In order to
* implement this, we return a modified stack pointer from fiber_switchContext. However,
* we have to remember that when we restore the registers from the stack!
*
* --------------------------- <= Stack Base
* | Frame | <= Many other stack frames
* | Frame |
* |-------------------------| <= The last stack frame. This one is created by fiber_switchContext
* | registers with pointers |
* | | <= Stack pointer. GC stops scanning here
* | return address |
* |floating point registers |
* --------------------------- <= Real Stack End
*
* fiber_switchContext:
* push {registers with pointers}
* copy {stack pointer} into {location pointed to by a}
* push {return Address}
* push {Floating point registers}
* //We have now switch to the stack of a different Context!
* copy {b} into {stack pointer}
* //We now have to adjust the stack pointer to point to 'Real Stack End' so we can pop
* //the FP registers
* //+ or - depends on if your stack grows downwards or upwards
* {stack pointer} = {stack pointer} +- ({FPRegisters}.sizeof + {return address}.sizeof}
* pop {Floating point registers}
* pop {return Address}
* pop {registers with pointers}
* jump to {return Address}
*
* So the question now is which registers need to be saved? This depends on the specific
* architecture ABI of course, but here are some general guidelines:
* - If a register is callee-save (if the callee modifies the register it must saved and
* restored by the callee) it needs to be saved/restored in switchContext
* - If a register is caller-save it needn't be saved/restored. (Calling fiber_switchContext
* is a function call and the compiler therefore already must save these registers before
* calling fiber_switchContext)
* - Argument registers used for passing parameters to functions needn't be saved/restored
* - The return register needn't be saved/restored (fiber_switchContext hasn't got a return type)
* - All scratch registers needn't be saved/restored
* - The link register usually needn't be saved/restored (but sometimes it must be cleared -
* see below for details)
* - The frame pointer register - if it exists - is usually callee-save
* - All current implementations do not save control registers
*
* What happens on the first switch into a Fiber? We never saved a state for this fiber before,
* but the initial state is prepared in the initStack routine. (This routine will also be called
* when a Fiber is being resetted). initStack must produce exactly the same stack layout as the
* part of fiber_switchContext which saves the registers. Pay special attention to set the stack
* pointer correctly if you use the GC optimization mentioned before. the return Address saved in
* initStack must be the address of fiber_entrypoint.
*
* There's now a small but important difference between the first context switch into a fiber and
* further context switches. On the first switch, Fiber.call is used and the returnAddress in
* fiber_switchContext will point to fiber_entrypoint. The important thing here is that this jump
* is a _function call_, we call fiber_entrypoint by jumping before it's function prologue. On later
* calls, the user used yield() in a function, and therefore the return address points into a user
* function, after the yield call. So here the jump in fiber_switchContext is a _function return_,
* not a function call!
*
* The most important result of this is that on entering a function, i.e. fiber_entrypoint, we
* would have to provide a return address / set the link register once fiber_entrypoint
* returns. Now fiber_entrypoint does never return and therefore the actual value of the return
* address / link register is never read/used and therefore doesn't matter. When fiber_switchContext
* performs a _function return_ the value in the link register doesn't matter either.
* However, the link register will still be saved to the stack in fiber_entrypoint and some
* exception handling / stack unwinding code might read it from this stack location and crash.
* The exact solution depends on your architecture, but see the ARM implementation for a way
* to deal with this issue.
*
* The ARM implementation is meant to be used as a kind of documented example implementation.
* Look there for a concrete example.
*
* FIXME: fiber_entrypoint might benefit from a @noreturn attribute, but D doesn't have one.
*/
/**
* This class provides a cooperative concurrency mechanism integrated with the
* threading and garbage collection functionality. Calling a fiber may be
* considered a blocking operation that returns when the fiber yields (via
* Fiber.yield()). Execution occurs within the context of the calling thread
* so synchronization is not necessary to guarantee memory visibility so long
* as the same thread calls the fiber each time. Please note that there is no
* requirement that a fiber be bound to one specific thread. Rather, fibers
* may be freely passed between threads so long as they are not currently
* executing. Like threads, a new fiber thread may be created using either
* derivation or composition, as in the following example.
*
* Warning:
* Status registers are not saved by the current implementations. This means
* floating point exception status bits (overflow, divide by 0), rounding mode
* and similar stuff is set per-thread, not per Fiber!
*
* Warning:
* On ARM FPU registers are not saved if druntime was compiled as ARM_SoftFloat.
* If such a build is used on a ARM_SoftFP system which actually has got a FPU
* and other libraries are using the FPU registers (other code is compiled
* as ARM_SoftFP) this can cause problems. Druntime must be compiled as
* ARM_SoftFP in this case.
*
* Authors: Based on a design by Mikola Lysenko.
*/
class Fiber : FiberBase
{
///////////////////////////////////////////////////////////////////////////
// Initialization
///////////////////////////////////////////////////////////////////////////
version (Windows)
// exception handling walks the stack, invoking DbgHelp.dll which
// needs up to 16k of stack space depending on the version of DbgHelp.dll,
// the existence of debug symbols and other conditions. Avoid causing
// stack overflows by defaulting to a larger stack size
enum defaultStackPages = 8;
else version (OSX)
{
version (X86_64)
// libunwind on macOS 11 now requires more stack space than 16k, so
// default to a larger stack size. This is only applied to X86 as
// the pageSize is still 4k, however on AArch64 it is 16k.
enum defaultStackPages = 8;
else
enum defaultStackPages = 4;
}
else
enum defaultStackPages = 4;
/**
* Initializes a fiber object which is associated with a static
* D function.
*
* Params:
* fn = The fiber function.
* sz = The stack size for this fiber.
* guardPageSize = size of the guard page to trap fiber's stack
* overflows. Beware that using this will increase
* the number of mmaped regions on platforms using mmap
* so an OS-imposed limit may be hit.
*
* In:
* fn must not be null.
*/
this( void function() fn, size_t sz = pageSize * defaultStackPages,
size_t guardPageSize = pageSize ) nothrow
{
super( fn, sz, guardPageSize );
}
/**
* Initializes a fiber object which is associated with a dynamic
* D function.
*
* Params:
* dg = The fiber function.
* sz = The stack size for this fiber.
* guardPageSize = size of the guard page to trap fiber's stack
* overflows. Beware that using this will increase
* the number of mmaped regions on platforms using mmap
* so an OS-imposed limit may be hit.
*
* In:
* dg must not be null.
*/
this( void delegate() dg, size_t sz = pageSize * defaultStackPages,
size_t guardPageSize = pageSize ) nothrow
{
super( dg, sz, guardPageSize );
}
///////////////////////////////////////////////////////////////////////////
// Fiber Accessors
///////////////////////////////////////////////////////////////////////////
/**
* Provides a reference to the calling fiber or null if no fiber is
* currently active.
*
* Returns:
* The fiber object representing the calling fiber or null if no fiber
* is currently active within this thread. The result of deleting this object is undefined.
*/
static Fiber getThis() @safe nothrow @nogc
{
return cast(Fiber) FiberBase.getThis();
}
///////////////////////////////////////////////////////////////////////////
// Static Initialization
///////////////////////////////////////////////////////////////////////////
version (Posix)
{
static this()
{
static if ( __traits( compiles, ucontext_t ) )
{
int status = getcontext( &sm_utxt );
assert( status == 0 );
}
}
}
protected:
///////////////////////////////////////////////////////////////////////////
// Stack Management
///////////////////////////////////////////////////////////////////////////
//
// Allocate a new stack for this fiber.
//
final override void allocStack( size_t sz, size_t guardPageSize ) nothrow
in
{
assert( !m_pmem && !m_ctxt );
}
do
{
// adjust alloc size to a multiple of pageSize
sz += pageSize - 1;
sz -= sz % pageSize;
// NOTE: This instance of Thread.Context is dynamic so Fiber objects
// can be collected by the GC so long as no user level references
// to the object exist. If m_ctxt were not dynamic then its
// presence in the global context list would be enough to keep
// this object alive indefinitely. An alternative to allocating
// room for this struct explicitly would be to mash it into the
// base of the stack being allocated below. However, doing so
// requires too much special logic to be worthwhile.
m_ctxt = new StackContext;
version (Windows)
{
// reserve memory for stack
m_pmem = VirtualAlloc( null,
sz + guardPageSize,
MEM_RESERVE,
PAGE_NOACCESS );
if ( !m_pmem )
onOutOfMemoryError();
version (StackGrowsDown)
{
void* stack = m_pmem + guardPageSize;
void* guard = m_pmem;
void* pbase = stack + sz;
}
else
{
void* stack = m_pmem;
void* guard = m_pmem + sz;
void* pbase = stack;
}
// allocate reserved stack segment
stack = VirtualAlloc( stack,
sz,
MEM_COMMIT,
PAGE_READWRITE );
if ( !stack )
onOutOfMemoryError();
if (guardPageSize)
{
// allocate reserved guard page
guard = VirtualAlloc( guard,
guardPageSize,
MEM_COMMIT,
PAGE_READWRITE | PAGE_GUARD );
if ( !guard )
onOutOfMemoryError();
}
m_ctxt.bstack = pbase;
m_ctxt.tstack = pbase;
m_size = sz;
}
else
{
version (Posix) import core.sys.posix.sys.mman : MAP_ANON, MAP_FAILED, MAP_PRIVATE, mmap,
mprotect, PROT_NONE, PROT_READ, PROT_WRITE;
version (OpenBSD) import core.sys.posix.sys.mman : MAP_STACK;
static if ( __traits( compiles, ucontext_t ) )
{
// Stack size must be at least the minimum allowable by the OS.
if (sz < MINSIGSTKSZ)
sz = MINSIGSTKSZ;
}
static if ( __traits( compiles, mmap ) )
{
// Allocate more for the memory guard
sz += guardPageSize;
int mmap_flags = MAP_PRIVATE | MAP_ANON;
version (OpenBSD)
mmap_flags |= MAP_STACK;
m_pmem = mmap( null,
sz,
PROT_READ | PROT_WRITE,
mmap_flags,
-1,
0 );
if ( m_pmem == MAP_FAILED )
m_pmem = null;
}
else static if ( __traits( compiles, valloc ) )
{
m_pmem = valloc( sz );
}
else
{
import core.stdc.stdlib : malloc;
m_pmem = malloc( sz );
}
if ( !m_pmem )
onOutOfMemoryError();
version (StackGrowsDown)
{
m_ctxt.bstack = m_pmem + sz;
m_ctxt.tstack = m_pmem + sz;
void* guard = m_pmem;
}
else
{
m_ctxt.bstack = m_pmem;
m_ctxt.tstack = m_pmem;
void* guard = m_pmem + sz - guardPageSize;
}
m_size = sz;
static if ( __traits( compiles, mmap ) )
{
if (guardPageSize)
{
// protect end of stack
if ( mprotect(guard, guardPageSize, PROT_NONE) == -1 )
abort();
}
}
else
{
// Supported only for mmap allocated memory - results are
// undefined if applied to memory not obtained by mmap
}
}
ThreadBase.add( m_ctxt );
}
//
// Free this fiber's stack.
//
final override void freeStack() nothrow @nogc
in(m_pmem)
in(m_ctxt)
{
// NOTE: m_ctxt is guaranteed to be alive because it is held in the
// global context list.
ThreadBase.slock.lock_nothrow();
scope(exit) ThreadBase.slock.unlock_nothrow();
ThreadBase.remove( m_ctxt );
version (Windows)
{
VirtualFree( m_pmem, 0, MEM_RELEASE );
}
else
{
import core.sys.posix.sys.mman : mmap, munmap;
static if ( __traits( compiles, mmap ) )
{
munmap( m_pmem, m_size );
}
else
{
free( m_pmem );
}
}
m_pmem = null;
m_ctxt = null;
}
//
// Initialize the allocated stack.
// Look above the definition of 'class Fiber' for some information about the implementation of this routine
//
final override void initStack() nothrow @nogc
in
{
assert( m_ctxt.tstack && m_ctxt.tstack == m_ctxt.bstack );
assert( cast(size_t) m_ctxt.bstack % (void*).sizeof == 0 );
}
do
{
void* pstack = m_ctxt.tstack;
scope( exit ) m_ctxt.tstack = pstack;
void push( size_t val ) nothrow
{
version (StackGrowsDown)
{
pstack -= size_t.sizeof;
*(cast(size_t*) pstack) = val;
}
else
{
pstack += size_t.sizeof;
*(cast(size_t*) pstack) = val;
}
}
// NOTE: On OS X the stack must be 16-byte aligned according
// to the IA-32 call spec. For x86_64 the stack also needs to
// be aligned to 16-byte according to SysV AMD64 ABI.
version (AlignFiberStackTo16Byte)
{
version (StackGrowsDown)
{
pstack = cast(void*)(cast(size_t)(pstack) - (cast(size_t)(pstack) & 0x0F));
}
else
{
pstack = cast(void*)(cast(size_t)(pstack) + (cast(size_t)(pstack) & 0x0F));
}
}
version (AsmX86_Windows)
{
version (StackGrowsDown) {} else static assert( false );
// On Windows Server 2008 and 2008 R2, an exploit mitigation
// technique known as SEHOP is activated by default. To avoid
// hijacking of the exception handler chain, the presence of a
// Windows-internal handler (ntdll.dll!FinalExceptionHandler) at
// its end is tested by RaiseException. If it is not present, all
// handlers are disregarded, and the program is thus aborted
// (see http://blogs.technet.com/b/srd/archive/2009/02/02/
// preventing-the-exploitation-of-seh-overwrites-with-sehop.aspx).
// For new threads, this handler is installed by Windows immediately
// after creation. To make exception handling work in fibers, we
// have to insert it for our new stacks manually as well.
//
// To do this, we first determine the handler by traversing the SEH
// chain of the current thread until its end, and then construct a
// registration block for the last handler on the newly created
// thread. We then continue to push all the initial register values
// for the first context switch as for the other implementations.
//
// Note that this handler is never actually invoked, as we install
// our own one on top of it in the fiber entry point function.
// Thus, it should not have any effects on OSes not implementing
// exception chain verification.
alias fp_t = void function(); // Actual signature not relevant.
static struct EXCEPTION_REGISTRATION
{
EXCEPTION_REGISTRATION* next; // sehChainEnd if last one.
fp_t handler;
}
enum sehChainEnd = cast(EXCEPTION_REGISTRATION*) 0xFFFFFFFF;
__gshared fp_t finalHandler = null;
if ( finalHandler is null )
{
static EXCEPTION_REGISTRATION* fs0() nothrow
{
asm pure nothrow @nogc
{
naked;
mov EAX, FS:[0];
ret;
}
}
auto reg = fs0();
while ( reg.next != sehChainEnd ) reg = reg.next;
// Benign races are okay here, just to avoid re-lookup on every
// fiber creation.
finalHandler = reg.handler;
}
// When linking with /safeseh (supported by LDC, but not DMD)
// the exception chain must not extend to the very top
// of the stack, otherwise the exception chain is also considered
// invalid. Reserving additional 4 bytes at the top of the stack will
// keep the EXCEPTION_REGISTRATION below that limit
size_t reserve = EXCEPTION_REGISTRATION.sizeof + 4;
pstack -= reserve;
*(cast(EXCEPTION_REGISTRATION*)pstack) =
EXCEPTION_REGISTRATION( sehChainEnd, finalHandler );
auto pChainEnd = pstack;
push( cast(size_t) &fiber_entryPoint ); // EIP
push( cast(size_t) m_ctxt.bstack - reserve ); // EBP
push( 0x00000000 ); // EDI
push( 0x00000000 ); // ESI
push( 0x00000000 ); // EBX
push( cast(size_t) pChainEnd ); // FS:[0]
push( cast(size_t) m_ctxt.bstack ); // FS:[4]
push( cast(size_t) m_ctxt.bstack - m_size ); // FS:[8]
push( 0x00000000 ); // EAX
}
else version (AsmX86_64_Windows)
{
// Using this trampoline instead of the raw fiber_entryPoint
// ensures that during context switches, source and destination
// stacks have the same alignment. Otherwise, the stack would need
// to be shifted by 8 bytes for the first call, as fiber_entryPoint
// is an actual function expecting a stack which is not aligned
// to 16 bytes.
static void trampoline()
{
asm pure nothrow @nogc
{
naked;
sub RSP, 32; // Shadow space (Win64 calling convention)
call fiber_entryPoint;
xor RCX, RCX; // This should never be reached, as
jmp RCX; // fiber_entryPoint must never return.
}
}
push( cast(size_t) &trampoline ); // RIP
push( 0x00000000_00000000 ); // RBP
push( 0x00000000_00000000 ); // R12
push( 0x00000000_00000000 ); // R13
push( 0x00000000_00000000 ); // R14
push( 0x00000000_00000000 ); // R15
push( 0x00000000_00000000 ); // RDI
push( 0x00000000_00000000 ); // RSI
push( 0x00000000_00000000 ); // XMM6 (high)
push( 0x00000000_00000000 ); // XMM6 (low)
push( 0x00000000_00000000 ); // XMM7 (high)
push( 0x00000000_00000000 ); // XMM7 (low)
push( 0x00000000_00000000 ); // XMM8 (high)
push( 0x00000000_00000000 ); // XMM8 (low)
push( 0x00000000_00000000 ); // XMM9 (high)
push( 0x00000000_00000000 ); // XMM9 (low)
push( 0x00000000_00000000 ); // XMM10 (high)
push( 0x00000000_00000000 ); // XMM10 (low)
push( 0x00000000_00000000 ); // XMM11 (high)
push( 0x00000000_00000000 ); // XMM11 (low)
push( 0x00000000_00000000 ); // XMM12 (high)
push( 0x00000000_00000000 ); // XMM12 (low)
push( 0x00000000_00000000 ); // XMM13 (high)
push( 0x00000000_00000000 ); // XMM13 (low)
push( 0x00000000_00000000 ); // XMM14 (high)
push( 0x00000000_00000000 ); // XMM14 (low)
push( 0x00000000_00000000 ); // XMM15 (high)
push( 0x00000000_00000000 ); // XMM15 (low)
push( 0x00000000_00000000 ); // RBX
push( 0xFFFFFFFF_FFFFFFFF ); // GS:[0]
version (StackGrowsDown)
{
push( cast(size_t) m_ctxt.bstack ); // GS:[8]
push( cast(size_t) m_ctxt.bstack - m_size ); // GS:[16]
}
else
{
push( cast(size_t) m_ctxt.bstack ); // GS:[8]
push( cast(size_t) m_ctxt.bstack + m_size ); // GS:[16]
}
}
else version (AsmX86_Posix)
{
push( 0x00000000 ); // Return address of fiber_entryPoint call
push( cast(size_t) &fiber_entryPoint ); // EIP
push( cast(size_t) m_ctxt.bstack ); // EBP
push( 0x00000000 ); // EDI
push( 0x00000000 ); // ESI
push( 0x00000000 ); // EBX
push( 0x00000000 ); // EAX
}
else version (AsmX86_64_Posix)
{
push( 0x00000000_00000000 ); // Return address of fiber_entryPoint call
push( cast(size_t) &fiber_entryPoint ); // RIP
push( cast(size_t) m_ctxt.bstack ); // RBP
push( 0x00000000_00000000 ); // RBX
push( 0x00000000_00000000 ); // R12
push( 0x00000000_00000000 ); // R13
push( 0x00000000_00000000 ); // R14
push( 0x00000000_00000000 ); // R15
}
else version (AsmPPC_Posix)
{
version (StackGrowsDown)
{
pstack -= int.sizeof * 5;
}
else
{
pstack += int.sizeof * 5;
}
push( cast(size_t) &fiber_entryPoint ); // link register
push( 0x00000000 ); // control register
push( 0x00000000 ); // old stack pointer
// GPR values
version (StackGrowsDown)
{
pstack -= int.sizeof * 20;
}
else
{
pstack += int.sizeof * 20;
}
assert( (cast(size_t) pstack & 0x0f) == 0 );
}
else version (AsmPPC_Darwin)
{
version (StackGrowsDown) {}
else static assert(false, "PowerPC Darwin only supports decrementing stacks");
uint wsize = size_t.sizeof;
// linkage + regs + FPRs + VRs
uint space = 8 * wsize + 20 * wsize + 18 * 8 + 12 * 16;
(cast(ubyte*)pstack - space)[0 .. space] = 0;
pstack -= wsize * 6;
*cast(size_t*)pstack = cast(size_t) &fiber_entryPoint; // LR
pstack -= wsize * 22;
// On Darwin PPC64 pthread self is in R13 (which is reserved).
// At present, it is not safe to migrate fibers between threads, but if that
// changes, then updating the value of R13 will also need to be handled.
version (PPC64)
*cast(size_t*)(pstack + wsize) = cast(size_t) ThreadBase.getThis().m_addr;
assert( (cast(size_t) pstack & 0x0f) == 0 );
}
else version (AsmMIPS_O32_Posix)
{
version (StackGrowsDown) {}
else static assert(0);
/* We keep the FP registers and the return address below
* the stack pointer, so they don't get scanned by the
* GC. The last frame before swapping the stack pointer is
* organized like the following.
*
* |-----------|<= frame pointer
* | $gp |
* | $s0-8 |
* |-----------|<= stack pointer
* | $ra |
* | align(8) |
* | $f20-30 |
* |-----------|
*
*/
enum SZ_GP = 10 * size_t.sizeof; // $gp + $s0-8
enum SZ_RA = size_t.sizeof; // $ra
version (MIPS_HardFloat)
{
enum SZ_FP = 6 * 8; // $f20-30
enum ALIGN = -(SZ_FP + SZ_RA) & (8 - 1);
}
else
{
enum SZ_FP = 0;
enum ALIGN = 0;
}
enum BELOW = SZ_FP + ALIGN + SZ_RA;
enum ABOVE = SZ_GP;
enum SZ = BELOW + ABOVE;
(cast(ubyte*)pstack - SZ)[0 .. SZ] = 0;
pstack -= ABOVE;
*cast(size_t*)(pstack - SZ_RA) = cast(size_t)&fiber_entryPoint;
}
else version (AsmMIPS_N64_Posix)
{
version (StackGrowsDown) {}
else static assert(0);
/* We keep the FP registers and the return address below
* the stack pointer, so they don't get scanned by the
* GC. The last frame before swapping the stack pointer is
* organized like the following.
*
* |-----------|<= frame pointer
* | $fp/$gp |
* | $s0-7 |
* |-----------|<= stack pointer
* | $ra |
* | $f24-31 |
* |-----------|
*
*/
enum SZ_GP = 10 * size_t.sizeof; // $fp + $gp + $s0-7
enum SZ_RA = size_t.sizeof; // $ra
version (MIPS_HardFloat)
{
enum SZ_FP = 8 * double.sizeof; // $f24-31
}
else
{
enum SZ_FP = 0;
}
enum BELOW = SZ_FP + SZ_RA;
enum ABOVE = SZ_GP;
enum SZ = BELOW + ABOVE;
(cast(ubyte*)pstack - SZ)[0 .. SZ] = 0;
pstack -= ABOVE;
*cast(size_t*)(pstack - SZ_RA) = cast(size_t)&fiber_entryPoint;
}
else version (AsmLoongArch64_Posix)
{
// Like others, FP registers and return address ($r1) are kept
// below the saved stack top (tstack) to hide from GC scanning.
// fiber_switchContext expects newp sp to look like this:
// 10: $r21 (reserved)
// 9: $r22 (frame pointer)
// 8: $r23
// ...
// 0: $r31 <-- newp tstack
// -1: $r1 (return address) [&fiber_entryPoint]
// -2: $f24
// ...
// -9: $f31
version (StackGrowsDown) {}
else
static assert(false, "Only full descending stacks supported on LoongArch64");
// Only need to set return address ($r1). Everything else is fine
// zero initialized.
pstack -= size_t.sizeof * 11; // skip past space reserved for $r21-$r31
push(cast(size_t) &fiber_entryPoint);
pstack += size_t.sizeof; // adjust sp (newp) above lr
}
else version (AsmAArch64_Posix)
{
// Like others, FP registers and return address (lr) are kept
// below the saved stack top (tstack) to hide from GC scanning.
// fiber_switchContext expects newp sp to look like this:
// 19: x19
// ...
// 9: x29 (fp) <-- newp tstack
// 8: x30 (lr) [&fiber_entryPoint]
// 7: d8
// ...
// 0: d15
version (StackGrowsDown) {}
else
static assert(false, "Only full descending stacks supported on AArch64");
// Only need to set return address (lr). Everything else is fine
// zero initialized.
pstack -= size_t.sizeof * 11; // skip past x19-x29
push(cast(size_t) &fiber_trampoline); // see threadasm.S for docs
pstack += size_t.sizeof; // adjust sp (newp) above lr
}
else version (AsmARM_Posix)
{
/* We keep the FP registers and the return address below
* the stack pointer, so they don't get scanned by the
* GC. The last frame before swapping the stack pointer is
* organized like the following.
*
* | |-----------|<= 'frame starts here'
* | | fp | (the actual frame pointer, r11 isn't
* | | r10-r4 | updated and still points to the previous frame)
* | |-----------|<= stack pointer
* | | lr |
* | | 4byte pad |
* | | d15-d8 |(if FP supported)
* | |-----------|
* Y
* stack grows down: The pointer value here is smaller than some lines above
*/
// frame pointer can be zero, r10-r4 also zero initialized
version (StackGrowsDown)
pstack -= int.sizeof * 8;
else
static assert(false, "Only full descending stacks supported on ARM");
// link register
push( cast(size_t) &fiber_entryPoint );
/*
* We do not push padding and d15-d8 as those are zero initialized anyway
* Position the stack pointer above the lr register
*/
pstack += int.sizeof * 1;
}
else version (GNU_AsmX86_Windows)
{
version (StackGrowsDown) {} else static assert( false );
// Currently, MinGW doesn't utilize SEH exceptions.
// See DMD AsmX86_Windows If this code ever becomes fails and SEH is used.
push( 0x00000000 ); // Return address of fiber_entryPoint call
push( cast(size_t) &fiber_entryPoint ); // EIP
push( 0x00000000 ); // EBP
push( 0x00000000 ); // EDI
push( 0x00000000 ); // ESI
push( 0x00000000 ); // EBX
push( 0xFFFFFFFF ); // FS:[0] - Current SEH frame
push( cast(size_t) m_ctxt.bstack ); // FS:[4] - Top of stack
push( cast(size_t) m_ctxt.bstack - m_size ); // FS:[8] - Bottom of stack
push( 0x00000000 ); // EAX
}
else version (GNU_AsmX86_64_Windows)
{
push( 0x00000000_00000000 ); // Return address of fiber_entryPoint call
push( cast(size_t) &fiber_entryPoint ); // RIP
push( 0x00000000_00000000 ); // RBP
push( 0x00000000_00000000 ); // RBX
push( 0x00000000_00000000 ); // R12
push( 0x00000000_00000000 ); // R13
push( 0x00000000_00000000 ); // R14
push( 0x00000000_00000000 ); // R15
push( 0xFFFFFFFF_FFFFFFFF ); // GS:[0] - Current SEH frame
version (StackGrowsDown)
{
push( cast(size_t) m_ctxt.bstack ); // GS:[8] - Top of stack
push( cast(size_t) m_ctxt.bstack - m_size ); // GS:[16] - Bottom of stack
}
else
{
push( cast(size_t) m_ctxt.bstack ); // GS:[8] - Top of stack
push( cast(size_t) m_ctxt.bstack + m_size ); // GS:[16] - Bottom of stack
}
}
else static if ( __traits( compiles, ucontext_t ) )
{
getcontext( &m_utxt );
m_utxt.uc_stack.ss_sp = m_pmem;
m_utxt.uc_stack.ss_size = m_size;
makecontext( &m_utxt, &fiber_entryPoint, 0 );
// NOTE: If ucontext is being used then the top of the stack will
// be a pointer to the ucontext_t struct for that fiber.
push( cast(size_t) &m_utxt );
}
else
static assert(0, "Not implemented");
}
}
version (AsmX86_64_Windows)
{
// Test Windows x64 calling convention
unittest
{
void testNonvolatileRegister(alias REG)()
{
auto zeroRegister = new Fiber(() {
mixin("asm pure nothrow @nogc { naked; xor "~REG~", "~REG~"; ret; }");
});
long after;
mixin("asm pure nothrow @nogc { mov "~REG~", 0xFFFFFFFFFFFFFFFF; }");
zeroRegister.call();
mixin("asm pure nothrow @nogc { mov after, "~REG~"; }");
assert(after == -1);
}
void testNonvolatileRegisterSSE(alias REG)()
{
auto zeroRegister = new Fiber(() {
mixin("asm pure nothrow @nogc { naked; xorpd "~REG~", "~REG~"; ret; }");
});
long[2] before = [0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF], after;
mixin("asm pure nothrow @nogc { movdqu "~REG~", before; }");
zeroRegister.call();
mixin("asm pure nothrow @nogc { movdqu after, "~REG~"; }");
assert(before == after);
}
testNonvolatileRegister!("R12")();
testNonvolatileRegister!("R13")();
testNonvolatileRegister!("R14")();
testNonvolatileRegister!("R15")();
testNonvolatileRegister!("RDI")();
testNonvolatileRegister!("RSI")();
testNonvolatileRegister!("RBX")();
testNonvolatileRegisterSSE!("XMM6")();
testNonvolatileRegisterSSE!("XMM7")();
testNonvolatileRegisterSSE!("XMM8")();
testNonvolatileRegisterSSE!("XMM9")();
testNonvolatileRegisterSSE!("XMM10")();
testNonvolatileRegisterSSE!("XMM11")();
testNonvolatileRegisterSSE!("XMM12")();
testNonvolatileRegisterSSE!("XMM13")();
testNonvolatileRegisterSSE!("XMM14")();
testNonvolatileRegisterSSE!("XMM15")();
}
}
version (D_InlineAsm_X86_64)
{
unittest
{
void testStackAlignment()
{
void* pRSP;
asm pure nothrow @nogc
{
mov pRSP, RSP;
}
assert((cast(size_t)pRSP & 0xF) == 0);
}
auto fib = new Fiber(&testStackAlignment);
fib.call();
}
}