libitm/method-ml.cc - gcc - Git at Google

 /* Copyright (C) 2012-2022 Free Software Foundation, Inc.
    Contributed by Torvald Riegel <triegel@redhat.com>.

    This file is part of the GNU Transactional Memory Library (libitm).

    Libitm is free software; you can redistribute it and/or modify it
    under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 3 of the License, or
    (at your option) any later version.

    Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
    more details.

    Under Section 7 of GPL version 3, you are granted additional
    permissions described in the GCC Runtime Library Exception, version
    3.1, as published by the Free Software Foundation.

    You should have received a copy of the GNU General Public License and
    a copy of the GCC Runtime Library Exception along with this program;
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */

 #include "libitm_i.h"

 using namespace GTM;

 namespace {

 // This group consists of all TM methods that synchronize via multiple locks
 // (or ownership records).
 struct ml_mg : public method_group
 {
   static const gtm_word LOCK_BIT = (~(gtm_word)0 >> 1) + 1;
   static const gtm_word INCARNATION_BITS = 3;
   static const gtm_word INCARNATION_MASK = 7;
   // Maximum time is all bits except the lock bit, the overflow reserve bit,
   // and the incarnation bits).
   static const gtm_word TIME_MAX = (~(gtm_word)0 >> (2 + INCARNATION_BITS));
   // The overflow reserve bit is the MSB of the timestamp part of an orec,
   // so we can have TIME_MAX+1 pending timestamp increases before we overflow.
   static const gtm_word OVERFLOW_RESERVE = TIME_MAX + 1;

   static bool is_locked(gtm_word o) { return o & LOCK_BIT; }
   static gtm_word set_locked(gtm_thread *tx)
   {
     return ((uintptr_t)tx >> 1) | LOCK_BIT;
   }
   // Returns a time that includes the lock bit, which is required by both
   // validate() and is_more_recent_or_locked().
   static gtm_word get_time(gtm_word o) { return o >> INCARNATION_BITS; }
   static gtm_word set_time(gtm_word time) { return time << INCARNATION_BITS; }
   static bool is_more_recent_or_locked(gtm_word o, gtm_word than_time)
   {
     // LOCK_BIT is the MSB; thus, if O is locked, it is larger than TIME_MAX.
     return get_time(o) > than_time;
   }
   static bool has_incarnation_left(gtm_word o)
   {
     return (o & INCARNATION_MASK) < INCARNATION_MASK;
   }
   static gtm_word inc_incarnation(gtm_word o) { return o + 1; }

   // The shared time base.
   atomic<gtm_word> time __attribute__((aligned(HW_CACHELINE_SIZE)));

   // The array of ownership records.
   atomic<gtm_word>* orecs __attribute__((aligned(HW_CACHELINE_SIZE)));
   char tailpadding[HW_CACHELINE_SIZE - sizeof(atomic<gtm_word>*)];

   // Location-to-orec mapping.  Stripes of 32B mapped to 2^16 orecs using
   // multiplicative hashing.  See Section 5.2.2 of Torvald Riegel's PhD thesis
   // for the background on this choice of hash function and parameters:
   // http://nbn-resolving.de/urn:nbn:de:bsz:14-qucosa-115596
   // We pick the Mult32 hash because it works well with fewer orecs (i.e.,
   // less space overhead and just 32b multiplication).
   // We may want to check and potentially change these settings once we get
   // better or just more benchmarks.
   static const gtm_word L2O_ORECS_BITS = 16;
   static const gtm_word L2O_ORECS = 1 << L2O_ORECS_BITS;
   // An iterator over the orecs covering the region [addr,addr+len).
   struct orec_iterator
   {
     static const gtm_word L2O_SHIFT = 5;
     static const uint32_t L2O_MULT32 = 81007;
     uint32_t mult;
     size_t orec;
     size_t orec_end;
     orec_iterator (const void* addr, size_t len)
     {
       uint32_t a = (uintptr_t) addr >> L2O_SHIFT;
       uint32_t ae = ((uintptr_t) addr + len + (1 << L2O_SHIFT) - 1)
 	  >> L2O_SHIFT;
       mult = a * L2O_MULT32;
       orec = mult >> (32 - L2O_ORECS_BITS);
       // We can't really avoid this second multiplication unless we use a
       // branch instead or know more about the alignment of addr.  (We often
       // know len at compile time because of instantiations of functions
       // such as _ITM_RU* for accesses of specific lengths.
       orec_end = (ae * L2O_MULT32) >> (32 - L2O_ORECS_BITS);
     }
     size_t get() { return orec; }
     void advance()
     {
       // We cannot simply increment orec because L2O_MULT32 is larger than
       // 1 << (32 - L2O_ORECS_BITS), and thus an increase of the stripe (i.e.,
       // addr >> L2O_SHIFT) could increase the resulting orec index by more
       // than one; with the current parameters, we would roughly acquire a
       // fourth more orecs than necessary for regions covering more than orec.
       // Keeping mult around as extra state shouldn't matter much.
       mult += L2O_MULT32;
       orec = mult >> (32 - L2O_ORECS_BITS);
     }
     bool reached_end() { return orec == orec_end; }
   };

   virtual void init()
   {
     // We assume that an atomic<gtm_word> is backed by just a gtm_word, so
     // starting with zeroed memory is fine.
     orecs = (atomic<gtm_word>*) xcalloc(
         sizeof(atomic<gtm_word>) * L2O_ORECS, true);
     // This store is only executed while holding the serial lock, so relaxed
     // memory order is sufficient here.
     time.store(0, memory_order_relaxed);
   }

   virtual void fini()
   {
     free(orecs);
   }

   // We only re-initialize when our time base overflows.  Thus, only reset
   // the time base and the orecs but do not re-allocate the orec array.
   virtual void reinit()
   {
     // This store is only executed while holding the serial lock, so relaxed
     // memory order is sufficient here.  Same holds for the memset.
     time.store(0, memory_order_relaxed);
     // The memset below isn't strictly kosher because it bypasses
     // the non-trivial assignment operator defined by std::atomic.  Using
     // a local void* is enough to prevent GCC from warning for this.
     void *p = orecs;
     memset(p, 0, sizeof(atomic<gtm_word>) * L2O_ORECS);
   }
 };

 static ml_mg o_ml_mg;


 // The multiple lock, write-through TM method.
 // Maps each memory location to one of the orecs in the orec array, and then
 // acquires the associated orec eagerly before writing through.
 // Writes require undo-logging because we are dealing with several locks/orecs
 // and need to resolve deadlocks if necessary by aborting one of the
 // transactions.
 // Reads do time-based validation with snapshot time extensions.  Incarnation
 // numbers are used to decrease contention on the time base (with those,
 // aborted transactions do not need to acquire a new version number for the
 // data that has been previously written in the transaction and needs to be
 // rolled back).
 // gtm_thread::shared_state is used to store a transaction's current
 // snapshot time (or commit time). The serial lock uses ~0 for inactive
 // transactions and 0 for active ones. Thus, we always have a meaningful
 // timestamp in shared_state that can be used to implement quiescence-based
 // privatization safety.
 class ml_wt_dispatch : public abi_dispatch
 {
 protected:
   static void pre_write(gtm_thread *tx, const void *addr, size_t len)
   {
     gtm_word snapshot = tx->shared_state.load(memory_order_relaxed);
     gtm_word locked_by_tx = ml_mg::set_locked(tx);

     // Lock all orecs that cover the region.
     ml_mg::orec_iterator oi(addr, len);
     do
       {
         // Load the orec.  Relaxed memory order is sufficient here because
         // either we have acquired the orec or we will try to acquire it with
         // a CAS with stronger memory order.
         gtm_word o = o_ml_mg.orecs[oi.get()].load(memory_order_relaxed);

         // Check whether we have acquired the orec already.
         if (likely (locked_by_tx != o))
           {
             // If not, acquire.  Make sure that our snapshot time is larger or
             // equal than the orec's version to avoid masking invalidations of
             // our snapshot with our own writes.
             if (unlikely (ml_mg::is_locked(o)))
               tx->restart(RESTART_LOCKED_WRITE);

             if (unlikely (ml_mg::get_time(o) > snapshot))
               {
                 // We only need to extend the snapshot if we have indeed read
                 // from this orec before.  Given that we are an update
                 // transaction, we will have to extend anyway during commit.
                 // ??? Scan the read log instead, aborting if we have read
                 // from data covered by this orec before?
                 snapshot = extend(tx);
               }

             // We need acquire memory order here to synchronize with other
             // (ownership) releases of the orec.  We do not need acq_rel order
             // because whenever another thread reads from this CAS'
             // modification, then it will abort anyway and does not rely on
             // any further happens-before relation to be established.
             if (unlikely (!o_ml_mg.orecs[oi.get()].compare_exchange_strong(
                 o, locked_by_tx, memory_order_acquire)))
               tx->restart(RESTART_LOCKED_WRITE);

             // We use an explicit fence here to avoid having to use release
             // memory order for all subsequent data stores.  This fence will
             // synchronize with loads of the data with acquire memory order.
             // See post_load() for why this is necessary.
             // Adding require memory order to the prior CAS is not sufficient,
             // at least according to the Batty et al. formalization of the
             // memory model.
             atomic_thread_fence(memory_order_release);

             // We log the previous value here to be able to use incarnation
             // numbers when we have to roll back.
             // ??? Reserve capacity early to avoid capacity checks here?
             gtm_rwlog_entry *e = tx->writelog.push();
             e->orec = o_ml_mg.orecs + oi.get();
             e->value = o;
           }
         oi.advance();
       }
     while (!oi.reached_end());

     // Do undo logging.  We do not know which region prior writes logged
     // (even if orecs have been acquired), so just log everything.
     tx->undolog.log(addr, len);
   }

   static void pre_write(const void *addr, size_t len)
   {
     gtm_thread *tx = gtm_thr();
     pre_write(tx, addr, len);
   }

   // Returns true iff all the orecs in our read log still have the same time
   // or have been locked by the transaction itself.
   static bool validate(gtm_thread *tx)
   {
     gtm_word locked_by_tx = ml_mg::set_locked(tx);
     // ??? This might get called from pre_load() via extend().  In that case,
     // we don't really need to check the new entries that pre_load() is
     // adding.  Stop earlier?
     for (gtm_rwlog_entry *i = tx->readlog.begin(), *ie = tx->readlog.end();
         i != ie; i++)
       {
 	// Relaxed memory order is sufficient here because we do not need to
 	// establish any new synchronizes-with relationships.  We only need
 	// to read a value that is as least as current as enforced by the
 	// callers: extend() loads global time with acquire, and trycommit()
 	// increments global time with acquire.  Therefore, we will see the
 	// most recent orec updates before the global time that we load.
         gtm_word o = i->orec->load(memory_order_relaxed);
         // We compare only the time stamp and the lock bit here.  We know that
         // we have read only committed data before, so we can ignore
         // intermediate yet rolled-back updates presented by the incarnation
         // number bits.
         if (ml_mg::get_time(o) != ml_mg::get_time(i->value)
             && o != locked_by_tx)
           return false;
       }
     return true;
   }

   // Tries to extend the snapshot to a more recent time.  Returns the new
   // snapshot time and updates TX->SHARED_STATE.  If the snapshot cannot be
   // extended to the current global time, TX is restarted.
   static gtm_word extend(gtm_thread *tx)
   {
     // We read global time here, even if this isn't strictly necessary
     // because we could just return the maximum of the timestamps that
     // validate sees.  However, the potential cache miss on global time is
     // probably a reasonable price to pay for avoiding unnecessary extensions
     // in the future.
     // We need acquire memory oder because we have to synchronize with the
     // increment of global time by update transactions, whose lock
     // acquisitions we have to observe (also see trycommit()).
     gtm_word snapshot = o_ml_mg.time.load(memory_order_acquire);
     if (!validate(tx))
       tx->restart(RESTART_VALIDATE_READ);

     // Update our public snapshot time.  Probably useful to decrease waiting
     // due to quiescence-based privatization safety.
     // Use release memory order to establish synchronizes-with with the
     // privatizers; prior data loads should happen before the privatizers
     // potentially modify anything.
     tx->shared_state.store(snapshot, memory_order_release);
     return snapshot;
   }

   // First pass over orecs.  Load and check all orecs that cover the region.
   // Write to read log, extend snapshot time if necessary.
   static gtm_rwlog_entry* pre_load(gtm_thread *tx, const void* addr,
       size_t len)
   {
     // Don't obtain an iterator yet because the log might get resized.
     size_t log_start = tx->readlog.size();
     gtm_word snapshot = tx->shared_state.load(memory_order_relaxed);
     gtm_word locked_by_tx = ml_mg::set_locked(tx);

     ml_mg::orec_iterator oi(addr, len);
     do
       {
         // We need acquire memory order here so that this load will
         // synchronize with the store that releases the orec in trycommit().
         // In turn, this makes sure that subsequent data loads will read from
         // a visible sequence of side effects that starts with the most recent
         // store to the data right before the release of the orec.
         gtm_word o = o_ml_mg.orecs[oi.get()].load(memory_order_acquire);

         if (likely (!ml_mg::is_more_recent_or_locked(o, snapshot)))
           {
             success:
             gtm_rwlog_entry *e = tx->readlog.push();
             e->orec = o_ml_mg.orecs + oi.get();
             e->value = o;
           }
         else if (!ml_mg::is_locked(o))
           {
             // We cannot read this part of the region because it has been
             // updated more recently than our snapshot time.  If we can extend
             // our snapshot, then we can read.
             snapshot = extend(tx);
             goto success;
           }
         else
           {
             // If the orec is locked by us, just skip it because we can just
             // read from it.  Otherwise, restart the transaction.
             if (o != locked_by_tx)
               tx->restart(RESTART_LOCKED_READ);
           }
         oi.advance();
       }
     while (!oi.reached_end());
     return &tx->readlog[log_start];
   }

   // Second pass over orecs, verifying that the we had a consistent read.
   // Restart the transaction if any of the orecs is locked by another
   // transaction.
   static void post_load(gtm_thread *tx, gtm_rwlog_entry* log)
   {
     for (gtm_rwlog_entry *end = tx->readlog.end(); log != end; log++)
       {
         // Check that the snapshot is consistent.  We expect the previous data
         // load to have acquire memory order, or be atomic and followed by an
         // acquire fence.
         // As a result, the data load will synchronize with the release fence
         // issued by the transactions whose data updates the data load has read
         // from.  This forces the orec load to read from a visible sequence of
         // side effects that starts with the other updating transaction's
         // store that acquired the orec and set it to locked.
         // We therefore either read a value with the locked bit set (and
         // restart) or read an orec value that was written after the data had
         // been written.  Either will allow us to detect inconsistent reads
         // because it will have a higher/different value.
 	// Also note that differently to validate(), we compare the raw value
 	// of the orec here, including incarnation numbers.  We must prevent
 	// returning uncommitted data from loads (whereas when validating, we
 	// already performed a consistent load).
         gtm_word o = log->orec->load(memory_order_relaxed);
         if (log->value != o)
           tx->restart(RESTART_VALIDATE_READ);
       }
   }

   template <typename V> static V load(const V* addr, ls_modifier mod)
   {
     // Read-for-write should be unlikely, but we need to handle it or will
     // break later WaW optimizations.
     if (unlikely(mod == RfW))
       {
 	pre_write(addr, sizeof(V));
 	return *addr;
       }
     if (unlikely(mod == RaW))
       return *addr;
     // ??? Optimize for RaR?

     gtm_thread *tx = gtm_thr();
     gtm_rwlog_entry* log = pre_load(tx, addr, sizeof(V));

     // Load the data.
     // This needs to have acquire memory order (see post_load()).
     // Alternatively, we can put an acquire fence after the data load but this
     // is probably less efficient.
     // FIXME We would need an atomic load with acquire memory order here but
     // we can't just forge an atomic load for nonatomic data because this
     // might not work on all implementations of atomics.  However, we need
     // the acquire memory order and we can only establish this if we link
     // it to the matching release using a reads-from relation between atomic
     // loads.  Also, the compiler is allowed to optimize nonatomic accesses
     // differently than atomic accesses (e.g., if the load would be moved to
     // after the fence, we potentially don't synchronize properly anymore).
     // Instead of the following, just use an ordinary load followed by an
     // acquire fence, and hope that this is good enough for now:
     // V v = atomic_load_explicit((atomic<V>*)addr, memory_order_acquire);
     V v = *addr;
     atomic_thread_fence(memory_order_acquire);

     // ??? Retry the whole load if it wasn't consistent?
     post_load(tx, log);

     return v;
   }

   template <typename V> static void store(V* addr, const V value,
       ls_modifier mod)
   {
     if (likely(mod != WaW))
       pre_write(addr, sizeof(V));
     // FIXME We would need an atomic store here but we can't just forge an
     // atomic load for nonatomic data because this might not work on all
     // implementations of atomics.  However, we need this store to link the
     // release fence in pre_write() to the acquire operation in load, which
     // is only guaranteed if we have a reads-from relation between atomic
     // accesses.  Also, the compiler is allowed to optimize nonatomic accesses
     // differently than atomic accesses (e.g., if the store would be moved
     // to before the release fence in pre_write(), things could go wrong).
     // atomic_store_explicit((atomic<V>*)addr, value, memory_order_relaxed);
     *addr = value;
   }

 public:
   static void memtransfer_static(void *dst, const void* src, size_t size,
       bool may_overlap, ls_modifier dst_mod, ls_modifier src_mod)
   {
     gtm_rwlog_entry* log = 0;
     gtm_thread *tx = 0;

     if (src_mod == RfW)
       {
         tx = gtm_thr();
         pre_write(tx, src, size);
       }
     else if (src_mod != RaW && src_mod != NONTXNAL)
       {
         tx = gtm_thr();
         log = pre_load(tx, src, size);
       }
     // ??? Optimize for RaR?

     if (dst_mod != NONTXNAL && dst_mod != WaW)
       {
         if (src_mod != RfW && (src_mod == RaW || src_mod == NONTXNAL))
           tx = gtm_thr();
         pre_write(tx, dst, size);
       }

     // FIXME We should use atomics here (see store()).  Let's just hope that
     // memcpy/memmove are good enough.
     if (!may_overlap)
       ::memcpy(dst, src, size);
     else
       ::memmove(dst, src, size);

     // ??? Retry the whole memtransfer if it wasn't consistent?
     if (src_mod != RfW && src_mod != RaW && src_mod != NONTXNAL)
       {
 	// See load() for why we need the acquire fence here.
 	atomic_thread_fence(memory_order_acquire);
 	post_load(tx, log);
       }
   }

   static void memset_static(void *dst, int c, size_t size, ls_modifier mod)
   {
     if (mod != WaW)
       pre_write(dst, size);
     // FIXME We should use atomics here (see store()).  Let's just hope that
     // memset is good enough.
     ::memset(dst, c, size);
   }

   virtual gtm_restart_reason begin_or_restart()
   {
     // We don't need to do anything for nested transactions.
     gtm_thread *tx = gtm_thr();
     if (tx->parent_txns.size() > 0)
       return NO_RESTART;

     // Read the current time, which becomes our snapshot time.
     // Use acquire memory oder so that we see the lock acquisitions by update
     // transcations that incremented the global time (see trycommit()).
     gtm_word snapshot = o_ml_mg.time.load(memory_order_acquire);
     // Re-initialize method group on time overflow.
     if (snapshot >= o_ml_mg.TIME_MAX)
       return RESTART_INIT_METHOD_GROUP;

     // We don't need to enforce any ordering for the following store. There
     // are no earlier data loads in this transaction, so the store cannot
     // become visible before those (which could lead to the violation of
     // privatization safety). The store can become visible after later loads
     // but this does not matter because the previous value will have been
     // smaller or equal (the serial lock will set shared_state to zero when
     // marking the transaction as active, and restarts enforce immediate
     // visibility of a smaller or equal value with a barrier (see
     // rollback()).
     tx->shared_state.store(snapshot, memory_order_relaxed);
     return NO_RESTART;
   }

   virtual bool trycommit(gtm_word& priv_time)
   {
     gtm_thread* tx = gtm_thr();

     // If we haven't updated anything, we can commit.
     if (!tx->writelog.size())
       {
         tx->readlog.clear();
         // We still need to ensure privatization safety, unfortunately.  While
         // we cannot have privatized anything by ourselves (because we are not
         // an update transaction), we can have observed the commits of
         // another update transaction that privatized something.  Because any
         // commit happens before ensuring privatization, our snapshot and
         // commit can thus have happened before ensuring privatization safety
         // for this commit/snapshot time.  Therefore, before we can return to
         // nontransactional code that might use the privatized data, we must
         // ensure privatization safety for our snapshot time.
         // This still seems to be better than not allowing use of the
         // snapshot time before privatization safety has been ensured because
         // we at least can run transactions such as this one, and in the
         // meantime the transaction producing this commit time might have
         // finished ensuring privatization safety for it.
         priv_time = tx->shared_state.load(memory_order_relaxed);
         return true;
       }

     // Get a commit time.
     // Overflow of o_ml_mg.time is prevented in begin_or_restart().
     // We need acq_rel here because (1) the acquire part is required for our
     // own subsequent call to validate(), and the release part is necessary to
     // make other threads' validate() work as explained there and in extend().
     gtm_word ct = o_ml_mg.time.fetch_add(1, memory_order_acq_rel) + 1;

     // Extend our snapshot time to at least our commit time.
     // Note that we do not need to validate if our snapshot time is right
     // before the commit time because we are never sharing the same commit
     // time with other transactions.
     // No need to reset shared_state, which will be modified by the serial
     // lock right after our commit anyway.
     gtm_word snapshot = tx->shared_state.load(memory_order_relaxed);
     if (snapshot < ct - 1 && !validate(tx))
       return false;

     // Release orecs.
     // See pre_load() / post_load() for why we need release memory order.
     // ??? Can we use a release fence and relaxed stores?
     gtm_word v = ml_mg::set_time(ct);
     for (gtm_rwlog_entry *i = tx->writelog.begin(), *ie = tx->writelog.end();
         i != ie; i++)
       i->orec->store(v, memory_order_release);

     // We're done, clear the logs.
     tx->writelog.clear();
     tx->readlog.clear();

     // Need to ensure privatization safety. Every other transaction must
     // have a snapshot time that is at least as high as our commit time
     // (i.e., our commit must be visible to them).
     priv_time = ct;
     return true;
   }

   virtual void rollback(gtm_transaction_cp *cp)
   {
     // We don't do anything for rollbacks of nested transactions.
     // ??? We could release locks here if we snapshot writelog size.  readlog
     // is similar.  This is just a performance optimization though.  Nested
     // aborts should be rather infrequent, so the additional save/restore
     // overhead for the checkpoints could be higher.
     if (cp != 0)
       return;

     gtm_thread *tx = gtm_thr();
     gtm_word overflow_value = 0;

     // Release orecs.
     for (gtm_rwlog_entry *i = tx->writelog.begin(), *ie = tx->writelog.end();
         i != ie; i++)
       {
         // If possible, just increase the incarnation number.
         // See pre_load() / post_load() for why we need release memory order.
 	// ??? Can we use a release fence and relaxed stores?  (Same below.)
         if (ml_mg::has_incarnation_left(i->value))
           i->orec->store(ml_mg::inc_incarnation(i->value),
               memory_order_release);
         else
           {
             // We have an incarnation overflow.  Acquire a new timestamp, and
             // use it from now on as value for each orec whose incarnation
             // number cannot be increased.
             // Overflow of o_ml_mg.time is prevented in begin_or_restart().
             // See pre_load() / post_load() for why we need release memory
             // order.
             if (!overflow_value)
               // Release memory order is sufficient but required here.
               // In contrast to the increment in trycommit(), we need release
               // for the same reason but do not need the acquire because we
               // do not validate subsequently.
               overflow_value = ml_mg::set_time(
                   o_ml_mg.time.fetch_add(1, memory_order_release) + 1);
             i->orec->store(overflow_value, memory_order_release);
           }
       }

     // We need this release fence to ensure that privatizers see the
     // rolled-back original state (not any uncommitted values) when they read
     // the new snapshot time that we write in begin_or_restart().
     atomic_thread_fence(memory_order_release);

     // We're done, clear the logs.
     tx->writelog.clear();
     tx->readlog.clear();
   }

   virtual bool snapshot_most_recent()
   {
     // This is the same code as in extend() except that we do not restart
     // on failure but simply return the result, and that we don't validate
     // if our snapshot is already most recent.
     gtm_thread* tx = gtm_thr();
     gtm_word snapshot = o_ml_mg.time.load(memory_order_acquire);
     if (snapshot == tx->shared_state.load(memory_order_relaxed))
       return true;
     if (!validate(tx))
       return false;

     // Update our public snapshot time.  Necessary so that we do not prevent
     // other transactions from ensuring privatization safety.
     tx->shared_state.store(snapshot, memory_order_release);
     return true;
   }

   virtual bool supports(unsigned number_of_threads)
   {
     // Each txn can commit and fail and rollback once before checking for
     // overflow, so this bounds the number of threads that we can support.
     // In practice, this won't be a problem but we check it anyway so that
     // we never break in the occasional weird situation.
     return (number_of_threads * 2 <= ml_mg::OVERFLOW_RESERVE);
   }

   CREATE_DISPATCH_METHODS(virtual, )
   CREATE_DISPATCH_METHODS_MEM()

   ml_wt_dispatch() : abi_dispatch(false, true, false, false, 0, &o_ml_mg)
   { }
 };

 } // anon namespace

 static const ml_wt_dispatch o_ml_wt_dispatch;

 abi_dispatch *
 GTM::dispatch_ml_wt ()
 {
   return const_cast<ml_wt_dispatch *>(&o_ml_wt_dispatch);
 }