| //===-- tsan_clock.h --------------------------------------------*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file is a part of ThreadSanitizer (TSan), a race detector. |
| // |
| //===----------------------------------------------------------------------===// |
| #ifndef TSAN_CLOCK_H |
| #define TSAN_CLOCK_H |
| |
| #include "tsan_defs.h" |
| #include "tsan_dense_alloc.h" |
| |
| namespace __tsan { |
| |
| typedef DenseSlabAlloc<ClockBlock, 1 << 22, 1 << 10> ClockAlloc; |
| typedef DenseSlabAllocCache ClockCache; |
| |
| // The clock that lives in sync variables (mutexes, atomics, etc). |
| class SyncClock { |
| public: |
| SyncClock(); |
| ~SyncClock(); |
| |
| uptr size() const; |
| |
| // These are used only in tests. |
| u64 get(unsigned tid) const; |
| u64 get_clean(unsigned tid) const; |
| |
| void Resize(ClockCache *c, uptr nclk); |
| void Reset(ClockCache *c); |
| |
| void DebugDump(int(*printf)(const char *s, ...)); |
| |
| // Clock element iterator. |
| // Note: it iterates only over the table without regard to dirty entries. |
| class Iter { |
| public: |
| explicit Iter(SyncClock* parent); |
| Iter& operator++(); |
| bool operator!=(const Iter& other); |
| ClockElem &operator*(); |
| |
| private: |
| SyncClock *parent_; |
| // [pos_, end_) is the current continuous range of clock elements. |
| ClockElem *pos_; |
| ClockElem *end_; |
| int block_; // Current number of second level block. |
| |
| NOINLINE void Next(); |
| }; |
| |
| Iter begin(); |
| Iter end(); |
| |
| private: |
| friend class ThreadClock; |
| friend class Iter; |
| static const uptr kDirtyTids = 2; |
| |
| struct Dirty { |
| u32 tid() const { return tid_ == kShortInvalidTid ? kInvalidTid : tid_; } |
| void set_tid(u32 tid) { |
| tid_ = tid == kInvalidTid ? kShortInvalidTid : tid; |
| } |
| u64 epoch : kClkBits; |
| |
| private: |
| // Full kInvalidTid won't fit into Dirty::tid. |
| static const u64 kShortInvalidTid = (1ull << (64 - kClkBits)) - 1; |
| u64 tid_ : 64 - kClkBits; // kInvalidId if not active |
| }; |
| |
| static_assert(sizeof(Dirty) == 8, "Dirty is not 64bit"); |
| |
| unsigned release_store_tid_; |
| unsigned release_store_reused_; |
| Dirty dirty_[kDirtyTids]; |
| // If size_ is 0, tab_ is nullptr. |
| // If size <= 64 (kClockCount), tab_ contains pointer to an array with |
| // 64 ClockElem's (ClockBlock::clock). |
| // Otherwise, tab_ points to an array with up to 127 u32 elements, |
| // each pointing to the second-level 512b block with 64 ClockElem's. |
| // Unused space in the first level ClockBlock is used to store additional |
| // clock elements. |
| // The last u32 element in the first level ClockBlock is always used as |
| // reference counter. |
| // |
| // See the following scheme for details. |
| // All memory blocks are 512 bytes (allocated from ClockAlloc). |
| // Clock (clk) elements are 64 bits. |
| // Idx and ref are 32 bits. |
| // |
| // tab_ |
| // | |
| // \/ |
| // +----------------------------------------------------+ |
| // | clk128 | clk129 | ...unused... | idx1 | idx0 | ref | |
| // +----------------------------------------------------+ |
| // | | |
| // | \/ |
| // | +----------------+ |
| // | | clk0 ... clk63 | |
| // | +----------------+ |
| // \/ |
| // +------------------+ |
| // | clk64 ... clk127 | |
| // +------------------+ |
| // |
| // Note: dirty entries, if active, always override what's stored in the clock. |
| ClockBlock *tab_; |
| u32 tab_idx_; |
| u16 size_; |
| u16 blocks_; // Number of second level blocks. |
| |
| void Unshare(ClockCache *c); |
| bool IsShared() const; |
| bool Cachable() const; |
| void ResetImpl(); |
| void FlushDirty(); |
| uptr capacity() const; |
| u32 get_block(uptr bi) const; |
| void append_block(u32 idx); |
| ClockElem &elem(unsigned tid) const; |
| }; |
| |
| // The clock that lives in threads. |
| class ThreadClock { |
| public: |
| typedef DenseSlabAllocCache Cache; |
| |
| explicit ThreadClock(unsigned tid, unsigned reused = 0); |
| |
| u64 get(unsigned tid) const; |
| void set(ClockCache *c, unsigned tid, u64 v); |
| void set(u64 v); |
| void tick(); |
| uptr size() const; |
| |
| void acquire(ClockCache *c, SyncClock *src); |
| void releaseStoreAcquire(ClockCache *c, SyncClock *src); |
| void release(ClockCache *c, SyncClock *dst); |
| void acq_rel(ClockCache *c, SyncClock *dst); |
| void ReleaseStore(ClockCache *c, SyncClock *dst); |
| void ResetCached(ClockCache *c); |
| void NoteGlobalAcquire(u64 v); |
| |
| void DebugReset(); |
| void DebugDump(int(*printf)(const char *s, ...)); |
| |
| private: |
| static const uptr kDirtyTids = SyncClock::kDirtyTids; |
| // Index of the thread associated with he clock ("current thread"). |
| const unsigned tid_; |
| const unsigned reused_; // tid_ reuse count. |
| // Current thread time when it acquired something from other threads. |
| u64 last_acquire_; |
| |
| // Last time another thread has done a global acquire of this thread's clock. |
| // It helps to avoid problem described in: |
| // https://github.com/golang/go/issues/39186 |
| // See test/tsan/java_finalizer2.cpp for a regression test. |
| // Note the failuire is _extremely_ hard to hit, so if you are trying |
| // to reproduce it, you may want to run something like: |
| // $ go get golang.org/x/tools/cmd/stress |
| // $ stress -p=64 ./a.out |
| // |
| // The crux of the problem is roughly as follows. |
| // A number of O(1) optimizations in the clocks algorithm assume proper |
| // transitive cumulative propagation of clock values. The AcquireGlobal |
| // operation may produce an inconsistent non-linearazable view of |
| // thread clocks. Namely, it may acquire a later value from a thread |
| // with a higher ID, but fail to acquire an earlier value from a thread |
| // with a lower ID. If a thread that executed AcquireGlobal then releases |
| // to a sync clock, it will spoil the sync clock with the inconsistent |
| // values. If another thread later releases to the sync clock, the optimized |
| // algorithm may break. |
| // |
| // The exact sequence of events that leads to the failure. |
| // - thread 1 executes AcquireGlobal |
| // - thread 1 acquires value 1 for thread 2 |
| // - thread 2 increments clock to 2 |
| // - thread 2 releases to sync object 1 |
| // - thread 3 at time 1 |
| // - thread 3 acquires from sync object 1 |
| // - thread 3 increments clock to 2 |
| // - thread 1 acquires value 2 for thread 3 |
| // - thread 1 releases to sync object 2 |
| // - sync object 2 clock has 1 for thread 2 and 2 for thread 3 |
| // - thread 3 releases to sync object 2 |
| // - thread 3 sees value 2 in the clock for itself |
| // and decides that it has already released to the clock |
| // and did not acquire anything from other threads after that |
| // (the last_acquire_ check in release operation) |
| // - thread 3 does not update the value for thread 2 in the clock from 1 to 2 |
| // - thread 4 acquires from sync object 2 |
| // - thread 4 detects a false race with thread 2 |
| // as it should have been synchronized with thread 2 up to time 2, |
| // but because of the broken clock it is now synchronized only up to time 1 |
| // |
| // The global_acquire_ value helps to prevent this scenario. |
| // Namely, thread 3 will not trust any own clock values up to global_acquire_ |
| // for the purposes of the last_acquire_ optimization. |
| atomic_uint64_t global_acquire_; |
| |
| // Cached SyncClock (without dirty entries and release_store_tid_). |
| // We reuse it for subsequent store-release operations without intervening |
| // acquire operations. Since it is shared (and thus constant), clock value |
| // for the current thread is then stored in dirty entries in the SyncClock. |
| // We host a reference to the table while it is cached here. |
| u32 cached_idx_; |
| u16 cached_size_; |
| u16 cached_blocks_; |
| |
| // Number of active elements in the clk_ table (the rest is zeros). |
| uptr nclk_; |
| u64 clk_[kMaxTidInClock]; // Fixed size vector clock. |
| |
| bool IsAlreadyAcquired(const SyncClock *src) const; |
| bool HasAcquiredAfterRelease(const SyncClock *dst) const; |
| void UpdateCurrentThread(ClockCache *c, SyncClock *dst) const; |
| }; |
| |
| ALWAYS_INLINE u64 ThreadClock::get(unsigned tid) const { |
| DCHECK_LT(tid, kMaxTidInClock); |
| return clk_[tid]; |
| } |
| |
| ALWAYS_INLINE void ThreadClock::set(u64 v) { |
| DCHECK_GE(v, clk_[tid_]); |
| clk_[tid_] = v; |
| } |
| |
| ALWAYS_INLINE void ThreadClock::tick() { |
| clk_[tid_]++; |
| } |
| |
| ALWAYS_INLINE uptr ThreadClock::size() const { |
| return nclk_; |
| } |
| |
| ALWAYS_INLINE void ThreadClock::NoteGlobalAcquire(u64 v) { |
| // Here we rely on the fact that AcquireGlobal is protected by |
| // ThreadRegistryLock, thus only one thread at a time executes it |
| // and values passed to this function should not go backwards. |
| CHECK_LE(atomic_load_relaxed(&global_acquire_), v); |
| atomic_store_relaxed(&global_acquire_, v); |
| } |
| |
| ALWAYS_INLINE SyncClock::Iter SyncClock::begin() { |
| return Iter(this); |
| } |
| |
| ALWAYS_INLINE SyncClock::Iter SyncClock::end() { |
| return Iter(nullptr); |
| } |
| |
| ALWAYS_INLINE uptr SyncClock::size() const { |
| return size_; |
| } |
| |
| ALWAYS_INLINE SyncClock::Iter::Iter(SyncClock* parent) |
| : parent_(parent) |
| , pos_(nullptr) |
| , end_(nullptr) |
| , block_(-1) { |
| if (parent) |
| Next(); |
| } |
| |
| ALWAYS_INLINE SyncClock::Iter& SyncClock::Iter::operator++() { |
| pos_++; |
| if (UNLIKELY(pos_ >= end_)) |
| Next(); |
| return *this; |
| } |
| |
| ALWAYS_INLINE bool SyncClock::Iter::operator!=(const SyncClock::Iter& other) { |
| return parent_ != other.parent_; |
| } |
| |
| ALWAYS_INLINE ClockElem &SyncClock::Iter::operator*() { |
| return *pos_; |
| } |
| } // namespace __tsan |
| |
| #endif // TSAN_CLOCK_H |