gcc/prime-paths.cc - gcc.git - Git at Google

 /* Find prime paths
    Copyright (C) 2024-2025 Free Software Foundation, Inc.

 This file is part of GCC.

 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
 Software Foundation; either version 3, or (at your option) any later
 version.

 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
 WARRANTY; without even the implied warranty of MERCHANTABILITY or
 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 for more details.

 You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */

 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
 #include "obstack.h"
 #include "sbitmap.h"
 #include "vec.h"
 #include "graphds.h"
 #include "selftest.h"

 namespace
 {

 /* Counter for the number of candidate paths to generate before giving up.  It
    is neater to use a global because it has to be checked deep in helper
    functions, which may also suffer under path explosion.  It is a heuristic
    guaranteed to overshoot the number of actual paths (which is difficult to
    estimate), and is intended to give up on (absurdly) large functions with
    millions of paths, not be a high fidelity rejection mechanism.  This is
    essentially an exception.  */
 size_t approx_limit;

 /* Reset the threshold to APPROX when a function is too complex and finding
    paths should give up.  */
 void
 limit_reset (size_t approx)
 {
   approx_limit = approx;
 }

 /* Record approximately APPROX new paths.  Returns true if the limit is
    exceeded and coverage should give up.  */
 bool
 limit_checked_add (size_t approx)
 {
   approx_limit -= approx < approx_limit ? approx : approx_limit;
   return approx_limit == 0;
 }

 /* Check if adding APPROX would exceed the path limit.  This is necessary when
    (pessimistically counted) trie insertions would exceed the limit and yields
    a partial result, when the path count would drop below the limit again once
    redundancies are eliminated.  */
 bool
 limit_exceed_p (size_t approx)
 {
   return approx > approx_limit;
 }

 /* A silly RAII wrapper for struct graph.  The prime_paths function has multiple
    returns, and this helps reliably clean up.  */
 struct auto_graph
 {
   auto_graph (struct graph *graph) : ptr (graph) {}
   auto_graph (const auto_graph &) = delete;
   ~auto_graph () { free_graph (ptr); }
   operator struct graph* () { return ptr; }
   struct graph* operator -> () { return ptr; }
   graph *ptr;
 };

 /* A silly RAII wrapper for an sbitmap vector.  The prime_paths function has
    multiple returns, and this helps reliably clean up.  */
 struct auto_sbitmap_vector
 {
   auto_sbitmap_vector (sbitmap *s) : ptr (s) {}
   auto_sbitmap_vector (const auto_sbitmap_vector &) = delete;
   ~auto_sbitmap_vector () { sbitmap_vector_free (ptr); }
   operator sbitmap* () { return ptr; }
   sbitmap* ptr;
 };

 /* A silly RAII wrpaper for automatically releasing a vec<vec<int>>.  */
 struct auto_vec_vec : vec<vec<int>>
 {
   auto_vec_vec () = default;
   auto_vec_vec (vec<vec<int>> v) : vec<vec<int>>(v) {}
   ~auto_vec_vec () { release_vec_vec (*this); }
 };

 /* A silly RAII wrpaper for automatically releasing a vec<vec<vec<int>>>.  */
 struct auto_vec_vec_vec : vec<vec<vec<int>>>
 {
   ~auto_vec_vec_vec ()
   {
     for (vec<vec<int>> &v : *this)
       release_vec_vec (v);
     release ();
   }
 };

 /* A trivial key/value pair for a short linear map type.  */
 struct xpair
 {
   int key;
   unsigned val;
 };

 /* A node in a trie, optimized for mid-sized alphabets possibly larger than 256
    but not much more.  Finding the prime paths ends up creating a large amount
    of these nodes so space and access costs matters a lot.

    The node does not explicitly store its own key (CFG vertex ID/basic block
    index), nor does it store pointers to its successors.  Rather, it stores the
    key+offset pairs for its successors the root trie object, and in a sense
    behaves like near pointers.  This makes the trie vertices small and
    reloctable, and removes the need for pointer chasing when releasing the trie.

    The union of near/far is essentially a short-vector optimization, switching
    to a heap-allocated vector when necessary.  This happens relatively rarely
    (usually maxes out at 1-2%), and the vertices that have more than 2 sucessors
    also tend to have more than 4.  The root vertex tends to use the dynamic
    vector because the subpaths are recorded as the successors of the root.

    Conceptually, this is a small map from vertex-id -> index and the API is
    modelled as such.  The insert and search functions are unrolled by hand when
    using the small vector.  This has a noticable performance impact on insert in
    particular, and is not too complex since we know we are limited to 2
    elements.

    Vertices are tagged with endofpath and inserted.  If endofpath is set, the
    path from the root to this vertex is a complete path.  If inserted is set
    then the vertex is a part of proper path (one given to insert) and not
    generated as a suffix.  For example:

    insert ([2 4 6])
    insert ([9 7 2 4 6])
    insert ([2 4 6 8])

    The inserted flags for [2 4 6] are not cleared, because otherwise [2 4 6 8]
    would be dropped when only following inserted vertices.  The endofpath flag
    in [2 4 6] is cleared when the suffixes of [9 7 2 4 6] are inserted.

    The node will be inserted into a vec, and should be trivial.  Instances
    should be value-initialized to zero-intialized state.  */
 struct trie_node
 {
   unsigned length () const
   { return !heaped ? len : far.length (); }

   const xpair *begin () const
   { return !heaped ? near : far.begin (); }

   const xpair *end () const
   { return !heaped ? (near + len) : far.end (); }

   /* Get the ith successor.  This is used for traversal and not lookup, and
      should only be used by the iterator.  */
   const xpair &at (unsigned i) const
   { return !heaped ? near[i] : far[i]; }

   const xpair *get (int key) const;
   void put (int key, unsigned val);

   unsigned near_lower_bound (int key) const;

   /* Experimentally I found that using a union with 2 elements in the near array
      to be faster than 4 or without the union (very slightly).  A lot of trie
      vertices will be created, and vast majority of vertices will have 1 or 2
      successors (straight line or if-then), and the cost of size and copying
      adds up.  */
   union
   {
     xpair near[2];
     vec<xpair> far;
   };
   unsigned len : 8;
   unsigned endofpath : 1;
   unsigned inserted : 1;
   unsigned heaped : 1;
 };

 /* Compare LHS.key < RHS.key, for use with vec.lower_bound.  */
 bool
 xpair_less (const xpair& lhs, const xpair& rhs)
 {
   return lhs.key < rhs.key;
 }

 /* Compare LHS.key to RHS.key, for use with vec.bsearch.  */
 int
 xpair_cmp (const void *lhs, const void *rhs)
 {
   return ((const xpair*)lhs)->key - ((const xpair*)rhs)->key;
 }

 /* Get a pointer to the element at KEY if it exists, otherwise NULL.  */
 const xpair*
 trie_node::get (int key) const
 {
   if (!heaped)
     {
       if (len == 0) return NULL;
       if (len >= 1 && key == near[0].key) return near + 0;
       if (len >= 2 && key == near[1].key) return near + 1;
       return NULL;
     }
   else
     {
       xpair kv;
       kv.key = key;
       return const_cast <vec<xpair>&> (far).bsearch (&kv, xpair_cmp);
     }
 }

 /* Put ("emplace") VAL at KEY, extending the paths that pass through this
    vertex.  This function assumes that KEY is not already a successor, and does
    not perform this check.  get () should be called and checked for NULL putting
    with this function.  Put maintains the order of the successors.  */
 void
 trie_node::put (int key, unsigned val)
 {
   xpair kv;
   kv.key = key;
   kv.val = val;
   if (!heaped)
     {
       const unsigned i = near_lower_bound (key);
       if (len < 2)
 	{
 	  near[1] = near[0];
 	  near[i] = kv;
 	  len += 1;
 	}
       else
 	{
 	  /* This insert is the 3rd element, which does not fit in the embedded
 	     storage, so we must create a vector and convert to a far node.  */
 	  vec<xpair> xs {};
 	  xs.reserve (13);
 	  xs.quick_grow (3);
 	  gcc_checking_assert (i <= 2);
 	  if (i == 0)
 	    {
 	      xs[0] = kv;
 	      xs[1] = near[0];
 	      xs[2] = near[1];
 	    }
 	  else if (i == 1)
 	    {
 	      xs[0] = near[0];
 	      xs[1] = kv;
 	      xs[2] = near[1];
 	    }
 	  else
 	    {
 	      xs[0] = near[0];
 	      xs[1] = near[1];
 	      xs[2] = kv;
 	    }

 	  far = xs;
 	  heaped = 1;
 	}
     }
   else
     {
       const unsigned i = far.lower_bound (kv, xpair_less);
       far.safe_insert (i, kv);
     }
 }

 /* Get the index to the last element that compares less than KEY, similar to
    vec.lower_bound.  This assumes the near vector is active, and is for internal
    use.  */
 unsigned
 trie_node::near_lower_bound (int key) const
 {
   gcc_checking_assert (!heaped);
   if (len == 0) return 0;
   if (len >= 1 && key < near[0].key) return 0;
   if (len >= 2 && key < near[1].key) return 1;
   return len;
 }

 /* The trie is a major workhorse for this algorithm.  It has two key properties
    - set-like subpath elimination and sorted output.

    Many evaluated paths will be non-prime, that is, a sequence of vertices that
    is also fully embedded in a longer sequence of vertices.  For example the
    path [3 4 5 8] is a subpath of both [2 3 4 5 8] and [3 4 5 8 10].  The
    insert_with_suffix function maintains this property so that inserting a
    subpath into the trie is effectively a no-op, and inserting a superpath will
    effectively remove (unmark) the subpath.  Sometimes it can be guaranteed that
    no redundant (subpaths) will be generated, in which case the insert function
    can be used.  The insert function is really only set insert, only becoming a
    no-op for identical paths, which will be a lot faster.

    Paths can be extracted with an iterator, which will output paths in
    lexicographically sorted order.  This is an important property because the
    index of a path in the sorted set will be used by the coverage to record when
    a path is taken and completed.  The iterator has different behavior than the
    standard C++ iterators, and to avoid mixups the interface is deliberately
    different.  The iterator has a (large) stack which is not cheap to copy, and
    if the stack is shallow copied it would mean iterator copies have non-local
    effects.  */
 struct trie
 {
   struct iter;
   trie ();
   trie (const trie &o);
   trie (trie &&o);
   ~trie ();

   bool insert (const vec<int>&);
   bool insert (const array_slice<const int>);
   bool hard_insert (const array_slice<const int>);
   bool insert_with_suffix (const array_slice<const int>);
   bool insert_suffix (const array_slice<const int>);

   void merge (const trie&);

   iter paths (vec<int>&) const;
   iter paths (vec<int>&, int from) const;

   vec<vec<int>> paths () const;

   size_t size () const { return len; }

   vec<trie_node> vertices;
   size_t len;

   /* An iterator for the paths of the trie.  The iterator yields all paths in
      lexicographical order.  The iterator will be invalidated on any insertion
      into the trie.  The iterator should not be constructed directly, but
      through the paths functions on the trie.  It is essentially an explicit
      stack depth-first traversal.

      The iter fills a user-provided buffer which should only be read as a when
      the iter is active.  Whenever next returns true the buffer is filled with
      the current path.  Uses will generally look like this:

      vec<int> path {};
      auto iter = trie.paths (path);
      while (iter.next ())
        use_path (path);
 */
   struct iter
   {
     iter (vec<int>&, const vec<trie_node>&);
     iter (int first, vec<int>& path, const vec<trie_node> &vertices);
     ~iter ()
     { stack.release (); }

     bool next ();
     bool next (int);
     bool next (bool);


     /* This is the analog of the stack frame when implementing a recursive
        depth-first path traversal and collecting paths to the leafs:

        for (auto successor : vertex[ix])
 	 {
 	   path.push (successor.value);
 	   collect (successor.ix, successor.begin, successor.end, path)
 	   path.pop ();
 	 }

        Using size_t + 2x unsigned helped make the frame more compact and faster
        than pointers.  */
     struct frame
     {
       /* The index of this frame's vertex, so that vertices[ix].  */
       size_t ix;
       /* The index of the current active successor of vertices[ix].  */
       unsigned itr;
       /* The end of vertices[ix] successors.  When itr == end, vertex[ix] is
 	 exhausted.  */
       unsigned end;
     };

     /* User provided buffer to fill with the paths.  */
     vec<int> &path;
     /* Direct reference to the trie vertices vector.  */
     const vec<trie_node> &vertices;
     /* The call stack.  */
     vec<frame> stack;
     /* Yield flag.  If this is true then next () is permitted to and return a
        new value.  If this is false, a value has already been yielded and next
        must first reset the state before building the next value.  */
     bool yield = true;

     iter (const iter& o) : path (o.path), vertices (o.vertices),
       stack (o.stack.copy ()), yield (o.yield)
     {
     }

     /* Delete the copy assignment as the iter stores references and would cause
        bad bugs.  It is not necessary for the iterator to work well.  To support
        these the references would need to be (explicit) pointers.  */
     iter& operator = (const iter& o) = delete;
   };
 };

 /* Construct an iterator filling BUFFER.  */
 trie::iter
 trie::paths (vec<int> &buffer) const
 {
   buffer.truncate (0);
   return iter (buffer, vertices);
 }

 /* Construct an iterator filling BUFFER for paths starting at FROM.  */
 trie::iter
 trie::paths (vec<int>& buffer, int from) const
 {
   buffer.truncate (0);
   return iter (from, buffer, vertices);
 }

 /* Default construct a new trie.  */
 trie::trie () : vertices (vec<trie_node> {}), len (0)
 {
   vertices.safe_push (trie_node {});
   vertices[0].inserted = true;
 }

 /* Copy construct a new trie.  */
 trie::trie (const trie &o) : vertices (o.vertices.copy ()), len (o.len)
 {
 }

 /* Move construct a new trie.  */
 trie::trie (trie &&o) : vertices (o.vertices), len (o.len)
 {
   o.vertices = {};
   o.len = 0;
 }

 /* Destroy a trie and release all the heaped resources.  */
 trie::~trie ()
 {
   for (trie_node &v : vertices)
     if (v.heaped)
       v.far.release ();
   vertices.release ();
 }

 /* Insert PATH into the trie.  */
 bool
 trie::insert (const vec<int>& path)
 {
   return insert (array_slice <const int> (path));
 }

 /* Insert the PATH into the trie.  Duplicate entries will not be entered twice.
    If PATH is a subpath of another path this will not be detected or if there is
    a path previously inserted that is a subpath of PATH then this redundancy
    will not be eliminated.  For that behavior, call insert_with_suffix.  */
 bool
 trie::insert (array_slice<const int> path)
 {
   size_t index = 0;
   size_t partition = 0;
   for (const int v : path)
     {
       trie_node &current = vertices[index];
       current.inserted = true;
       partition++;

       const auto *xp = current.get (v);
       if (xp)
 	{
 	  index = xp->val;
 	}
       else
 	{
 	  /* A new vertex on this path has been created, which means the rest of
 	     the path will also have to be created.  Drain the path and create
 	     the remaining vertices in a single operation.  */
 	  unsigned ix = vertices.length ();
 	  current.put (v, ix);
 	  current.endofpath = false;

 	  array_slice<const int> tail (path.begin () + partition,
 				       path.size () - partition);
 	  vertices.safe_grow_cleared (1 + ix + tail.size ());

 	  for (const int v : tail)
 	    {
 	      trie_node &last = vertices[ix];
 	      ix += 1;
 	      last.put (v, ix);
 	      last.inserted = true;
 	    }

 	  vertices.last ().endofpath = true;
 	  vertices.last ().inserted = true;
 	  len += 1;
 	  return true;
 	}
     }

   return false;
 }

 /* hard_insert is like insert, except it does not overwrite any endofpath flags,
    and records the endofpath flag even when a superpath of PATH has been
    inserted previously.  This effectively disables subpath elimination.  */
 bool
 trie::hard_insert (array_slice<const int> path)
 {
   size_t index = 0;
   size_t partition = 0;
   for (const int v : path)
     {
       trie_node &current = vertices[index];
       current.inserted = true;
       partition++;

       const auto *xp = current.get (v);
       if (xp)
 	{
 	  index = xp->val;
 	}
       else
 	{
 	  unsigned ix = vertices.length ();
 	  current.put (v, ix);

 	  array_slice<const int> tail (path.begin () + partition,
 				       path.size () - partition);
 	  vertices.safe_grow_cleared (1 + ix + tail.size ());

 	  for (const int v : tail)
 	    {
 	      trie_node &last = vertices[ix];
 	      ix += 1;
 	      last.put (v, ix);
 	      last.inserted = true;
 	    }

 	  vertices.last ().endofpath = true;
 	  vertices.last ().inserted = true;
 	  len += 1;
 	  return true;
 	}
     }

   vertices[index].endofpath = true;
   return false;
 }

 /* Insert a suffixes of PATH.  This is identical to insert except that it is
    assumed that PATH is a subpath, and that the inserted path should clear the
    inserted and endofpath flags.  This function should only be called by
    insert_with_suffix.  */
 bool
 trie::insert_suffix (array_slice<const int> path)
 {
   size_t index = 0;
   size_t partition = 0;
   for (const int v : path)
     {
       trie_node &current = vertices[index];
       current.endofpath = false;
       partition++;

       const auto *xp = current.get (v);
       if (xp)
 	{
 	  index = xp->val;
 	}
       else
 	{
 	  /* A new vertex on this path has been created, which means the rest of
 	     the path will also have to be created.  Drain the path and create
 	     the remaining vertices in a single operation.  */
 	  unsigned ix = vertices.length ();
 	  current.put (v, ix);

 	  array_slice<const int> tail (path.begin () + partition,
 				       path.size () - partition);
 	  vertices.safe_grow_cleared (1 + ix + tail.size ());

 	  for (const int v : tail)
 	    {
 	      vertices[ix].put (v, ix + 1);
 	      ix += 1;
 	    }

 	  return true;
 	}
     }

   vertices[index].endofpath = false;
   return false;
 }

 /* Insert the paths from OTHER into this trie.  */
 void
 trie::merge (const trie& other)
 {
   auto_vec<int, 32> p {};
   iter itr = other.paths (p);
   while (itr.next ())
     insert_with_suffix (p);
 }

 /* Insert PATH and all its subpaths into the trie.  This function implements the
    redundancy property of the trie - if an inserted path is either a subpath or
    superpath of some other path then only the longest will keep its inserted
    flag.  */
 bool
 trie::insert_with_suffix (array_slice<const int> path)
 {
   bool inserted = insert (path);
   path = array_slice<const int> (path.begin () + 1, path.size () - 1);
   while (inserted && !path.empty ())
     {
       inserted = insert_suffix (path);
       path = array_slice<const int> (path.begin () + 1, path.size () - 1);
     }
   return inserted;
 }

 /* Convert the paths of a trie to a vec-of-vec.  */
 vec<vec<int>>
 trie::paths () const
 {
   auto_vec<int> path {};
   vec<vec<int>> all {};
   auto iter = paths (path);
   while (iter.next ())
     all.safe_push (path.copy ());
   return all;
 }

 /* Create an iterator over VERTICES with the caller-provided buffer PATH.  */
 trie::iter::iter (vec<int> &path, const vec<trie_node> &vertices) : path (path),
   vertices (vertices), stack (vec<frame> {})
 {
   gcc_checking_assert (!vertices.is_empty ());
   stack.reserve (13);
   frame f;
   f.ix = 0;
   f.itr = 0;
   f.end = vertices[0].length ();
   stack.quick_push (f);
 }

 /* Create an iterator over VERTICES with the caller-provided buffer PATH for all
    paths and subpaths (suffixes) starting in FROM.  Note that FROM will not be
    in the output buffer PATH, mainly because non-rooted paths are used when
    splicing with paths that end in FROM.  */
 trie::iter::iter (int from, vec<int> &path, const vec<trie_node> &vertices) :
   path (path), vertices (vertices), stack (vec<frame> {})
 {
   gcc_checking_assert (!vertices.is_empty ());
   stack.reserve (13);

   auto *xp = vertices[0].get (from);
   if (!xp)
     {
       /* No paths start with FROM, so construct an iterator where next () always
 	 returns false.  */
       frame f;
       f.ix = 0;
       f.itr = 0;
       f.end = 0;
       stack.safe_push (f);
       return;
     }

   frame f;
   f.ix = xp->val;
   f.itr = 0;
   f.end = vertices[f.ix].length ();
   stack.safe_push (f);
 }

 /* Find the next complete prime path in the trie and write it to the caller's
    buffer.  Returns true if a path is written and false if the iterator is
    exhausted, in which case no path is written and the contents of the buffer is
    garbage.  */
 bool
 trie::iter::next ()
 {
   while (true)
     {
       frame &top = stack.last ();
       const trie_node &vertex = vertices[top.ix];

       if (vertex.endofpath && yield
 	  && (top.itr != top.end || vertex.length () == 0))
 	{
 	  yield = false;
 	  return true;
 	}

       yield = true;

       if (top.itr != top.end)
 	{
 	  const xpair succ = vertex.at (top.itr);
 	  const trie_node &next = vertices[succ.val];
 	  top.itr++;

 	  if (!next.inserted)
 	    continue;

 	  frame f {};
 	  f.ix = succ.val;
 	  f.itr = 0;
 	  f.end = next.length ();
 	  path.safe_push (succ.key);
 	  stack.safe_push (f);
 	}
       else
 	{
 	  stack.pop ();
 	  if (stack.is_empty ())
 	    return false;
 	  path.pop ();
 	}
     }
 }

 /* Find the next path in the trie that would continue (but does not include)
    LIMIT.  If the trie contains the paths [2 4 6 8 9] [2 4 6 8 10] and [2 4 5
    8], iter.next (8) would yield [2 4 6] and [2 4 5].  Returns true if a path is
    written and false if the iterator is exhausted, in which case no path is
    written and the contents of the buffer is garbage.  */
 bool
 trie::iter::next (int limit)
 {
   while (true)
     {
       frame &top = stack.last ();
       const trie_node &vertex = vertices[top.ix];

       if (yield && top.itr != top.end)
 	{
 	  const xpair succ = vertex.at (top.itr);
 	  const trie_node &next = vertices[succ.val];
 	  const int key = succ.key;
 	  const int val = succ.val;
 	  top.itr++;

 	  if (!next.inserted)
 	    continue;

 	  if (key == limit)
 	    {
 	      if (path.is_empty ())
 		continue;
 	      yield = false;
 	      return true;
 	    }

 	  frame f {};
 	  f.ix = val;
 	  f.itr = 0;
 	  f.end = next.length ();
 	  path.safe_push (key);
 	  stack.safe_push (f);
 	}
       else
 	{
 	  yield = true;
 	  stack.pop ();
 	  if (stack.is_empty ())
 	    return false;
 	  path.pop ();
 	}
     }
 }

 /* Find the next path in among all paths including subpaths/suffixes.  This is
    mainly useful in combination with trie.paths (from) for finding the paths
    that go through some vertex.  */
 bool
 trie::iter::next (bool)
 {
   while (true)
     {
       frame &top = stack.last ();
       const trie_node &vertex = vertices[top.ix];

       if (yield && vertex.length () == 0)
 	{
 	  yield = false;
 	  return true;
 	}

       yield = true;

       if (top.itr != top.end)
 	{
 	  const xpair succ = vertex.at (top.itr);
 	  const trie_node &next = vertices[succ.val];
 	  top.itr++;

 	  frame f {};
 	  f.ix = succ.val;
 	  f.itr = 0;
 	  f.end = next.length ();
 	  path.safe_push (succ.key);
 	  stack.safe_push (f);
 	}
       else
 	{
 	  stack.pop ();
 	  if (stack.is_empty ())
 	    return false;
 	  path.pop ();
 	}
     }
 }

 /* Return the index of NEEDLE in HAYSTACK, or (size_t)-1 if not found.  */
 template <typename T>
 size_t
 index_of (T needle, const vec <T> &haystack)
 {
   size_t len = haystack.length ();
   for (size_t i = 0; i != len; ++i)
     if (haystack[i] == needle)
       return i;
   return (size_t)-1;
 }

 /* Check if there is an edge in GRAPH from SRC to DST.  */
 bool
 edge_p (const struct graph *graph, int src, int dest)
 {
   for (struct graph_edge *e = graph->vertices[src].succ; e; e = e->succ_next)
     if (e->dest == dest)
       return true;
   return false;
 }

 /* Check if PATH is a cycle starting (and ending) with V.  */
 bool
 cycle_p (const vec<int>& path, int v)
 {
   return path[0] == v && path[path.length ()-1] == v;
 }

 /* Find the SCC entry-exit paths, the simple paths from ENTRY to EXIT, and add
    them to OUT.  PRIME_PATHS is the prime paths of the SCC.  Paths are hard
    inserted into OUT, which disables subpath eliminiation and essentially makes
    OUT a compact set.  This is important to not eliminate paths from ENTRY to
    EXIT which are traversed by other ENTRY/EXIT pairs.  Duplicated entries are
    removed.  */
 void
 scc_entry_exit_paths (const vec<vec<int>> &internal_pp, int entry, int exit,
 		      trie &out)
 {
   if (entry == exit)
     {
       out.hard_insert (array_slice <const int> (&entry, 1));
       return;
     }

   for (const vec<int> &path : internal_pp)
     {
       const size_t Ven = index_of (entry, path);
       const size_t Vex = index_of (exit, path);

       if (Ven == (size_t)-1 || Vex == (size_t)-1 || Vex <= Ven)
 	continue;

       const size_t len = (Vex + 1) - Ven;
       array_slice <const int> p (path.begin () + Ven, len);
       out.hard_insert (p);
     }
 }

 /* Find the SCC exit paths, the simple paths that starts in a non-entry vertex
    in the SCC and ends in EXIT and are not a cycles.  INTERNAL_PP are the
    internal prime paths for the SCC with EXIT as an exit vertex.

    Fazli claims the path must not be a subpath of another exit path in the SCC,
    but this is only half true: see gcov-29.c/pathcov005a.  Subpaths must survive
    if they end in a different exit vertex than the superpath, so the hard_insert
    is important.  */
 void
 scc_exit_paths (const vec<vec<int>> &prime_paths, int exit, trie &out)
 {
   trie trie;
   for (const vec<int> &path : prime_paths)
     {
       const size_t Vex = index_of (exit, path);
       if (Vex == (size_t)-1 || cycle_p (path, exit))
 	continue;
       array_slice <const int> p (path.begin (), Vex + 1);
       trie.insert_with_suffix (p);
     }

   auto_vec<int> path {};
   auto iter = trie.paths (path);
   while (iter.next ())
     out.hard_insert (path);
 }

 /* Find the SCC entry paths, the simple paths that start in the entry vertex
    ENTRY and are not cycles.  INTERNAL_PP are the internal prime paths for the
    SCC with ENTRY as an entry vertex.  The paths are inserted into OUT.  */
 void
 scc_entry_paths (const vec<vec<int>> &internal_pp, int entry, trie &trie)
 {
   for (const vec<int> &path : internal_pp)
     {
       const size_t Ven = index_of (entry, path);
       if (Ven == (size_t)-1 || cycle_p (path, entry))
 	continue;
       array_slice <const int> p (path.begin () + Ven, path.length () - Ven);
       trie.insert (p);
     }
 }

 /* Worker for cfg_complete_prime_paths.  ITR is the current id for the current
    path.  END is end of the path so that when ITR == END, the walk is completed.
    EDGES is the matrix of edges where EDGES[src][dst] is set if there is an edge
    from src to dest.  PATH is the vertices that make up this walk so far.  TRIE
    is the output trie where paths are inserted.  SCC_ENEX_PATHS are the
    entry-exit paths found by the scc_entry_exit_paths function.  */
 void
 cfg_complete_prime_paths1 (const int *itr, const int *end,
 			   const sbitmap *edges,
 			   const vec<vec<vec<int>>> &scc_enex_paths,
 			   vec<int> &path, trie &trie)
 {
   if (itr == end)
     {
       trie.insert_with_suffix (path);
       return;
     }

   const unsigned pathlen = path.length ();
   const sbitmap succs = edges[path.last ()];
   for (const vec<int> &enex : scc_enex_paths[*itr])
     {
       if (!bitmap_bit_p (succs, enex[0]))
 	continue;

       path.safe_splice (enex);
       cfg_complete_prime_paths1 (itr + 1, end, edges, scc_enex_paths,
 				 path, trie);
       path.truncate (pathlen);
       if (limit_exceed_p (trie.size ()))
 	return;
     }
 }

 /* Find the complete prime paths of the CFG, the prime paths that start in the
    entry vertex and end in the exit vertex.  */
 trie
 cfg_complete_prime_paths (const sbitmap *edges,
 			  const vec<trie> &scc_entry_exit_paths,
 			  const trie &ccfg_prime_paths)
 {
   trie trie;
   auto_vec<int, 16> path {};
   auto_vec<int, 16> cfgpp {};
   auto_vec_vec_vec scc_enex {};
   scc_enex.reserve (scc_entry_exit_paths.length ());

   for (size_t i = 0; i != scc_entry_exit_paths.length (); ++i)
     {
       scc_enex.quick_push (vec<vec<int>> {});
       auto iter = scc_entry_exit_paths[i].paths (path);
       while (iter.next ())
 	scc_enex[i].safe_push (path.copy ());
     }

   auto iter = ccfg_prime_paths.paths (cfgpp);
   while (!limit_exceed_p (trie.size ()) && iter.next ())
     for (const vec<int> &enex : scc_enex[cfgpp[0]])
       {
 	path.truncate (0);
 	path.safe_splice (enex);
 	cfg_complete_prime_paths1 (cfgpp.begin () + 1, cfgpp.end (), edges,
 				   scc_enex, path, trie);
 	if (limit_exceed_p (trie.size ()))
 	  return trie;
       }

   return trie;
 }

 /* Find the SCC exit prime paths, the prime paths that start from a strongly
    connected component and end in the end vertex.  SCCS is a vector where
    SCCS[i] = SCC (vertex_i) so that if vertex[2].component == 5 then SCCS[2] ==
    5.  SCC_EXIT_PATHS is the output of scc_exit_paths ().  COMPLETE_PRIME_PATHS
    is the output of cfg_complete_prime_paths ().

    This function can suffer under path explosion and will terminate early if
    the number of inserts in COMPLETE_PRIME_PATHS exceeds approx_limit.  */
 trie
 scc_exit_prime_paths (const struct graph *cfg, const trie &scc_exit_paths,
 		      const trie &complete_prime_paths)
 {
   trie trie;
   auto_vec<int, 8> path {};
   auto_vec<int, 8> r {};
   auto_vec<int, 8> q {};

   auto exiter = scc_exit_paths.paths (q);
   while (exiter.next ())
     {
       const int Vex = q.last ();
       auto iter = complete_prime_paths.paths (r, Vex);
       while (iter.next (true))
 	{
 	  /* There could be multiple Vex in the SCC.  Even if scc_exit_paths
 	     did not kill the subpaths, this trie probably would.  It can be
 	     assumed that all vertices in q are in the same SCC.

 	     This is an important note, as the Fazli and Afsharchi paper does
 	     not properly capture this subtlety.  */
 	  const int p0 = Vex;
 	  const int p1 = r[0];

 	  if (cfg->vertices[p0].component == cfg->vertices[p1].component)
 	    continue;

 	  path.truncate (0);
 	  path.reserve (q.length () + r.length ());
 	  path.splice (q);
 	  path.splice (r);
 	  /* This can probably insert without subpath elimination because:
 	     1. Conflicts are *really* rare (see patmatch in tree.c), but they
 		do happen.
 	     2. The output of this function is "filtered" through another trie
 		anyway so the redundant paths generated here will be eliminated
 		in the consumers at a very low extra cost.  */
 	  trie.insert (path);
 	  if (limit_exceed_p (trie.size ()))
 	    return trie;
 	}
     }

   return trie;
 }

 /* Check if PATH in CFG enters the VERTEX's SCC through VERTEX.  */
 bool
 enters_through_p (const struct graph *cfg, const vec<int> &path, int vertex)
 {
   gcc_checking_assert (!path.is_empty ());
   const int last = path.address()[path.length ()-1];
   if (cfg->vertices[last].component == cfg->vertices[vertex].component)
     return false;
   return edge_p (cfg, last, vertex);
 }

 /* Worker for scc_entry_prime_paths.  CFG is the CFG for the function,
    SCC_ENTRY_PATHS the accumulated scc_entry_paths for all the SCCs, PRIME_PATHS
    is either the result of cfg_complete_prime_paths or exit_prime_paths, and OUT
    the output trie.

    This function can suffer under path explosion and will terminate early if
    the number of inserts in OUT exceeds approx_limit.  */
 void
 scc_entry_prime_paths1 (const struct graph *cfg, const trie &scc_entry_paths,
 			const trie &prime_paths, trie &out)
 {
   auto_vec<int, 8> p {};
   auto_vec<int, 8> q {};
   auto_vec<int, 8> path {};
   auto itr = scc_entry_paths.paths (q);
   while (itr.next ())
     {
       const int Ven = q[0];
       /* TODO: This might benefit from a reversed trie lookup.  */
       auto xitr = prime_paths.paths (p);
       while (xitr.next (Ven))
 	{
 	  if (!enters_through_p (cfg, p, Ven))
 	    continue;

 	  path.truncate (0);
 	  path.reserve (p.length () + q.length ());
 	  path.splice (p);
 	  path.splice (q);
 	  out.insert_with_suffix (path);
 	  if (limit_exceed_p (out.size ()))
 	    return;
 	}
     }
 }

 /* Find the entry prime paths - the prime paths that start in the root and end
    in a strongly connected component.  CFG is the CFG for this function,
    SCC_ENTRY_PATHS the accumulated scc_entry_paths for all the SCCs,
    COMPLETE_PRIME_PATHS the result of cfg_complete_prime_paths, and
    EXIT_PRIME_PATHS result of exit_prime_paths.

    This function can suffer under path explosion and will terminate early if
    the return value grows beyond approx_limit.  */
 trie
 scc_entry_prime_paths (const struct graph *cfg,
 		       const trie &scc_entry_paths,
 		       const trie &complete_prime_paths,
 		       const trie &exit_prime_paths)
 {
   trie trie;
   scc_entry_prime_paths1 (cfg, scc_entry_paths, complete_prime_paths, trie);
   scc_entry_prime_paths1 (cfg, scc_entry_paths, exit_prime_paths, trie);
   return trie;
 }

 /* Build a new control flow graph from the strongly connected components, so
    that every node in the CCFG is a strongly connected component in the original
    CFG.  NSSC is the number of vertices in the new graph, and the return value
    of graphds_ssc.  */
 struct graph*
 build_ccfg (struct graph *cfg, int nscc)
 {
   struct graph *ccfg = new_graph (nscc);
   for (int i = 0; i != cfg->n_vertices; ++i)
     {
       struct vertex v = cfg->vertices[i];
       for (struct graph_edge *e = v.succ; e; e = e->succ_next)
 	{
 	  int src = v.component;
 	  int dest = cfg->vertices[e->dest].component;
 	  if (src != dest && !edge_p (ccfg, src, dest))
 	    add_edge (ccfg, src, dest);
 	}
     }

   return ccfg;
 }

 /* Create a new graph from CFG where the edges between strongly connected
    components removed.  */
 struct graph*
 disconnect_sccs (struct graph *cfg)
 {
   struct graph *ccfg = new_graph (cfg->n_vertices);
   const struct vertex *vertices = cfg->vertices;
   for (int i = 0; i != cfg->n_vertices; ++i)
     {
       ccfg->vertices[i].data = &cfg->vertices[i];
       for (struct graph_edge *e = vertices[i].succ; e; e = e->succ_next)
 	if (vertices[e->src].component == vertices[e->dest].component)
 	  add_edge (ccfg, e->src, e->dest)->data = e;
     }
   return ccfg;
 }

 /* Check if vertex I in CFG is the entry vertex of a strongly connected
    component.  A vertex is an entry vertex if 1) there are no predecessors
    (i.e. the root vertex is always an entry vertex) or 2) a predecessor belongs
    to a different SCC.  */
 bool
 scc_entry_vertex_p (struct graph *cfg, size_t i)
 {
   if (!cfg->vertices[i].pred)
     return true;
   const int scc = cfg->vertices[i].component;
   for (struct graph_edge *e = cfg->vertices[i].pred; e; e = e->pred_next)
     if (cfg->vertices[e->src].component != scc)
       return true;
   return false;
 }

 /* Check if vertex I in CFG is an exit vertex of a strongly connected component.
    A vertex is an exit vertex if 1) there are no successors (i.e. the sink is
    always an exit vertex) or 2) if a successor belongs to a different SCC.  */
 bool
 scc_exit_vertex_p (struct graph *cfg, size_t i)
 {
   if (!cfg->vertices[i].succ)
     return true;
   const int scc = cfg->vertices[i].component;
   for (struct graph_edge *e = cfg->vertices[i].succ; e; e = e->succ_next)
     if (cfg->vertices[e->dest].component != scc)
       return true;
   return false;
 }

 /* Worker for simple_paths.  Find all the simple paths in CFG starting at NODE
    and insert into OUT.  This is a DFS where the search stops when entering a
    vertex already in SEEN.  PATH is the sequence of ids for the vertices taken
    from the from the root to NODE.  When the number of inserts reaches LIMIT
    the function aborts and returns so the caller can report that it is giving
    up because the function is too complex.

    This function can suffer under path explosion and will terminate early if
    the number of inserts in OUT exceeds approx_limit.  */
 void
 simple_paths1 (const struct graph *cfg, int node, sbitmap seen, vec<int> &path,
 	       trie &out)
 {
   if (limit_exceed_p (out.size ()))
     return;

   if (!bitmap_set_bit (seen, node))
     {
       if (path[0] == node)
 	path.quick_push (node);
       out.insert (path);
       if (path[0] == node)
 	path.pop ();
       return;
     }
   path.quick_push (node);

   struct graph_edge *succs = cfg->vertices[node].succ;
   if (!succs)
     {
       out.insert (path);
       bitmap_clear_bit (seen, node);
       path.pop ();
       return;
     }

   for (struct graph_edge *e = succs; e; e = e->succ_next)
     simple_paths1 (cfg, e->dest, seen, path, out);

   bitmap_clear_bit (seen, node);
   path.pop ();
 }

 /* Find all the simple paths in CFG starting at ROOT and insert into OUT.  A
    simple path is a sequence of vertices without any duplicated vertices (i.e.
    no loops).  SEEN should be an sbitmap of CFG->n_vertices size.  PATH and
    SEEN will be cleared entry and is for buffer reuse between calls.  When the
    number of inserts reaches LIMIT the function aborts and returns so the
    caller can report that it is giving up because the function is too complex.
    Note that there might be fewer prime paths than inserts, but if the number
    of inserts alone is larger than LIMIT the function is very complex and would
    take too long to compile in later stages.

    This function can suffer under path explosion and will terminate early if
    the number of inserts in OUT exceeds approx_limit.  Since OUT is often
    shared between calls it is ok to use in a loop, and only check the size of
    OUT after the loop terminates.  */
 void
 simple_paths (struct graph *cfg, int root, sbitmap seen, vec<int> &path,
 	      trie &out)
 {
   bitmap_clear (seen);
   path.reserve (cfg->n_vertices);
   path.truncate (0);
   simple_paths1 (cfg, root, seen, path, out);
 }

 /* Merge the tries T1, T2, T3, and set of paths VECS into the larges trie.
    Returns a reference to the trie merged into.  Merging tries and resolving
    redundant paths is the slowest step (at least in the sense it works on the
    largest input), and merging into a partial result reduces the work
    accordingly.  For large problems this is a massive improvement, which in the
    worst cases (where all tries but one are empty or almost empty) speed up
    30-40%.  */
 trie&
 merge (trie &t1, trie &t2, trie &t3, vec<vec<vec<int>>> &vecs)
 {
   trie *dst = nullptr;
   const size_t s1 = t1.size ();
   const size_t s2 = t2.size ();
   const size_t s3 = t3.size ();

   if (s1 >= s2 && s1 >= s3)
     {
       dst = &t1;
       t1.merge (t2);
       t1.merge (t3);
     }
   else if (s2 >= s1 && s2 >= s3)
     {
       dst = &t2;
       t2.merge (t1);
       t2.merge (t3);
     }
   else
     {
       dst = &t3;
       t3.merge (t1);
       t3.merge (t2);
     }

   gcc_checking_assert (dst);
   for (const vec<vec<int>> &v2 : vecs)
     for (const vec<int> &v1 : v2)
       dst->insert_with_suffix (v1);
   return *dst;
 }

 /* Store the edges of CFG in a matrix of bitmaps so that bit_p (edges[src],
    dest) is true if there is an edge from src to dest.  This is faster and more
    convenient than walking the linked list of successors in hot loops.  The
    vector will have N bitmaps of N bits where N is the number of vertices in
    CFG.  */
 sbitmap*
 edge_matrix (const struct graph *cfg)
 {
   sbitmap *edges = sbitmap_vector_alloc (cfg->n_vertices, cfg->n_vertices);
   bitmap_vector_clear (edges, cfg->n_vertices);
   for (int i = 0; i != cfg->n_vertices; ++i)
     for (graph_edge *e = cfg->vertices[i].succ; e; e = e->succ_next)
       bitmap_set_bit (edges[e->src], e->dest);
   return edges;
 }

 } // namespace

 /* Find the prime paths for CFG.  The search gives up after approximate
    PATHLIMIT probable paths have been generated to address path explosions.
    The PATHLIMIT flag is typically controlled by -fpath-coverage-limit.  This
    function is a part of -fpath-coverage and will also be called from gcov.
    The paths are returned in lexicographical order based on node (basic block)
    ID.  If the path limit was exceeded, an empty vector is returned.

    A simple path is a path where all vertices are unique, except possibly the
    first and last.  A prime path is a maximal-length simple path which is not a
    part of any other simple path.  Prime paths strike a good balance between
    coverage thoroughness, loops (requiring them to be taken and skipped), and
    number of paths.

    The algorithm is based on Fazli & Afsharchi's "A Time and Space-Efficient
    Compositional Method for Prime and Test Paths Generation" (2019), combined
    with a suffix trie for removing duplicate or redundant paths.  An auxillary
    graph of the strongly connected components (SCCs) is built.  Then, the prime
    paths of the SCCs composes the prime paths of each SCC with the prime paths
    of this auxillary graph.  This can drastically cut the number of redundant
    paths generated compared to a naive algorithm.

    This does not work for all graphs.  Some structures, e.g. when most of the
    graph is inside a single SCC, cause the algorithm to degenerate to a naive
    one.  The same happens for functions with many SCCs that are either
    singletons or very small.  Those cases will be slower with respect to the
    number of paths, but still fast enough if the path limit is kept reasonably
    low (a few hundred thousand).  */
 vec<vec<int>>
 prime_paths (struct graph *cfg, size_t pathlimit)
 {
   const int nscc = graphds_scc (cfg, NULL);
   auto_graph disconnected (disconnect_sccs (cfg));
   auto_graph ccfg (build_ccfg (cfg, nscc));
   auto_sbitmap_vector edges (edge_matrix (cfg));

   auto_sbitmap seen (cfg->n_vertices);
   auto_vec<int, 8> pathbuf {};

   limit_reset (pathlimit);

   /* Store an SCC-ID -> vertices mapping to quickly find the vertices that
      make up a strongly connected component.  */
   auto_vec_vec sccs {};
   sccs.safe_grow_cleared (ccfg->n_vertices);
   for (int i = 0; i != cfg->n_vertices; ++i)
     sccs[cfg->vertices[i].component].safe_push (i);

   auto_vec_vec_vec scc_internal_pp {};
   scc_internal_pp.safe_grow_cleared (nscc);
   for (int i = 0; i != nscc; ++i)
     {
       trie internal_pp;
       for (int V : sccs[i])
 	simple_paths (disconnected, V, seen, pathbuf, internal_pp);
       if (limit_exceed_p (internal_pp.size ()))
 	return {};
       scc_internal_pp[i] = internal_pp.paths ();
       if (limit_checked_add (scc_internal_pp[i].length ()))
 	return {};
     }

   auto_vec<trie, 8> scc_enex_paths (nscc);
   scc_enex_paths.safe_grow_cleared (nscc);
   trie scc_en_paths;
   trie scc_ex_paths;

   for (int i = 0; i != ccfg->n_vertices; ++i)
     {
       for (int Ven : sccs[i])
 	{
 	  if (!scc_entry_vertex_p (cfg, Ven))
 	    continue;

 	  for (int Vex : sccs[i])
 	    {
 	      if (!scc_exit_vertex_p (cfg, Vex))
 		continue;
 	      scc_entry_exit_paths (scc_internal_pp[i], Ven, Vex,
 				    scc_enex_paths[i]);
 	    }
 	}
     }

   for (int i = 0; i != cfg->n_vertices; ++i)
     {
       const int scc = cfg->vertices[i].component;
       if (scc_entry_vertex_p (cfg, i))
 	scc_entry_paths (scc_internal_pp[scc], i, scc_en_paths);

       if (scc_exit_vertex_p (cfg, i))
 	scc_exit_paths (scc_internal_pp[scc], i, scc_ex_paths);
     }

   /* In the presence of abnormal edges (like longjmp) it is possible to have
      multiple "entry points" in function -- build ccfg prime paths starting at
      any vertex without predecessor.  For most graphs this will only be the
      ENTRY_BLOCK.  */
   trie ccfg_prime_paths;
   for (int i = 0; i != ccfg->n_vertices; ++i)
     if (!ccfg->vertices[i].pred)
       simple_paths (ccfg, i, seen, pathbuf, ccfg_prime_paths);
   if (limit_exceed_p (ccfg_prime_paths.size ()))
     return {};

   trie complete_prime_paths = cfg_complete_prime_paths (edges, scc_enex_paths,
 							ccfg_prime_paths);
   if (limit_checked_add (complete_prime_paths.size ()))
     return {};
   trie exit_prime_paths = scc_exit_prime_paths (cfg, scc_ex_paths,
 						complete_prime_paths);
   if (limit_checked_add (exit_prime_paths.size ()))
     return {};
   trie entry_prime_paths = scc_entry_prime_paths (cfg, scc_en_paths,
 						  complete_prime_paths,
 						  exit_prime_paths);
   if (limit_checked_add (entry_prime_paths.size ()))
     return {};

   trie &merged = merge (complete_prime_paths, entry_prime_paths,
 			exit_prime_paths, scc_internal_pp);
   if (merged.size () > pathlimit)
     return {};

   return merged.paths ();
 }

 #if CHECKING_P

 namespace selftest
 {

 /* Check if the trie contains PATH.  */
 static bool
 contains (const trie &trie, array_slice<const int> path)
 {
   size_t index = 0;
   for (int id : path)
     {
       const trie_node &current = trie.vertices[index];
       if (!current.inserted)
 	return false;
       const auto *xp = current.get (id);
       if (!xp)
 	return false;
       index = xp->val;
     }
   return trie.vertices[index].inserted && trie.vertices[index].endofpath;
 }

 static bool
 equal_p (array_slice<const int> lhs, array_slice<const int> rhs)
 {
   if (lhs.size () != rhs.size ())
     return false;

   size_t length = lhs.size ();
   for (size_t i = 0; i != length; ++i)
     if (lhs[i] != rhs[i])
       return false;
   return true;
 }

 static bool
 any_equal_p (const array_slice<const int> &needle,
 	     const vec<vec<int>> &haystack)
 {
   for (const vec<int> &x : haystack)
     if (equal_p (needle, array_slice <const int> (x)))
       return true;
   return false;
 }

 static size_t
 count (const trie &trie)
 {
   size_t n = 0;
   auto_vec<int> path {};
   auto iter = trie.paths (path);
   while (iter.next ())
     n += 1;
   return n;
 }

 static vec<vec<int>>
 simple_paths (struct graph *cfg, trie &trie, int root = 0)
 {
   auto_sbitmap seen (cfg->n_vertices);
   auto_vec<int> path;
   simple_paths (cfg, root, seen, path, trie);
   return trie.paths ();
 }

 /* Create a CFG that roughly corresponds to this program:

 int binary_search(int a[], int len, int from, int to, int key)
 {
     int low = from;
     int high = to - 1;

     while (low <= high)
     {
 	int mid = (low + high) >> 1;
 	long midVal = a[mid];

 	if (midVal < key)
 	    low = mid + 1;
 	else if (midVal > key)
 	    high = mid - 1;
 	else
 	    return mid; // key found
     }
     return -1;
 }

   This program would emit a CFG very similar to the CFG used by Fazli &
   Afsharchi (2019).  The selftest cases are built from the partial paths used
   in that paper.  */
 static struct graph*
 binary_search_cfg ()
 {
     struct graph *g = new_graph (11);
     add_edge (g, 0, 1);
     add_edge (g, 1, 2);
     add_edge (g, 2, 3);
     add_edge (g, 2, 4);
     add_edge (g, 3, 10);
     add_edge (g, 4, 5);
     add_edge (g, 4, 6);
     add_edge (g, 5, 7);
     add_edge (g, 6, 8);
     add_edge (g, 6, 9);
     add_edge (g, 7, 2);
     add_edge (g, 8, 10);
     add_edge (g, 9, 7);
     graphds_scc (g, NULL);
     return g;
 }

 /* Test a full run of the algorithm against a known graph (binary-search).  */
 static void
 test_prime_paths ()
 {
   auto_graph g (binary_search_cfg ());
   vec<vec<int>> paths = prime_paths (g, 100);
   const int p01[] = { 0, 1, 2, 3, 10 };
   const int p02[] = { 0, 1, 2, 4, 6, 8, 10 };
   const int p03[] = { 5, 7, 2, 4, 6, 9 };
   const int p04[] = { 4, 6, 9, 7, 2, 4 };
   const int p05[] = { 2, 4, 6, 9, 7, 2 };
   const int p06[] = { 6, 9, 7, 2, 4, 6 };
   const int p07[] = { 9, 7, 2, 4, 6, 9 };
   const int p08[] = { 7, 2, 4, 6, 9, 7 };
   const int p09[] = { 6, 9, 7, 2, 4, 5 };
   const int p10[] = { 4, 5, 7, 2, 4 };
   const int p11[] = { 2, 4, 5, 7, 2 };
   const int p12[] = { 5, 7, 2, 4, 5 };
   const int p13[] = { 7, 2, 4, 5, 7 };
   const int p14[] = { 4, 6, 9, 7, 2, 3, 10 };
   const int p15[] = { 5, 7, 2, 4, 6, 8, 10 };
   const int p16[] = { 9, 7, 2, 4, 6, 8, 10 };
   const int p17[] = { 4, 5, 7, 2, 3, 10 };
   const int p18[] = { 0, 1, 2, 4, 6, 9, 7 };
   const int p19[] = { 0, 1, 2, 4, 5, 7 };

   ASSERT_EQ (paths.length (), 19);
   ASSERT_TRUE (any_equal_p (p01, paths));
   ASSERT_TRUE (any_equal_p (p02, paths));
   ASSERT_TRUE (any_equal_p (p03, paths));
   ASSERT_TRUE (any_equal_p (p04, paths));
   ASSERT_TRUE (any_equal_p (p05, paths));
   ASSERT_TRUE (any_equal_p (p06, paths));
   ASSERT_TRUE (any_equal_p (p07, paths));
   ASSERT_TRUE (any_equal_p (p08, paths));
   ASSERT_TRUE (any_equal_p (p09, paths));
   ASSERT_TRUE (any_equal_p (p10, paths));
   ASSERT_TRUE (any_equal_p (p11, paths));
   ASSERT_TRUE (any_equal_p (p12, paths));
   ASSERT_TRUE (any_equal_p (p13, paths));
   ASSERT_TRUE (any_equal_p (p14, paths));
   ASSERT_TRUE (any_equal_p (p15, paths));
   ASSERT_TRUE (any_equal_p (p16, paths));
   ASSERT_TRUE (any_equal_p (p17, paths));
   ASSERT_TRUE (any_equal_p (p18, paths));
   ASSERT_TRUE (any_equal_p (p19, paths));
   release_vec_vec (paths);
 }

 /* The strongly connected component graph for binary_search looks like
     this, using the vertex numbers from the original graph:

     START
       |
       1
       |
       2 (SCC)
      / \
     3   8
      \ /
      END

   The components gets renumbered by graphds_scc, so the ccfg looks like
   this.  The actual numbers don't matter as long as the structure of the
   graph is preserved, and this test is now sensitive to the numbering set
   by graphds_scc.  It does not have to be - if that function should reverse
   the numbering this test must be updated.

       5
       |
       4
       |
       3 (SCC)
      / \
     2   1
      \ /
       0
 */
 static void
 test_build_ccfg ()
 {
   auto_graph cfg (binary_search_cfg ());
   const int nscc = graphds_scc (cfg, NULL);
   auto_graph ccfg (build_ccfg (cfg, nscc));
   ASSERT_EQ (6, nscc);

   ASSERT_TRUE (edge_p (ccfg, 5, 4));
   ASSERT_TRUE (edge_p (ccfg, 4, 3));
   ASSERT_TRUE (edge_p (ccfg, 3, 2));
   ASSERT_TRUE (edge_p (ccfg, 3, 1));
   ASSERT_TRUE (edge_p (ccfg, 2, 0));
   ASSERT_TRUE (edge_p (ccfg, 1, 0));
 }

 /* This test checks some basic assumptions on finding the strongly connected
    components and disconnecting the graph by removing all edges between SCCs.
    Creating a single auxillary graph simplifies the bookkeeping.  */
 static void
 test_split_components ()
 {
   auto_graph cfg (binary_search_cfg ());
   int nscc = graphds_scc (cfg, NULL);
   auto_graph ccfg (disconnect_sccs (cfg));

   auto_vec_vec entries {};
   auto_vec_vec exits {};
   entries.safe_grow_cleared (nscc);
   exits.safe_grow_cleared (nscc);

   for (int i = 0; i != cfg->n_vertices; ++i)
     {
       if (scc_entry_vertex_p (cfg, i))
 	entries[cfg->vertices[i].component].safe_push (i);
       if (scc_exit_vertex_p (cfg, i))
 	exits[cfg->vertices[i].component].safe_push (i);
     }

   const int p10[] = { 10 };
   const int p08[] = { 8 };
   const int p03[] = { 3 };
   const int p02[] = { 2 };
   const int p01[] = { 1 };
   const int p00[] = { 0 };
   const int p26[] = { 2, 6 };

   ASSERT_EQ (entries.length (), 6);
   ASSERT_TRUE (any_equal_p (p10, entries));
   ASSERT_TRUE (any_equal_p (p08, entries));
   ASSERT_TRUE (any_equal_p (p03, entries));
   ASSERT_TRUE (any_equal_p (p02, entries));
   ASSERT_TRUE (any_equal_p (p01, entries));
   ASSERT_TRUE (any_equal_p (p00, entries));

   ASSERT_EQ (exits.length (), 6);
   ASSERT_TRUE (any_equal_p (p10, exits));
   ASSERT_TRUE (any_equal_p (p08, exits));
   ASSERT_TRUE (any_equal_p (p03, exits));
   ASSERT_TRUE (any_equal_p (p26, exits));
   ASSERT_TRUE (any_equal_p (p01, exits));
   ASSERT_TRUE (any_equal_p (p00, exits));

   /* The result of disconnect_sccs () disconnects the graph into its strongly
      connected components.  The subgraphs are either singletons (a single
      vertex with no edges) or graphs with cycles.  The SCC internal prime
      paths can be found by running a DFS from every SCC vertex, terminating
      on a duplicated vertex.  This may create some redundant paths still,
      which must be filtered out.

      Singletons can either be detected and skipped (requires counting the
      components) or filtered after.  For this test case they are skipped
      because other graph inconsistencies are easier to detect.  */

   /* Count and check singleton components.  */
   auto_vec<int> scc_size {};
   scc_size.safe_grow_cleared (nscc);
   for (int i = 0; i != cfg->n_vertices; ++i)
     scc_size[cfg->vertices[i].component]++;
   ASSERT_EQ (nscc, 6);
   ASSERT_EQ (scc_size[0], 1);
   ASSERT_EQ (scc_size[1], 1);
   ASSERT_EQ (scc_size[2], 1);
   ASSERT_EQ (scc_size[3], 6);
   ASSERT_EQ (scc_size[4], 1);
   ASSERT_EQ (scc_size[5], 1);

   /* Manually unroll the loop finding the simple paths starting at the
      vertices in the SCCs.  In this case there is only the one SCC.  */
   trie ccfg_paths;
   auto_vec_vec (simple_paths (ccfg, ccfg_paths, 2));
   auto_vec_vec (simple_paths (ccfg, ccfg_paths, 4));
   auto_vec_vec (simple_paths (ccfg, ccfg_paths, 5));
   auto_vec_vec (simple_paths (ccfg, ccfg_paths, 6));
   auto_vec_vec (simple_paths (ccfg, ccfg_paths, 7));
   auto_vec_vec (simple_paths (ccfg, ccfg_paths, 9));
   /* Then in+out of trie.  */
   auto_vec_vec xscc_internal_pp = ccfg_paths.paths ();
   trie scc_internal_pp;
   for (auto &p : xscc_internal_pp)
     scc_internal_pp.insert_with_suffix (p);

   const int pp01[] = { 5, 7, 2, 4, 6, 9 };
   const int pp02[] = { 4, 5, 7, 2, 4 };
   const int pp03[] = { 4, 6, 9, 7, 2, 4 };
   const int pp04[] = { 2, 4, 5, 7, 2 };
   const int pp05[] = { 2, 4, 6, 9, 7, 2 };
   const int pp06[] = { 5, 7, 2, 4, 5 };
   const int pp07[] = { 6, 9, 7, 2, 4, 6 };
   const int pp08[] = { 7, 2, 4, 5, 7 };
   const int pp09[] = { 9, 7, 2, 4, 6, 9 };
   const int pp10[] = { 7, 2, 4, 6, 9, 7 };
   const int pp11[] = { 6, 9, 7, 2, 4, 5 };

   ASSERT_EQ (count (scc_internal_pp), 11);
   ASSERT_TRUE (contains (scc_internal_pp, pp01));
   ASSERT_TRUE (contains (scc_internal_pp, pp02));
   ASSERT_TRUE (contains (scc_internal_pp, pp03));
   ASSERT_TRUE (contains (scc_internal_pp, pp04));
   ASSERT_TRUE (contains (scc_internal_pp, pp05));
   ASSERT_TRUE (contains (scc_internal_pp, pp06));
   ASSERT_TRUE (contains (scc_internal_pp, pp07));
   ASSERT_TRUE (contains (scc_internal_pp, pp08));
   ASSERT_TRUE (contains (scc_internal_pp, pp09));
   ASSERT_TRUE (contains (scc_internal_pp, pp10));
   ASSERT_TRUE (contains (scc_internal_pp, pp11));
 }

 /* The remaining tests deconstructs the algorithm and runs only a single phase
    with good inputs at that point.  This makes it easier to pinpoint where
    things go wrong, and helps show in steps how the algorithm works and the
    phases relate.

    The phases and their inputs and outputs are bazed on Fazli & Afshsarchi.  */

 static void
 test_scc_internal_prime_paths ()
 {
   /* This graph has only the SCC subgraph.  The result of running prime-paths
      on it should be the SCC internal prime paths of the full graph.  */
   auto_graph scc (new_graph (11));
   add_edge (scc, 0, 2);
   add_edge (scc, 2, 4);
   add_edge (scc, 4, 5);
   add_edge (scc, 4, 6);
   add_edge (scc, 5, 7);
   add_edge (scc, 6, 9);
   add_edge (scc, 9, 7);
   add_edge (scc, 7, 2);

   auto_vec_vec paths = prime_paths (scc, 100);
   const int p01[] = { 5, 7, 2, 4, 6, 9 };
   const int p02[] = { 4, 6, 9, 7, 2, 4 };
   const int p03[] = { 2, 4, 6, 9, 7, 2 };
   const int p04[] = { 6, 9, 7, 2, 4, 6 };
   const int p05[] = { 9, 7, 2, 4, 6, 9 };
   const int p06[] = { 7, 2, 4, 6, 9, 7 };
   const int p07[] = { 6, 9, 7, 2, 4, 5 };
   const int p08[] = { 4, 5, 7, 2, 4 };
   const int p09[] = { 2, 4, 5, 7, 2 };
   const int p10[] = { 5, 7, 2, 4, 5 };
   const int p11[] = { 7, 2, 4, 5, 7 };

   ASSERT_TRUE (any_equal_p (p01, paths));
   ASSERT_TRUE (any_equal_p (p02, paths));
   ASSERT_TRUE (any_equal_p (p03, paths));
   ASSERT_TRUE (any_equal_p (p04, paths));
   ASSERT_TRUE (any_equal_p (p05, paths));
   ASSERT_TRUE (any_equal_p (p06, paths));
   ASSERT_TRUE (any_equal_p (p07, paths));
   ASSERT_TRUE (any_equal_p (p08, paths));
   ASSERT_TRUE (any_equal_p (p09, paths));
   ASSERT_TRUE (any_equal_p (p10, paths));
   ASSERT_TRUE (any_equal_p (p11, paths));
 }

 /* Test the entry/exit path helpers for the strongly connected component in
    binary_search.  The SCC has one entry (2, the loop header) and two exits (2,
    6, the loop exit and return).  */
 static void
 test_scc_entry_exit_paths ()
 {
   auto_graph scc (new_graph (11));
   add_edge (scc, 2, 4);
   add_edge (scc, 4, 5);
   add_edge (scc, 4, 6);
   add_edge (scc, 5, 7);
   add_edge (scc, 6, 9);
   add_edge (scc, 9, 7);
   add_edge (scc, 7, 2);

   trie scc_internal_trie;
   auto_vec_vec (simple_paths (scc, scc_internal_trie, 2));
   auto_vec_vec (simple_paths (scc, scc_internal_trie, 4));
   auto_vec_vec (simple_paths (scc, scc_internal_trie, 5));
   auto_vec_vec (simple_paths (scc, scc_internal_trie, 6));
   auto_vec_vec (simple_paths (scc, scc_internal_trie, 7));
   auto_vec_vec (simple_paths (scc, scc_internal_trie, 9));
   auto_vec_vec scc_prime_paths = scc_internal_trie.paths ();

   trie entry_exits {};
   scc_entry_exit_paths (scc_prime_paths, 2, 2, entry_exits);
   scc_entry_exit_paths (scc_prime_paths, 2, 6, entry_exits);

   const int p01[] = { 2 };
   const int p02[] = { 2, 4, 6 };

   ASSERT_EQ (count (entry_exits), 2);
   ASSERT_TRUE (contains (entry_exits, p01));
   ASSERT_TRUE (contains (entry_exits, p02));

   trie exits;
   scc_exit_paths (scc_prime_paths, 2, exits);
   scc_exit_paths (scc_prime_paths, 6, exits);

   const int p03[] = { 4, 6, 9, 7, 2 };
   const int p04[] = { 5, 7, 2, 4, 6 };
   const int p05[] = { 9, 7, 2, 4, 6 };
   const int p06[] = { 4, 5, 7, 2 };

   ASSERT_EQ (count (exits), 4);
   ASSERT_TRUE (contains (exits, p03));
   ASSERT_TRUE (contains (exits, p04));
   ASSERT_TRUE (contains (exits, p05));
   ASSERT_TRUE (contains (exits, p06));

   trie entries;
   scc_entry_paths (scc_prime_paths, 2, entries);

   const int p07[] = { 2, 4, 6, 9, 7 };
   const int p08[] = { 2, 4, 5, 7 };
   ASSERT_EQ (count (entries), 2);
   ASSERT_TRUE (contains (entries, p07));
   ASSERT_TRUE (contains (entries, p08));
 }

 static void
 test_complete_prime_paths ()
 {
   const int ccfgpp0[] = { 0, 1, 2, 3, 5 };
   const int ccfgpp1[] = { 0, 1, 2, 4, 5 };
   trie ccfg_prime_paths {};
   ccfg_prime_paths.insert (ccfgpp0);
   ccfg_prime_paths.insert (ccfgpp1);

   const int scc0[] = { 2 };
   const int scc1[] = { 2, 4, 6 };

   const int ccfg_single[] = { 0, 1, 3, 8, 10 };

   auto_graph cfg (binary_search_cfg ());
   auto_sbitmap_vector edges (sbitmap_vector_alloc (cfg->n_vertices,
 						   cfg->n_vertices));
   bitmap_vector_clear (edges, cfg->n_vertices);
   for (int i = 0; i != cfg->n_vertices; ++i)
     for (graph_edge *e = cfg->vertices[i].succ; e; e = e->succ_next)
       bitmap_set_bit (edges[e->src], e->dest);

   auto_vec<trie> ccfg_paths {};
   ccfg_paths.safe_grow_cleared (6);
   ccfg_paths[0].insert (array_slice <const int> (ccfg_single + 0, 1));
   ccfg_paths[1].insert (array_slice <const int> (ccfg_single + 1, 1));
   ccfg_paths[3].insert (array_slice <const int> (ccfg_single + 2, 1));
   ccfg_paths[4].insert (array_slice <const int> (ccfg_single + 3, 1));
   ccfg_paths[5].insert (array_slice <const int> (ccfg_single + 4, 1));

   /* The paths for the SCC would need to be updated in ccfg pre pass.  This
      feels like a clumsy interface and should maybe be disconnected from the
      struct graph.  */
   ccfg_paths[2].hard_insert (scc0);
   ccfg_paths[2].hard_insert (scc1);

   trie cpp = cfg_complete_prime_paths (edges, ccfg_paths, ccfg_prime_paths);

   const int p01[] = { 0, 1, 2, 3, 10 };
   const int p02[] = { 0, 1, 2, 4, 6, 8, 10 };

   ASSERT_EQ (count (cpp), 2);
   ASSERT_TRUE (contains (cpp, p01));
   ASSERT_TRUE (contains (cpp, p02));
 }

 static vec<int>
 binary_search_scc_map ()
 {
   vec<int> sccs {};
   sccs.safe_grow (11);
   sccs[0] = 0;
   sccs[1] = 1;
   sccs[2] = 2;
   sccs[3] = 3;
   sccs[4] = 2;
   sccs[5] = 2;
   sccs[6] = 2;
   sccs[7] = 2;
   sccs[8] = 4;
   sccs[9] = 2;
   sccs[10] = 5;
   return sccs;
 }

 static void
 test_exit_prime_paths ()
 {
   const int cpp01[] = { 0, 1, 2, 3, 10 };
   const int cpp02[] = { 0, 1, 2, 4, 6, 8, 10 };
   trie complete_prime_paths {};
   complete_prime_paths.insert_with_suffix (cpp01);
   complete_prime_paths.insert_with_suffix (cpp02);

   const int ep01[] = { 4, 6, 9, 7, 2 };
   const int ep02[] = { 5, 7, 2, 4, 6 };
   const int ep03[] = { 9, 7, 2, 4, 6 };
   const int ep04[] = { 4, 5, 7, 2 };
   trie exit_paths;
   exit_paths.insert (ep01);
   exit_paths.insert (ep02);
   exit_paths.insert (ep03);
   exit_paths.insert (ep04);

   auto_graph cfg (binary_search_cfg ());
   trie epp = scc_exit_prime_paths (cfg, exit_paths, complete_prime_paths);

   const int pp01[] = { 4, 6, 9, 7, 2, 3, 10 };
   const int pp02[] = { 5, 7, 2, 4, 6, 8, 10 };
   const int pp03[] = { 9, 7, 2, 4, 6, 8, 10 };
   const int pp04[] = { 4, 5, 7, 2, 3, 10 };

   ASSERT_EQ (count (epp), 4);
   ASSERT_TRUE (contains (epp, pp01));
   ASSERT_TRUE (contains (epp, pp02));
   ASSERT_TRUE (contains (epp, pp03));
   ASSERT_TRUE (contains (epp, pp04));
 }

 static void
 test_entry_prime_paths ()
 {
   auto_graph cfg (binary_search_cfg ());

   static int sccep01[] = { 2, 4, 6, 9, 7 };
   static int sccep02[] = { 2, 4, 5, 7 };
   trie scc_entry_paths;
   scc_entry_paths.insert (sccep01);
   scc_entry_paths.insert (sccep02);

   const int cpp01[] = { 0, 1, 2, 3, 10 };
   const int cpp02[] = { 0, 1, 2, 4, 6, 8, 10 };
   trie complete_prime_paths {};
   complete_prime_paths.insert (cpp01);
   complete_prime_paths.insert (cpp02);

   const int ep01[] = { 4, 6, 9, 7, 2, 3, 10 };
   const int ep02[] = { 5, 7, 2, 4, 6, 8, 10 };
   const int ep03[] = { 9, 7, 2, 4, 6, 8, 10 };
   const int ep04[] = { 4, 5, 7, 2, 3, 10 };
   trie exit_prime_paths {};
   exit_prime_paths.insert (ep01);
   exit_prime_paths.insert (ep02);
   exit_prime_paths.insert (ep03);
   exit_prime_paths.insert (ep04);

   auto_vec<int> sccs = binary_search_scc_map ();

   trie epp = scc_entry_prime_paths (cfg, scc_entry_paths,
 				    complete_prime_paths,
 				    exit_prime_paths);

   /* The 0 (start node) does not show up in the Fazli & Afsharchi paper and
      kinda, but has no real impact on the result.  The prime-paths functions
      do not care about these vertices, but the path-coverage instrumentation
      filters out the ENTRY/EXIT blocks from all the paths.  */
   const int pp01[] = { 0, 1, 2, 4, 6, 9, 7 };
   const int pp02[] = { 0, 1, 2, 4, 5, 7 };
   ASSERT_EQ (count (epp), 2);
   ASSERT_TRUE (contains (epp, pp01));
   ASSERT_TRUE (contains (epp, pp02));
 }

 /* A straight-line graph with one vertex should yield a single path of length 1
    with just the non-exit non-entry node in it.  */
 void
 test_singleton_path ()
 {
   auto_graph cfg (new_graph (3));
   add_edge (cfg, 0, 2);
   add_edge (cfg, 2, 1);
   auto_vec_vec paths = prime_paths (cfg, 100);

   ASSERT_EQ (paths.length (), 1);
   ASSERT_EQ (paths[0].length (), 3);
   ASSERT_EQ (paths[0][0], 0);
   ASSERT_EQ (paths[0][1], 2);
   ASSERT_EQ (paths[0][2], 1);
 }

 void
 path_coverage_cc_tests ()
 {
   limit_reset (250000);
   test_prime_paths ();
   test_build_ccfg ();
   test_split_components ();
   test_scc_internal_prime_paths ();
   test_scc_entry_exit_paths ();
   test_complete_prime_paths ();
   test_exit_prime_paths ();
   test_entry_prime_paths ();
   test_singleton_path ();
 }

 } // namespace selftest

 #endif /* #if CHECKING_P */