sim/m32r/mloopx.in - binutils-gdb - Git at Google

 # Simulator main loop for m32rx. -*- C -*-
 #
 # Copyright 1996-2021 Free Software Foundation, Inc.
 #
 # This file is part of the GNU Simulators.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.

 # Syntax:
 # /bin/sh mainloop.in command
 #
 # Command is one of:
 #
 # init
 # support
 # extract-{simple,scache,pbb}
 # {full,fast}-exec-{simple,scache,pbb}
 #
 # A target need only provide a "full" version of one of simple,scache,pbb.
 # If the target wants it can also provide a fast version of same, or if
 # the slow (full featured) version is `simple', then the fast version can be
 # one of scache/pbb.
 # A target can't provide more than this.

 # ??? After a few more ports are done, revisit.
 # Will eventually need to machine generate a lot of this.

 case "x$1" in

 xsupport)

 cat <<EOF

 /* Emit insns to write back the results of insns executed in parallel.
    SC points to a sufficient number of scache entries for the writeback
    handlers.
    SC1/ID1 is the first insn (left slot, lower address).
    SC2/ID2 is the second insn (right slot, higher address).  */

 static INLINE void
 emit_par_finish (SIM_CPU *current_cpu, PCADDR pc, SCACHE *sc,
 		 SCACHE *sc1, const IDESC *id1, SCACHE *sc2, const IDESC *id2)
 {
   ARGBUF *abuf;

   abuf = &sc->argbuf;
   id1 = id1->par_idesc;
   abuf->fields.write.abuf = &sc1->argbuf;
   @cpu@_fill_argbuf (current_cpu, abuf, id1, pc, 0);
   /* no need to set trace_p,profile_p */
 #if 0 /* not currently needed for id2 since results written directly */
   abuf = &sc[1].argbuf;
   id2 = id2->par_idesc;
   abuf->fields.write.abuf = &sc2->argbuf;
   @cpu@_fill_argbuf (current_cpu, abuf, id2, pc + 2, 0);
   /* no need to set trace_p,profile_p */
 #endif
 }

 static INLINE const IDESC *
 emit_16 (SIM_CPU *current_cpu, PCADDR pc, CGEN_INSN_INT insn,
 	 SCACHE *sc, int fast_p, int parallel_p)
 {
   ARGBUF *abuf = &sc->argbuf;
   const IDESC *id = @cpu@_decode (current_cpu, pc, insn, insn, abuf);

   if (parallel_p)
     id = id->par_idesc;
   @cpu@_fill_argbuf (current_cpu, abuf, id, pc, fast_p);
   return id;
 }

 static INLINE const IDESC *
 emit_full16 (SIM_CPU *current_cpu, PCADDR pc, CGEN_INSN_INT insn, SCACHE *sc,
 	     int trace_p, int profile_p)
 {
   const IDESC *id;

   @cpu@_emit_before (current_cpu, sc, pc, 1);
   id = emit_16 (current_cpu, pc, insn, sc + 1, 0, 0);
   @cpu@_emit_after (current_cpu, sc + 2, pc);
   sc[1].argbuf.trace_p = trace_p;
   sc[1].argbuf.profile_p = profile_p;
   return id;
 }

 static INLINE const IDESC *
 emit_parallel (SIM_CPU *current_cpu, PCADDR pc, CGEN_INSN_INT insn,
 	       SCACHE *sc, int fast_p)
 {
   const IDESC *id,*id2;

   /* Emit both insns, then emit a finisher-upper.
      We speed things up by handling the second insn serially
      [not parallelly].  Then the writeback only has to deal
      with the first insn.  */
   /* ??? Revisit to handle exceptions right.  */

   /* FIXME: No need to handle this parallely if second is nop.  */
   id = emit_16 (current_cpu, pc, insn >> 16, sc, fast_p, 1);

   /* Note that this can never be a cti.  No cti's go in the S pipeline.  */
   id2 = emit_16 (current_cpu, pc + 2, insn & 0x7fff, sc + 1, fast_p, 0);

   /* Set sc/snc insns notion of where to skip to.  */
   if (IDESC_SKIP_P (id))
     SEM_SKIP_COMPILE (current_cpu, sc, 1);

   /* Emit code to finish executing the semantics
      (write back the results).  */
   emit_par_finish (current_cpu, pc, sc + 2, sc, id, sc + 1, id2);

   return id;
 }

 static INLINE const IDESC *
 emit_full_parallel (SIM_CPU *current_cpu, PCADDR pc, CGEN_INSN_INT insn,
 		    SCACHE *sc, int trace_p, int profile_p)
 {
   const IDESC *id,*id2;

   /* Emit both insns, then emit a finisher-upper.
      We speed things up by handling the second insn serially
      [not parallelly].  Then the writeback only has to deal
      with the first insn.  */
   /* ??? Revisit to handle exceptions right.  */

   @cpu@_emit_before (current_cpu, sc, pc, 1);

   /* FIXME: No need to handle this parallelly if second is nop.  */
   id = emit_16 (current_cpu, pc, insn >> 16, sc + 1, 0, 1);
   sc[1].argbuf.trace_p = trace_p;
   sc[1].argbuf.profile_p = profile_p;

   @cpu@_emit_before (current_cpu, sc + 2, pc, 0);

   /* Note that this can never be a cti.  No cti's go in the S pipeline.  */
   id2 = emit_16 (current_cpu, pc + 2, insn & 0x7fff, sc + 3, 0, 0);
   sc[3].argbuf.trace_p = trace_p;
   sc[3].argbuf.profile_p = profile_p;

   /* Set sc/snc insns notion of where to skip to.  */
   if (IDESC_SKIP_P (id))
     SEM_SKIP_COMPILE (current_cpu, sc, 4);

   /* Emit code to finish executing the semantics
      (write back the results).  */
   emit_par_finish (current_cpu, pc, sc + 4, sc + 1, id, sc + 3, id2);

   @cpu@_emit_after (current_cpu, sc + 5, pc);

   return id;
 }

 static INLINE const IDESC *
 emit_32 (SIM_CPU *current_cpu, PCADDR pc, CGEN_INSN_INT insn,
 	 SCACHE *sc, int fast_p)
 {
   ARGBUF *abuf = &sc->argbuf;
   const IDESC *id = @cpu@_decode (current_cpu, pc,
 				  (USI) insn >> 16, insn, abuf);

   @cpu@_fill_argbuf (current_cpu, abuf, id, pc, fast_p);
   return id;
 }

 static INLINE const IDESC *
 emit_full32 (SIM_CPU *current_cpu, PCADDR pc, CGEN_INSN_INT insn, SCACHE *sc,
 	     int trace_p, int profile_p)
 {
   const IDESC *id;

   @cpu@_emit_before (current_cpu, sc, pc, 1);
   id = emit_32 (current_cpu, pc, insn, sc + 1, 0);
   @cpu@_emit_after (current_cpu, sc + 2, pc);
   sc[1].argbuf.trace_p = trace_p;
   sc[1].argbuf.profile_p = profile_p;
   return id;
 }

 EOF

 ;;

 xinit)

 # Nothing needed.

 ;;

 xextract-pbb)

 # Inputs:  current_cpu, pc, sc, max_insns, FAST_P
 # Outputs: sc, pc
 # sc must be left pointing past the last created entry.
 # pc must be left pointing past the last created entry.
 # If the pbb is terminated by a cti insn, SET_CTI_VPC(sc) must be called
 # to record the vpc of the cti insn.
 # SET_INSN_COUNT(n) must be called to record number of real insns.

 cat <<EOF
 {
   const IDESC *idesc;
   int icount = 0;

   if ((pc & 3) != 0)
     {
       /* This occurs when single stepping and when compiling the not-taken
 	 part of conditional branches.  */
       UHI insn = GETIMEMUHI (current_cpu, pc);
       int trace_p = PC_IN_TRACE_RANGE_P (current_cpu, pc);
       int profile_p = PC_IN_PROFILE_RANGE_P (current_cpu, pc);
       SCACHE *cti_sc; /* ??? tmp hack */

       /* A parallel insn isn't allowed here, but we don't mind nops.
 	 ??? We need to wait until the insn is executed before signalling
 	 the error, for situations where such signalling is wanted.  */
 #if 0
       if ((insn & 0x8000) != 0
 	  && (insn & 0x7fff) != 0x7000) /* parallel nops are ok */
 	sim_engine_invalid_insn (current_cpu, pc, 0);
 #endif

       /* Only emit before/after handlers if necessary.  */
       if (FAST_P || (! trace_p && ! profile_p))
 	{
 	  idesc = emit_16 (current_cpu, pc, insn & 0x7fff, sc, FAST_P, 0);
 	  cti_sc = sc;
 	  ++sc;
 	  --max_insns;
 	}
       else
 	{
 	  idesc = emit_full16 (current_cpu, pc, insn & 0x7fff, sc,
 			       trace_p, profile_p);
 	  cti_sc = sc + 1;
 	  sc += 3;
 	  max_insns -= 3;
 	}
       ++icount;
       pc += 2;
       if (IDESC_CTI_P (idesc))
 	{
 	  SET_CTI_VPC (cti_sc);
 	  goto Finish;
 	}
     }

   /* There are two copies of the compiler: full(!fast) and fast.
      The "full" case emits before/after handlers for each insn.
      Having two copies of this code is a tradeoff, having one copy
      seemed a bit more difficult to read (due to constantly testing
      FAST_P).  ??? On the other hand, with address ranges we'll want to
      omit before/after handlers for unwanted insns.  Having separate loops
      for FAST/!FAST avoids constantly doing the test in the loop, but
      typically FAST_P is a constant and such tests will get optimized out.  */

   if (FAST_P)
     {
       while (max_insns > 0)
 	{
 	  USI insn = GETIMEMUSI (current_cpu, pc);
 	  if ((SI) insn < 0)
 	    {
 	      /* 32 bit insn */
 	      idesc = emit_32 (current_cpu, pc, insn, sc, 1);
 	      ++sc;
 	      --max_insns;
 	      ++icount;
 	      pc += 4;
 	      if (IDESC_CTI_P (idesc))
 		{
 		  SET_CTI_VPC (sc - 1);
 		  break;
 		}
 	    }
 	  else
 	    {
 	      if ((insn & 0x8000) != 0) /* parallel? */
 		{
 		  int up_count;

 		  if (((insn >> 16) & 0xfff0) == 0x10f0)
 		    {
 		      /* FIXME: No need to handle this sequentially if system
 		         calls will be able to execute after second insn in
 		         parallel. ( trap #num || insn ) */
 		      /* insn */
 		      idesc = emit_16 (current_cpu, pc + 2, insn & 0x7fff,
  				       sc, 1, 0);
 		      /* trap */
 		      emit_16 (current_cpu, pc, insn >> 16, sc + 1, 1, 0);
 		      up_count = 2;
 		    }
 		  else
 		    {
 		      /* Yep.  Here's the "interesting" [sic] part.  */
 		      idesc = emit_parallel (current_cpu, pc, insn, sc, 1);
 		      up_count = 3;
 		    }
 		  sc += up_count;
 		  max_insns -= up_count;
 		  icount += 2;
 		  pc += 4;
 		  if (IDESC_CTI_P (idesc))
 		    {
 		      SET_CTI_VPC (sc - up_count);
 		      break;
 		    }
 		}
 	      else /* 2 serial 16 bit insns */
 		{
 		  idesc = emit_16 (current_cpu, pc, insn >> 16, sc, 1, 0);
 		  ++sc;
 		  --max_insns;
 		  ++icount;
 		  pc += 2;
 		  if (IDESC_CTI_P (idesc))
 		    {
 		      SET_CTI_VPC (sc - 1);
 		      break;
 		    }
 		  /* While we're guaranteed that there's room to extract the
 		     insn, when single stepping we can't; the pbb must stop
 		     after the first insn.  */
 		  if (max_insns == 0)
 		    break;
 		  idesc = emit_16 (current_cpu, pc, insn & 0x7fff, sc, 1, 0);
 		  ++sc;
 		  --max_insns;
 		  ++icount;
 		  pc += 2;
 		  if (IDESC_CTI_P (idesc))
 		    {
 		      SET_CTI_VPC (sc - 1);
 		      break;
 		    }
 		}
 	    }
 	}
     }
   else /* ! FAST_P */
     {
       while (max_insns > 0)
 	{
 	  USI insn = GETIMEMUSI (current_cpu, pc);
 	  int trace_p = PC_IN_TRACE_RANGE_P (current_cpu, pc);
 	  int profile_p = PC_IN_PROFILE_RANGE_P (current_cpu, pc);
 	  SCACHE *cti_sc; /* ??? tmp hack */
 	  if ((SI) insn < 0)
 	    {
 	      /* 32 bit insn
 		 Only emit before/after handlers if necessary.  */
 	      if (trace_p || profile_p)
 		{
 		  idesc = emit_full32 (current_cpu, pc, insn, sc,
 				       trace_p, profile_p);
 		  cti_sc = sc + 1;
 		  sc += 3;
 		  max_insns -= 3;
 		}
 	      else
 		{
 		  idesc = emit_32 (current_cpu, pc, insn, sc, 0);
 		  cti_sc = sc;
 		  ++sc;
 		  --max_insns;
 		}
 	      ++icount;
 	      pc += 4;
 	      if (IDESC_CTI_P (idesc))
 		{
 		  SET_CTI_VPC (cti_sc);
 		  break;
 		}
 	    }
 	  else
 	    {
 	      if ((insn & 0x8000) != 0) /* parallel? */
 		{
 		  /* Yep.  Here's the "interesting" [sic] part.
 		     Only emit before/after handlers if necessary.  */
 		  if (trace_p || profile_p)
 		    {
 		      if (((insn >> 16) & 0xfff0) == 0x10f0)
 			{
 			  /* FIXME: No need to handle this sequentially if
 			     system calls will be able to execute after second
 			     insn in parallel. ( trap #num || insn ) */
 			  /* insn */
 			  idesc = emit_full16 (current_cpu, pc + 2,
 					       insn & 0x7fff, sc, 0, 0);
 			  /* trap */
 			  emit_full16 (current_cpu, pc, insn >> 16, sc + 3,
 				       0, 0);
 			}
 		      else
 			{
 		          idesc = emit_full_parallel (current_cpu, pc, insn,
 						      sc, trace_p, profile_p);
 			}
 		      cti_sc = sc + 1;
 		      sc += 6;
 		      max_insns -= 6;
 		    }
 		  else
 		    {
 		      int up_count;

 		      if (((insn >> 16) & 0xfff0) == 0x10f0)
 			{
                           /* FIXME: No need to handle this sequentially if
                              system calls will be able to execute after second
                              insn in parallel. ( trap #num || insn ) */
                           /* insn */
                           idesc = emit_16 (current_cpu, pc + 2, insn & 0x7fff,
                                            sc, 0, 0);
                           /* trap */
                           emit_16 (current_cpu, pc, insn >> 16, sc + 1, 0, 0);
                           up_count = 2;
 			}
 		      else
 			{
 		          idesc = emit_parallel (current_cpu, pc, insn, sc, 0);
                           up_count = 3;
 			}
 		      cti_sc = sc;
 		      sc += up_count;
 		      max_insns -= up_count;
 		    }
 		  icount += 2;
 		  pc += 4;
 		  if (IDESC_CTI_P (idesc))
 		    {
 		      SET_CTI_VPC (cti_sc);
 		      break;
 		    }
 		}
 	      else /* 2 serial 16 bit insns */
 		{
 		  /* Only emit before/after handlers if necessary.  */
 		  if (trace_p || profile_p)
 		    {
 		      idesc = emit_full16 (current_cpu, pc, insn >> 16, sc,
 					   trace_p, profile_p);
 		      cti_sc = sc + 1;
 		      sc += 3;
 		      max_insns -= 3;
 		    }
 		  else
 		    {
 		      idesc = emit_16 (current_cpu, pc, insn >> 16, sc, 0, 0);
 		      cti_sc = sc;
 		      ++sc;
 		      --max_insns;
 		    }
 		  ++icount;
 		  pc += 2;
 		  if (IDESC_CTI_P (idesc))
 		    {
 		      SET_CTI_VPC (cti_sc);
 		      break;
 		    }
 		  /* While we're guaranteed that there's room to extract the
 		     insn, when single stepping we can't; the pbb must stop
 		     after the first insn.  */
 		  if (max_insns <= 0)
 		    break;
 		  /* Use the same trace/profile address for the 2nd insn.
 		     Saves us having to compute it and they come in pairs
 		     anyway (e.g. can never branch to the 2nd insn).  */
 		  if (trace_p || profile_p)
 		    {
 		      idesc = emit_full16 (current_cpu, pc, insn & 0x7fff, sc,
 					   trace_p, profile_p);
 		      cti_sc = sc + 1;
 		      sc += 3;
 		      max_insns -= 3;
 		    }
 		  else
 		    {
 		      idesc = emit_16 (current_cpu, pc, insn & 0x7fff, sc, 0, 0);
 		      cti_sc = sc;
 		      ++sc;
 		      --max_insns;
 		    }
 		  ++icount;
 		  pc += 2;
 		  if (IDESC_CTI_P (idesc))
 		    {
 		      SET_CTI_VPC (cti_sc);
 		      break;
 		    }
 		}
 	    }
 	}
     }

  Finish:
   SET_INSN_COUNT (icount);
 }
 EOF

 ;;

 xfull-exec-pbb)

 # Inputs: current_cpu, vpc, FAST_P
 # Outputs: vpc
 # vpc is the virtual program counter.

 cat <<EOF
 #define DEFINE_SWITCH
 #include "semx-switch.c"
 EOF

 ;;

 *)
   echo "Invalid argument to mainloop.in: $1" >&2
   exit 1
   ;;

 esac
	# Simulator main loop for m32rx. -- C --
	#
	# Copyright 1996-2021 Free Software Foundation, Inc.
	#
	# This file is part of the GNU Simulators.
	#
	# This program is free software; you can redistribute it and/or modify
	# it under the terms of the GNU General Public License as published by
	# the Free Software Foundation; either version 3 of the License, or
	# (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program. If not, see <http://www.gnu.org/licenses/>.

	# Syntax:
	# /bin/sh mainloop.in command
	#
	# Command is one of:
	#
	# init
	# support
	# extract-{simple,scache,pbb}
	# {full,fast}-exec-{simple,scache,pbb}
	#
	# A target need only provide a "full" version of one of simple,scache,pbb.
	# If the target wants it can also provide a fast version of same, or if
	# the slow (full featured) version is `simple', then the fast version can be
	# one of scache/pbb.
	# A target can't provide more than this.

	# ??? After a few more ports are done, revisit.
	# Will eventually need to machine generate a lot of this.

	case "x$1" in

	xsupport)

	cat <<EOF

	/* Emit insns to write back the results of insns executed in parallel.
	SC points to a sufficient number of scache entries for the writeback
	handlers.
	SC1/ID1 is the first insn (left slot, lower address).
	SC2/ID2 is the second insn (right slot, higher address). */

	static INLINE void
	emit_par_finish (SIM_CPU current_cpu, PCADDR pc, SCACHE sc,
	SCACHE sc1, const IDESC id1, SCACHE sc2, const IDESC id2)
	{
	ARGBUF *abuf;

	abuf = &sc->argbuf;
	id1 = id1->par_idesc;
	abuf->fields.write.abuf = &sc1->argbuf;
	@cpu@_fill_argbuf (current_cpu, abuf, id1, pc, 0);
	/* no need to set trace_p,profile_p */
	#if 0 /* not currently needed for id2 since results written directly */
	abuf = &sc[1].argbuf;
	id2 = id2->par_idesc;
	abuf->fields.write.abuf = &sc2->argbuf;
	@cpu@_fill_argbuf (current_cpu, abuf, id2, pc + 2, 0);
	/* no need to set trace_p,profile_p */
	#endif
	}

	static INLINE const IDESC *
	emit_16 (SIM_CPU *current_cpu, PCADDR pc, CGEN_INSN_INT insn,
	SCACHE *sc, int fast_p, int parallel_p)
	{
	ARGBUF *abuf = &sc->argbuf;
	const IDESC *id = @cpu@_decode (current_cpu, pc, insn, insn, abuf);

	if (parallel_p)
	id = id->par_idesc;
	@cpu@_fill_argbuf (current_cpu, abuf, id, pc, fast_p);
	return id;
	}

	static INLINE const IDESC *
	emit_full16 (SIM_CPU current_cpu, PCADDR pc, CGEN_INSN_INT insn, SCACHE sc,
	int trace_p, int profile_p)
	{
	const IDESC *id;

	@cpu@_emit_before (current_cpu, sc, pc, 1);
	id = emit_16 (current_cpu, pc, insn, sc + 1, 0, 0);
	@cpu@_emit_after (current_cpu, sc + 2, pc);
	sc[1].argbuf.trace_p = trace_p;
	sc[1].argbuf.profile_p = profile_p;
	return id;
	}

	static INLINE const IDESC *
	emit_parallel (SIM_CPU *current_cpu, PCADDR pc, CGEN_INSN_INT insn,
	SCACHE *sc, int fast_p)
	{
	const IDESC id,id2;

	/* Emit both insns, then emit a finisher-upper.
	We speed things up by handling the second insn serially
	[not parallelly]. Then the writeback only has to deal
	with the first insn. */
	/* ??? Revisit to handle exceptions right. */

	/* FIXME: No need to handle this parallely if second is nop. */
	id = emit_16 (current_cpu, pc, insn >> 16, sc, fast_p, 1);

	/* Note that this can never be a cti. No cti's go in the S pipeline. */
	id2 = emit_16 (current_cpu, pc + 2, insn & 0x7fff, sc + 1, fast_p, 0);

	/* Set sc/snc insns notion of where to skip to. */
	if (IDESC_SKIP_P (id))
	SEM_SKIP_COMPILE (current_cpu, sc, 1);

	/* Emit code to finish executing the semantics
	(write back the results). */
	emit_par_finish (current_cpu, pc, sc + 2, sc, id, sc + 1, id2);

	return id;
	}

	static INLINE const IDESC *
	emit_full_parallel (SIM_CPU *current_cpu, PCADDR pc, CGEN_INSN_INT insn,
	SCACHE *sc, int trace_p, int profile_p)
	{
	const IDESC id,id2;

	/* Emit both insns, then emit a finisher-upper.
	We speed things up by handling the second insn serially
	[not parallelly]. Then the writeback only has to deal
	with the first insn. */
	/* ??? Revisit to handle exceptions right. */

	@cpu@_emit_before (current_cpu, sc, pc, 1);

	/* FIXME: No need to handle this parallelly if second is nop. */
	id = emit_16 (current_cpu, pc, insn >> 16, sc + 1, 0, 1);
	sc[1].argbuf.trace_p = trace_p;
	sc[1].argbuf.profile_p = profile_p;

	@cpu@_emit_before (current_cpu, sc + 2, pc, 0);

	/* Note that this can never be a cti. No cti's go in the S pipeline. */
	id2 = emit_16 (current_cpu, pc + 2, insn & 0x7fff, sc + 3, 0, 0);
	sc[3].argbuf.trace_p = trace_p;
	sc[3].argbuf.profile_p = profile_p;

	/* Set sc/snc insns notion of where to skip to. */
	if (IDESC_SKIP_P (id))
	SEM_SKIP_COMPILE (current_cpu, sc, 4);

	/* Emit code to finish executing the semantics
	(write back the results). */
	emit_par_finish (current_cpu, pc, sc + 4, sc + 1, id, sc + 3, id2);

	@cpu@_emit_after (current_cpu, sc + 5, pc);

	return id;
	}

	static INLINE const IDESC *
	emit_32 (SIM_CPU *current_cpu, PCADDR pc, CGEN_INSN_INT insn,
	SCACHE *sc, int fast_p)
	{
	ARGBUF *abuf = &sc->argbuf;
	const IDESC *id = @cpu@_decode (current_cpu, pc,
	(USI) insn >> 16, insn, abuf);

	@cpu@_fill_argbuf (current_cpu, abuf, id, pc, fast_p);
	return id;
	}

	static INLINE const IDESC *
	emit_full32 (SIM_CPU current_cpu, PCADDR pc, CGEN_INSN_INT insn, SCACHE sc,
	int trace_p, int profile_p)
	{
	const IDESC *id;

	@cpu@_emit_before (current_cpu, sc, pc, 1);
	id = emit_32 (current_cpu, pc, insn, sc + 1, 0);
	@cpu@_emit_after (current_cpu, sc + 2, pc);
	sc[1].argbuf.trace_p = trace_p;
	sc[1].argbuf.profile_p = profile_p;
	return id;
	}

	EOF

	;;

	xinit)

	# Nothing needed.

	;;

	xextract-pbb)

	# Inputs: current_cpu, pc, sc, max_insns, FAST_P
	# Outputs: sc, pc
	# sc must be left pointing past the last created entry.
	# pc must be left pointing past the last created entry.
	# If the pbb is terminated by a cti insn, SET_CTI_VPC(sc) must be called
	# to record the vpc of the cti insn.
	# SET_INSN_COUNT(n) must be called to record number of real insns.

	cat <<EOF
	{
	const IDESC *idesc;
	int icount = 0;

	if ((pc & 3) != 0)
	{
	/* This occurs when single stepping and when compiling the not-taken
	part of conditional branches. */
	UHI insn = GETIMEMUHI (current_cpu, pc);
	int trace_p = PC_IN_TRACE_RANGE_P (current_cpu, pc);
	int profile_p = PC_IN_PROFILE_RANGE_P (current_cpu, pc);
	SCACHE cti_sc; / ??? tmp hack */

	/* A parallel insn isn't allowed here, but we don't mind nops.
	??? We need to wait until the insn is executed before signalling
	the error, for situations where such signalling is wanted. */
	#if 0
	if ((insn & 0x8000) != 0
	&& (insn & 0x7fff) != 0x7000) /* parallel nops are ok */
	sim_engine_invalid_insn (current_cpu, pc, 0);
	#endif

	/* Only emit before/after handlers if necessary. */
	if (FAST_P \|\| (! trace_p && ! profile_p))
	{
	idesc = emit_16 (current_cpu, pc, insn & 0x7fff, sc, FAST_P, 0);
	cti_sc = sc;
	++sc;
	--max_insns;
	}
	else
	{
	idesc = emit_full16 (current_cpu, pc, insn & 0x7fff, sc,
	trace_p, profile_p);
	cti_sc = sc + 1;
	sc += 3;
	max_insns -= 3;
	}
	++icount;
	pc += 2;
	if (IDESC_CTI_P (idesc))
	{
	SET_CTI_VPC (cti_sc);
	goto Finish;
	}
	}

	/* There are two copies of the compiler: full(!fast) and fast.
	The "full" case emits before/after handlers for each insn.
	Having two copies of this code is a tradeoff, having one copy
	seemed a bit more difficult to read (due to constantly testing
	FAST_P). ??? On the other hand, with address ranges we'll want to
	omit before/after handlers for unwanted insns. Having separate loops
	for FAST/!FAST avoids constantly doing the test in the loop, but
	typically FAST_P is a constant and such tests will get optimized out. */

	if (FAST_P)
	{
	while (max_insns > 0)
	{
	USI insn = GETIMEMUSI (current_cpu, pc);
	if ((SI) insn < 0)
	{
	/* 32 bit insn */
	idesc = emit_32 (current_cpu, pc, insn, sc, 1);
	++sc;
	--max_insns;
	++icount;
	pc += 4;
	if (IDESC_CTI_P (idesc))
	{
	SET_CTI_VPC (sc - 1);
	break;
	}
	}
	else
	{
	if ((insn & 0x8000) != 0) /* parallel? */
	{
	int up_count;

	if (((insn >> 16) & 0xfff0) == 0x10f0)
	{
	/* FIXME: No need to handle this sequentially if system
	calls will be able to execute after second insn in
	parallel. ( trap #num \|\| insn ) */
	/* insn */
	idesc = emit_16 (current_cpu, pc + 2, insn & 0x7fff,
	sc, 1, 0);
	/* trap */
	emit_16 (current_cpu, pc, insn >> 16, sc + 1, 1, 0);
	up_count = 2;
	}
	else
	{
	/* Yep. Here's the "interesting" [sic] part. */
	idesc = emit_parallel (current_cpu, pc, insn, sc, 1);
	up_count = 3;
	}
	sc += up_count;
	max_insns -= up_count;
	icount += 2;
	pc += 4;
	if (IDESC_CTI_P (idesc))
	{
	SET_CTI_VPC (sc - up_count);
	break;
	}
	}
	else /* 2 serial 16 bit insns */
	{
	idesc = emit_16 (current_cpu, pc, insn >> 16, sc, 1, 0);
	++sc;
	--max_insns;
	++icount;
	pc += 2;
	if (IDESC_CTI_P (idesc))
	{
	SET_CTI_VPC (sc - 1);
	break;
	}
	/* While we're guaranteed that there's room to extract the
	insn, when single stepping we can't; the pbb must stop
	after the first insn. */
	if (max_insns == 0)
	break;
	idesc = emit_16 (current_cpu, pc, insn & 0x7fff, sc, 1, 0);
	++sc;
	--max_insns;
	++icount;
	pc += 2;
	if (IDESC_CTI_P (idesc))
	{
	SET_CTI_VPC (sc - 1);
	break;
	}
	}
	}
	}
	}
	else /* ! FAST_P */
	{
	while (max_insns > 0)
	{
	USI insn = GETIMEMUSI (current_cpu, pc);
	int trace_p = PC_IN_TRACE_RANGE_P (current_cpu, pc);
	int profile_p = PC_IN_PROFILE_RANGE_P (current_cpu, pc);
	SCACHE cti_sc; / ??? tmp hack */
	if ((SI) insn < 0)
	{
	/* 32 bit insn
	Only emit before/after handlers if necessary. */
	if (trace_p \|\| profile_p)
	{
	idesc = emit_full32 (current_cpu, pc, insn, sc,
	trace_p, profile_p);
	cti_sc = sc + 1;
	sc += 3;
	max_insns -= 3;
	}
	else
	{
	idesc = emit_32 (current_cpu, pc, insn, sc, 0);
	cti_sc = sc;
	++sc;
	--max_insns;
	}
	++icount;
	pc += 4;
	if (IDESC_CTI_P (idesc))
	{
	SET_CTI_VPC (cti_sc);
	break;
	}
	}
	else
	{
	if ((insn & 0x8000) != 0) /* parallel? */
	{
	/* Yep. Here's the "interesting" [sic] part.
	Only emit before/after handlers if necessary. */
	if (trace_p \|\| profile_p)
	{
	if (((insn >> 16) & 0xfff0) == 0x10f0)
	{
	/* FIXME: No need to handle this sequentially if
	system calls will be able to execute after second
	insn in parallel. ( trap #num \|\| insn ) */
	/* insn */
	idesc = emit_full16 (current_cpu, pc + 2,
	insn & 0x7fff, sc, 0, 0);
	/* trap */
	emit_full16 (current_cpu, pc, insn >> 16, sc + 3,
	0, 0);
	}
	else
	{
	idesc = emit_full_parallel (current_cpu, pc, insn,
	sc, trace_p, profile_p);
	}
	cti_sc = sc + 1;
	sc += 6;
	max_insns -= 6;
	}
	else
	{
	int up_count;

	if (((insn >> 16) & 0xfff0) == 0x10f0)
	{
	/* FIXME: No need to handle this sequentially if
	system calls will be able to execute after second
	insn in parallel. ( trap #num \|\| insn ) */
	/* insn */
	idesc = emit_16 (current_cpu, pc + 2, insn & 0x7fff,
	sc, 0, 0);
	/* trap */
	emit_16 (current_cpu, pc, insn >> 16, sc + 1, 0, 0);
	up_count = 2;
	}
	else
	{
	idesc = emit_parallel (current_cpu, pc, insn, sc, 0);
	up_count = 3;
	}
	cti_sc = sc;
	sc += up_count;
	max_insns -= up_count;
	}
	icount += 2;
	pc += 4;
	if (IDESC_CTI_P (idesc))
	{
	SET_CTI_VPC (cti_sc);
	break;
	}
	}
	else /* 2 serial 16 bit insns */
	{
	/* Only emit before/after handlers if necessary. */
	if (trace_p \|\| profile_p)
	{
	idesc = emit_full16 (current_cpu, pc, insn >> 16, sc,
	trace_p, profile_p);
	cti_sc = sc + 1;
	sc += 3;
	max_insns -= 3;
	}
	else
	{
	idesc = emit_16 (current_cpu, pc, insn >> 16, sc, 0, 0);
	cti_sc = sc;
	++sc;
	--max_insns;
	}
	++icount;
	pc += 2;
	if (IDESC_CTI_P (idesc))
	{
	SET_CTI_VPC (cti_sc);
	break;
	}
	/* While we're guaranteed that there's room to extract the
	insn, when single stepping we can't; the pbb must stop
	after the first insn. */
	if (max_insns <= 0)
	break;
	/* Use the same trace/profile address for the 2nd insn.
	Saves us having to compute it and they come in pairs
	anyway (e.g. can never branch to the 2nd insn). */
	if (trace_p \|\| profile_p)
	{
	idesc = emit_full16 (current_cpu, pc, insn & 0x7fff, sc,
	trace_p, profile_p);
	cti_sc = sc + 1;
	sc += 3;
	max_insns -= 3;
	}
	else
	{
	idesc = emit_16 (current_cpu, pc, insn & 0x7fff, sc, 0, 0);
	cti_sc = sc;
	++sc;
	--max_insns;
	}
	++icount;
	pc += 2;
	if (IDESC_CTI_P (idesc))
	{
	SET_CTI_VPC (cti_sc);
	break;
	}
	}
	}
	}
	}

	Finish:
	SET_INSN_COUNT (icount);
	}
	EOF

	;;

	xfull-exec-pbb)

	# Inputs: current_cpu, vpc, FAST_P
	# Outputs: vpc
	# vpc is the virtual program counter.

	cat <<EOF
	#define DEFINE_SWITCH
	#include "semx-switch.c"
	EOF

	;;

	*)
	echo "Invalid argument to mainloop.in: $1" >&2
	exit 1
	;;

	esac