gas: fold is_end_of_line[] into lex_type[] ... by way of introducing LEX_EOL and LEX_EOS. As a prereq convert the remaining open-coded accesses. The Alpha change is actually a functional one: The array slot for '!' having been set to 1 is very unlikely to have been correct. 1 means "end of line", when surely "end of statement" was always meant.

commit: 3e1457049444ad390a871f437b83640b409f6cb4 [log] [tgz]
author: Jan Beulich <jbeulich@suse.com> Fri Mar 07 08:28:59 2025 +0100
committer: Jan Beulich <jbeulich@suse.com> Fri Mar 07 08:28:59 2025 +0100
tree: 582652002edb86f03462e49b35879b63a864018c
parent: 62d9ebbd4ead93588f31dedda5e02a6702afd166 [diff]
diff --git a/gas/cond.c b/gas/cond.c
index 9213f91..c14399d 100644
--- a/gas/cond.c
+++ b/gas/cond.c

@@ -141,7 +141,7 @@
   if (current_cframe != NULL && current_cframe->ignoring)
     {
       operand.X_add_number = 0;
-      while (! is_end_of_line[(unsigned char) *input_line_pointer])
+      while (! is_end_of_stmt (*input_line_pointer))
 	++input_line_pointer;
     }
   else
@@ -201,7 +201,7 @@
       int is_eol;
 
       SKIP_WHITESPACE ();
-      is_eol = is_end_of_line[(unsigned char) *input_line_pointer];
+      is_eol = is_end_of_stmt (*input_line_pointer);
       cframe.ignoring = (test_blank == !is_eol);
     }
 
@@ -232,7 +232,7 @@
     {
       ++s;
       ++input_line_pointer;
-      while (! is_end_of_line[(unsigned char) *input_line_pointer])
+      while (! is_end_of_stmt (*input_line_pointer))
 	{
 	  *s++ = *input_line_pointer++;
 	  if (s[-1] == '\'')
@@ -247,7 +247,7 @@
   else
     {
       while (*input_line_pointer != terminator
-	     && ! is_end_of_line[(unsigned char) *input_line_pointer])
+	     && ! is_end_of_stmt (*input_line_pointer))
 	++input_line_pointer;
       s = input_line_pointer;
       while (s > ret && is_whitespace (s[-1]))
@@ -330,7 +330,7 @@
 
   if (current_cframe == NULL || current_cframe->ignoring)
     {
-      while (! is_end_of_line[(unsigned char) *input_line_pointer])
+      while (! is_end_of_stmt (*input_line_pointer))
 	++input_line_pointer;
 
       if (current_cframe == NULL)
@@ -401,7 +401,7 @@
 
   if (flag_mri)
     {
-      while (! is_end_of_line[(unsigned char) *input_line_pointer])
+      while (! is_end_of_stmt (*input_line_pointer))
 	++input_line_pointer;
     }
 
@@ -448,7 +448,7 @@
 
   if (flag_mri)
     {
-      while (! is_end_of_line[(unsigned char) *input_line_pointer])
+      while (! is_end_of_stmt (*input_line_pointer))
 	++input_line_pointer;
     }
 

diff --git a/gas/config/tc-alpha.c b/gas/config/tc-alpha.c
index b39a7c8..d6245e5 100644
--- a/gas/config/tc-alpha.c
+++ b/gas/config/tc-alpha.c

@@ -888,7 +888,7 @@
 
 #ifdef RELOC_OP_P
   /* ??? Wrest control of ! away from the regular expression parser.  */
-  is_end_of_line[(unsigned char) '!'] = 1;
+  lex_type[(unsigned char) '!'] |= LEX_EOS;
 #endif
 
   while (tok < end_tok && *input_line_pointer)
@@ -1029,21 +1029,21 @@
   debug_exp (orig_tok, ntok - (end_tok - tok));
 #endif
 #ifdef RELOC_OP_P
-  is_end_of_line[(unsigned char) '!'] = 0;
+  lex_type[(unsigned char) '!'] &= ~LEX_EOS;
 #endif
 
   return ntok - (end_tok - tok);
 
  err:
 #ifdef RELOC_OP_P
-  is_end_of_line[(unsigned char) '!'] = 0;
+  lex_type[(unsigned char) '!'] &= ~LEX_EOS;
 #endif
   input_line_pointer = old_input_line_pointer;
   return TOKENIZE_ERROR;
 
 #ifdef RELOC_OP_P
  err_report:
-  is_end_of_line[(unsigned char) '!'] = 0;
+  lex_type[(unsigned char) '!'] &= ~LEX_EOS;
 #endif
   input_line_pointer = old_input_line_pointer;
   return TOKENIZE_ERROR_REPORT;

diff --git a/gas/expr.c b/gas/expr.c
index b22346e..099be43 100644
--- a/gas/expr.c
+++ b/gas/expr.c

@@ -298,7 +298,7 @@
 #define valuesize 32
 #endif
 
-  if (is_end_of_line[(unsigned char) *input_line_pointer])
+  if (is_end_of_stmt (*input_line_pointer))
     {
       expressionP->X_op = O_absent;
       return;
@@ -803,7 +803,7 @@
   SKIP_WHITESPACE ();		/* Leading whitespace is part of operand.  */
   c = *input_line_pointer++;	/* input_line_pointer -> past char in c.  */
 
-  if (is_end_of_line[(unsigned char) c])
+  if (is_end_of_stmt (c))
     goto eol;
 
   switch (c)
@@ -946,7 +946,7 @@
 	      /* If it says "0f" and it could possibly be a floating point
 		 number, make it one.  Otherwise, make it a local label,
 		 and try to deal with parsing the rest later.  */
-	      if (!is_end_of_line[(unsigned char) input_line_pointer[1]]
+	      if (!is_end_of_stmt (input_line_pointer[1])
 		  && strchr (FLT_CHARS, 'f') != NULL)
 		{
 		  char *cp = input_line_pointer + 1;
@@ -1668,7 +1668,7 @@
   c = *input_line_pointer & 0xff;
   *num_chars = 1;
 
-  if (is_end_of_line[c])
+  if (is_end_of_stmt (c))
     return O_illegal;
 
 #ifdef md_operator

diff --git a/gas/listing.c b/gas/listing.c
index be71597..a84541e 100644
--- a/gas/listing.c
+++ b/gas/listing.c

@@ -362,8 +362,7 @@
 	  int seen_slash = 0;
 
 	  for (copy = input_line_pointer;
-	       *copy && (seen_quote
-			 || is_end_of_line [(unsigned char) *copy] != 1);
+	       seen_quote ? *copy : !is_end_of_line (*copy);
 	       copy++)
 	    {
 	      if (seen_slash)
@@ -1580,7 +1579,7 @@
     {
       if (quoted
 	  ? *input_line_pointer == '\"'
-	  : is_end_of_line[(unsigned char) *input_line_pointer])
+	  : is_end_of_stmt (*input_line_pointer))
 	{
 	  if (listing)
 	    {

diff --git a/gas/read.c b/gas/read.c
index 2939c68..e451721 100644
--- a/gas/read.c
+++ b/gas/read.c

@@ -79,7 +79,7 @@
 #ifndef CR_EOL
 #define LEX_CR LEX_WHITE
 #else
-#define LEX_CR 0
+#define LEX_CR LEX_EOL
 #endif
 
 #ifndef LEX_AT
@@ -118,7 +118,7 @@
 
 /* Used by is_... macros. our ctype[].  */
 char lex_type[256] = {
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, LEX_CR, 0, 0,	/* @ABCDEFGHIJKLMNO */
+  0x20, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0x20, 0, 0, LEX_CR, 0, 0, /* @ABCDEFGHIJKLMNO */
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* PQRSTUVWXYZ[\]^_ */
   8, 0, 0, LEX_HASH, LEX_DOLLAR, LEX_PCT, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, /* _!"#$%&'()*+,-./ */
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, LEX_QM,	/* 0123456789:;<=>? */
@@ -136,32 +136,6 @@
   3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
 };
 
-/* In: a character.
-   Out: 1 if this character ends a line.
-	2 if this character is a line separator.  */
-char is_end_of_line[256] = {
-#ifdef CR_EOL
-  1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,	/* @abcdefghijklmno */
-#else
-  1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,	/* @abcdefghijklmno */
-#endif
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* */
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* _!"#$%&'()*+,-./ */
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 0123456789:;<=>? */
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* */
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* */
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* */
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* */
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* */
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* */
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* */
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* */
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* */
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* */
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* */
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0	/* */
-};
-
 #ifndef TC_CASE_SENSITIVE
 char original_case_string[128];
 #endif
@@ -290,8 +264,7 @@
 #endif
   /* Use machine dependent syntax.  */
   for (p = tc_line_separator_chars; *p; p++)
-    is_end_of_line[(unsigned char) *p] = 2;
-  /* Use more.  FIXME-SOMEDAY.  */
+    lex_type[(unsigned char) *p] = LEX_EOS;
 
   if (flag_mri)
     lex_type['?'] = LEX_BEGIN_NAME | LEX_NAME;
@@ -944,7 +917,8 @@
 	  /* We now have input_line_pointer->1st char of next line.
 	     If input_line_pointer [-1] == '\n' then we just
 	     scanned another line: so bump line counters.  */
-	  was_new_line = is_end_of_line[(unsigned char) input_line_pointer[-1]];
+	  was_new_line = lex_type[(unsigned char) input_line_pointer[-1]]
+			 & (LEX_EOL | LEX_EOS);
 	  if (was_new_line)
 	    {
 	      symbol_set_value_now (&dot_symbol);
@@ -993,7 +967,7 @@
 #endif
 
 	  next_char = *input_line_pointer;
-	  if (was_new_line == 1
+	  if ((was_new_line & LEX_EOL)
              && (strchr (line_comment_chars, '#')
 		  ? next_char == '#'
 		  : next_char && strchr (line_comment_chars, next_char)))
@@ -1087,7 +1061,7 @@
 			    }
 			  if (strncasecmp (rest, "MACRO", 5) == 0
 			      && (is_whitespace (rest[5])
-				  || is_end_of_line[(unsigned char) rest[5]]))
+				  || is_end_of_stmt (rest[5])))
 			    mri_line_macro = 1;
 			}
 
@@ -1343,7 +1317,7 @@
 	    }
 
 	  /* Empty statement?  */
-	  if (is_end_of_line[(unsigned char) next_char])
+	  if (is_end_of_stmt (next_char))
 	    continue;
 
 	  if ((LOCAL_LABELS_DOLLAR || LOCAL_LABELS_FB) && ISDIGIT (next_char))
@@ -1502,7 +1476,7 @@
   know (flag_m68k_mri);
 
   for (s = input_line_pointer;
-       ((!is_end_of_line[(unsigned char) *s] && !is_whitespace (*s))
+       ((!is_end_of_stmt (*s) && !is_whitespace (*s))
 	|| inquote);
        s++)
     {
@@ -1511,7 +1485,7 @@
     }
 #else
   for (s = input_line_pointer;
-       !is_end_of_line[(unsigned char) *s];
+       !is_end_of_stmt (*s);
        s++)
     ;
 #endif
@@ -1530,7 +1504,7 @@
 
   input_line_pointer = stop;
   *stop = stopc;
-  while (!is_end_of_line[(unsigned char) *input_line_pointer])
+  while (!is_end_of_stmt (*input_line_pointer))
     ++input_line_pointer;
 }
 
@@ -1562,7 +1536,7 @@
   if (flag_mri)
     stop = mri_comment_field (&stopc);
 
-  if (is_end_of_line[(unsigned char) *input_line_pointer])
+  if (is_end_of_stmt (*input_line_pointer))
     {
       if (arg < 0)
 	align = 0;
@@ -2038,7 +2012,7 @@
 	 backquote.  */
       if (flag_m68k_mri
 	  && *input_line_pointer == '\''
-	  && is_end_of_line[(unsigned char) input_line_pointer[1]])
+	  && is_end_of_stmt (input_line_pointer[1]))
 	++input_line_pointer;
 
       demand_empty_rest_of_line ();
@@ -2161,7 +2135,7 @@
 		break;
 	      }
 
-	  if (!is_end_of_line[(unsigned char)*input_line_pointer])
+	  if (!is_end_of_stmt (*input_line_pointer))
 	    file = NULL;
         }
 
@@ -2196,7 +2170,7 @@
       /* The MRI assembler permits the start symbol to follow .end,
 	 but we don't support that.  */
       SKIP_WHITESPACE ();
-      if (!is_end_of_line[(unsigned char) *input_line_pointer]
+      if (!is_end_of_stmt (*input_line_pointer)
 	  && *input_line_pointer != '*'
 	  && *input_line_pointer != '!')
 	as_warn (_("start address not supported"));
@@ -2425,7 +2399,7 @@
 	{
 	  input_line_pointer++;
 	  SKIP_WHITESPACE ();
-	  if (is_end_of_line[(unsigned char) *input_line_pointer])
+	  if (is_end_of_stmt (*input_line_pointer))
 	    c = '\n';
 	}
 
@@ -2485,7 +2459,7 @@
 
   type = LINKONCE_DISCARD;
 
-  if (!is_end_of_line[(unsigned char) *input_line_pointer])
+  if (!is_end_of_stmt (*input_line_pointer))
     {
       char *s;
       char c;
@@ -4016,7 +3990,7 @@
   SKIP_WHITESPACE ();
   if (input_line_pointer > buffer_limit)
     return;
-  if (is_end_of_line[(unsigned char) *input_line_pointer])
+  if (is_end_of_stmt (*input_line_pointer))
     input_line_pointer++;
   else
     {
@@ -5784,7 +5758,7 @@
       	{
 	  c = * input_line_pointer ++;
 
-	  if (c >= 256 || is_end_of_line [c])
+	  if (c >= 256 || is_end_of_stmt (c))
 	    {
 	      as_bad (_("end of line encountered inside .base64 string"));
 	      ignore_rest_of_line ();
@@ -6311,7 +6285,7 @@
 is_it_end_of_statement (void)
 {
   SKIP_WHITESPACE ();
-  return (is_end_of_line[(unsigned char) *input_line_pointer]);
+  return is_end_of_stmt (*input_line_pointer);
 }
 
 void
@@ -6499,7 +6473,7 @@
     {
       SKIP_WHITESPACE ();
       i = 0;
-      while (!is_end_of_line[(unsigned char) *input_line_pointer]
+      while (!is_end_of_stmt (*input_line_pointer)
 	     && !is_whitespace (*input_line_pointer))
 	{
 	  obstack_1grow (&notes, *input_line_pointer);
@@ -6509,7 +6483,7 @@
 
       obstack_1grow (&notes, '\0');
       filename = (char *) obstack_finish (&notes);
-      while (!is_end_of_line[(unsigned char) *input_line_pointer])
+      while (!is_end_of_stmt (*input_line_pointer))
 	++input_line_pointer;
     }
 
@@ -6795,7 +6769,7 @@
   char inquote = '\0';
   int inescape = 0;
 
-  while (!is_end_of_line[(unsigned char) *s]
+  while (!is_end_of_stmt (*s)
 	 || (inquote && !ISCNTRL (*s))
 	 || (inquote == '\'' && flag_mri)
 #ifdef TC_EOL_IN_INSN
@@ -6804,7 +6778,7 @@
 	 /* PR 6926:  When we are parsing the body of a macro the sequence
 	    \@ is special - it refers to the invocation count.  If the @
 	    character happens to be registered as a line-separator character
-	    by the target, then the is_end_of_line[] test above will have
+	    by the target, then the is_end_of_stmt() test above will have
 	    returned true, but we need to ignore the line separating
 	    semantics in this particular case.  */
 	 || (in_macro && inescape && *s == '@')

diff --git a/gas/read.h b/gas/read.h
index 535f028..f6d0167 100644
--- a/gas/read.h
+++ b/gas/read.h

@@ -41,6 +41,8 @@
 #define LEX_BEGIN_NAME	(2)	/* may begin a name */
 #define LEX_END_NAME	(4)	/* ends a name */
 #define LEX_WHITE	(8)	/* whitespace */
+#define LEX_EOS	(0x10)  /* end of statement */
+#define LEX_EOL	(0x20)  /* end of line */
 
 #define is_name_beginner(c) \
   ( lex_type[(unsigned char) (c)] & LEX_BEGIN_NAME )
@@ -56,12 +58,11 @@
 #undef ISBLANK
 
 /* The distinction of "line" and "statement" sadly is blurred by unhelpful
-   naming of e.g. the underlying array.  Most users really mean "end of
-   statement".  Going forward only these wrappers are supposed to be used.  */
+   naming in a few places.  Most users really mean "end of statement".  */
 #define is_end_of_stmt(c) \
-  (is_end_of_line[(unsigned char) (c)])
+  ( lex_type[(unsigned char) (c)] & (LEX_EOS | LEX_EOL) )
 #define is_end_of_line(c) \
-  (is_end_of_line[(unsigned char) (c)] == 1)
+  ( lex_type[(unsigned char) (c)] & LEX_EOL )
 
 #ifndef is_a_char
 #define CHAR_MASK	(0xff)
@@ -70,7 +71,6 @@
 #endif /* is_a_char() */
 
 extern char lex_type[];
-extern char is_end_of_line[];
 
 extern int is_it_end_of_statement (void);
 extern char *find_end_of_line (char *, int);
commit	3e1457049444ad390a871f437b83640b409f6cb4	[log] [tgz]
author	Jan Beulich <jbeulich@suse.com>	Fri Mar 07 08:28:59 2025 +0100
committer	Jan Beulich <jbeulich@suse.com>	Fri Mar 07 08:28:59 2025 +0100
tree	582652002edb86f03462e49b35879b63a864018c
parent	62d9ebbd4ead93588f31dedda5e02a6702afd166 [diff]