gcc/java/lex.c - gcc - Git at Google

 /* Language lexer for the GNU compiler for the Java(TM) language.
    Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
    Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)

 This file is part of GNU CC.

 GNU CC is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2, or (at your option)
 any later version.

 GNU CC is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with GNU CC; see the file COPYING.  If not, write to
 the Free Software Foundation, 59 Temple Place - Suite 330,
 Boston, MA 02111-1307, USA.

 Java and all Java-based marks are trademarks or registered trademarks
 of Sun Microsystems, Inc. in the United States and other countries.
 The Free Software Foundation is independent of Sun Microsystems, Inc.  */

 /* It defines java_lex (yylex) that reads a Java ASCII source file
    possibly containing Unicode escape sequence or utf8 encoded
    characters and returns a token for everything found but comments,
    white spaces and line terminators. When necessary, it also fills
    the java_lval (yylval) union. It's implemented to be called by a
    re-entrant parser generated by Bison.

    The lexical analysis conforms to the Java grammar described in "The
    Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
    Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */

 #include "keyword.h"
 #include "flags.h"
 #include "chartables.h"

 /* Function declaration  */
 static char *java_sprint_unicode PARAMS ((struct java_line *, int));
 static void java_unicode_2_utf8 PARAMS ((unicode_t));
 static void java_lex_error PARAMS ((const char *, int));
 #ifndef JC1_LITE
 static int java_is_eol PARAMS ((FILE *, int));
 static tree build_wfl_node PARAMS ((tree));
 #endif
 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
 static int java_parse_escape_sequence PARAMS ((void));
 static int java_start_char_p PARAMS ((unicode_t));
 static int java_part_char_p PARAMS ((unicode_t));
 static int java_parse_doc_section PARAMS ((int));
 static void java_parse_end_comment PARAMS ((int));
 static int java_get_unicode PARAMS ((void));
 static int java_read_unicode PARAMS ((java_lexer *, int *));
 static int java_read_unicode_collapsing_terminators PARAMS ((java_lexer *,
 							     int *));
 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
 static int java_read_char PARAMS ((java_lexer *));
 static void java_allocate_new_line PARAMS ((void));
 static void java_unget_unicode PARAMS ((void));
 static unicode_t java_sneak_unicode PARAMS ((void));
 #ifndef JC1_LITE
 static int utf8_cmp PARAMS ((const unsigned char *, int, const char *));
 #endif

 java_lexer *java_new_lexer PARAMS ((FILE *, const char *));
 #ifndef JC1_LITE
 static void error_if_numeric_overflow PARAMS ((tree));
 #endif

 #ifdef HAVE_ICONV
 /* This is nonzero if we have initialized `need_byteswap'.  */
 static int byteswap_init = 0;

 /* Some versions of iconv() (e.g., glibc 2.1.3) will return UCS-2 in
    big-endian order -- not native endian order.  We handle this by
    doing a conversion once at startup and seeing what happens.  This
    flag holds the results of this determination.  */
 static int need_byteswap = 0;
 #endif

 void
 java_init_lex (finput, encoding)
      FILE *finput;
      const char *encoding;
 {
 #ifndef JC1_LITE
   int java_lang_imported = 0;

   if (!java_lang_id)
     java_lang_id = get_identifier ("java.lang");
   if (!java_lang_cloneable)
     java_lang_cloneable = get_identifier ("java.lang.Cloneable");
   if (!java_io_serializable)
     java_io_serializable = get_identifier ("java.io.Serializable");
   if (!inst_id)
     inst_id = get_identifier ("inst$");
   if (!wpv_id)
     wpv_id = get_identifier ("write_parm_value$");

   if (!java_lang_imported)
     {
       tree node = build_tree_list
 	(build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
       read_import_dir (TREE_PURPOSE (node));
       TREE_CHAIN (node) = ctxp->import_demand_list;
       ctxp->import_demand_list = node;
       java_lang_imported = 1;
     }

   if (!wfl_operator)
     wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
   if (!label_id)
     label_id = get_identifier ("$L");
   if (!wfl_append)
     wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
   if (!wfl_string_buffer)
     wfl_string_buffer =
       build_expr_wfl (get_identifier (flag_emit_class_files
 				      ? "java.lang.StringBuffer"
 				      : "gnu.gcj.runtime.StringBuffer"),
 		      NULL, 0, 0);
   if (!wfl_to_string)
     wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);

   CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
     CPC_INSTANCE_INITIALIZER_LIST (ctxp) = NULL_TREE;

   memset ((PTR) ctxp->modifier_ctx, 0, 11*sizeof (ctxp->modifier_ctx[0]));
   memset ((PTR) current_jcf, 0, sizeof (JCF));
   ctxp->current_parsed_class = NULL;
   ctxp->package = NULL_TREE;
 #endif

   ctxp->filename = input_filename;
   ctxp->lineno = lineno = 0;
   ctxp->p_line = NULL;
   ctxp->c_line = NULL;
   ctxp->java_error_flag = 0;
   ctxp->lexer = java_new_lexer (finput, encoding);
 }

 static char *
 java_sprint_unicode (line, i)
     struct java_line *line;
     int i;
 {
   static char buffer [10];
   if (line->unicode_escape_p [i] || line->line [i] > 128)
     sprintf (buffer, "\\u%04x", line->line [i]);
   else
     {
       buffer [0] = line->line [i];
       buffer [1] = '\0';
     }
   return buffer;
 }

 static unicode_t
 java_sneak_unicode ()
 {
   return (ctxp->c_line->line [ctxp->c_line->current]);
 }

 static void
 java_unget_unicode ()
 {
   if (!ctxp->c_line->current)
     /* Can't unget unicode.  */
     abort ();

   ctxp->c_line->current--;
   ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
 }

 static void
 java_allocate_new_line ()
 {
   unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
   char ahead_escape_p = (ctxp->c_line ?
 			 ctxp->c_line->unicode_escape_ahead_p : 0);

   if (ctxp->c_line && !ctxp->c_line->white_space_only)
     {
       if (ctxp->p_line)
 	{
 	  free (ctxp->p_line->unicode_escape_p);
 	  free (ctxp->p_line->line);
 	  free (ctxp->p_line);
 	}
       ctxp->p_line = ctxp->c_line;
       ctxp->c_line = NULL;		/* Reallocated */
     }

   if (!ctxp->c_line)
     {
       ctxp->c_line = (struct java_line *)xmalloc (sizeof (struct java_line));
       ctxp->c_line->max = JAVA_LINE_MAX;
       ctxp->c_line->line = (unicode_t *)xmalloc
 	(sizeof (unicode_t)*ctxp->c_line->max);
       ctxp->c_line->unicode_escape_p =
 	  (char *)xmalloc (sizeof (char)*ctxp->c_line->max);
       ctxp->c_line->white_space_only = 0;
     }

   ctxp->c_line->line [0] = ctxp->c_line->size = 0;
   ctxp->c_line->char_col = ctxp->c_line->current = 0;
   if (ahead)
     {
       ctxp->c_line->line [ctxp->c_line->size] = ahead;
       ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
       ctxp->c_line->size++;
     }
   ctxp->c_line->ahead [0] = 0;
   ctxp->c_line->unicode_escape_ahead_p = 0;
   ctxp->c_line->lineno = ++lineno;
   ctxp->c_line->white_space_only = 1;
 }

 /* Create a new lexer object.  */

 java_lexer *
 java_new_lexer (finput, encoding)
      FILE *finput;
      const char *encoding;
 {
   java_lexer *lex = (java_lexer *) xmalloc (sizeof (java_lexer));
   int enc_error = 0;

   lex->finput = finput;
   lex->bs_count = 0;
   lex->unget_value = 0;
   lex->hit_eof = 0;

 #ifdef HAVE_ICONV
   lex->handle = iconv_open ("UCS-2", encoding);
   if (lex->handle != (iconv_t) -1)
     {
       lex->first = -1;
       lex->last = -1;
       lex->out_first = -1;
       lex->out_last = -1;
       lex->read_anything = 0;
       lex->use_fallback = 0;

       /* Work around broken iconv() implementations by doing checking at
 	 runtime.  We assume that if the UTF-8 => UCS-2 encoder is broken,
 	 then all UCS-2 encoders will be broken.  Perhaps not a valid
 	 assumption.  */
       if (! byteswap_init)
 	{
 	  iconv_t handle;

 	  byteswap_init = 1;

 	  handle = iconv_open ("UCS-2", "UTF-8");
 	  if (handle != (iconv_t) -1)
 	    {
 	      unicode_t result;
 	      unsigned char in[3];
 	      char *inp, *outp;
 	      size_t inc, outc, r;

 	      /* This is the UTF-8 encoding of \ufeff.  */
 	      in[0] = 0xef;
 	      in[1] = 0xbb;
 	      in[2] = 0xbf;

 	      inp = in;
 	      inc = 3;
 	      outp = (char *) &result;
 	      outc = 2;

 	      r = iconv (handle, (ICONV_CONST char **) &inp, &inc,
 			 &outp, &outc);
 	      iconv_close (handle);
 	      /* Conversion must be complete for us to use the result.  */
 	      if (r != (size_t) -1 && inc == 0 && outc == 0)
 		need_byteswap = (result != 0xfeff);
 	    }
 	}

       lex->byte_swap = need_byteswap;
     }
   else
 #endif /* HAVE_ICONV */
     {
       /* If iconv failed, use the internal decoder if the default
 	 encoding was requested.  This code is used on platforms where
 	 iconv exists but is insufficient for our needs.  For
 	 instance, on Solaris 2.5 iconv cannot handle UTF-8 or UCS-2.  */
       if (strcmp (encoding, DEFAULT_ENCODING))
 	enc_error = 1;
 #ifdef HAVE_ICONV
       else
 	lex->use_fallback = 1;
 #endif /* HAVE_ICONV */
     }

   if (enc_error)
     fatal_error ("unknown encoding: `%s'\nThis might mean that your locale's encoding is not supported\nby your system's iconv(3) implementation.  If you aren't trying\nto use a particular encoding for your input file, try the\n`--encoding=UTF-8' option", encoding);

   return lex;
 }

 void
 java_destroy_lexer (lex)
      java_lexer *lex;
 {
 #ifdef HAVE_ICONV
   if (! lex->use_fallback)
     iconv_close (lex->handle);
 #endif
   free (lex);
 }

 static int
 java_read_char (lex)
      java_lexer *lex;
 {
   if (lex->unget_value)
     {
       unicode_t r = lex->unget_value;
       lex->unget_value = 0;
       return r;
     }

 #ifdef HAVE_ICONV
   if (! lex->use_fallback)
     {
       size_t ir, inbytesleft, in_save, out_count, out_save;
       char *inp, *outp;
       unicode_t result;

       /* If there is data which has already been converted, use it.  */
       if (lex->out_first == -1 || lex->out_first >= lex->out_last)
 	{
 	  lex->out_first = 0;
 	  lex->out_last = 0;

 	  while (1)
 	    {
 	      /* See if we need to read more data.  If FIRST == 0 then
 		 the previous conversion attempt ended in the middle of
 		 a character at the end of the buffer.  Otherwise we
 		 only have to read if the buffer is empty.  */
 	      if (lex->first == 0 || lex->first >= lex->last)
 		{
 		  int r;

 		  if (lex->first >= lex->last)
 		    {
 		      lex->first = 0;
 		      lex->last = 0;
 		    }
 		  if (feof (lex->finput))
 		    return UEOF;
 		  r = fread (&lex->buffer[lex->last], 1,
 			     sizeof (lex->buffer) - lex->last,
 			     lex->finput);
 		  lex->last += r;
 		}

 	      inbytesleft = lex->last - lex->first;
 	      out_count = sizeof (lex->out_buffer) - lex->out_last;

 	      if (inbytesleft == 0)
 		{
 		  /* We've tried to read and there is nothing left.  */
 		  return UEOF;
 		}

 	      in_save = inbytesleft;
 	      out_save = out_count;
 	      inp = &lex->buffer[lex->first];
 	      outp = &lex->out_buffer[lex->out_last];
 	      ir = iconv (lex->handle, (ICONV_CONST char **) &inp,
 			  &inbytesleft, &outp, &out_count);

 	      /* If we haven't read any bytes, then look to see if we
 		 have read a BOM.  */
 	      if (! lex->read_anything && out_save - out_count >= 2)
 		{
 		  unicode_t uc = * (unicode_t *) &lex->out_buffer[0];
 		  if (uc == 0xfeff)
 		    {
 		      lex->byte_swap = 0;
 		      lex->out_first += 2;
 		    }
 		  else if (uc == 0xfffe)
 		    {
 		      lex->byte_swap = 1;
 		      lex->out_first += 2;
 		    }
 		  lex->read_anything = 1;
 		}

 	      if (lex->byte_swap)
 		{
 		  unsigned int i;
 		  for (i = 0; i < out_save - out_count; i += 2)
 		    {
 		      char t = lex->out_buffer[lex->out_last + i];
 		      lex->out_buffer[lex->out_last + i]
 			= lex->out_buffer[lex->out_last + i + 1];
 		      lex->out_buffer[lex->out_last + i + 1] = t;
 		    }
 		}

 	      lex->first += in_save - inbytesleft;
 	      lex->out_last += out_save - out_count;

 	      /* If we converted anything at all, move along.  */
 	      if (out_count != out_save)
 		break;

 	      if (ir == (size_t) -1)
 		{
 		  if (errno == EINVAL)
 		    {
 		      /* This is ok.  This means that the end of our buffer
 			 is in the middle of a character sequence.  We just
 			 move the valid part of the buffer to the beginning
 			 to force a read.  */
 		      memmove (&lex->buffer[0], &lex->buffer[lex->first],
 			       lex->last - lex->first);
 		      lex->last -= lex->first;
 		      lex->first = 0;
 		    }
 		  else
 		    {
 		      /* A more serious error.  */
 		      java_lex_error ("unrecognized character in input stream",
 				      0);
 		      return UEOF;
 		    }
 		}
 	    }
 	}

       if (lex->out_first == -1 || lex->out_first >= lex->out_last)
 	{
 	  /* Don't have any data.  */
 	  return UEOF;
 	}

       /* Success.  */
       result = * ((unicode_t *) &lex->out_buffer[lex->out_first]);
       lex->out_first += 2;
       return result;
     }
   else
 #endif /* HAVE_ICONV */
     {
       int c, c1, c2;
       c = getc (lex->finput);

       if (c == EOF)
 	return UEOF;
       if (c < 128)
 	return (unicode_t) c;
       else
 	{
 	  if ((c & 0xe0) == 0xc0)
 	    {
 	      c1 = getc (lex->finput);
 	      if ((c1 & 0xc0) == 0x80)
 		{
 		  unicode_t r = (unicode_t)(((c & 0x1f) << 6) + (c1 & 0x3f));
 		  /* Check for valid 2-byte characters.  We explicitly
 		     allow \0 because this encoding is common in the
 		     Java world.  */
 		  if (r == 0 || (r >= 0x80 && r <= 0x7ff))
 		    return r;
 		}
 	    }
 	  else if ((c & 0xf0) == 0xe0)
 	    {
 	      c1 = getc (lex->finput);
 	      if ((c1 & 0xc0) == 0x80)
 		{
 		  c2 = getc (lex->finput);
 		  if ((c2 & 0xc0) == 0x80)
 		    {
 		      unicode_t r =  (unicode_t)(((c & 0xf) << 12) +
 						 (( c1 & 0x3f) << 6)
 						 + (c2 & 0x3f));
 		      /* Check for valid 3-byte characters.
 			 Don't allow surrogate, \ufffe or \uffff.  */
 		      if (r >= 0x800 && r <= 0xffff
 			  && ! (r >= 0xd800 && r <= 0xdfff)
 			  && r != 0xfffe && r != 0xffff)
 			return r;
 		    }
 		}
 	    }

 	  /* We simply don't support invalid characters.  We also
 	     don't support 4-, 5-, or 6-byte UTF-8 sequences, as these
 	     cannot be valid Java characters.  */
 	  java_lex_error ("malformed UTF-8 character", 0);
 	}
     }

   /* We only get here on error.  */
   return UEOF;
 }

 static void
 java_store_unicode (l, c, unicode_escape_p)
     struct java_line *l;
     unicode_t c;
     int unicode_escape_p;
 {
   if (l->size == l->max)
     {
       l->max += JAVA_LINE_MAX;
       l->line = (unicode_t *) xrealloc (l->line, sizeof (unicode_t)*l->max);
       l->unicode_escape_p = (char *) xrealloc (l->unicode_escape_p,
 					       sizeof (char)*l->max);
     }
   l->line [l->size] = c;
   l->unicode_escape_p [l->size++] = unicode_escape_p;
 }

 static int
 java_read_unicode (lex, unicode_escape_p)
      java_lexer *lex;
      int *unicode_escape_p;
 {
   int c;

   c = java_read_char (lex);
   *unicode_escape_p = 0;

   if (c != '\\')
     {
       lex->bs_count = 0;
       return c;
     }

   ++lex->bs_count;
   if ((lex->bs_count) % 2 == 1)
     {
       /* Odd number of \ seen.  */
       c = java_read_char (lex);
       if (c == 'u')
         {
 	  unicode_t unicode = 0;
 	  int shift = 12;

 	  /* Recognize any number of `u's in \u.  */
 	  while ((c = java_read_char (lex)) == 'u')
 	    ;

 	  /* Unget the most recent character as it is not a `u'.  */
 	  if (c == UEOF)
 	    return UEOF;
 	  lex->unget_value = c;

 	  /* Next should be 4 hex digits, otherwise it's an error.
 	     The hex value is converted into the unicode, pushed into
 	     the Unicode stream.  */
 	  for (shift = 12; shift >= 0; shift -= 4)
 	    {
 	      if ((c = java_read_char (lex)) == UEOF)
 	        return UEOF;
 	      if (hex_p (c))
 		unicode |= (unicode_t)(hex_value (c) << shift);
 	      else
 		java_lex_error ("Non hex digit in Unicode escape sequence", 0);
 	    }
 	  lex->bs_count = 0;
 	  *unicode_escape_p = 1;
 	  return unicode;
 	}
       lex->unget_value = c;
     }
   return (unicode_t) '\\';
 }

 static int
 java_read_unicode_collapsing_terminators (lex, unicode_escape_p)
      java_lexer *lex;
      int *unicode_escape_p;
 {
   int c = java_read_unicode (lex, unicode_escape_p);

   if (c == '\r')
     {
       /* We have to read ahead to see if we got \r\n.  In that case we
 	 return a single line terminator.  */
       int dummy;
       c = java_read_unicode (lex, &dummy);
       if (c != '\n')
 	lex->unget_value = c;
       /* In either case we must return a newline.  */
       c = '\n';
     }

   return c;
 }

 static int
 java_get_unicode ()
 {
   /* It's time to read a line when... */
   if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
     {
       int c;
       int found_chars = 0;

       if (ctxp->lexer->hit_eof)
 	return UEOF;

       java_allocate_new_line ();
       if (ctxp->c_line->line[0] != '\n')
 	{
 	  for (;;)
 	    {
 	      int unicode_escape_p;
 	      c = java_read_unicode_collapsing_terminators (ctxp->lexer,
 							    &unicode_escape_p);
 	      if (c != UEOF)
 		{
 		  found_chars = 1;
 		  java_store_unicode (ctxp->c_line, c, unicode_escape_p);
 		  if (ctxp->c_line->white_space_only
 		      && !JAVA_WHITE_SPACE_P (c)
 		      && c != '\n')
 		    ctxp->c_line->white_space_only = 0;
 		}
 	      if ((c == '\n') || (c == UEOF))
 		break;
 	    }

 	  if (c == UEOF && ! found_chars)
 	    {
 	      ctxp->lexer->hit_eof = 1;
 	      return UEOF;
 	    }
 	}
     }
   ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
   JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
   return ctxp->c_line->line [ctxp->c_line->current++];
 }

 /* Parse the end of a C style comment.
  * C is the first character following the '/' and '*'. */
 static void
 java_parse_end_comment (c)
      int c;
 {
   for ( ;; c = java_get_unicode ())
     {
       switch (c)
 	{
 	case UEOF:
 	  java_lex_error ("Comment not terminated at end of input", 0);
 	  return;
 	case '*':
 	  switch (c = java_get_unicode ())
 	    {
 	    case UEOF:
 	      java_lex_error ("Comment not terminated at end of input", 0);
 	      return;
 	    case '/':
 	      return;
 	    case '*':	/* reparse only '*' */
 	      java_unget_unicode ();
 	    }
 	}
     }
 }

 /* Parse the documentation section. Keywords must be at the beginning
    of a documentation comment line (ignoring white space and any `*'
    character). Parsed keyword(s): @DEPRECATED.  */

 static int
 java_parse_doc_section (c)
      int c;
 {
   int valid_tag = 0, seen_star = 0;

   while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
     {
       switch (c)
 	{
 	case '*':
 	  seen_star = 1;
 	  break;
 	case '\n': /* ULT */
 	  valid_tag = 1;
 	default:
 	  seen_star = 0;
 	}
       c = java_get_unicode();
     }

   if (c == UEOF)
     java_lex_error ("Comment not terminated at end of input", 0);

   if (seen_star && (c == '/'))
     return 1;			/* Goto step1 in caller */

   /* We're parsing @deprecated */
   if (valid_tag && (c == '@'))
     {
       char tag [11];
       int  tag_index = 0;

       while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
 	{
 	  c = java_get_unicode ();
 	  tag [tag_index++] = c;
 	}

       if (c == UEOF)
 	java_lex_error ("Comment not terminated at end of input", 0);
       tag [tag_index] = '\0';

       if (!strcmp (tag, "deprecated"))
 	ctxp->deprecated = 1;
     }
   java_unget_unicode ();
   return 0;
 }

 /* Return true if C is a valid start character for a Java identifier.
    This is only called if C >= 128 -- smaller values are handled
    inline.  However, this function handles all values anyway.  */
 static int
 java_start_char_p (c)
      unicode_t c;
 {
   unsigned int hi = c / 256;
   const char *const page = type_table[hi];
   unsigned long val = (unsigned long) page;
   int flags;

   if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
     flags = page[c & 255];
   else
     flags = val;

   return flags & LETTER_START;
 }

 /* Return true if C is a valid part character for a Java identifier.
    This is only called if C >= 128 -- smaller values are handled
    inline.  However, this function handles all values anyway.  */
 static int
 java_part_char_p (c)
      unicode_t c;
 {
   unsigned int hi = c / 256;
   const char *const page = type_table[hi];
   unsigned long val = (unsigned long) page;
   int flags;

   if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
     flags = page[c & 255];
   else
     flags = val;

   return flags & LETTER_PART;
 }

 static int
 java_parse_escape_sequence ()
 {
   unicode_t char_lit;
   int c;

   switch (c = java_get_unicode ())
     {
     case 'b':
       return (unicode_t)0x8;
     case 't':
       return (unicode_t)0x9;
     case 'n':
       return (unicode_t)0xa;
     case 'f':
       return (unicode_t)0xc;
     case 'r':
       return (unicode_t)0xd;
     case '"':
       return (unicode_t)0x22;
     case '\'':
       return (unicode_t)0x27;
     case '\\':
       return (unicode_t)0x5c;
     case '0': case '1': case '2': case '3': case '4':
     case '5': case '6': case '7':
       {
 	int octal_escape[3];
 	int octal_escape_index = 0;
 	int max = 3;
 	int i, shift;

 	for (; octal_escape_index < max && RANGE (c, '0', '7');
 	     c = java_get_unicode ())
 	  {
 	    if (octal_escape_index == 0 && c > '3')
 	      {
 		/* According to the grammar, `\477' has a well-defined
 		   meaning -- it is `\47' followed by `7'.  */
 		--max;
 	      }
 	    octal_escape [octal_escape_index++] = c;
 	  }

 	java_unget_unicode ();

 	for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
 	     i < octal_escape_index; i++, shift -= 3)
 	  char_lit |= (octal_escape [i] - '0') << shift;

 	return char_lit;
       }
     default:
       java_lex_error ("Invalid character in escape sequence", 0);
       return JAVA_CHAR_ERROR;
     }
 }

 /* Isolate the code which may raise an arithmetic exception in its
    own function.  */

 #ifndef JC1_LITE
 struct jpa_args
 {
   YYSTYPE *java_lval;
   char *literal_token;
   int fflag;
   int number_beginning;
 };

 #ifdef REAL_ARITHMETIC
 #define IS_ZERO(X) (ereal_cmp (X, dconst0) == 0)
 #else
 #define IS_ZERO(X) ((X) == 0)
 #endif

 static void java_perform_atof	PARAMS ((PTR));

 static void
 java_perform_atof (av)
      PTR av;
 {
   struct jpa_args *a = (struct jpa_args *)av;
   YYSTYPE *java_lval = a->java_lval;
   int number_beginning = a->number_beginning;
   REAL_VALUE_TYPE value;
   tree type = (a->fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);

   SET_REAL_VALUE_ATOF (value,
 		       REAL_VALUE_ATOF (a->literal_token, TYPE_MODE (type)));

   if (REAL_VALUE_ISINF (value) || REAL_VALUE_ISNAN (value))
     {
       JAVA_FLOAT_RANGE_ERROR ((a->fflag ? "float" : "double"));
       value = DCONST0;
     }
   else if (IS_ZERO (value))
     {
       /* We check to see if the value is really 0 or if we've found an
 	 underflow.  We do this in the most primitive imaginable way.  */
       int really_zero = 1;
       char *p = a->literal_token;
       if (*p == '-')
 	++p;
       while (*p && *p != 'e' && *p != 'E')
 	{
 	  if (*p != '0' && *p != '.')
 	    {
 	      really_zero = 0;
 	      break;
 	    }
 	  ++p;
 	}
       if (! really_zero)
 	{
 	  int i = ctxp->c_line->current;
 	  ctxp->c_line->current = number_beginning;
 	  java_lex_error ("Floating point literal underflow", 0);
 	  ctxp->c_line->current = i;
 	}
     }

   SET_LVAL_NODE_TYPE (build_real (type, value), type);
 }
 #endif

 static int yylex		PARAMS ((YYSTYPE *));

 static int
 #ifdef JC1_LITE
 yylex (java_lval)
 #else
 java_lex (java_lval)
 #endif
      YYSTYPE *java_lval;
 {
   int c;
   unicode_t first_unicode;
   int ascii_index, all_ascii;
   char *string;

   /* Translation of the Unicode escape in the raw stream of Unicode
      characters. Takes care of line terminator.  */
  step1:
   /* Skip white spaces: SP, TAB and FF or ULT */
   for (c = java_get_unicode ();
        c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
     if (c == '\n')
       {
 	ctxp->elc.line = ctxp->c_line->lineno;
 	ctxp->elc.col  = ctxp->c_line->char_col-2;
       }

   ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);

   if (c == 0x1a)		/* CTRL-Z */
     {
       if ((c = java_get_unicode ()) == UEOF)
 	return 0;		/* Ok here */
       else
 	java_unget_unicode ();	/* Caught later, at the end of the function */
     }
   /* Handle EOF here */
   if (c == UEOF)	/* Should probably do something here... */
     return 0;

   /* Take care of eventual comments.  */
   if (c == '/')
     {
       switch (c = java_get_unicode ())
 	{
 	case '/':
 	  for (;;)
 	    {
 	      c = java_get_unicode ();
 	      if (c == UEOF)
 		{
 		  /* It is ok to end a `//' comment with EOF, unless
 		     we're being pedantic.  */
 		  if (pedantic)
 		    java_lex_error ("Comment not terminated at end of input",
 				    0);
 		  return 0;
 		}
 	      if (c == '\n')	/* ULT */
 		goto step1;
 	    }
 	  break;

 	case '*':
 	  if ((c = java_get_unicode ()) == '*')
 	    {
 	      if ((c = java_get_unicode ()) == '/')
 		goto step1;	/* Empy documentation comment  */
 	      else if (java_parse_doc_section (c))
 		goto step1;
 	    }

 	  java_parse_end_comment ((c = java_get_unicode ()));
 	  goto step1;
 	  break;
 	default:
 	  java_unget_unicode ();
 	  c = '/';
 	  break;
 	}
     }

   ctxp->elc.line = ctxp->c_line->lineno;
   ctxp->elc.prev_col = ctxp->elc.col;
   ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
   if (ctxp->elc.col < 0)
     abort ();

   /* Numeric literals */
   if (JAVA_ASCII_DIGIT (c) || (c == '.'))
     {
       /* This section of code is borrowed from gcc/c-lex.c  */
 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
       int parts[TOTAL_PARTS];
       HOST_WIDE_INT high, low;
       /* End borrowed section  */
       char literal_token [256];
       int  literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
       int  found_hex_digits = 0;
       int  i;
 #ifndef JC1_LITE
       int  number_beginning = ctxp->c_line->current;
       tree value;
 #endif

       /* We might have a . separator instead of a FP like .[0-9]* */
       if (c == '.')
 	{
 	  unicode_t peep = java_sneak_unicode ();

 	  if (!JAVA_ASCII_DIGIT (peep))
 	    {
 	      JAVA_LEX_SEP('.');
 	      BUILD_OPERATOR (DOT_TK);
 	    }
 	}

       for (i = 0; i < TOTAL_PARTS; i++)
 	parts [i] = 0;

       if (c == '0')
 	{
 	  c = java_get_unicode ();
 	  if (c == 'x' || c == 'X')
 	    {
 	      radix = 16;
 	      c = java_get_unicode ();
 	    }
 	  else if (JAVA_ASCII_DIGIT (c))
 	    radix = 8;
 	  else if (c == '.')
 	    {
 	      /* Push the '.' back and prepare for a FP parsing... */
 	      java_unget_unicode ();
 	      c = '0';
 	    }
 	  else
 	    {
 	      /* We have a zero literal: 0, 0{f,F}, 0{d,D} */
 	      JAVA_LEX_LIT ("0", 10);
               switch (c)
 		{
 		case 'L': case 'l':
 		  SET_LVAL_NODE (long_zero_node);
 		  return (INT_LIT_TK);
 		case 'f': case 'F':
 		  SET_LVAL_NODE (float_zero_node);
 		  return (FP_LIT_TK);
 		case 'd': case 'D':
 		  SET_LVAL_NODE (double_zero_node);
 		  return (FP_LIT_TK);
 		default:
 		  java_unget_unicode ();
 		  SET_LVAL_NODE (integer_zero_node);
 		  return (INT_LIT_TK);
 		}
 	    }
 	}
       /* Parse the first part of the literal, until we find something
 	 which is not a number.  */
       while ((radix == 10 && JAVA_ASCII_DIGIT (c)) ||
 	     (radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
 	     (radix == 8  && JAVA_ASCII_OCTDIGIT (c)))
 	{
 	  /* We store in a string (in case it turns out to be a FP) and in
 	     PARTS if we have to process a integer literal.  */
 	  int numeric = hex_value (c);
 	  int count;

 	  /* Remember when we find a valid hexadecimal digit */
 	  if (radix == 16)
 	    found_hex_digits = 1;

 	  literal_token [literal_index++] = c;
 	  /* This section of code if borrowed from gcc/c-lex.c  */
 	  for (count = 0; count < TOTAL_PARTS; count++)
 	    {
 	      parts[count] *= radix;
 	      if (count)
 		{
 		  parts[count]   += (parts[count-1] >> HOST_BITS_PER_CHAR);
 		  parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
 		}
 	      else
 		parts[0] += numeric;
 	    }
 	  if (parts [TOTAL_PARTS-1] != 0)
 	    overflow = 1;
 	  /* End borrowed section.  */
 	  c = java_get_unicode ();
 	}

       /* If we have something from the FP char set but not a digit, parse
 	 a FP literal.  */
       if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
 	{
 	  int stage = 0;
 	  int seen_digit = (literal_index ? 1 : 0);
 	  int seen_exponent = 0;
 	  int fflag = 0;	/* 1 for {f,F}, 0 for {d,D}. FP literal are
 				   double unless specified. */

 	  /* It is ok if the radix is 8 because this just means we've
 	     seen a leading `0'.  However, radix==16 is invalid.  */
 	  if (radix == 16)
 	    java_lex_error ("Can't express non-decimal FP literal", 0);
 	  radix = 10;

 	  for (;;)
 	    {
 	      if (c == '.')
 		{
 		  if (stage < 1)
 		    {
 		      stage = 1;
 		      literal_token [literal_index++ ] = c;
 		      c = java_get_unicode ();
 		    }
 		  else
 		    java_lex_error ("Invalid character in FP literal", 0);
 		}

 	      if (c == 'e' || c == 'E')
 		{
 		  if (stage < 2)
 		    {
 		      /* {E,e} must have seen at list a digit */
 		      if (!seen_digit)
 			java_lex_error ("Invalid FP literal", 0);
 		      seen_digit = 0;
 		      seen_exponent = 1;
 		      stage = 2;
 		      literal_token [literal_index++] = c;
 		      c = java_get_unicode ();
 		    }
 		  else
 		    java_lex_error ("Invalid character in FP literal", 0);
 		}
 	      if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
 		{
 		  fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
 		  stage = 4;	/* So we fall through */
 		}

 	      if ((c=='-' || c =='+') && stage == 2)
 		{
 		  stage = 3;
 		  literal_token [literal_index++] = c;
 		  c = java_get_unicode ();
 		}

 	      if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
 		  (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
 		  (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
 		  (stage == 3 && JAVA_ASCII_DIGIT (c)))
 		{
 		  if (JAVA_ASCII_DIGIT (c))
 		    seen_digit = 1;
 		  literal_token [literal_index++ ] = c;
 		  c = java_get_unicode ();
 		}
 	      else
 		{
 #ifndef JC1_LITE
 		  struct jpa_args a;
 #endif
 		  if (stage != 4) /* Don't push back fF/dD */
 		    java_unget_unicode ();

 		  /* An exponent (if any) must have seen a digit.  */
 		  if (seen_exponent && !seen_digit)
 		    java_lex_error ("Invalid FP literal", 0);

 		  literal_token [literal_index] = '\0';
 		  JAVA_LEX_LIT (literal_token, radix);

 #ifndef JC1_LITE
 		  a.literal_token = literal_token;
 		  a.fflag = fflag;
 		  a.java_lval = java_lval;
 		  a.number_beginning = number_beginning;
 		  if (do_float_handler (java_perform_atof, (PTR) &a))
 		    return FP_LIT_TK;

 		  JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
 #else
 		  return FP_LIT_TK;
 #endif
 		}
 	    }
 	} /* JAVA_ASCCI_FPCHAR (c) */

       if (radix == 16 && ! found_hex_digits)
 	java_lex_error
 	  ("0x must be followed by at least one hexadecimal digit", 0);

       /* Here we get back to converting the integral literal.  */
       if (c == 'L' || c == 'l')
 	long_suffix = 1;
       else if (radix == 16 && JAVA_ASCII_LETTER (c))
 	java_lex_error ("Digit out of range in hexadecimal literal", 0);
       else if (radix == 8  && JAVA_ASCII_DIGIT (c))
 	java_lex_error ("Digit out of range in octal literal", 0);
       else if (radix == 16 && !literal_index)
 	java_lex_error ("No digit specified for hexadecimal literal", 0);
       else
 	java_unget_unicode ();

 #ifdef JAVA_LEX_DEBUG
       literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
       JAVA_LEX_LIT (literal_token, radix);
 #endif
       /* This section of code is borrowed from gcc/c-lex.c  */
       if (!overflow)
 	{
 	  bytes = GET_TYPE_PRECISION (long_type_node);
 	  for (i = bytes; i < TOTAL_PARTS; i++)
 	    if (parts [i])
 	      {
 	        overflow = 1;
 		break;
 	      }
 	}
       high = low = 0;
       for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
 	{
 	  high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
 					      / HOST_BITS_PER_CHAR)]
 		   << (i * HOST_BITS_PER_CHAR));
 	  low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
 	}
       /* End borrowed section.  */

       /* Range checking */
       if (long_suffix)
 	{
 	  /* 9223372036854775808L is valid if operand of a '-'. Otherwise
 	     9223372036854775807L is the biggest `long' literal that can be
 	     expressed using a 10 radix. For other radixes, everything that
 	     fits withing 64 bits is OK. */
 	  int hb = (high >> 31);
 	  if (overflow || (hb && low && radix == 10)
 	      || (hb && high & 0x7fffffff && radix == 10))
 	    JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
 	}
       else
 	{
 	  /* 2147483648 is valid if operand of a '-'. Otherwise,
 	     2147483647 is the biggest `int' literal that can be
 	     expressed using a 10 radix. For other radixes, everything
 	     that fits within 32 bits is OK.  As all literals are
 	     signed, we sign extend here. */
 	  int hb = (low >> 31) & 0x1;
 	  if (overflow || high || (hb && low & 0x7fffffff && radix == 10))
 	    JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
 	  high = -hb;
 	}
 #ifndef JC1_LITE
       value = build_int_2 (low, high);
       JAVA_RADIX10_FLAG (value) = radix == 10;
       SET_LVAL_NODE_TYPE (value, long_suffix ? long_type_node : int_type_node);
 #else
       SET_LVAL_NODE_TYPE (build_int_2 (low, high),
 			  long_suffix ? long_type_node : int_type_node);
 #endif
       return INT_LIT_TK;
     }

   /* Character literals */
   if (c == '\'')
     {
       int char_lit;
       if ((c = java_get_unicode ()) == '\\')
 	char_lit = java_parse_escape_sequence ();
       else
 	{
 	  if (c == '\n' || c == '\'')
 	    java_lex_error ("Invalid character literal", 0);
 	  char_lit = c;
 	}

       c = java_get_unicode ();

       if ((c == '\n') || (c == UEOF))
 	java_lex_error ("Character literal not terminated at end of line", 0);
       if (c != '\'')
 	java_lex_error ("Syntax error in character literal", 0);

       if (char_lit == JAVA_CHAR_ERROR)
         char_lit = 0;		/* We silently convert it to zero */

       JAVA_LEX_CHAR_LIT (char_lit);
       SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
       return CHAR_LIT_TK;
     }

   /* String literals */
   if (c == '"')
     {
       int no_error;
       char *string;

       for (no_error = 1, c = java_get_unicode ();
 	   c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
 	{
 	  if (c == '\\')
 	    c = java_parse_escape_sequence ();
 	  if (c == JAVA_CHAR_ERROR)
 	    {
 	      no_error = 0;
 	      c = 0;		/* We silently convert it to zero.  */
 	    }
 	  java_unicode_2_utf8 (c);
 	}
       if (c == '\n' || c == UEOF) /* ULT */
 	{
 	  lineno--;		/* Refer to the line the terminator was seen */
 	  java_lex_error ("String not terminated at end of line", 0);
 	  lineno++;
 	}

       obstack_1grow (&temporary_obstack, '\0');
       string = obstack_finish (&temporary_obstack);
 #ifndef JC1_LITE
       if (!no_error || (c != '"'))
 	java_lval->node = error_mark_node; /* Requires futher testing FIXME */
       else
 	java_lval->node = build_string (strlen (string), string);
 #endif
       obstack_free (&temporary_obstack, string);
       return STRING_LIT_TK;
     }

   /* Separator */
   switch (c)
     {
     case '(':
       JAVA_LEX_SEP (c);
       BUILD_OPERATOR (OP_TK);
     case ')':
       JAVA_LEX_SEP (c);
       return CP_TK;
     case '{':
       JAVA_LEX_SEP (c);
       if (ctxp->ccb_indent == 1)
 	ctxp->first_ccb_indent1 = lineno;
       ctxp->ccb_indent++;
       BUILD_OPERATOR (OCB_TK);
     case '}':
       JAVA_LEX_SEP (c);
       ctxp->ccb_indent--;
       if (ctxp->ccb_indent == 1)
         ctxp->last_ccb_indent1 = lineno;
       BUILD_OPERATOR (CCB_TK);
     case '[':
       JAVA_LEX_SEP (c);
       BUILD_OPERATOR (OSB_TK);
     case ']':
       JAVA_LEX_SEP (c);
       return CSB_TK;
     case ';':
       JAVA_LEX_SEP (c);
       return SC_TK;
     case ',':
       JAVA_LEX_SEP (c);
       return C_TK;
     case '.':
       JAVA_LEX_SEP (c);
       BUILD_OPERATOR (DOT_TK);
       /*      return DOT_TK; */
     }

   /* Operators */
   switch (c)
     {
     case '=':
       if ((c = java_get_unicode ()) == '=')
 	{
 	  BUILD_OPERATOR (EQ_TK);
 	}
       else
 	{
 	  /* Equals is used in two different locations. In the
 	     variable_declarator: rule, it has to be seen as '=' as opposed
 	     to being seen as an ordinary assignment operator in
 	     assignment_operators: rule.  */
 	  java_unget_unicode ();
 	  BUILD_OPERATOR (ASSIGN_TK);
 	}

     case '>':
       switch ((c = java_get_unicode ()))
 	{
 	case '=':
 	  BUILD_OPERATOR (GTE_TK);
 	case '>':
 	  switch ((c = java_get_unicode ()))
 	    {
 	    case '>':
 	      if ((c = java_get_unicode ()) == '=')
 		{
 		  BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
 		}
 	      else
 		{
 		  java_unget_unicode ();
 		  BUILD_OPERATOR (ZRS_TK);
 		}
 	    case '=':
 	      BUILD_OPERATOR2 (SRS_ASSIGN_TK);
 	    default:
 	      java_unget_unicode ();
 	      BUILD_OPERATOR (SRS_TK);
 	    }
 	default:
 	  java_unget_unicode ();
 	  BUILD_OPERATOR (GT_TK);
 	}

     case '<':
       switch ((c = java_get_unicode ()))
 	{
 	case '=':
 	  BUILD_OPERATOR (LTE_TK);
 	case '<':
 	  if ((c = java_get_unicode ()) == '=')
 	    {
 	      BUILD_OPERATOR2 (LS_ASSIGN_TK);
 	    }
 	  else
 	    {
 	      java_unget_unicode ();
 	      BUILD_OPERATOR (LS_TK);
 	    }
 	default:
 	  java_unget_unicode ();
 	  BUILD_OPERATOR (LT_TK);
 	}

     case '&':
       switch ((c = java_get_unicode ()))
 	{
 	case '&':
 	  BUILD_OPERATOR (BOOL_AND_TK);
 	case '=':
 	  BUILD_OPERATOR2 (AND_ASSIGN_TK);
 	default:
 	  java_unget_unicode ();
 	  BUILD_OPERATOR (AND_TK);
 	}

     case '|':
       switch ((c = java_get_unicode ()))
 	{
 	case '|':
 	  BUILD_OPERATOR (BOOL_OR_TK);
 	case '=':
 	  BUILD_OPERATOR2 (OR_ASSIGN_TK);
 	default:
 	  java_unget_unicode ();
 	  BUILD_OPERATOR (OR_TK);
 	}

     case '+':
       switch ((c = java_get_unicode ()))
 	{
 	case '+':
 	  BUILD_OPERATOR (INCR_TK);
 	case '=':
 	  BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
 	default:
 	  java_unget_unicode ();
 	  BUILD_OPERATOR (PLUS_TK);
 	}

     case '-':
       switch ((c = java_get_unicode ()))
 	{
 	case '-':
 	  BUILD_OPERATOR (DECR_TK);
 	case '=':
 	  BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
 	default:
 	  java_unget_unicode ();
 	  BUILD_OPERATOR (MINUS_TK);
 	}

     case '*':
       if ((c = java_get_unicode ()) == '=')
 	{
 	  BUILD_OPERATOR2 (MULT_ASSIGN_TK);
 	}
       else
 	{
 	  java_unget_unicode ();
 	  BUILD_OPERATOR (MULT_TK);
 	}

     case '/':
       if ((c = java_get_unicode ()) == '=')
 	{
 	  BUILD_OPERATOR2 (DIV_ASSIGN_TK);
 	}
       else
 	{
 	  java_unget_unicode ();
 	  BUILD_OPERATOR (DIV_TK);
 	}

     case '^':
       if ((c = java_get_unicode ()) == '=')
 	{
 	  BUILD_OPERATOR2 (XOR_ASSIGN_TK);
 	}
       else
 	{
 	  java_unget_unicode ();
 	  BUILD_OPERATOR (XOR_TK);
 	}

     case '%':
       if ((c = java_get_unicode ()) == '=')
 	{
 	  BUILD_OPERATOR2 (REM_ASSIGN_TK);
 	}
       else
 	{
 	  java_unget_unicode ();
 	  BUILD_OPERATOR (REM_TK);
 	}

     case '!':
       if ((c = java_get_unicode()) == '=')
 	{
 	  BUILD_OPERATOR (NEQ_TK);
 	}
       else
 	{
 	  java_unget_unicode ();
 	  BUILD_OPERATOR (NEG_TK);
 	}

     case '?':
       JAVA_LEX_OP ("?");
       BUILD_OPERATOR (REL_QM_TK);
     case ':':
       JAVA_LEX_OP (":");
       BUILD_OPERATOR (REL_CL_TK);
     case '~':
       BUILD_OPERATOR (NOT_TK);
     }

   /* Keyword, boolean literal or null literal */
   for (first_unicode = c, all_ascii = 1, ascii_index = 0;
        JAVA_PART_CHAR_P (c); c = java_get_unicode ())
     {
       java_unicode_2_utf8 (c);
       if (all_ascii && c >= 128)
         all_ascii = 0;
       ascii_index++;
     }

   obstack_1grow (&temporary_obstack, '\0');
   string = obstack_finish (&temporary_obstack);
   java_unget_unicode ();

   /* If we have something all ascii, we consider a keyword, a boolean
      literal, a null literal or an all ASCII identifier.  Otherwise,
      this is an identifier (possibly not respecting formation rule).  */
   if (all_ascii)
     {
       const struct java_keyword *kw;
       if ((kw=java_keyword (string, ascii_index)))
 	{
 	  JAVA_LEX_KW (string);
 	  switch (kw->token)
 	    {
 	    case PUBLIC_TK:       case PROTECTED_TK: case STATIC_TK:
 	    case ABSTRACT_TK:     case FINAL_TK:     case NATIVE_TK:
 	    case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
 	    case PRIVATE_TK:      case STRICT_TK:
 	      SET_MODIFIER_CTX (kw->token);
 	      return MODIFIER_TK;
 	    case FLOAT_TK:
 	      SET_LVAL_NODE (float_type_node);
 	      return FP_TK;
 	    case DOUBLE_TK:
 	      SET_LVAL_NODE (double_type_node);
 	      return FP_TK;
 	    case BOOLEAN_TK:
 	      SET_LVAL_NODE (boolean_type_node);
 	      return BOOLEAN_TK;
 	    case BYTE_TK:
 	      SET_LVAL_NODE (byte_type_node);
 	      return INTEGRAL_TK;
 	    case SHORT_TK:
 	      SET_LVAL_NODE (short_type_node);
 	      return INTEGRAL_TK;
 	    case INT_TK:
 	      SET_LVAL_NODE (int_type_node);
 	      return INTEGRAL_TK;
 	    case LONG_TK:
 	      SET_LVAL_NODE (long_type_node);
 	      return INTEGRAL_TK;
 	    case CHAR_TK:
 	      SET_LVAL_NODE (char_type_node);
 	      return INTEGRAL_TK;

 	      /* Keyword based literals */
 	    case TRUE_TK:
 	    case FALSE_TK:
 	      SET_LVAL_NODE ((kw->token == TRUE_TK ?
 			      boolean_true_node : boolean_false_node));
 	      return BOOL_LIT_TK;
 	    case NULL_TK:
 	      SET_LVAL_NODE (null_pointer_node);
 	      return NULL_TK;

 	      /* Some keyword we want to retain information on the location
 		 they where found */
 	    case CASE_TK:
 	    case DEFAULT_TK:
 	    case SUPER_TK:
 	    case THIS_TK:
 	    case RETURN_TK:
 	    case BREAK_TK:
 	    case CONTINUE_TK:
 	    case TRY_TK:
 	    case CATCH_TK:
 	    case THROW_TK:
 	    case INSTANCEOF_TK:
 	      BUILD_OPERATOR (kw->token);

 	    default:
 	      return kw->token;
 	    }
 	}
     }

   /* We may have an ID here */
   if (JAVA_START_CHAR_P (first_unicode))
     {
       JAVA_LEX_ID (string);
       java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
       return ID_TK;
     }

   /* Everything else is an invalid character in the input */
   {
     char lex_error_buffer [128];
     sprintf (lex_error_buffer, "Invalid character `%s' in input",
 	     java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
     java_lex_error (lex_error_buffer, 1);
   }
   return 0;
 }

 #ifndef JC1_LITE
 /* This is called by the parser to see if an error should be generated
    due to numeric overflow.  This function only handles the particular
    case of the largest negative value, and is only called in the case
    where this value is not preceded by `-'.  */
 static void
 error_if_numeric_overflow (value)
      tree value;
 {
   if (TREE_CODE (value) == INTEGER_CST && JAVA_RADIX10_FLAG (value))
     {
       unsigned HOST_WIDE_INT lo, hi;

       lo = TREE_INT_CST_LOW (value);
       hi = TREE_INT_CST_HIGH (value);
       if (TREE_TYPE (value) == long_type_node)
 	{
 	  int hb = (hi >> 31);
 	  if (hb && !(hi & 0x7fffffff))
 	    java_lex_error ("Numeric overflow for `long' literal", 0);
 	}
       else
 	{
 	  int hb = (lo >> 31) & 0x1;
 	  if (hb && !(lo & 0x7fffffff))
 	    java_lex_error ("Numeric overflow for `int' literal", 0);
 	}
     }
 }
 #endif /* JC1_LITE */

 static void
 java_unicode_2_utf8 (unicode)
     unicode_t unicode;
 {
   if (RANGE (unicode, 0x01, 0x7f))
     obstack_1grow (&temporary_obstack, (char)unicode);
   else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
     {
       obstack_1grow (&temporary_obstack,
 		     (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
       obstack_1grow (&temporary_obstack,
 		     (unsigned char)(0x80 | (unicode & 0x3f)));
     }
   else				/* Range 0x800-0xffff */
     {
       obstack_1grow (&temporary_obstack,
 		     (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
       obstack_1grow (&temporary_obstack,
 		     (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
       obstack_1grow (&temporary_obstack,
 		     (unsigned char)(0x80 | (unicode & 0x003f)));
     }
 }

 #ifndef JC1_LITE
 static tree
 build_wfl_node (node)
      tree node;
 {
   node = build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
   /* Prevent java_complete_lhs from short-circuiting node (if constant). */
   TREE_TYPE (node) = NULL_TREE;
   return node;
 }
 #endif

 static void
 java_lex_error (msg, forward)
      const char *msg ATTRIBUTE_UNUSED;
      int forward ATTRIBUTE_UNUSED;
 {
 #ifndef JC1_LITE
   ctxp->elc.line = ctxp->c_line->lineno;
   ctxp->elc.col = ctxp->c_line->char_col-1+forward;

   /* Might be caught in the middle of some error report */
   ctxp->java_error_flag = 0;
   java_error (NULL);
   java_error (msg);
 #endif
 }

 #ifndef JC1_LITE
 static int
 java_is_eol (fp, c)
   FILE *fp;
   int c;
 {
   int next;
   switch (c)
     {
     case '\r':
       next = getc (fp);
       if (next != '\n' && next != EOF)
 	ungetc (next, fp);
       return 1;
     case '\n':
       return 1;
     default:
       return 0;
     }
 }
 #endif

 char *
 java_get_line_col (filename, line, col)
      const char *filename ATTRIBUTE_UNUSED;
      int line ATTRIBUTE_UNUSED, col ATTRIBUTE_UNUSED;
 {
 #ifdef JC1_LITE
   return 0;
 #else
   /* Dumb implementation. Doesn't try to cache or optimize things. */
   /* First line of the file is line 1, first column is 1 */

   /* COL == -1 means, at the CR/LF in LINE */
   /* COL == -2 means, at the first non space char in LINE */

   FILE *fp;
   int c, ccol, cline = 1;
   int current_line_col = 0;
   int first_non_space = 0;
   char *base;

   if (!(fp = fopen (filename, "r")))
     fatal_io_error ("can't open %s", filename);

   while (cline != line)
     {
       c = getc (fp);
       if (c == EOF)
 	{
 	  static const char msg[] = "<<file too short - unexpected EOF>>";
 	  obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
 	  goto have_line;
 	}
       if (java_is_eol (fp, c))
 	cline++;
     }

   /* Gather the chars of the current line in a buffer */
   for (;;)
     {
       c = getc (fp);
       if (c < 0 || java_is_eol (fp, c))
 	break;
       if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
 	first_non_space = current_line_col;
       obstack_1grow (&temporary_obstack, c);
       current_line_col++;
     }
  have_line:

   obstack_1grow (&temporary_obstack, '\n');

   if (col == -1)
     {
       col = current_line_col;
       first_non_space = 0;
     }
   else if (col == -2)
     col = first_non_space;
   else
     first_non_space = 0;

   /* Place the '^' a the right position */
   base = obstack_base (&temporary_obstack);
   for (ccol = 1; ccol <= col+3; ccol++)
     {
       /* Compute \t when reaching first_non_space */
       char c = (first_non_space ?
 		(base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
       obstack_1grow (&temporary_obstack, c);
     }
   obstack_grow0 (&temporary_obstack, "^", 1);

   fclose (fp);
   return obstack_finish (&temporary_obstack);
 #endif
 }

 #ifndef JC1_LITE
 static int
 utf8_cmp (str, length, name)
      const unsigned char *str;
      int length;
      const char *name;
 {
   const unsigned char *limit = str + length;
   int i;

   for (i = 0; name[i]; ++i)
     {
       int ch = UTF8_GET (str, limit);
       if (ch != name[i])
 	return ch - name[i];
     }

   return str == limit ? 0 : 1;
 }

 /* A sorted list of all C++ keywords.  */

 static const char *const cxx_keywords[] =
 {
   "_Complex",
   "__alignof",
   "__alignof__",
   "__asm",
   "__asm__",
   "__attribute",
   "__attribute__",
   "__builtin_va_arg",
   "__complex",
   "__complex__",
   "__const",
   "__const__",
   "__extension__",
   "__imag",
   "__imag__",
   "__inline",
   "__inline__",
   "__label__",
   "__null",
   "__real",
   "__real__",
   "__restrict",
   "__restrict__",
   "__signed",
   "__signed__",
   "__typeof",
   "__typeof__",
   "__volatile",
   "__volatile__",
   "and",
   "and_eq",
   "asm",
   "auto",
   "bitand",
   "bitor",
   "bool",
   "break",
   "case",
   "catch",
   "char",
   "class",
   "compl",
   "const",
   "const_cast",
   "continue",
   "default",
   "delete",
   "do",
   "double",
   "dynamic_cast",
   "else",
   "enum",
   "explicit",
   "export",
   "extern",
   "false",
   "float",
   "for",
   "friend",
   "goto",
   "if",
   "inline",
   "int",
   "long",
   "mutable",
   "namespace",
   "new",
   "not",
   "not_eq",
   "operator",
   "or",
   "or_eq",
   "private",
   "protected",
   "public",
   "register",
   "reinterpret_cast",
   "return",
   "short",
   "signed",
   "sizeof",
   "static",
   "static_cast",
   "struct",
   "switch",
   "template",
   "this",
   "throw",
   "true",
   "try",
   "typedef",
   "typeid",
   "typename",
   "typeof",
   "union",
   "unsigned",
   "using",
   "virtual",
   "void",
   "volatile",
   "wchar_t",
   "while",
   "xor",
   "xor_eq"
 };

 /* Return true if NAME is a C++ keyword.  */

 int
 cxx_keyword_p (name, length)
      const char *name;
      int length;
 {
   int last = ARRAY_SIZE (cxx_keywords);
   int first = 0;
   int mid = (last + first) / 2;
   int old = -1;

   for (mid = (last + first) / 2;
        mid != old;
        old = mid, mid = (last + first) / 2)
     {
       int kwl = strlen (cxx_keywords[mid]);
       int min_length = kwl > length ? length : kwl;
       int r = utf8_cmp (name, min_length, cxx_keywords[mid]);

       if (r == 0)
 	{
 	  int i;
 	  /* We've found a match if all the remaining characters are
 	     `$'.  */
 	  for (i = min_length; i < length && name[i] == '$'; ++i)
 	    ;
 	  if (i == length)
 	    return 1;
 	  r = 1;
 	}

       if (r < 0)
 	last = mid;
       else
 	first = mid;
     }
   return 0;
 }
 #endif /* JC1_LITE */