| ------------------------------------------------------------------------------ |
| -- -- |
| -- GNAT LIBRARY COMPONENTS -- |
| -- -- |
| -- G N A T . S P I T B O L . P A T T E R N S -- |
| -- -- |
| -- B o d y -- |
| -- -- |
| -- Copyright (C) 1998-2013, AdaCore -- |
| -- -- |
| -- GNAT is free software; you can redistribute it and/or modify it under -- |
| -- terms of the GNU General Public License as published by the Free Soft- -- |
| -- ware Foundation; either version 3, or (at your option) any later ver- -- |
| -- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- |
| -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- |
| -- or FITNESS FOR A PARTICULAR PURPOSE. -- |
| -- -- |
| -- As a special exception under Section 7 of GPL version 3, you are granted -- |
| -- additional permissions described in the GCC Runtime Library Exception, -- |
| -- version 3.1, as published by the Free Software Foundation. -- |
| -- -- |
| -- You should have received a copy of the GNU General Public License and -- |
| -- a copy of the GCC Runtime Library Exception along with this program; -- |
| -- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- |
| -- <http://www.gnu.org/licenses/>. -- |
| -- -- |
| -- GNAT was originally developed by the GNAT team at New York University. -- |
| -- Extensive contributions were provided by Ada Core Technologies Inc. -- |
| -- -- |
| ------------------------------------------------------------------------------ |
| |
| -- Note: the data structures and general approach used in this implementation |
| -- are derived from the original MINIMAL sources for SPITBOL. The code is not |
| -- a direct translation, but the approach is followed closely. In particular, |
| -- we use the one stack approach developed in the SPITBOL implementation. |
| |
| with Ada.Strings.Unbounded.Aux; use Ada.Strings.Unbounded.Aux; |
| |
| with GNAT.Debug_Utilities; use GNAT.Debug_Utilities; |
| |
| with System; use System; |
| |
| with Ada.Unchecked_Conversion; |
| with Ada.Unchecked_Deallocation; |
| |
| package body GNAT.Spitbol.Patterns is |
| |
| ------------------------ |
| -- Internal Debugging -- |
| ------------------------ |
| |
| Internal_Debug : constant Boolean := False; |
| -- Set this flag to True to activate some built-in debugging traceback |
| -- These are all lines output with PutD and Put_LineD. |
| |
| procedure New_LineD; |
| pragma Inline (New_LineD); |
| -- Output new blank line with New_Line if Internal_Debug is True |
| |
| procedure PutD (Str : String); |
| pragma Inline (PutD); |
| -- Output string with Put if Internal_Debug is True |
| |
| procedure Put_LineD (Str : String); |
| pragma Inline (Put_LineD); |
| -- Output string with Put_Line if Internal_Debug is True |
| |
| ----------------------------- |
| -- Local Type Declarations -- |
| ----------------------------- |
| |
| subtype String_Ptr is Ada.Strings.Unbounded.String_Access; |
| subtype File_Ptr is Ada.Text_IO.File_Access; |
| |
| function To_Address is new Ada.Unchecked_Conversion (PE_Ptr, Address); |
| -- Used only for debugging output purposes |
| |
| subtype AFC is Ada.Finalization.Controlled; |
| |
| N : constant PE_Ptr := null; |
| -- Shorthand used to initialize Copy fields to null |
| |
| type Natural_Ptr is access all Natural; |
| type Pattern_Ptr is access all Pattern; |
| |
| -------------------------------------------------- |
| -- Description of Algorithm and Data Structures -- |
| -------------------------------------------------- |
| |
| -- A pattern structure is represented as a linked graph of nodes |
| -- with the following structure: |
| |
| -- +------------------------------------+ |
| -- I Pcode I |
| -- +------------------------------------+ |
| -- I Index I |
| -- +------------------------------------+ |
| -- I Pthen I |
| -- +------------------------------------+ |
| -- I parameter(s) I |
| -- +------------------------------------+ |
| |
| -- Pcode is a code value indicating the type of the pattern node. This |
| -- code is used both as the discriminant value for the record, and as |
| -- the case index in the main match routine that branches to the proper |
| -- match code for the given element. |
| |
| -- Index is a serial index number. The use of these serial index |
| -- numbers is described in a separate section. |
| |
| -- Pthen is a pointer to the successor node, i.e the node to be matched |
| -- if the attempt to match the node succeeds. If this is the last node |
| -- of the pattern to be matched, then Pthen points to a dummy node |
| -- of kind PC_EOP (end of pattern), which initializes pattern exit. |
| |
| -- The parameter or parameters are present for certain node types, |
| -- and the type varies with the pattern code. |
| |
| type Pattern_Code is ( |
| PC_Arb_Y, |
| PC_Assign, |
| PC_Bal, |
| PC_BreakX_X, |
| PC_Cancel, |
| PC_EOP, |
| PC_Fail, |
| PC_Fence, |
| PC_Fence_X, |
| PC_Fence_Y, |
| PC_R_Enter, |
| PC_R_Remove, |
| PC_R_Restore, |
| PC_Rest, |
| PC_Succeed, |
| PC_Unanchored, |
| |
| PC_Alt, |
| PC_Arb_X, |
| PC_Arbno_S, |
| PC_Arbno_X, |
| |
| PC_Rpat, |
| |
| PC_Pred_Func, |
| |
| PC_Assign_Imm, |
| PC_Assign_OnM, |
| PC_Any_VP, |
| PC_Break_VP, |
| PC_BreakX_VP, |
| PC_NotAny_VP, |
| PC_NSpan_VP, |
| PC_Span_VP, |
| PC_String_VP, |
| |
| PC_Write_Imm, |
| PC_Write_OnM, |
| |
| PC_Null, |
| PC_String, |
| |
| PC_String_2, |
| PC_String_3, |
| PC_String_4, |
| PC_String_5, |
| PC_String_6, |
| |
| PC_Setcur, |
| |
| PC_Any_CH, |
| PC_Break_CH, |
| PC_BreakX_CH, |
| PC_Char, |
| PC_NotAny_CH, |
| PC_NSpan_CH, |
| PC_Span_CH, |
| |
| PC_Any_CS, |
| PC_Break_CS, |
| PC_BreakX_CS, |
| PC_NotAny_CS, |
| PC_NSpan_CS, |
| PC_Span_CS, |
| |
| PC_Arbno_Y, |
| PC_Len_Nat, |
| PC_Pos_Nat, |
| PC_RPos_Nat, |
| PC_RTab_Nat, |
| PC_Tab_Nat, |
| |
| PC_Pos_NF, |
| PC_Len_NF, |
| PC_RPos_NF, |
| PC_RTab_NF, |
| PC_Tab_NF, |
| |
| PC_Pos_NP, |
| PC_Len_NP, |
| PC_RPos_NP, |
| PC_RTab_NP, |
| PC_Tab_NP, |
| |
| PC_Any_VF, |
| PC_Break_VF, |
| PC_BreakX_VF, |
| PC_NotAny_VF, |
| PC_NSpan_VF, |
| PC_Span_VF, |
| PC_String_VF); |
| |
| type IndexT is range 0 .. +(2 **15 - 1); |
| |
| type PE (Pcode : Pattern_Code) is record |
| |
| Index : IndexT; |
| -- Serial index number of pattern element within pattern |
| |
| Pthen : PE_Ptr; |
| -- Successor element, to be matched after this one |
| |
| case Pcode is |
| |
| when PC_Arb_Y | |
| PC_Assign | |
| PC_Bal | |
| PC_BreakX_X | |
| PC_Cancel | |
| PC_EOP | |
| PC_Fail | |
| PC_Fence | |
| PC_Fence_X | |
| PC_Fence_Y | |
| PC_Null | |
| PC_R_Enter | |
| PC_R_Remove | |
| PC_R_Restore | |
| PC_Rest | |
| PC_Succeed | |
| PC_Unanchored => null; |
| |
| when PC_Alt | |
| PC_Arb_X | |
| PC_Arbno_S | |
| PC_Arbno_X => Alt : PE_Ptr; |
| |
| when PC_Rpat => PP : Pattern_Ptr; |
| |
| when PC_Pred_Func => BF : Boolean_Func; |
| |
| when PC_Assign_Imm | |
| PC_Assign_OnM | |
| PC_Any_VP | |
| PC_Break_VP | |
| PC_BreakX_VP | |
| PC_NotAny_VP | |
| PC_NSpan_VP | |
| PC_Span_VP | |
| PC_String_VP => VP : VString_Ptr; |
| |
| when PC_Write_Imm | |
| PC_Write_OnM => FP : File_Ptr; |
| |
| when PC_String => Str : String_Ptr; |
| |
| when PC_String_2 => Str2 : String (1 .. 2); |
| |
| when PC_String_3 => Str3 : String (1 .. 3); |
| |
| when PC_String_4 => Str4 : String (1 .. 4); |
| |
| when PC_String_5 => Str5 : String (1 .. 5); |
| |
| when PC_String_6 => Str6 : String (1 .. 6); |
| |
| when PC_Setcur => Var : Natural_Ptr; |
| |
| when PC_Any_CH | |
| PC_Break_CH | |
| PC_BreakX_CH | |
| PC_Char | |
| PC_NotAny_CH | |
| PC_NSpan_CH | |
| PC_Span_CH => Char : Character; |
| |
| when PC_Any_CS | |
| PC_Break_CS | |
| PC_BreakX_CS | |
| PC_NotAny_CS | |
| PC_NSpan_CS | |
| PC_Span_CS => CS : Character_Set; |
| |
| when PC_Arbno_Y | |
| PC_Len_Nat | |
| PC_Pos_Nat | |
| PC_RPos_Nat | |
| PC_RTab_Nat | |
| PC_Tab_Nat => Nat : Natural; |
| |
| when PC_Pos_NF | |
| PC_Len_NF | |
| PC_RPos_NF | |
| PC_RTab_NF | |
| PC_Tab_NF => NF : Natural_Func; |
| |
| when PC_Pos_NP | |
| PC_Len_NP | |
| PC_RPos_NP | |
| PC_RTab_NP | |
| PC_Tab_NP => NP : Natural_Ptr; |
| |
| when PC_Any_VF | |
| PC_Break_VF | |
| PC_BreakX_VF | |
| PC_NotAny_VF | |
| PC_NSpan_VF | |
| PC_Span_VF | |
| PC_String_VF => VF : VString_Func; |
| |
| end case; |
| end record; |
| |
| subtype PC_Has_Alt is Pattern_Code range PC_Alt .. PC_Arbno_X; |
| -- Range of pattern codes that has an Alt field. This is used in the |
| -- recursive traversals, since these links must be followed. |
| |
| EOP_Element : aliased constant PE := (PC_EOP, 0, N); |
| -- This is the end of pattern element, and is thus the representation of |
| -- a null pattern. It has a zero index element since it is never placed |
| -- inside a pattern. Furthermore it does not need a successor, since it |
| -- marks the end of the pattern, so that no more successors are needed. |
| |
| EOP : constant PE_Ptr := EOP_Element'Unrestricted_Access; |
| -- This is the end of pattern pointer, that is used in the Pthen pointer |
| -- of other nodes to signal end of pattern. |
| |
| -- The following array is used to determine if a pattern used as an |
| -- argument for Arbno is eligible for treatment using the simple Arbno |
| -- structure (i.e. it is a pattern that is guaranteed to match at least |
| -- one character on success, and not to make any entries on the stack. |
| |
| OK_For_Simple_Arbno : constant array (Pattern_Code) of Boolean := |
| (PC_Any_CS | |
| PC_Any_CH | |
| PC_Any_VF | |
| PC_Any_VP | |
| PC_Char | |
| PC_Len_Nat | |
| PC_NotAny_CS | |
| PC_NotAny_CH | |
| PC_NotAny_VF | |
| PC_NotAny_VP | |
| PC_Span_CS | |
| PC_Span_CH | |
| PC_Span_VF | |
| PC_Span_VP | |
| PC_String | |
| PC_String_2 | |
| PC_String_3 | |
| PC_String_4 | |
| PC_String_5 | |
| PC_String_6 => True, |
| others => False); |
| |
| ------------------------------- |
| -- The Pattern History Stack -- |
| ------------------------------- |
| |
| -- The pattern history stack is used for controlling backtracking when |
| -- a match fails. The idea is to stack entries that give a cursor value |
| -- to be restored, and a node to be reestablished as the current node to |
| -- attempt an appropriate rematch operation. The processing for a pattern |
| -- element that has rematch alternatives pushes an appropriate entry or |
| -- entry on to the stack, and the proceeds. If a match fails at any point, |
| -- the top element of the stack is popped off, resetting the cursor and |
| -- the match continues by accessing the node stored with this entry. |
| |
| type Stack_Entry is record |
| |
| Cursor : Integer; |
| -- Saved cursor value that is restored when this entry is popped |
| -- from the stack if a match attempt fails. Occasionally, this |
| -- field is used to store a history stack pointer instead of a |
| -- cursor. Such cases are noted in the documentation and the value |
| -- stored is negative since stack pointer values are always negative. |
| |
| Node : PE_Ptr; |
| -- This pattern element reference is reestablished as the current |
| -- Node to be matched (which will attempt an appropriate rematch). |
| |
| end record; |
| |
| subtype Stack_Range is Integer range -Stack_Size .. -1; |
| |
| type Stack_Type is array (Stack_Range) of Stack_Entry; |
| -- The type used for a history stack. The actual instance of the stack |
| -- is declared as a local variable in the Match routine, to properly |
| -- handle recursive calls to Match. All stack pointer values are negative |
| -- to distinguish them from normal cursor values. |
| |
| -- Note: the pattern matching stack is used only to handle backtracking. |
| -- If no backtracking occurs, its entries are never accessed, and never |
| -- popped off, and in particular it is normal for a successful match |
| -- to terminate with entries on the stack that are simply discarded. |
| |
| -- Note: in subsequent diagrams of the stack, we always place element |
| -- zero (the deepest element) at the top of the page, then build the |
| -- stack down on the page with the most recent (top of stack) element |
| -- being the bottom-most entry on the page. |
| |
| -- Stack checking is handled by labeling every pattern with the maximum |
| -- number of stack entries that are required, so a single check at the |
| -- start of matching the pattern suffices. There are two exceptions. |
| |
| -- First, the count does not include entries for recursive pattern |
| -- references. Such recursions must therefore perform a specific |
| -- stack check with respect to the number of stack entries required |
| -- by the recursive pattern that is accessed and the amount of stack |
| -- that remains unused. |
| |
| -- Second, the count includes only one iteration of an Arbno pattern, |
| -- so a specific check must be made on subsequent iterations that there |
| -- is still enough stack space left. The Arbno node has a field that |
| -- records the number of stack entries required by its argument for |
| -- this purpose. |
| |
| --------------------------------------------------- |
| -- Use of Serial Index Field in Pattern Elements -- |
| --------------------------------------------------- |
| |
| -- The serial index numbers for the pattern elements are assigned as |
| -- a pattern is constructed from its constituent elements. Note that there |
| -- is never any sharing of pattern elements between patterns (copies are |
| -- always made), so the serial index numbers are unique to a particular |
| -- pattern as referenced from the P field of a value of type Pattern. |
| |
| -- The index numbers meet three separate invariants, which are used for |
| -- various purposes as described in this section. |
| |
| -- First, the numbers uniquely identify the pattern elements within a |
| -- pattern. If Num is the number of elements in a given pattern, then |
| -- the serial index numbers for the elements of this pattern will range |
| -- from 1 .. Num, so that each element has a separate value. |
| |
| -- The purpose of this assignment is to provide a convenient auxiliary |
| -- data structure mechanism during operations which must traverse a |
| -- pattern (e.g. copy and finalization processing). Once constructed |
| -- patterns are strictly read only. This is necessary to allow sharing |
| -- of patterns between tasks. This means that we cannot go marking the |
| -- pattern (e.g. with a visited bit). Instead we construct a separate |
| -- vector that contains the necessary information indexed by the Index |
| -- values in the pattern elements. For this purpose the only requirement |
| -- is that they be uniquely assigned. |
| |
| -- Second, the pattern element referenced directly, i.e. the leading |
| -- pattern element, is always the maximum numbered element and therefore |
| -- indicates the total number of elements in the pattern. More precisely, |
| -- the element referenced by the P field of a pattern value, or the |
| -- element returned by any of the internal pattern construction routines |
| -- in the body (that return a value of type PE_Ptr) always is this |
| -- maximum element, |
| |
| -- The purpose of this requirement is to allow an immediate determination |
| -- of the number of pattern elements within a pattern. This is used to |
| -- properly size the vectors used to contain auxiliary information for |
| -- traversal as described above. |
| |
| -- Third, as compound pattern structures are constructed, the way in which |
| -- constituent parts of the pattern are constructed is stylized. This is |
| -- an automatic consequence of the way that these compound structures |
| -- are constructed, and basically what we are doing is simply documenting |
| -- and specifying the natural result of the pattern construction. The |
| -- section describing compound pattern structures gives details of the |
| -- numbering of each compound pattern structure. |
| |
| -- The purpose of specifying the stylized numbering structures for the |
| -- compound patterns is to help simplify the processing in the Image |
| -- function, since it eases the task of retrieving the original recursive |
| -- structure of the pattern from the flat graph structure of elements. |
| -- This use in the Image function is the only point at which the code |
| -- makes use of the stylized structures. |
| |
| type Ref_Array is array (IndexT range <>) of PE_Ptr; |
| -- This type is used to build an array whose N'th entry references the |
| -- element in a pattern whose Index value is N. See Build_Ref_Array. |
| |
| procedure Build_Ref_Array (E : PE_Ptr; RA : out Ref_Array); |
| -- Given a pattern element which is the leading element of a pattern |
| -- structure, and a Ref_Array with bounds 1 .. E.Index, fills in the |
| -- Ref_Array so that its N'th entry references the element of the |
| -- referenced pattern whose Index value is N. |
| |
| ------------------------------- |
| -- Recursive Pattern Matches -- |
| ------------------------------- |
| |
| -- The pattern primitive (+P) where P is a Pattern_Ptr or Pattern_Func |
| -- causes a recursive pattern match. This cannot be handled by an actual |
| -- recursive call to the outer level Match routine, since this would not |
| -- allow for possible backtracking into the region matched by the inner |
| -- pattern. Indeed this is the classical clash between recursion and |
| -- backtracking, and a simple recursive stack structure does not suffice. |
| |
| -- This section describes how this recursion and the possible associated |
| -- backtracking is handled. We still use a single stack, but we establish |
| -- the concept of nested regions on this stack, each of which has a stack |
| -- base value pointing to the deepest stack entry of the region. The base |
| -- value for the outer level is zero. |
| |
| -- When a recursive match is established, two special stack entries are |
| -- made. The first entry is used to save the original node that starts |
| -- the recursive match. This is saved so that the successor field of |
| -- this node is accessible at the end of the match, but it is never |
| -- popped and executed. |
| |
| -- The second entry corresponds to a standard new region action. A |
| -- PC_R_Remove node is stacked, whose cursor field is used to store |
| -- the outer stack base, and the stack base is reset to point to |
| -- this PC_R_Remove node. Then the recursive pattern is matched and |
| -- it can make history stack entries in the normal matter, so now |
| -- the stack looks like: |
| |
| -- (stack entries made by outer level) |
| |
| -- (Special entry, node is (+P) successor |
| -- cursor entry is not used) |
| |
| -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack base |
| -- saved base value for the enclosing region) |
| |
| -- (stack entries made by inner level) |
| |
| -- If a subsequent failure occurs and pops the PC_R_Remove node, it |
| -- removes itself and the special entry immediately underneath it, |
| -- restores the stack base value for the enclosing region, and then |
| -- again signals failure to look for alternatives that were stacked |
| -- before the recursion was initiated. |
| |
| -- Now we need to consider what happens if the inner pattern succeeds, as |
| -- signalled by accessing the special PC_EOP pattern primitive. First we |
| -- recognize the nested case by looking at the Base value. If this Base |
| -- value is Stack'First, then the entire match has succeeded, but if the |
| -- base value is greater than Stack'First, then we have successfully |
| -- matched an inner pattern, and processing continues at the outer level. |
| |
| -- There are two cases. The simple case is when the inner pattern has made |
| -- no stack entries, as recognized by the fact that the current stack |
| -- pointer is equal to the current base value. In this case it is fine to |
| -- remove all trace of the recursion by restoring the outer base value and |
| -- using the special entry to find the appropriate successor node. |
| |
| -- The more complex case arises when the inner match does make stack |
| -- entries. In this case, the PC_EOP processing stacks a special entry |
| -- whose cursor value saves the saved inner base value (the one that |
| -- references the corresponding PC_R_Remove value), and whose node |
| -- pointer references a PC_R_Restore node, so the stack looks like: |
| |
| -- (stack entries made by outer level) |
| |
| -- (Special entry, node is (+P) successor, |
| -- cursor entry is not used) |
| |
| -- (PC_R_Remove entry, "cursor" value is (negative) |
| -- saved base value for the enclosing region) |
| |
| -- (stack entries made by inner level) |
| |
| -- (PC_Region_Replace entry, "cursor" value is (negative) |
| -- stack pointer value referencing the PC_R_Remove entry). |
| |
| -- If the entire match succeeds, then these stack entries are, as usual, |
| -- ignored and abandoned. If on the other hand a subsequent failure |
| -- causes the PC_Region_Replace entry to be popped, it restores the |
| -- inner base value from its saved "cursor" value and then fails again. |
| -- Note that it is OK that the cursor is temporarily clobbered by this |
| -- pop, since the second failure will reestablish a proper cursor value. |
| |
| --------------------------------- |
| -- Compound Pattern Structures -- |
| --------------------------------- |
| |
| -- This section discusses the compound structures used to represent |
| -- constructed patterns. It shows the graph structures of pattern |
| -- elements that are constructed, and in the case of patterns that |
| -- provide backtracking possibilities, describes how the history |
| -- stack is used to control the backtracking. Finally, it notes the |
| -- way in which the Index numbers are assigned to the structure. |
| |
| -- In all diagrams, solid lines (built with minus signs or vertical |
| -- bars, represent successor pointers (Pthen fields) with > or V used |
| -- to indicate the direction of the pointer. The initial node of the |
| -- structure is in the upper left of the diagram. A dotted line is an |
| -- alternative pointer from the element above it to the element below |
| -- it. See individual sections for details on how alternatives are used. |
| |
| ------------------- |
| -- Concatenation -- |
| ------------------- |
| |
| -- In the pattern structures listed in this section, a line that looks |
| -- like ----> with nothing to the right indicates an end of pattern |
| -- (EOP) pointer that represents the end of the match. |
| |
| -- When a pattern concatenation (L & R) occurs, the resulting structure |
| -- is obtained by finding all such EOP pointers in L, and replacing |
| -- them to point to R. This is the most important flattening that |
| -- occurs in constructing a pattern, and it means that the pattern |
| -- matching circuitry does not have to keep track of the structure |
| -- of a pattern with respect to concatenation, since the appropriate |
| -- successor is always at hand. |
| |
| -- Concatenation itself generates no additional possibilities for |
| -- backtracking, but the constituent patterns of the concatenated |
| -- structure will make stack entries as usual. The maximum amount |
| -- of stack required by the structure is thus simply the sum of the |
| -- maximums required by L and R. |
| |
| -- The index numbering of a concatenation structure works by leaving |
| -- the numbering of the right hand pattern, R, unchanged and adjusting |
| -- the numbers in the left hand pattern, L up by the count of elements |
| -- in R. This ensures that the maximum numbered element is the leading |
| -- element as required (given that it was the leading element in L). |
| |
| ----------------- |
| -- Alternation -- |
| ----------------- |
| |
| -- A pattern (L or R) constructs the structure: |
| |
| -- +---+ +---+ |
| -- | A |---->| L |----> |
| -- +---+ +---+ |
| -- . |
| -- . |
| -- +---+ |
| -- | R |----> |
| -- +---+ |
| |
| -- The A element here is a PC_Alt node, and the dotted line represents |
| -- the contents of the Alt field. When the PC_Alt element is matched, |
| -- it stacks a pointer to the leading element of R on the history stack |
| -- so that on subsequent failure, a match of R is attempted. |
| |
| -- The A node is the highest numbered element in the pattern. The |
| -- original index numbers of R are unchanged, but the index numbers |
| -- of the L pattern are adjusted up by the count of elements in R. |
| |
| -- Note that the difference between the index of the L leading element |
| -- the index of the R leading element (after building the alt structure) |
| -- indicates the number of nodes in L, and this is true even after the |
| -- structure is incorporated into some larger structure. For example, |
| -- if the A node has index 16, and L has index 15 and R has index |
| -- 5, then we know that L has 10 (15-5) elements in it. |
| |
| -- Suppose that we now concatenate this structure to another pattern |
| -- with 9 elements in it. We will now have the A node with an index |
| -- of 25, L with an index of 24 and R with an index of 14. We still |
| -- know that L has 10 (24-14) elements in it, numbered 15-24, and |
| -- consequently the successor of the alternation structure has an |
| -- index with a value less than 15. This is used in Image to figure |
| -- out the original recursive structure of a pattern. |
| |
| -- To clarify the interaction of the alternation and concatenation |
| -- structures, here is a more complex example of the structure built |
| -- for the pattern: |
| |
| -- (V or W or X) (Y or Z) |
| |
| -- where A,B,C,D,E are all single element patterns: |
| |
| -- +---+ +---+ +---+ +---+ |
| -- I A I---->I V I---+-->I A I---->I Y I----> |
| -- +---+ +---+ I +---+ +---+ |
| -- . I . |
| -- . I . |
| -- +---+ +---+ I +---+ |
| -- I A I---->I W I-->I I Z I----> |
| -- +---+ +---+ I +---+ |
| -- . I |
| -- . I |
| -- +---+ I |
| -- I X I------------>+ |
| -- +---+ |
| |
| -- The numbering of the nodes would be as follows: |
| |
| -- +---+ +---+ +---+ +---+ |
| -- I 8 I---->I 7 I---+-->I 3 I---->I 2 I----> |
| -- +---+ +---+ I +---+ +---+ |
| -- . I . |
| -- . I . |
| -- +---+ +---+ I +---+ |
| -- I 6 I---->I 5 I-->I I 1 I----> |
| -- +---+ +---+ I +---+ |
| -- . I |
| -- . I |
| -- +---+ I |
| -- I 4 I------------>+ |
| -- +---+ |
| |
| -- Note: The above structure actually corresponds to |
| |
| -- (A or (B or C)) (D or E) |
| |
| -- rather than |
| |
| -- ((A or B) or C) (D or E) |
| |
| -- which is the more natural interpretation, but in fact alternation |
| -- is associative, and the construction of an alternative changes the |
| -- left grouped pattern to the right grouped pattern in any case, so |
| -- that the Image function produces a more natural looking output. |
| |
| --------- |
| -- Arb -- |
| --------- |
| |
| -- An Arb pattern builds the structure |
| |
| -- +---+ |
| -- | X |----> |
| -- +---+ |
| -- . |
| -- . |
| -- +---+ |
| -- | Y |----> |
| -- +---+ |
| |
| -- The X node is a PC_Arb_X node, which matches null, and stacks a |
| -- pointer to Y node, which is the PC_Arb_Y node that matches one |
| -- extra character and restacks itself. |
| |
| -- The PC_Arb_X node is numbered 2, and the PC_Arb_Y node is 1 |
| |
| ------------------------- |
| -- Arbno (simple case) -- |
| ------------------------- |
| |
| -- The simple form of Arbno can be used where the pattern always |
| -- matches at least one character if it succeeds, and it is known |
| -- not to make any history stack entries. In this case, Arbno (P) |
| -- can construct the following structure: |
| |
| -- +-------------+ |
| -- | ^ |
| -- V | |
| -- +---+ | |
| -- | S |----> | |
| -- +---+ | |
| -- . | |
| -- . | |
| -- +---+ | |
| -- | P |---------->+ |
| -- +---+ |
| |
| -- The S (PC_Arbno_S) node matches null stacking a pointer to the |
| -- pattern P. If a subsequent failure causes P to be matched and |
| -- this match succeeds, then node A gets restacked to try another |
| -- instance if needed by a subsequent failure. |
| |
| -- The node numbering of the constituent pattern P is not affected. |
| -- The S node has a node number of P.Index + 1. |
| |
| -------------------------- |
| -- Arbno (complex case) -- |
| -------------------------- |
| |
| -- A call to Arbno (P), where P can match null (or at least is not |
| -- known to require a non-null string) and/or P requires pattern stack |
| -- entries, constructs the following structure: |
| |
| -- +--------------------------+ |
| -- | ^ |
| -- V | |
| -- +---+ | |
| -- | X |----> | |
| -- +---+ | |
| -- . | |
| -- . | |
| -- +---+ +---+ +---+ | |
| -- | E |---->| P |---->| Y |--->+ |
| -- +---+ +---+ +---+ |
| |
| -- The node X (PC_Arbno_X) matches null, stacking a pointer to the |
| -- E-P-X structure used to match one Arbno instance. |
| |
| -- Here E is the PC_R_Enter node which matches null and creates two |
| -- stack entries. The first is a special entry whose node field is |
| -- not used at all, and whose cursor field has the initial cursor. |
| |
| -- The second entry corresponds to a standard new region action. A |
| -- PC_R_Remove node is stacked, whose cursor field is used to store |
| -- the outer stack base, and the stack base is reset to point to |
| -- this PC_R_Remove node. Then the pattern P is matched, and it can |
| -- make history stack entries in the normal manner, so now the stack |
| -- looks like: |
| |
| -- (stack entries made before assign pattern) |
| |
| -- (Special entry, node field not used, |
| -- used only to save initial cursor) |
| |
| -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack Base |
| -- saved base value for the enclosing region) |
| |
| -- (stack entries made by matching P) |
| |
| -- If the match of P fails, then the PC_R_Remove entry is popped and |
| -- it removes both itself and the special entry underneath it, |
| -- restores the outer stack base, and signals failure. |
| |
| -- If the match of P succeeds, then node Y, the PC_Arbno_Y node, pops |
| -- the inner region. There are two possibilities. If matching P left |
| -- no stack entries, then all traces of the inner region can be removed. |
| -- If there are stack entries, then we push an PC_Region_Replace stack |
| -- entry whose "cursor" value is the inner stack base value, and then |
| -- restore the outer stack base value, so the stack looks like: |
| |
| -- (stack entries made before assign pattern) |
| |
| -- (Special entry, node field not used, |
| -- used only to save initial cursor) |
| |
| -- (PC_R_Remove entry, "cursor" value is (negative) |
| -- saved base value for the enclosing region) |
| |
| -- (stack entries made by matching P) |
| |
| -- (PC_Region_Replace entry, "cursor" value is (negative) |
| -- stack pointer value referencing the PC_R_Remove entry). |
| |
| -- Now that we have matched another instance of the Arbno pattern, |
| -- we need to move to the successor. There are two cases. If the |
| -- Arbno pattern matched null, then there is no point in seeking |
| -- alternatives, since we would just match a whole bunch of nulls. |
| -- In this case we look through the alternative node, and move |
| -- directly to its successor (i.e. the successor of the Arbno |
| -- pattern). If on the other hand a non-null string was matched, |
| -- we simply follow the successor to the alternative node, which |
| -- sets up for another possible match of the Arbno pattern. |
| |
| -- As noted in the section on stack checking, the stack count (and |
| -- hence the stack check) for a pattern includes only one iteration |
| -- of the Arbno pattern. To make sure that multiple iterations do not |
| -- overflow the stack, the Arbno node saves the stack count required |
| -- by a single iteration, and the Concat function increments this to |
| -- include stack entries required by any successor. The PC_Arbno_Y |
| -- node uses this count to ensure that sufficient stack remains |
| -- before proceeding after matching each new instance. |
| |
| -- The node numbering of the constituent pattern P is not affected. |
| -- Where N is the number of nodes in P, the Y node is numbered N + 1, |
| -- the E node is N + 2, and the X node is N + 3. |
| |
| ---------------------- |
| -- Assign Immediate -- |
| ---------------------- |
| |
| -- Immediate assignment (P * V) constructs the following structure |
| |
| -- +---+ +---+ +---+ |
| -- | E |---->| P |---->| A |----> |
| -- +---+ +---+ +---+ |
| |
| -- Here E is the PC_R_Enter node which matches null and creates two |
| -- stack entries. The first is a special entry whose node field is |
| -- not used at all, and whose cursor field has the initial cursor. |
| |
| -- The second entry corresponds to a standard new region action. A |
| -- PC_R_Remove node is stacked, whose cursor field is used to store |
| -- the outer stack base, and the stack base is reset to point to |
| -- this PC_R_Remove node. Then the pattern P is matched, and it can |
| -- make history stack entries in the normal manner, so now the stack |
| -- looks like: |
| |
| -- (stack entries made before assign pattern) |
| |
| -- (Special entry, node field not used, |
| -- used only to save initial cursor) |
| |
| -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack Base |
| -- saved base value for the enclosing region) |
| |
| -- (stack entries made by matching P) |
| |
| -- If the match of P fails, then the PC_R_Remove entry is popped |
| -- and it removes both itself and the special entry underneath it, |
| -- restores the outer stack base, and signals failure. |
| |
| -- If the match of P succeeds, then node A, which is the actual |
| -- PC_Assign_Imm node, executes the assignment (using the stack |
| -- base to locate the entry with the saved starting cursor value), |
| -- and the pops the inner region. There are two possibilities, if |
| -- matching P left no stack entries, then all traces of the inner |
| -- region can be removed. If there are stack entries, then we push |
| -- an PC_Region_Replace stack entry whose "cursor" value is the |
| -- inner stack base value, and then restore the outer stack base |
| -- value, so the stack looks like: |
| |
| -- (stack entries made before assign pattern) |
| |
| -- (Special entry, node field not used, |
| -- used only to save initial cursor) |
| |
| -- (PC_R_Remove entry, "cursor" value is (negative) |
| -- saved base value for the enclosing region) |
| |
| -- (stack entries made by matching P) |
| |
| -- (PC_Region_Replace entry, "cursor" value is the (negative) |
| -- stack pointer value referencing the PC_R_Remove entry). |
| |
| -- If a subsequent failure occurs, the PC_Region_Replace node restores |
| -- the inner stack base value and signals failure to explore rematches |
| -- of the pattern P. |
| |
| -- The node numbering of the constituent pattern P is not affected. |
| -- Where N is the number of nodes in P, the A node is numbered N + 1, |
| -- and the E node is N + 2. |
| |
| --------------------- |
| -- Assign On Match -- |
| --------------------- |
| |
| -- The assign on match (**) pattern is quite similar to the assign |
| -- immediate pattern, except that the actual assignment has to be |
| -- delayed. The following structure is constructed: |
| |
| -- +---+ +---+ +---+ |
| -- | E |---->| P |---->| A |----> |
| -- +---+ +---+ +---+ |
| |
| -- The operation of this pattern is identical to that described above |
| -- for deferred assignment, up to the point where P has been matched. |
| |
| -- The A node, which is the PC_Assign_OnM node first pushes a |
| -- PC_Assign node onto the history stack. This node saves the ending |
| -- cursor and acts as a flag for the final assignment, as further |
| -- described below. |
| |
| -- It then stores a pointer to itself in the special entry node field. |
| -- This was otherwise unused, and is now used to retrieve the address |
| -- of the variable to be assigned at the end of the pattern. |
| |
| -- After that the inner region is terminated in the usual manner, |
| -- by stacking a PC_R_Restore entry as described for the assign |
| -- immediate case. Note that the optimization of completely |
| -- removing the inner region does not happen in this case, since |
| -- we have at least one stack entry (the PC_Assign one we just made). |
| -- The stack now looks like: |
| |
| -- (stack entries made before assign pattern) |
| |
| -- (Special entry, node points to copy of |
| -- the PC_Assign_OnM node, and the |
| -- cursor field saves the initial cursor). |
| |
| -- (PC_R_Remove entry, "cursor" value is (negative) |
| -- saved base value for the enclosing region) |
| |
| -- (stack entries made by matching P) |
| |
| -- (PC_Assign entry, saves final cursor) |
| |
| -- (PC_Region_Replace entry, "cursor" value is (negative) |
| -- stack pointer value referencing the PC_R_Remove entry). |
| |
| -- If a subsequent failure causes the PC_Assign node to execute it |
| -- simply removes itself and propagates the failure. |
| |
| -- If the match succeeds, then the history stack is scanned for |
| -- PC_Assign nodes, and the assignments are executed (examination |
| -- of the above diagram will show that all the necessary data is |
| -- at hand for the assignment). |
| |
| -- To optimize the common case where no assign-on-match operations |
| -- are present, a global flag Assign_OnM is maintained which is |
| -- initialize to False, and gets set True as part of the execution |
| -- of the PC_Assign_OnM node. The scan of the history stack for |
| -- PC_Assign entries is done only if this flag is set. |
| |
| -- The node numbering of the constituent pattern P is not affected. |
| -- Where N is the number of nodes in P, the A node is numbered N + 1, |
| -- and the E node is N + 2. |
| |
| --------- |
| -- Bal -- |
| --------- |
| |
| -- Bal builds a single node: |
| |
| -- +---+ |
| -- | B |----> |
| -- +---+ |
| |
| -- The node B is the PC_Bal node which matches a parentheses balanced |
| -- string, starting at the current cursor position. It then updates |
| -- the cursor past this matched string, and stacks a pointer to itself |
| -- with this updated cursor value on the history stack, to extend the |
| -- matched string on a subsequent failure. |
| |
| -- Since this is a single node it is numbered 1 (the reason we include |
| -- it in the compound patterns section is that it backtracks). |
| |
| ------------ |
| -- BreakX -- |
| ------------ |
| |
| -- BreakX builds the structure |
| |
| -- +---+ +---+ |
| -- | B |---->| A |----> |
| -- +---+ +---+ |
| -- ^ . |
| -- | . |
| -- | +---+ |
| -- +<------| X | |
| -- +---+ |
| |
| -- Here the B node is the BreakX_xx node that performs a normal Break |
| -- function. The A node is an alternative (PC_Alt) node that matches |
| -- null, but stacks a pointer to node X (the PC_BreakX_X node) which |
| -- extends the match one character (to eat up the previously detected |
| -- break character), and then rematches the break. |
| |
| -- The B node is numbered 3, the alternative node is 1, and the X |
| -- node is 2. |
| |
| ----------- |
| -- Fence -- |
| ----------- |
| |
| -- Fence builds a single node: |
| |
| -- +---+ |
| -- | F |----> |
| -- +---+ |
| |
| -- The element F, PC_Fence, matches null, and stacks a pointer to a |
| -- PC_Cancel element which will abort the match on a subsequent failure. |
| |
| -- Since this is a single element it is numbered 1 (the reason we |
| -- include it in the compound patterns section is that it backtracks). |
| |
| -------------------- |
| -- Fence Function -- |
| -------------------- |
| |
| -- A call to the Fence function builds the structure: |
| |
| -- +---+ +---+ +---+ |
| -- | E |---->| P |---->| X |----> |
| -- +---+ +---+ +---+ |
| |
| -- Here E is the PC_R_Enter node which matches null and creates two |
| -- stack entries. The first is a special entry which is not used at |
| -- all in the fence case (it is present merely for uniformity with |
| -- other cases of region enter operations). |
| |
| -- The second entry corresponds to a standard new region action. A |
| -- PC_R_Remove node is stacked, whose cursor field is used to store |
| -- the outer stack base, and the stack base is reset to point to |
| -- this PC_R_Remove node. Then the pattern P is matched, and it can |
| -- make history stack entries in the normal manner, so now the stack |
| -- looks like: |
| |
| -- (stack entries made before fence pattern) |
| |
| -- (Special entry, not used at all) |
| |
| -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack Base |
| -- saved base value for the enclosing region) |
| |
| -- (stack entries made by matching P) |
| |
| -- If the match of P fails, then the PC_R_Remove entry is popped |
| -- and it removes both itself and the special entry underneath it, |
| -- restores the outer stack base, and signals failure. |
| |
| -- If the match of P succeeds, then node X, the PC_Fence_X node, gets |
| -- control. One might be tempted to think that at this point, the |
| -- history stack entries made by matching P can just be removed since |
| -- they certainly are not going to be used for rematching (that is |
| -- whole point of Fence after all). However, this is wrong, because |
| -- it would result in the loss of possible assign-on-match entries |
| -- for deferred pattern assignments. |
| |
| -- Instead what we do is to make a special entry whose node references |
| -- PC_Fence_Y, and whose cursor saves the inner stack base value, i.e. |
| -- the pointer to the PC_R_Remove entry. Then the outer stack base |
| -- pointer is restored, so the stack looks like: |
| |
| -- (stack entries made before assign pattern) |
| |
| -- (Special entry, not used at all) |
| |
| -- (PC_R_Remove entry, "cursor" value is (negative) |
| -- saved base value for the enclosing region) |
| |
| -- (stack entries made by matching P) |
| |
| -- (PC_Fence_Y entry, "cursor" value is (negative) stack |
| -- pointer value referencing the PC_R_Remove entry). |
| |
| -- If a subsequent failure occurs, then the PC_Fence_Y entry removes |
| -- the entire inner region, including all entries made by matching P, |
| -- and alternatives prior to the Fence pattern are sought. |
| |
| -- The node numbering of the constituent pattern P is not affected. |
| -- Where N is the number of nodes in P, the X node is numbered N + 1, |
| -- and the E node is N + 2. |
| |
| ------------- |
| -- Succeed -- |
| ------------- |
| |
| -- Succeed builds a single node: |
| |
| -- +---+ |
| -- | S |----> |
| -- +---+ |
| |
| -- The node S is the PC_Succeed node which matches null, and stacks |
| -- a pointer to itself on the history stack, so that a subsequent |
| -- failure repeats the same match. |
| |
| -- Since this is a single node it is numbered 1 (the reason we include |
| -- it in the compound patterns section is that it backtracks). |
| |
| --------------------- |
| -- Write Immediate -- |
| --------------------- |
| |
| -- The structure built for a write immediate operation (P * F, where |
| -- F is a file access value) is: |
| |
| -- +---+ +---+ +---+ |
| -- | E |---->| P |---->| W |----> |
| -- +---+ +---+ +---+ |
| |
| -- Here E is the PC_R_Enter node and W is the PC_Write_Imm node. The |
| -- handling is identical to that described above for Assign Immediate, |
| -- except that at the point where a successful match occurs, the matched |
| -- substring is written to the referenced file. |
| |
| -- The node numbering of the constituent pattern P is not affected. |
| -- Where N is the number of nodes in P, the W node is numbered N + 1, |
| -- and the E node is N + 2. |
| |
| -------------------- |
| -- Write On Match -- |
| -------------------- |
| |
| -- The structure built for a write on match operation (P ** F, where |
| -- F is a file access value) is: |
| |
| -- +---+ +---+ +---+ |
| -- | E |---->| P |---->| W |----> |
| -- +---+ +---+ +---+ |
| |
| -- Here E is the PC_R_Enter node and W is the PC_Write_OnM node. The |
| -- handling is identical to that described above for Assign On Match, |
| -- except that at the point where a successful match has completed, |
| -- the matched substring is written to the referenced file. |
| |
| -- The node numbering of the constituent pattern P is not affected. |
| -- Where N is the number of nodes in P, the W node is numbered N + 1, |
| -- and the E node is N + 2. |
| ----------------------- |
| -- Constant Patterns -- |
| ----------------------- |
| |
| -- The following pattern elements are referenced only from the pattern |
| -- history stack. In each case the processing for the pattern element |
| -- results in pattern match abort, or further failure, so there is no |
| -- need for a successor and no need for a node number |
| |
| CP_Assign : aliased PE := (PC_Assign, 0, N); |
| CP_Cancel : aliased PE := (PC_Cancel, 0, N); |
| CP_Fence_Y : aliased PE := (PC_Fence_Y, 0, N); |
| CP_R_Remove : aliased PE := (PC_R_Remove, 0, N); |
| CP_R_Restore : aliased PE := (PC_R_Restore, 0, N); |
| |
| ----------------------- |
| -- Local Subprograms -- |
| ----------------------- |
| |
| function Alternate (L, R : PE_Ptr) return PE_Ptr; |
| function "or" (L, R : PE_Ptr) return PE_Ptr renames Alternate; |
| -- Build pattern structure corresponding to the alternation of L, R. |
| -- (i.e. try to match L, and if that fails, try to match R). |
| |
| function Arbno_Simple (P : PE_Ptr) return PE_Ptr; |
| -- Build simple Arbno pattern, P is a pattern that is guaranteed to |
| -- match at least one character if it succeeds and to require no |
| -- stack entries under all circumstances. The result returned is |
| -- a simple Arbno structure as previously described. |
| |
| function Bracket (E, P, A : PE_Ptr) return PE_Ptr; |
| -- Given two single node pattern elements E and A, and a (possible |
| -- complex) pattern P, construct the concatenation E-->P-->A and |
| -- return a pointer to E. The concatenation does not affect the |
| -- node numbering in P. A has a number one higher than the maximum |
| -- number in P, and E has a number two higher than the maximum |
| -- number in P (see for example the Assign_Immediate structure to |
| -- understand a typical use of this function). |
| |
| function BreakX_Make (B : PE_Ptr) return Pattern; |
| -- Given a pattern element for a Break pattern, returns the |
| -- corresponding BreakX compound pattern structure. |
| |
| function Concat (L, R : PE_Ptr; Incr : Natural) return PE_Ptr; |
| -- Creates a pattern element that represents a concatenation of the |
| -- two given pattern elements (i.e. the pattern L followed by R). |
| -- The result returned is always the same as L, but the pattern |
| -- referenced by L is modified to have R as a successor. This |
| -- procedure does not copy L or R, so if a copy is required, it |
| -- is the responsibility of the caller. The Incr parameter is an |
| -- amount to be added to the Nat field of any P_Arbno_Y node that is |
| -- in the left operand, it represents the additional stack space |
| -- required by the right operand. |
| |
| function C_To_PE (C : PChar) return PE_Ptr; |
| -- Given a character, constructs a pattern element that matches |
| -- the single character. |
| |
| function Copy (P : PE_Ptr) return PE_Ptr; |
| -- Creates a copy of the pattern element referenced by the given |
| -- pattern element reference. This is a deep copy, which means that |
| -- it follows the Next and Alt pointers. |
| |
| function Image (P : PE_Ptr) return String; |
| -- Returns the image of the address of the referenced pattern element. |
| -- This is equivalent to Image (To_Address (P)); |
| |
| function Is_In (C : Character; Str : String) return Boolean; |
| pragma Inline (Is_In); |
| -- Determines if the character C is in string Str |
| |
| procedure Logic_Error; |
| -- Called to raise Program_Error with an appropriate message if an |
| -- internal logic error is detected. |
| |
| function Str_BF (A : Boolean_Func) return String; |
| function Str_FP (A : File_Ptr) return String; |
| function Str_NF (A : Natural_Func) return String; |
| function Str_NP (A : Natural_Ptr) return String; |
| function Str_PP (A : Pattern_Ptr) return String; |
| function Str_VF (A : VString_Func) return String; |
| function Str_VP (A : VString_Ptr) return String; |
| -- These are debugging routines, which return a representation of the |
| -- given access value (they are called only by Image and Dump) |
| |
| procedure Set_Successor (Pat : PE_Ptr; Succ : PE_Ptr); |
| -- Adjusts all EOP pointers in Pat to point to Succ. No other changes |
| -- are made. In particular, Succ is unchanged, and no index numbers |
| -- are modified. Note that Pat may not be equal to EOP on entry. |
| |
| function S_To_PE (Str : PString) return PE_Ptr; |
| -- Given a string, constructs a pattern element that matches the string |
| |
| procedure Uninitialized_Pattern; |
| pragma No_Return (Uninitialized_Pattern); |
| -- Called to raise Program_Error with an appropriate error message if |
| -- an uninitialized pattern is used in any pattern construction or |
| -- pattern matching operation. |
| |
| procedure XMatch |
| (Subject : String; |
| Pat_P : PE_Ptr; |
| Pat_S : Natural; |
| Start : out Natural; |
| Stop : out Natural); |
| -- This is the common pattern match routine. It is passed a string and |
| -- a pattern, and it indicates success or failure, and on success the |
| -- section of the string matched. It does not perform any assignments |
| -- to the subject string, so pattern replacement is for the caller. |
| -- |
| -- Subject The subject string. The lower bound is always one. In the |
| -- Match procedures, it is fine to use strings whose lower bound |
| -- is not one, but we perform a one time conversion before the |
| -- call to XMatch, so that XMatch does not have to be bothered |
| -- with strange lower bounds. |
| -- |
| -- Pat_P Points to initial pattern element of pattern to be matched |
| -- |
| -- Pat_S Maximum required stack entries for pattern to be matched |
| -- |
| -- Start If match is successful, starting index of matched section. |
| -- This value is always non-zero. A value of zero is used to |
| -- indicate a failed match. |
| -- |
| -- Stop If match is successful, ending index of matched section. |
| -- This can be zero if we match the null string at the start, |
| -- in which case Start is set to zero, and Stop to one. If the |
| -- Match fails, then the contents of Stop is undefined. |
| |
| procedure XMatchD |
| (Subject : String; |
| Pat_P : PE_Ptr; |
| Pat_S : Natural; |
| Start : out Natural; |
| Stop : out Natural); |
| -- Identical in all respects to XMatch, except that trace information is |
| -- output on Standard_Output during execution of the match. This is the |
| -- version that is called if the original Match call has Debug => True. |
| |
| --------- |
| -- "&" -- |
| --------- |
| |
| function "&" (L : PString; R : Pattern) return Pattern is |
| begin |
| return (AFC with R.Stk, Concat (S_To_PE (L), Copy (R.P), R.Stk)); |
| end "&"; |
| |
| function "&" (L : Pattern; R : PString) return Pattern is |
| begin |
| return (AFC with L.Stk, Concat (Copy (L.P), S_To_PE (R), 0)); |
| end "&"; |
| |
| function "&" (L : PChar; R : Pattern) return Pattern is |
| begin |
| return (AFC with R.Stk, Concat (C_To_PE (L), Copy (R.P), R.Stk)); |
| end "&"; |
| |
| function "&" (L : Pattern; R : PChar) return Pattern is |
| begin |
| return (AFC with L.Stk, Concat (Copy (L.P), C_To_PE (R), 0)); |
| end "&"; |
| |
| function "&" (L : Pattern; R : Pattern) return Pattern is |
| begin |
| return (AFC with L.Stk + R.Stk, Concat (Copy (L.P), Copy (R.P), R.Stk)); |
| end "&"; |
| |
| --------- |
| -- "*" -- |
| --------- |
| |
| -- Assign immediate |
| |
| -- +---+ +---+ +---+ |
| -- | E |---->| P |---->| A |----> |
| -- +---+ +---+ +---+ |
| |
| -- The node numbering of the constituent pattern P is not affected. |
| -- Where N is the number of nodes in P, the A node is numbered N + 1, |
| -- and the E node is N + 2. |
| |
| function "*" (P : Pattern; Var : VString_Var) return Pattern is |
| Pat : constant PE_Ptr := Copy (P.P); |
| E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); |
| A : constant PE_Ptr := |
| new PE'(PC_Assign_Imm, 0, EOP, Var'Unrestricted_Access); |
| begin |
| return (AFC with P.Stk + 3, Bracket (E, Pat, A)); |
| end "*"; |
| |
| function "*" (P : PString; Var : VString_Var) return Pattern is |
| Pat : constant PE_Ptr := S_To_PE (P); |
| E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); |
| A : constant PE_Ptr := |
| new PE'(PC_Assign_Imm, 0, EOP, Var'Unrestricted_Access); |
| begin |
| return (AFC with 3, Bracket (E, Pat, A)); |
| end "*"; |
| |
| function "*" (P : PChar; Var : VString_Var) return Pattern is |
| Pat : constant PE_Ptr := C_To_PE (P); |
| E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); |
| A : constant PE_Ptr := |
| new PE'(PC_Assign_Imm, 0, EOP, Var'Unrestricted_Access); |
| begin |
| return (AFC with 3, Bracket (E, Pat, A)); |
| end "*"; |
| |
| -- Write immediate |
| |
| -- +---+ +---+ +---+ |
| -- | E |---->| P |---->| W |----> |
| -- +---+ +---+ +---+ |
| |
| -- The node numbering of the constituent pattern P is not affected. |
| -- Where N is the number of nodes in P, the W node is numbered N + 1, |
| -- and the E node is N + 2. |
| |
| function "*" (P : Pattern; Fil : File_Access) return Pattern is |
| Pat : constant PE_Ptr := Copy (P.P); |
| E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); |
| W : constant PE_Ptr := new PE'(PC_Write_Imm, 0, EOP, Fil); |
| begin |
| return (AFC with 3, Bracket (E, Pat, W)); |
| end "*"; |
| |
| function "*" (P : PString; Fil : File_Access) return Pattern is |
| Pat : constant PE_Ptr := S_To_PE (P); |
| E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); |
| W : constant PE_Ptr := new PE'(PC_Write_Imm, 0, EOP, Fil); |
| begin |
| return (AFC with 3, Bracket (E, Pat, W)); |
| end "*"; |
| |
| function "*" (P : PChar; Fil : File_Access) return Pattern is |
| Pat : constant PE_Ptr := C_To_PE (P); |
| E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); |
| W : constant PE_Ptr := new PE'(PC_Write_Imm, 0, EOP, Fil); |
| begin |
| return (AFC with 3, Bracket (E, Pat, W)); |
| end "*"; |
| |
| ---------- |
| -- "**" -- |
| ---------- |
| |
| -- Assign on match |
| |
| -- +---+ +---+ +---+ |
| -- | E |---->| P |---->| A |----> |
| -- +---+ +---+ +---+ |
| |
| -- The node numbering of the constituent pattern P is not affected. |
| -- Where N is the number of nodes in P, the A node is numbered N + 1, |
| -- and the E node is N + 2. |
| |
| function "**" (P : Pattern; Var : VString_Var) return Pattern is |
| Pat : constant PE_Ptr := Copy (P.P); |
| E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); |
| A : constant PE_Ptr := |
| new PE'(PC_Assign_OnM, 0, EOP, Var'Unrestricted_Access); |
| begin |
| return (AFC with P.Stk + 3, Bracket (E, Pat, A)); |
| end "**"; |
| |
| function "**" (P : PString; Var : VString_Var) return Pattern is |
| Pat : constant PE_Ptr := S_To_PE (P); |
| E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); |
| A : constant PE_Ptr := |
| new PE'(PC_Assign_OnM, 0, EOP, Var'Unrestricted_Access); |
| begin |
| return (AFC with 3, Bracket (E, Pat, A)); |
| end "**"; |
| |
| function "**" (P : PChar; Var : VString_Var) return Pattern is |
| Pat : constant PE_Ptr := C_To_PE (P); |
| E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); |
| A : constant PE_Ptr := |
| new PE'(PC_Assign_OnM, 0, EOP, Var'Unrestricted_Access); |
| begin |
| return (AFC with 3, Bracket (E, Pat, A)); |
| end "**"; |
| |
| -- Write on match |
| |
| -- +---+ +---+ +---+ |
| -- | E |---->| P |---->| W |----> |
| -- +---+ +---+ +---+ |
| |
| -- The node numbering of the constituent pattern P is not affected. |
| -- Where N is the number of nodes in P, the W node is numbered N + 1, |
| -- and the E node is N + 2. |
| |
| function "**" (P : Pattern; Fil : File_Access) return Pattern is |
| Pat : constant PE_Ptr := Copy (P.P); |
| E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); |
| W : constant PE_Ptr := new PE'(PC_Write_OnM, 0, EOP, Fil); |
| begin |
| return (AFC with P.Stk + 3, Bracket (E, Pat, W)); |
| end "**"; |
| |
| function "**" (P : PString; Fil : File_Access) return Pattern is |
| Pat : constant PE_Ptr := S_To_PE (P); |
| E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); |
| W : constant PE_Ptr := new PE'(PC_Write_OnM, 0, EOP, Fil); |
| begin |
| return (AFC with 3, Bracket (E, Pat, W)); |
| end "**"; |
| |
| function "**" (P : PChar; Fil : File_Access) return Pattern is |
| Pat : constant PE_Ptr := C_To_PE (P); |
| E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); |
| W : constant PE_Ptr := new PE'(PC_Write_OnM, 0, EOP, Fil); |
| begin |
| return (AFC with 3, Bracket (E, Pat, W)); |
| end "**"; |
| |
| --------- |
| -- "+" -- |
| --------- |
| |
| function "+" (Str : VString_Var) return Pattern is |
| begin |
| return |
| (AFC with 0, |
| new PE'(PC_String_VP, 1, EOP, Str'Unrestricted_Access)); |
| end "+"; |
| |
| function "+" (Str : VString_Func) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_String_VF, 1, EOP, Str)); |
| end "+"; |
| |
| function "+" (P : Pattern_Var) return Pattern is |
| begin |
| return |
| (AFC with 3, |
| new PE'(PC_Rpat, 1, EOP, P'Unrestricted_Access)); |
| end "+"; |
| |
| function "+" (P : Boolean_Func) return Pattern is |
| begin |
| return (AFC with 3, new PE'(PC_Pred_Func, 1, EOP, P)); |
| end "+"; |
| |
| ---------- |
| -- "or" -- |
| ---------- |
| |
| function "or" (L : PString; R : Pattern) return Pattern is |
| begin |
| return (AFC with R.Stk + 1, S_To_PE (L) or Copy (R.P)); |
| end "or"; |
| |
| function "or" (L : Pattern; R : PString) return Pattern is |
| begin |
| return (AFC with L.Stk + 1, Copy (L.P) or S_To_PE (R)); |
| end "or"; |
| |
| function "or" (L : PString; R : PString) return Pattern is |
| begin |
| return (AFC with 1, S_To_PE (L) or S_To_PE (R)); |
| end "or"; |
| |
| function "or" (L : Pattern; R : Pattern) return Pattern is |
| begin |
| return (AFC with |
| Natural'Max (L.Stk, R.Stk) + 1, Copy (L.P) or Copy (R.P)); |
| end "or"; |
| |
| function "or" (L : PChar; R : Pattern) return Pattern is |
| begin |
| return (AFC with 1, C_To_PE (L) or Copy (R.P)); |
| end "or"; |
| |
| function "or" (L : Pattern; R : PChar) return Pattern is |
| begin |
| return (AFC with 1, Copy (L.P) or C_To_PE (R)); |
| end "or"; |
| |
| function "or" (L : PChar; R : PChar) return Pattern is |
| begin |
| return (AFC with 1, C_To_PE (L) or C_To_PE (R)); |
| end "or"; |
| |
| function "or" (L : PString; R : PChar) return Pattern is |
| begin |
| return (AFC with 1, S_To_PE (L) or C_To_PE (R)); |
| end "or"; |
| |
| function "or" (L : PChar; R : PString) return Pattern is |
| begin |
| return (AFC with 1, C_To_PE (L) or S_To_PE (R)); |
| end "or"; |
| |
| ------------ |
| -- Adjust -- |
| ------------ |
| |
| -- No two patterns share the same pattern elements, so the adjust |
| -- procedure for a Pattern assignment must do a deep copy of the |
| -- pattern element structure. |
| |
| procedure Adjust (Object : in out Pattern) is |
| begin |
| Object.P := Copy (Object.P); |
| end Adjust; |
| |
| --------------- |
| -- Alternate -- |
| --------------- |
| |
| function Alternate (L, R : PE_Ptr) return PE_Ptr is |
| begin |
| -- If the left pattern is null, then we just add the alternation |
| -- node with an index one greater than the right hand pattern. |
| |
| if L = EOP then |
| return new PE'(PC_Alt, R.Index + 1, EOP, R); |
| |
| -- If the left pattern is non-null, then build a reference vector |
| -- for its elements, and adjust their index values to accommodate |
| -- the right hand elements. Then add the alternation node. |
| |
| else |
| declare |
| Refs : Ref_Array (1 .. L.Index); |
| |
| begin |
| Build_Ref_Array (L, Refs); |
| |
| for J in Refs'Range loop |
| Refs (J).Index := Refs (J).Index + R.Index; |
| end loop; |
| end; |
| |
| return new PE'(PC_Alt, L.Index + 1, L, R); |
| end if; |
| end Alternate; |
| |
| --------- |
| -- Any -- |
| --------- |
| |
| function Any (Str : String) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_Any_CS, 1, EOP, To_Set (Str))); |
| end Any; |
| |
| function Any (Str : VString) return Pattern is |
| begin |
| return Any (S (Str)); |
| end Any; |
| |
| function Any (Str : Character) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_Any_CH, 1, EOP, Str)); |
| end Any; |
| |
| function Any (Str : Character_Set) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_Any_CS, 1, EOP, Str)); |
| end Any; |
| |
| function Any (Str : not null access VString) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_Any_VP, 1, EOP, VString_Ptr (Str))); |
| end Any; |
| |
| function Any (Str : VString_Func) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_Any_VF, 1, EOP, Str)); |
| end Any; |
| |
| --------- |
| -- Arb -- |
| --------- |
| |
| -- +---+ |
| -- | X |----> |
| -- +---+ |
| -- . |
| -- . |
| -- +---+ |
| -- | Y |----> |
| -- +---+ |
| |
| -- The PC_Arb_X element is numbered 2, and the PC_Arb_Y element is 1 |
| |
| function Arb return Pattern is |
| Y : constant PE_Ptr := new PE'(PC_Arb_Y, 1, EOP); |
| X : constant PE_Ptr := new PE'(PC_Arb_X, 2, EOP, Y); |
| begin |
| return (AFC with 1, X); |
| end Arb; |
| |
| ----------- |
| -- Arbno -- |
| ----------- |
| |
| function Arbno (P : PString) return Pattern is |
| begin |
| if P'Length = 0 then |
| return (AFC with 0, EOP); |
| else |
| return (AFC with 0, Arbno_Simple (S_To_PE (P))); |
| end if; |
| end Arbno; |
| |
| function Arbno (P : PChar) return Pattern is |
| begin |
| return (AFC with 0, Arbno_Simple (C_To_PE (P))); |
| end Arbno; |
| |
| function Arbno (P : Pattern) return Pattern is |
| Pat : constant PE_Ptr := Copy (P.P); |
| |
| begin |
| if P.Stk = 0 |
| and then OK_For_Simple_Arbno (Pat.Pcode) |
| then |
| return (AFC with 0, Arbno_Simple (Pat)); |
| end if; |
| |
| -- This is the complex case, either the pattern makes stack entries |
| -- or it is possible for the pattern to match the null string (more |
| -- accurately, we don't know that this is not the case). |
| |
| -- +--------------------------+ |
| -- | ^ |
| -- V | |
| -- +---+ | |
| -- | X |----> | |
| -- +---+ | |
| -- . | |
| -- . | |
| -- +---+ +---+ +---+ | |
| -- | E |---->| P |---->| Y |--->+ |
| -- +---+ +---+ +---+ |
| |
| -- The node numbering of the constituent pattern P is not affected. |
| -- Where N is the number of nodes in P, the Y node is numbered N + 1, |
| -- the E node is N + 2, and the X node is N + 3. |
| |
| declare |
| E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); |
| X : constant PE_Ptr := new PE'(PC_Arbno_X, 0, EOP, E); |
| Y : constant PE_Ptr := new PE'(PC_Arbno_Y, 0, X, P.Stk + 3); |
| EPY : constant PE_Ptr := Bracket (E, Pat, Y); |
| begin |
| X.Alt := EPY; |
| X.Index := EPY.Index + 1; |
| return (AFC with P.Stk + 3, X); |
| end; |
| end Arbno; |
| |
| ------------------ |
| -- Arbno_Simple -- |
| ------------------ |
| |
| -- +-------------+ |
| -- | ^ |
| -- V | |
| -- +---+ | |
| -- | S |----> | |
| -- +---+ | |
| -- . | |
| -- . | |
| -- +---+ | |
| -- | P |---------->+ |
| -- +---+ |
| |
| -- The node numbering of the constituent pattern P is not affected. |
| -- The S node has a node number of P.Index + 1. |
| |
| -- Note that we know that P cannot be EOP, because a null pattern |
| -- does not meet the requirements for simple Arbno. |
| |
| function Arbno_Simple (P : PE_Ptr) return PE_Ptr is |
| S : constant PE_Ptr := new PE'(PC_Arbno_S, P.Index + 1, EOP, P); |
| begin |
| Set_Successor (P, S); |
| return S; |
| end Arbno_Simple; |
| |
| --------- |
| -- Bal -- |
| --------- |
| |
| function Bal return Pattern is |
| begin |
| return (AFC with 1, new PE'(PC_Bal, 1, EOP)); |
| end Bal; |
| |
| ------------- |
| -- Bracket -- |
| ------------- |
| |
| function Bracket (E, P, A : PE_Ptr) return PE_Ptr is |
| begin |
| if P = EOP then |
| E.Pthen := A; |
| E.Index := 2; |
| A.Index := 1; |
| |
| else |
| E.Pthen := P; |
| Set_Successor (P, A); |
| E.Index := P.Index + 2; |
| A.Index := P.Index + 1; |
| end if; |
| |
| return E; |
| end Bracket; |
| |
| ----------- |
| -- Break -- |
| ----------- |
| |
| function Break (Str : String) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_Break_CS, 1, EOP, To_Set (Str))); |
| end Break; |
| |
| function Break (Str : VString) return Pattern is |
| begin |
| return Break (S (Str)); |
| end Break; |
| |
| function Break (Str : Character) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_Break_CH, 1, EOP, Str)); |
| end Break; |
| |
| function Break (Str : Character_Set) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_Break_CS, 1, EOP, Str)); |
| end Break; |
| |
| function Break (Str : not null access VString) return Pattern is |
| begin |
| return (AFC with 0, |
| new PE'(PC_Break_VP, 1, EOP, Str.all'Unchecked_Access)); |
| end Break; |
| |
| function Break (Str : VString_Func) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_Break_VF, 1, EOP, Str)); |
| end Break; |
| |
| ------------ |
| -- BreakX -- |
| ------------ |
| |
| function BreakX (Str : String) return Pattern is |
| begin |
| return BreakX_Make (new PE'(PC_BreakX_CS, 3, N, To_Set (Str))); |
| end BreakX; |
| |
| function BreakX (Str : VString) return Pattern is |
| begin |
| return BreakX (S (Str)); |
| end BreakX; |
| |
| function BreakX (Str : Character) return Pattern is |
| begin |
| return BreakX_Make (new PE'(PC_BreakX_CH, 3, N, Str)); |
| end BreakX; |
| |
| function BreakX (Str : Character_Set) return Pattern is |
| begin |
| return BreakX_Make (new PE'(PC_BreakX_CS, 3, N, Str)); |
| end BreakX; |
| |
| function BreakX (Str : not null access VString) return Pattern is |
| begin |
| return BreakX_Make (new PE'(PC_BreakX_VP, 3, N, VString_Ptr (Str))); |
| end BreakX; |
| |
| function BreakX (Str : VString_Func) return Pattern is |
| begin |
| return BreakX_Make (new PE'(PC_BreakX_VF, 3, N, Str)); |
| end BreakX; |
| |
| ----------------- |
| -- BreakX_Make -- |
| ----------------- |
| |
| -- +---+ +---+ |
| -- | B |---->| A |----> |
| -- +---+ +---+ |
| -- ^ . |
| -- | . |
| -- | +---+ |
| -- +<------| X | |
| -- +---+ |
| |
| -- The B node is numbered 3, the alternative node is 1, and the X |
| -- node is 2. |
| |
| function BreakX_Make (B : PE_Ptr) return Pattern is |
| X : constant PE_Ptr := new PE'(PC_BreakX_X, 2, B); |
| A : constant PE_Ptr := new PE'(PC_Alt, 1, EOP, X); |
| begin |
| B.Pthen := A; |
| return (AFC with 2, B); |
| end BreakX_Make; |
| |
| --------------------- |
| -- Build_Ref_Array -- |
| --------------------- |
| |
| procedure Build_Ref_Array (E : PE_Ptr; RA : out Ref_Array) is |
| |
| procedure Record_PE (E : PE_Ptr); |
| -- Record given pattern element if not already recorded in RA, |
| -- and also record any referenced pattern elements recursively. |
| |
| --------------- |
| -- Record_PE -- |
| --------------- |
| |
| procedure Record_PE (E : PE_Ptr) is |
| begin |
| PutD (" Record_PE called with PE_Ptr = " & Image (E)); |
| |
| if E = EOP or else RA (E.Index) /= null then |
| Put_LineD (", nothing to do"); |
| return; |
| |
| else |
| Put_LineD (", recording" & IndexT'Image (E.Index)); |
| RA (E.Index) := E; |
| Record_PE (E.Pthen); |
| |
| if E.Pcode in PC_Has_Alt then |
| Record_PE (E.Alt); |
| end if; |
| end if; |
| end Record_PE; |
| |
| -- Start of processing for Build_Ref_Array |
| |
| begin |
| New_LineD; |
| Put_LineD ("Entering Build_Ref_Array"); |
| Record_PE (E); |
| New_LineD; |
| end Build_Ref_Array; |
| |
| ------------- |
| -- C_To_PE -- |
| ------------- |
| |
| function C_To_PE (C : PChar) return PE_Ptr is |
| begin |
| return new PE'(PC_Char, 1, EOP, C); |
| end C_To_PE; |
| |
| ------------ |
| -- Cancel -- |
| ------------ |
| |
| function Cancel return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_Cancel, 1, EOP)); |
| end Cancel; |
| |
| ------------ |
| -- Concat -- |
| ------------ |
| |
| -- Concat needs to traverse the left operand performing the following |
| -- set of fixups: |
| |
| -- a) Any successor pointers (Pthen fields) that are set to EOP are |
| -- reset to point to the second operand. |
| |
| -- b) Any PC_Arbno_Y node has its stack count field incremented |
| -- by the parameter Incr provided for this purpose. |
| |
| -- d) Num fields of all pattern elements in the left operand are |
| -- adjusted to include the elements of the right operand. |
| |
| -- Note: we do not use Set_Successor in the processing for Concat, since |
| -- there is no point in doing two traversals, we may as well do everything |
| -- at the same time. |
| |
| function Concat (L, R : PE_Ptr; Incr : Natural) return PE_Ptr is |
| begin |
| if L = EOP then |
| return R; |
| |
| elsif R = EOP then |
| return L; |
| |
| else |
| declare |
| Refs : Ref_Array (1 .. L.Index); |
| -- We build a reference array for L whose N'th element points to |
| -- the pattern element of L whose original Index value is N. |
| |
| P : PE_Ptr; |
| |
| begin |
| Build_Ref_Array (L, Refs); |
| |
| for J in Refs'Range loop |
| P := Refs (J); |
| |
| P.Index := P.Index + R.Index; |
| |
| if P.Pcode = PC_Arbno_Y then |
| P.Nat := P.Nat + Incr; |
| end if; |
| |
| if P.Pthen = EOP then |
| P.Pthen := R; |
| end if; |
| |
| if P.Pcode in PC_Has_Alt and then P.Alt = EOP then |
| P.Alt := R; |
| end if; |
| end loop; |
| end; |
| |
| return L; |
| end if; |
| end Concat; |
| |
| ---------- |
| -- Copy -- |
| ---------- |
| |
| function Copy (P : PE_Ptr) return PE_Ptr is |
| begin |
| if P = null then |
| Uninitialized_Pattern; |
| |
| else |
| declare |
| Refs : Ref_Array (1 .. P.Index); |
| -- References to elements in P, indexed by Index field |
| |
| Copy : Ref_Array (1 .. P.Index); |
| -- Holds copies of elements of P, indexed by Index field |
| |
| E : PE_Ptr; |
| |
| begin |
| Build_Ref_Array (P, Refs); |
| |
| -- Now copy all nodes |
| |
| for J in Refs'Range loop |
| Copy (J) := new PE'(Refs (J).all); |
| end loop; |
| |
| -- Adjust all internal references |
| |
| for J in Copy'Range loop |
| E := Copy (J); |
| |
| -- Adjust successor pointer to point to copy |
| |
| if E.Pthen /= EOP then |
| E.Pthen := Copy (E.Pthen.Index); |
| end if; |
| |
| -- Adjust Alt pointer if there is one to point to copy |
| |
| if E.Pcode in PC_Has_Alt and then E.Alt /= EOP then |
| E.Alt := Copy (E.Alt.Index); |
| end if; |
| |
| -- Copy referenced string |
| |
| if E.Pcode = PC_String then |
| E.Str := new String'(E.Str.all); |
| end if; |
| end loop; |
| |
| return Copy (P.Index); |
| end; |
| end if; |
| end Copy; |
| |
| ---------- |
| -- Dump -- |
| ---------- |
| |
| procedure Dump (P : Pattern) is |
| |
| subtype Count is Ada.Text_IO.Count; |
| Scol : Count; |
| -- Used to keep track of column in dump output |
| |
| Refs : Ref_Array (1 .. P.P.Index); |
| -- We build a reference array whose N'th element points to the |
| -- pattern element whose Index value is N. |
| |
| Cols : Natural := 2; |
| -- Number of columns used for pattern numbers, minimum is 2 |
| |
| E : PE_Ptr; |
| |
| procedure Write_Node_Id (E : PE_Ptr); |
| -- Writes out a string identifying the given pattern element |
| |
| ------------------- |
| -- Write_Node_Id -- |
| ------------------- |
| |
| procedure Write_Node_Id (E : PE_Ptr) is |
| begin |
| if E = EOP then |
| Put ("EOP"); |
| |
| for J in 4 .. Cols loop |
| Put (' '); |
| end loop; |
| |
| else |
| declare |
| Str : String (1 .. Cols); |
| N : Natural := Natural (E.Index); |
| |
| begin |
| Put ("#"); |
| |
| for J in reverse Str'Range loop |
| Str (J) := Character'Val (48 + N mod 10); |
| N := N / 10; |
| end loop; |
| |
| Put (Str); |
| end; |
| end if; |
| end Write_Node_Id; |
| |
| -- Start of processing for Dump |
| |
| begin |
| New_Line; |
| Put ("Pattern Dump Output (pattern at " & |
| Image (P'Address) & |
| ", S = " & Natural'Image (P.Stk) & ')'); |
| |
| Scol := Col; |
| New_Line; |
| |
| while Col < Scol loop |
| Put ('-'); |
| end loop; |
| |
| New_Line; |
| |
| -- If uninitialized pattern, dump line and we are done |
| |
| if P.P = null then |
| Put_Line ("Uninitialized pattern value"); |
| return; |
| end if; |
| |
| -- If null pattern, just dump it and we are all done |
| |
| if P.P = EOP then |
| Put_Line ("EOP (null pattern)"); |
| return; |
| end if; |
| |
| Build_Ref_Array (P.P, Refs); |
| |
| -- Set number of columns required for node numbers |
| |
| while 10 ** Cols - 1 < Integer (P.P.Index) loop |
| Cols := Cols + 1; |
| end loop; |
| |
| -- Now dump the nodes in reverse sequence. We output them in reverse |
| -- sequence since this corresponds to the natural order used to |
| -- construct the patterns. |
| |
| for J in reverse Refs'Range loop |
| E := Refs (J); |
| Write_Node_Id (E); |
| Set_Col (Count (Cols) + 4); |
| Put (Image (E)); |
| Put (" "); |
| Put (Pattern_Code'Image (E.Pcode)); |
| Put (" "); |
| Set_Col (21 + Count (Cols) + Address_Image_Length); |
| Write_Node_Id (E.Pthen); |
| Set_Col (24 + 2 * Count (Cols) + Address_Image_Length); |
| |
| case E.Pcode is |
| |
| when PC_Alt | |
| PC_Arb_X | |
| PC_Arbno_S | |
| PC_Arbno_X => |
| Write_Node_Id (E.Alt); |
| |
| when PC_Rpat => |
| Put (Str_PP (E.PP)); |
| |
| when PC_Pred_Func => |
| Put (Str_BF (E.BF)); |
| |
| when PC_Assign_Imm | |
| PC_Assign_OnM | |
| PC_Any_VP | |
| PC_Break_VP | |
| PC_BreakX_VP | |
| PC_NotAny_VP | |
| PC_NSpan_VP | |
| PC_Span_VP | |
| PC_String_VP => |
| Put (Str_VP (E.VP)); |
| |
| when PC_Write_Imm | |
| PC_Write_OnM => |
| Put (Str_FP (E.FP)); |
| |
| when PC_String => |
| Put (Image (E.Str.all)); |
| |
| when PC_String_2 => |
| Put (Image (E.Str2)); |
| |
| when PC_String_3 => |
| Put (Image (E.Str3)); |
| |
| when PC_String_4 => |
| Put (Image (E.Str4)); |
| |
| when PC_String_5 => |
| Put (Image (E.Str5)); |
| |
| when PC_String_6 => |
| Put (Image (E.Str6)); |
| |
| when PC_Setcur => |
| Put (Str_NP (E.Var)); |
| |
| when PC_Any_CH | |
| PC_Break_CH | |
| PC_BreakX_CH | |
| PC_Char | |
| PC_NotAny_CH | |
| PC_NSpan_CH | |
| PC_Span_CH => |
| Put (''' & E.Char & '''); |
| |
| when PC_Any_CS | |
| PC_Break_CS | |
| PC_BreakX_CS | |
| PC_NotAny_CS | |
| PC_NSpan_CS | |
| PC_Span_CS => |
| Put ('"' & To_Sequence (E.CS) & '"'); |
| |
| when PC_Arbno_Y | |
| PC_Len_Nat | |
| PC_Pos_Nat | |
| PC_RPos_Nat | |
| PC_RTab_Nat | |
| PC_Tab_Nat => |
| Put (S (E.Nat)); |
| |
| when PC_Pos_NF | |
| PC_Len_NF | |
| PC_RPos_NF | |
| PC_RTab_NF | |
| PC_Tab_NF => |
| Put (Str_NF (E.NF)); |
| |
| when PC_Pos_NP | |
| PC_Len_NP | |
| PC_RPos_NP | |
| PC_RTab_NP | |
| PC_Tab_NP => |
| Put (Str_NP (E.NP)); |
| |
| when PC_Any_VF | |
| PC_Break_VF | |
| PC_BreakX_VF | |
| PC_NotAny_VF | |
| PC_NSpan_VF | |
| PC_Span_VF | |
| PC_String_VF => |
| Put (Str_VF (E.VF)); |
| |
| when others => null; |
| |
| end case; |
| |
| New_Line; |
| end loop; |
| |
| New_Line; |
| end Dump; |
| |
| ---------- |
| -- Fail -- |
| ---------- |
| |
| function Fail return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_Fail, 1, EOP)); |
| end Fail; |
| |
| ----------- |
| -- Fence -- |
| ----------- |
| |
| -- Simple case |
| |
| function Fence return Pattern is |
| begin |
| return (AFC with 1, new PE'(PC_Fence, 1, EOP)); |
| end Fence; |
| |
| -- Function case |
| |
| -- +---+ +---+ +---+ |
| -- | E |---->| P |---->| X |----> |
| -- +---+ +---+ +---+ |
| |
| -- The node numbering of the constituent pattern P is not affected. |
| -- Where N is the number of nodes in P, the X node is numbered N + 1, |
| -- and the E node is N + 2. |
| |
| function Fence (P : Pattern) return Pattern is |
| Pat : constant PE_Ptr := Copy (P.P); |
| E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); |
| X : constant PE_Ptr := new PE'(PC_Fence_X, 0, EOP); |
| begin |
| return (AFC with P.Stk + 1, Bracket (E, Pat, X)); |
| end Fence; |
| |
| -------------- |
| -- Finalize -- |
| -------------- |
| |
| procedure Finalize (Object : in out Pattern) is |
| |
| procedure Free is new Ada.Unchecked_Deallocation (PE, PE_Ptr); |
| procedure Free is new Ada.Unchecked_Deallocation (String, String_Ptr); |
| |
| begin |
| -- Nothing to do if already freed |
| |
| if Object.P = null then |
| return; |
| |
| -- Otherwise we must free all elements |
| |
| else |
| declare |
| Refs : Ref_Array (1 .. Object.P.Index); |
| -- References to elements in pattern to be finalized |
| |
| begin |
| Build_Ref_Array (Object.P, Refs); |
| |
| for J in Refs'Range loop |
| if Refs (J).Pcode = PC_String then |
| Free (Refs (J).Str); |
| end if; |
| |
| Free (Refs (J)); |
| end loop; |
| |
| Object.P := null; |
| end; |
| end if; |
| end Finalize; |
| |
| ----------- |
| -- Image -- |
| ----------- |
| |
| function Image (P : PE_Ptr) return String is |
| begin |
| return Image (To_Address (P)); |
| end Image; |
| |
| function Image (P : Pattern) return String is |
| begin |
| return S (Image (P)); |
| end Image; |
| |
| function Image (P : Pattern) return VString is |
| |
| Kill_Ampersand : Boolean := False; |
| -- Set True to delete next & to be output to Result |
| |
| Result : VString := Nul; |
| -- The result is accumulated here, using Append |
| |
| Refs : Ref_Array (1 .. P.P.Index); |
| -- We build a reference array whose N'th element points to the |
| -- pattern element whose Index value is N. |
| |
| procedure Delete_Ampersand; |
| -- Deletes the ampersand at the end of Result |
| |
| procedure Image_Seq (E : PE_Ptr; Succ : PE_Ptr; Paren : Boolean); |
| -- E refers to a pattern structure whose successor is given by Succ. |
| -- This procedure appends to Result a representation of this pattern. |
| -- The Paren parameter indicates whether parentheses are required if |
| -- the output is more than one element. |
| |
| procedure Image_One (E : in out PE_Ptr); |
| -- E refers to a pattern structure. This procedure appends to Result |
| -- a representation of the single simple or compound pattern structure |
| -- at the start of E and updates E to point to its successor. |
| |
| ---------------------- |
| -- Delete_Ampersand -- |
| ---------------------- |
| |
| procedure Delete_Ampersand is |
| L : constant Natural := Length (Result); |
| begin |
| if L > 2 then |
| Delete (Result, L - 1, L); |
| end if; |
| end Delete_Ampersand; |
| |
| --------------- |
| -- Image_One -- |
| --------------- |
| |
| procedure Image_One (E : in out PE_Ptr) is |
| |
| ER : PE_Ptr := E.Pthen; |
| -- Successor set as result in E unless reset |
| |
| begin |
| case E.Pcode is |
| |
| when PC_Cancel => |
| Append (Result, "Cancel"); |
| |
| when PC_Alt => Alt : declare |
| |
| Elmts_In_L : constant IndexT := E.Pthen.Index - E.Alt.Index; |
| -- Number of elements in left pattern of alternation |
| |
| Lowest_In_L : constant IndexT := E.Index - Elmts_In_L; |
| -- Number of lowest index in elements of left pattern |
| |
| E1 : PE_Ptr; |
| |
| begin |
| -- The successor of the alternation node must have a lower |
| -- index than any node that is in the left pattern or a |
| -- higher index than the alternation node itself. |
| |
| while ER /= EOP |
| and then ER.Index >= Lowest_In_L |
| and then ER.Index < E.Index |
| loop |
| ER := ER.Pthen; |
| end loop; |
| |
| Append (Result, '('); |
| |
| E1 := E; |
| loop |
| Image_Seq (E1.Pthen, ER, False); |
| Append (Result, " or "); |
| E1 := E1.Alt; |
| exit when E1.Pcode /= PC_Alt; |
| end loop; |
| |
| Image_Seq (E1, ER, False); |
| Append (Result, ')'); |
| end Alt; |
| |
| when PC_Any_CS => |
| Append (Result, "Any (" & Image (To_Sequence (E.CS)) & ')'); |
| |
| when PC_Any_VF => |
| Append (Result, "Any (" & Str_VF (E.VF) & ')'); |
| |
| when PC_Any_VP => |
| Append (Result, "Any (" & Str_VP (E.VP) & ')'); |
| |
| when PC_Arb_X => |
| Append (Result, "Arb"); |
| |
| when PC_Arbno_S => |
| Append (Result, "Arbno ("); |
| Image_Seq (E.Alt, E, False); |
| Append (Result, ')'); |
| |
| when PC_Arbno_X => |
| Append (Result, "Arbno ("); |
| Image_Seq (E.Alt.Pthen, Refs (E.Index - 2), False); |
| Append (Result, ')'); |
| |
| when PC_Assign_Imm => |
| Delete_Ampersand; |
| Append (Result, "* " & Str_VP (Refs (E.Index).VP)); |
| |
| when PC_Assign_OnM => |
| Delete_Ampersand; |
| Append (Result, "** " & Str_VP (Refs (E.Index).VP)); |
| |
| when PC_Any_CH => |
| Append (Result, "Any ('" & E.Char & "')"); |
| |
| when PC_Bal => |
| Append (Result, "Bal"); |
| |
| when PC_Break_CH => |
| Append (Result, "Break ('" & E.Char & "')"); |
| |
| when PC_Break_CS => |
| Append (Result, "Break (" & Image (To_Sequence (E.CS)) & ')'); |
| |
| when PC_Break_VF => |
| Append (Result, "Break (" & Str_VF (E.VF) & ')'); |
| |
| when PC_Break_VP => |
| Append (Result, "Break (" & Str_VP (E.VP) & ')'); |
| |
| when PC_BreakX_CH => |
| Append (Result, "BreakX ('" & E.Char & "')"); |
| ER := ER.Pthen; |
| |
| when PC_BreakX_CS => |
| Append (Result, "BreakX (" & Image (To_Sequence (E.CS)) & ')'); |
| ER := ER.Pthen; |
| |
| when PC_BreakX_VF => |
| Append (Result, "BreakX (" & Str_VF (E.VF) & ')'); |
| ER := ER.Pthen; |
| |
| when PC_BreakX_VP => |
| Append (Result, "BreakX (" & Str_VP (E.VP) & ')'); |
| ER := ER.Pthen; |
| |
| when PC_Char => |
| Append (Result, ''' & E.Char & '''); |
| |
| when PC_Fail => |
| Append (Result, "Fail"); |
| |
| when PC_Fence => |
| Append (Result, "Fence"); |
| |
| when PC_Fence_X => |
| Append (Result, "Fence ("); |
| Image_Seq (E.Pthen, Refs (E.Index - 1), False); |
| Append (Result, ")"); |
| ER := Refs (E.Index - 1).Pthen; |
| |
| when PC_Len_Nat => |
| Append (Result, "Len (" & E.Nat & ')'); |
| |
| when PC_Len_NF => |
| Append (Result, "Len (" & Str_NF (E.NF) & ')'); |
| |
| when PC_Len_NP => |
| Append (Result, "Len (" & Str_NP (E.NP) & ')'); |
| |
| when PC_NotAny_CH => |
| Append (Result, "NotAny ('" & E.Char & "')"); |
| |
| when PC_NotAny_CS => |
| Append (Result, "NotAny (" & Image (To_Sequence (E.CS)) & ')'); |
| |
| when PC_NotAny_VF => |
| Append (Result, "NotAny (" & Str_VF (E.VF) & ')'); |
| |
| when PC_NotAny_VP => |
| Append (Result, "NotAny (" & Str_VP (E.VP) & ')'); |
| |
| when PC_NSpan_CH => |
| Append (Result, "NSpan ('" & E.Char & "')"); |
| |
| when PC_NSpan_CS => |
| Append (Result, "NSpan (" & Image (To_Sequence (E.CS)) & ')'); |
| |
| when PC_NSpan_VF => |
| Append (Result, "NSpan (" & Str_VF (E.VF) & ')'); |
| |
| when PC_NSpan_VP => |
| Append (Result, "NSpan (" & Str_VP (E.VP) & ')'); |
| |
| when PC_Null => |
| Append (Result, """"""); |
| |
| when PC_Pos_Nat => |
| Append (Result, "Pos (" & E.Nat & ')'); |
| |
| when PC_Pos_NF => |
| Append (Result, "Pos (" & Str_NF (E.NF) & ')'); |
| |
| when PC_Pos_NP => |
| Append (Result, "Pos (" & Str_NP (E.NP) & ')'); |
| |
| when PC_R_Enter => |
| Kill_Ampersand := True; |
| |
| when PC_Rest => |
| Append (Result, "Rest"); |
| |
| when PC_Rpat => |
| Append (Result, "(+ " & Str_PP (E.PP) & ')'); |
| |
| when PC_Pred_Func => |
| Append (Result, "(+ " & Str_BF (E.BF) & ')'); |
| |
| when PC_RPos_Nat => |
| Append (Result, "RPos (" & E.Nat & ')'); |
| |
| when PC_RPos_NF => |
| Append (Result, "RPos (" & Str_NF (E.NF) & ')'); |
| |
| when PC_RPos_NP => |
| Append (Result, "RPos (" & Str_NP (E.NP) & ')'); |
| |
| when PC_RTab_Nat => |
| Append (Result, "RTab (" & E.Nat & ')'); |
| |
| when PC_RTab_NF => |
| Append (Result, "RTab (" & Str_NF (E.NF) & ')'); |
| |
| when PC_RTab_NP => |
| Append (Result, "RTab (" & Str_NP (E.NP) & ')'); |
| |
| when PC_Setcur => |
| Append (Result, "Setcur (" & Str_NP (E.Var) & ')'); |
| |
| when PC_Span_CH => |
| Append (Result, "Span ('" & E.Char & "')"); |
| |
| when PC_Span_CS => |
| Append (Result, "Span (" & Image (To_Sequence (E.CS)) & ')'); |
| |
| when PC_Span_VF => |
| Append (Result, "Span (" & Str_VF (E.VF) & ')'); |
| |
| when PC_Span_VP => |
| Append (Result, "Span (" & Str_VP (E.VP) & ')'); |
| |
| when PC_String => |
| Append (Result, Image (E.Str.all)); |
| |
| when PC_String_2 => |
| Append (Result, Image (E.Str2)); |
| |
| when PC_String_3 => |
| Append (Result, Image (E.Str3)); |
| |
| when PC_String_4 => |
| Append (Result, Image (E.Str4)); |
| |
| when PC_String_5 => |
| Append (Result, Image (E.Str5)); |
| |
| when PC_String_6 => |
| Append (Result, Image (E.Str6)); |
| |
| when PC_String_VF => |
| Append (Result, "(+" & Str_VF (E.VF) & ')'); |
| |
| when PC_String_VP => |
| Append (Result, "(+" & Str_VP (E.VP) & ')'); |
| |
| when PC_Succeed => |
| Append (Result, "Succeed"); |
| |
| when PC_Tab_Nat => |
| Append (Result, "Tab (" & E.Nat & ')'); |
| |
| when PC_Tab_NF => |
| Append (Result, "Tab (" & Str_NF (E.NF) & ')'); |
| |
| when PC_Tab_NP => |
| Append (Result, "Tab (" & Str_NP (E.NP) & ')'); |
| |
| when PC_Write_Imm => |
| Append (Result, '('); |
| Image_Seq (E, Refs (E.Index - 1), True); |
| Append (Result, " * " & Str_FP (Refs (E.Index - 1).FP)); |
| ER := Refs (E.Index - 1).Pthen; |
| |
| when PC_Write_OnM => |
| Append (Result, '('); |
| Image_Seq (E.Pthen, Refs (E.Index - 1), True); |
| Append (Result, " ** " & Str_FP (Refs (E.Index - 1).FP)); |
| ER := Refs (E.Index - 1).Pthen; |
| |
| -- Other pattern codes should not appear as leading elements |
| |
| when PC_Arb_Y | |
| PC_Arbno_Y | |
| PC_Assign | |
| PC_BreakX_X | |
| PC_EOP | |
| PC_Fence_Y | |
| PC_R_Remove | |
| PC_R_Restore | |
| PC_Unanchored => |
| Append (Result, "???"); |
| |
| end case; |
| |
| E := ER; |
| end Image_One; |
| |
| --------------- |
| -- Image_Seq -- |
| --------------- |
| |
| procedure Image_Seq (E : PE_Ptr; Succ : PE_Ptr; Paren : Boolean) is |
| Indx : constant Natural := Length (Result); |
| E1 : PE_Ptr := E; |
| Mult : Boolean := False; |
| |
| begin |
| -- The image of EOP is "" (the null string) |
| |
| if E = EOP then |
| Append (Result, """"""); |
| |
| -- Else generate appropriate concatenation sequence |
| |
| else |
| loop |
| Image_One (E1); |
| exit when E1 = Succ; |
| exit when E1 = EOP; |
| Mult := True; |
| |
| if Kill_Ampersand then |
| Kill_Ampersand := False; |
| else |
| Append (Result, " & "); |
| end if; |
| end loop; |
| end if; |
| |
| if Mult and Paren then |
| Insert (Result, Indx + 1, "("); |
| Append (Result, ")"); |
| end if; |
| end Image_Seq; |
| |
| -- Start of processing for Image |
| |
| begin |
| Build_Ref_Array (P.P, Refs); |
| Image_Seq (P.P, EOP, False); |
| return Result; |
| end Image; |
| |
| ----------- |
| -- Is_In -- |
| ----------- |
| |
| function Is_In (C : Character; Str : String) return Boolean is |
| begin |
| for J in Str'Range loop |
| if Str (J) = C then |
| return True; |
| end if; |
| end loop; |
| |
| return False; |
| end Is_In; |
| |
| --------- |
| -- Len -- |
| --------- |
| |
| function Len (Count : Natural) return Pattern is |
| begin |
| -- Note, the following is not just an optimization, it is needed |
| -- to ensure that Arbno (Len (0)) does not generate an infinite |
| -- matching loop (since PC_Len_Nat is OK_For_Simple_Arbno). |
| |
| if Count = 0 then |
| return (AFC with 0, new PE'(PC_Null, 1, EOP)); |
| |
| else |
| return (AFC with 0, new PE'(PC_Len_Nat, 1, EOP, Count)); |
| end if; |
| end Len; |
| |
| function Len (Count : Natural_Func) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_Len_NF, 1, EOP, Count)); |
| end Len; |
| |
| function Len (Count : not null access Natural) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_Len_NP, 1, EOP, Natural_Ptr (Count))); |
| end Len; |
| |
| ----------------- |
| -- Logic_Error -- |
| ----------------- |
| |
| procedure Logic_Error is |
| begin |
| raise Program_Error with |
| "Internal logic error in GNAT.Spitbol.Patterns"; |
| end Logic_Error; |
| |
| ----------- |
| -- Match -- |
| ----------- |
| |
| function Match |
| (Subject : VString; |
| Pat : Pattern) return Boolean |
| is |
| S : Big_String_Access; |
| L : Natural; |
| Start : Natural; |
| Stop : Natural; |
| pragma Unreferenced (Stop); |
| |
| begin |
| Get_String (Subject, S, L); |
| |
| if Debug_Mode then |
| XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); |
| else |
| XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); |
| end if; |
| |
| return Start /= 0; |
| end Match; |
| |
| function Match |
| (Subject : String; |
| Pat : Pattern) return Boolean |
| is |
| Start, Stop : Natural; |
| pragma Unreferenced (Stop); |
| |
| subtype String1 is String (1 .. Subject'Length); |
| |
| begin |
| if Debug_Mode then |
| XMatchD (String1 (Subject), Pat.P, Pat.Stk, Start, Stop); |
| else |
| XMatch (String1 (Subject), Pat.P, Pat.Stk, Start, Stop); |
| end if; |
| |
| return Start /= 0; |
| end Match; |
| |
| function Match |
| (Subject : VString_Var; |
| Pat : Pattern; |
| Replace : VString) return Boolean |
| is |
| Start : Natural; |
| Stop : Natural; |
| S : Big_String_Access; |
| L : Natural; |
| |
| begin |
| Get_String (Subject, S, L); |
| |
| if Debug_Mode then |
| XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); |
| else |
| XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); |
| end if; |
| |
| if Start = 0 then |
| return False; |
| else |
| Get_String (Replace, S, L); |
| Replace_Slice |
| (Subject'Unrestricted_Access.all, Start, Stop, S (1 .. L)); |
| return True; |
| end if; |
| end Match; |
| |
| function Match |
| (Subject : VString_Var; |
| Pat : Pattern; |
| Replace : String) return Boolean |
| is |
| Start : Natural; |
| Stop : Natural; |
| S : Big_String_Access; |
| L : Natural; |
| |
| begin |
| Get_String (Subject, S, L); |
| |
| if Debug_Mode then |
| XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); |
| else |
| XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); |
| end if; |
| |
| if Start = 0 then |
| return False; |
| else |
| Replace_Slice |
| (Subject'Unrestricted_Access.all, Start, Stop, Replace); |
| return True; |
| end if; |
| end Match; |
| |
| procedure Match |
| (Subject : VString; |
| Pat : Pattern) |
| is |
| S : Big_String_Access; |
| L : Natural; |
| |
| Start : Natural; |
| Stop : Natural; |
| pragma Unreferenced (Start, Stop); |
| |
| begin |
| Get_String (Subject, S, L); |
| |
| if Debug_Mode then |
| XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); |
| else |
| XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); |
| end if; |
| end Match; |
| |
| procedure Match |
| (Subject : String; |
| Pat : Pattern) |
| is |
| Start, Stop : Natural; |
| pragma Unreferenced (Start, Stop); |
| |
| subtype String1 is String (1 .. Subject'Length); |
| |
| begin |
| if Debug_Mode then |
| XMatchD (String1 (Subject), Pat.P, Pat.Stk, Start, Stop); |
| else |
| XMatch (String1 (Subject), Pat.P, Pat.Stk, Start, Stop); |
| end if; |
| end Match; |
| |
| procedure Match |
| (Subject : in out VString; |
| Pat : Pattern; |
| Replace : VString) |
| is |
| Start : Natural; |
| Stop : Natural; |
| S : Big_String_Access; |
| L : Natural; |
| |
| begin |
| Get_String (Subject, S, L); |
| |
| if Debug_Mode then |
| XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); |
| else |
| XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); |
| end if; |
| |
| if Start /= 0 then |
| Get_String (Replace, S, L); |
| Replace_Slice (Subject, Start, Stop, S (1 .. L)); |
| end if; |
| end Match; |
| |
| procedure Match |
| (Subject : in out VString; |
| Pat : Pattern; |
| Replace : String) |
| is |
| Start : Natural; |
| Stop : Natural; |
| S : Big_String_Access; |
| L : Natural; |
| |
| begin |
| Get_String (Subject, S, L); |
| |
| if Debug_Mode then |
| XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); |
| else |
| XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); |
| end if; |
| |
| if Start /= 0 then |
| Replace_Slice (Subject, Start, Stop, Replace); |
| end if; |
| end Match; |
| |
| function Match |
| (Subject : VString; |
| Pat : PString) return Boolean |
| is |
| Pat_Len : constant Natural := Pat'Length; |
| S : Big_String_Access; |
| L : Natural; |
| |
| begin |
| Get_String (Subject, S, L); |
| |
| if Anchored_Mode then |
| if Pat_Len > L then |
| return False; |
| else |
| return Pat = S (1 .. Pat_Len); |
| end if; |
| |
| else |
| for J in 1 .. L - Pat_Len + 1 loop |
| if Pat = S (J .. J + (Pat_Len - 1)) then |
| return True; |
| end if; |
| end loop; |
| |
| return False; |
| end if; |
| end Match; |
| |
| function Match |
| (Subject : String; |
| Pat : PString) return Boolean |
| is |
| Pat_Len : constant Natural := Pat'Length; |
| Sub_Len : constant Natural := Subject'Length; |
| SFirst : constant Natural := Subject'First; |
| |
| begin |
| if Anchored_Mode then |
| if Pat_Len > Sub_Len then |
| return False; |
| else |
| return Pat = Subject (SFirst .. SFirst + Pat_Len - 1); |
| end if; |
| |
| else |
| for J in SFirst .. SFirst + Sub_Len - Pat_Len loop |
| if Pat = Subject (J .. J + (Pat_Len - 1)) then |
| return True; |
| end if; |
| end loop; |
| |
| return False; |
| end if; |
| end Match; |
| |
| function Match |
| (Subject : VString_Var; |
| Pat : PString; |
| Replace : VString) return Boolean |
| is |
| Start : Natural; |
| Stop : Natural; |
| S : Big_String_Access; |
| L : Natural; |
| |
| begin |
| Get_String (Subject, S, L); |
| |
| if Debug_Mode then |
| XMatchD (S (1 .. L), S_To_PE (Pat), 0, Start, Stop); |
| else |
| XMatch (S (1 .. L), S_To_PE (Pat), 0, Start, Stop); |
| end if; |
| |
| if Start = 0 then |
| return False; |
| else |
| Get_String (Replace, S, L); |
| Replace_Slice |
| (Subject'Unrestricted_Access.all, Start, Stop, S (1 .. L)); |
| return True; |
| end if; |
| end Match; |
| |
| function Match |
| (Subject : VString_Var; |
| Pat : PString; |
| Replace : String) return Boolean |
| is |
| Start : Natural; |
| Stop : Natural; |
| S : Big_String_Access; |
| L : Natural; |
| |
| begin |
| Get_String (Subject, S, L); |
| |
| if Debug_Mode then |
| XMatchD (S (1 .. L), S_To_PE (Pat), 0, Start, Stop); |
| else |
| XMatch (S (1 .. L), S_To_PE (Pat), 0, Start, Stop); |
| end if; |
| |
| if Start = 0 then |
| return False; |
| else |
| Replace_Slice |
| (Subject'Unrestricted_Access.all, Start, Stop, Replace); |
| return True; |
| end if; |
| end Match; |
| |
| procedure Match |
| (Subject : VString; |
| Pat : PString) |
| is |
| S : Big_String_Access; |
| L : Natural; |
| |
| Start : Natural; |
| Stop : Natural; |
| pragma Unreferenced (Start, Stop); |
| |
| begin |
| Get_String (Subject, S, L); |
| |
| if Debug_Mode then |
| XMatchD (S (1 .. L), S_To_PE (Pat), 0, Start, Stop); |
| else |
| XMatch (S (1 .. L), S_To_PE (Pat), 0, Start, Stop); |
| end if; |
| end Match; |
| |
| procedure Match |
| (Subject : String; |
| Pat : PString) |
| is |
| Start, Stop : Natural; |
| pragma Unreferenced (Start, Stop); |
| |
| subtype String1 is String (1 .. Subject'Length); |
| |
| begin |
| if Debug_Mode then |
| XMatchD (String1 (Subject), S_To_PE (Pat), 0, Start, Stop); |
| else |
| XMatch (String1 (Subject), S_To_PE (Pat), 0, Start, Stop); |
| end if; |
| end Match; |
| |
| procedure Match |
| (Subject : in out VString; |
| Pat : PString; |
| Replace : VString) |
| is |
| Start : Natural; |
| Stop : Natural; |
| S : Big_String_Access; |
| L : Natural; |
| |
| begin |
| Get_String (Subject, S, L); |
| |
| if Debug_Mode then |
| XMatchD (S (1 .. L), S_To_PE (Pat), 0, Start, Stop); |
| else |
| XMatch (S (1 .. L), S_To_PE (Pat), 0, Start, Stop); |
| end if; |
| |
| if Start /= 0 then |
| Get_String (Replace, S, L); |
| Replace_Slice (Subject, Start, Stop, S (1 .. L)); |
| end if; |
| end Match; |
| |
| procedure Match |
| (Subject : in out VString; |
| Pat : PString; |
| Replace : String) |
| is |
| Start : Natural; |
| Stop : Natural; |
| S : Big_String_Access; |
| L : Natural; |
| |
| begin |
| Get_String (Subject, S, L); |
| |
| if Debug_Mode then |
| XMatchD (S (1 .. L), S_To_PE (Pat), 0, Start, Stop); |
| else |
| XMatch (S (1 .. L), S_To_PE (Pat), 0, Start, Stop); |
| end if; |
| |
| if Start /= 0 then |
| Replace_Slice (Subject, Start, Stop, Replace); |
| end if; |
| end Match; |
| |
| function Match |
| (Subject : VString_Var; |
| Pat : Pattern; |
| Result : Match_Result_Var) return Boolean |
| is |
| Start : Natural; |
| Stop : Natural; |
| S : Big_String_Access; |
| L : Natural; |
| |
| begin |
| Get_String (Subject, S, L); |
| |
| if Debug_Mode then |
| XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); |
| else |
| XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); |
| end if; |
| |
| if Start = 0 then |
| Result'Unrestricted_Access.all.Var := null; |
| return False; |
| |
| else |
| Result'Unrestricted_Access.all.Var := Subject'Unrestricted_Access; |
| Result'Unrestricted_Access.all.Start := Start; |
| Result'Unrestricted_Access.all.Stop := Stop; |
| return True; |
| end if; |
| end Match; |
| |
| procedure Match |
| (Subject : in out VString; |
| Pat : Pattern; |
| Result : out Match_Result) |
| is |
| Start : Natural; |
| Stop : Natural; |
| S : Big_String_Access; |
| L : Natural; |
| |
| begin |
| Get_String (Subject, S, L); |
| |
| if Debug_Mode then |
| XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); |
| else |
| XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); |
| end if; |
| |
| if Start = 0 then |
| Result.Var := null; |
| else |
| Result.Var := Subject'Unrestricted_Access; |
| Result.Start := Start; |
| Result.Stop := Stop; |
| end if; |
| end Match; |
| |
| --------------- |
| -- New_LineD -- |
| --------------- |
| |
| procedure New_LineD is |
| begin |
| if Internal_Debug then |
| New_Line; |
| end if; |
| end New_LineD; |
| |
| ------------ |
| -- NotAny -- |
| ------------ |
| |
| function NotAny (Str : String) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_NotAny_CS, 1, EOP, To_Set (Str))); |
| end NotAny; |
| |
| function NotAny (Str : VString) return Pattern is |
| begin |
| return NotAny (S (Str)); |
| end NotAny; |
| |
| function NotAny (Str : Character) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_NotAny_CH, 1, EOP, Str)); |
| end NotAny; |
| |
| function NotAny (Str : Character_Set) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_NotAny_CS, 1, EOP, Str)); |
| end NotAny; |
| |
| function NotAny (Str : not null access VString) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_NotAny_VP, 1, EOP, VString_Ptr (Str))); |
| end NotAny; |
| |
| function NotAny (Str : VString_Func) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_NotAny_VF, 1, EOP, Str)); |
| end NotAny; |
| |
| ----------- |
| -- NSpan -- |
| ----------- |
| |
| function NSpan (Str : String) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_NSpan_CS, 1, EOP, To_Set (Str))); |
| end NSpan; |
| |
| function NSpan (Str : VString) return Pattern is |
| begin |
| return NSpan (S (Str)); |
| end NSpan; |
| |
| function NSpan (Str : Character) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_NSpan_CH, 1, EOP, Str)); |
| end NSpan; |
| |
| function NSpan (Str : Character_Set) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_NSpan_CS, 1, EOP, Str)); |
| end NSpan; |
| |
| function NSpan (Str : not null access VString) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_NSpan_VP, 1, EOP, VString_Ptr (Str))); |
| end NSpan; |
| |
| function NSpan (Str : VString_Func) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_NSpan_VF, 1, EOP, Str)); |
| end NSpan; |
| |
| --------- |
| -- Pos -- |
| --------- |
| |
| function Pos (Count : Natural) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_Pos_Nat, 1, EOP, Count)); |
| end Pos; |
| |
| function Pos (Count : Natural_Func) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_Pos_NF, 1, EOP, Count)); |
| end Pos; |
| |
| function Pos (Count : not null access Natural) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_Pos_NP, 1, EOP, Natural_Ptr (Count))); |
| end Pos; |
| |
| ---------- |
| -- PutD -- |
| ---------- |
| |
| procedure PutD (Str : String) is |
| begin |
| if Internal_Debug then |
| Put (Str); |
| end if; |
| end PutD; |
| |
| --------------- |
| -- Put_LineD -- |
| --------------- |
| |
| procedure Put_LineD (Str : String) is |
| begin |
| if Internal_Debug then |
| Put_Line (Str); |
| end if; |
| end Put_LineD; |
| |
| ------------- |
| -- Replace -- |
| ------------- |
| |
| procedure Replace |
| (Result : in out Match_Result; |
| Replace : VString) |
| is |
| S : Big_String_Access; |
| L : Natural; |
| |
| begin |
| Get_String (Replace, S, L); |
| |
| if Result.Var /= null then |
| Replace_Slice (Result.Var.all, Result.Start, Result.Stop, S (1 .. L)); |
| Result.Var := null; |
| end if; |
| end Replace; |
| |
| ---------- |
| -- Rest -- |
| ---------- |
| |
| function Rest return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_Rest, 1, EOP)); |
| end Rest; |
| |
| ---------- |
| -- Rpos -- |
| ---------- |
| |
| function Rpos (Count : Natural) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_RPos_Nat, 1, EOP, Count)); |
| end Rpos; |
| |
| function Rpos (Count : Natural_Func) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_RPos_NF, 1, EOP, Count)); |
| end Rpos; |
| |
| function Rpos (Count : not null access Natural) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_RPos_NP, 1, EOP, Natural_Ptr (Count))); |
| end Rpos; |
| |
| ---------- |
| -- Rtab -- |
| ---------- |
| |
| function Rtab (Count : Natural) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_RTab_Nat, 1, EOP, Count)); |
| end Rtab; |
| |
| function Rtab (Count : Natural_Func) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_RTab_NF, 1, EOP, Count)); |
| end Rtab; |
| |
| function Rtab (Count : not null access Natural) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_RTab_NP, 1, EOP, Natural_Ptr (Count))); |
| end Rtab; |
| |
| ------------- |
| -- S_To_PE -- |
| ------------- |
| |
| function S_To_PE (Str : PString) return PE_Ptr is |
| Len : constant Natural := Str'Length; |
| |
| begin |
| case Len is |
| when 0 => |
| return new PE'(PC_Null, 1, EOP); |
| |
| when 1 => |
| return new PE'(PC_Char, 1, EOP, Str (Str'First)); |
| |
| when 2 => |
| return new PE'(PC_String_2, 1, EOP, Str); |
| |
| when 3 => |
| return new PE'(PC_String_3, 1, EOP, Str); |
| |
| when 4 => |
| return new PE'(PC_String_4, 1, EOP, Str); |
| |
| when 5 => |
| return new PE'(PC_String_5, 1, EOP, Str); |
| |
| when 6 => |
| return new PE'(PC_String_6, 1, EOP, Str); |
| |
| when others => |
| return new PE'(PC_String, 1, EOP, new String'(Str)); |
| |
| end case; |
| end S_To_PE; |
| |
| ------------------- |
| -- Set_Successor -- |
| ------------------- |
| |
| -- Note: this procedure is not used by the normal concatenation circuit, |
| -- since other fixups are required on the left operand in this case, and |
| -- they might as well be done all together. |
| |
| procedure Set_Successor (Pat : PE_Ptr; Succ : PE_Ptr) is |
| begin |
| if Pat = null then |
| Uninitialized_Pattern; |
| |
| elsif Pat = EOP then |
| Logic_Error; |
| |
| else |
| declare |
| Refs : Ref_Array (1 .. Pat.Index); |
| -- We build a reference array for L whose N'th element points to |
| -- the pattern element of L whose original Index value is N. |
| |
| P : PE_Ptr; |
| |
| begin |
| Build_Ref_Array (Pat, Refs); |
| |
| for J in Refs'Range loop |
| P := Refs (J); |
| |
| if P.Pthen = EOP then |
| P.Pthen := Succ; |
| end if; |
| |
| if P.Pcode in PC_Has_Alt and then P.Alt = EOP then |
| P.Alt := Succ; |
| end if; |
| end loop; |
| end; |
| end if; |
| end Set_Successor; |
| |
| ------------ |
| -- Setcur -- |
| ------------ |
| |
| function Setcur (Var : not null access Natural) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_Setcur, 1, EOP, Natural_Ptr (Var))); |
| end Setcur; |
| |
| ---------- |
| -- Span -- |
| ---------- |
| |
| function Span (Str : String) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_Span_CS, 1, EOP, To_Set (Str))); |
| end Span; |
| |
| function Span (Str : VString) return Pattern is |
| begin |
| return Span (S (Str)); |
| end Span; |
| |
| function Span (Str : Character) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_Span_CH, 1, EOP, Str)); |
| end Span; |
| |
| function Span (Str : Character_Set) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_Span_CS, 1, EOP, Str)); |
| end Span; |
| |
| function Span (Str : not null access VString) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_Span_VP, 1, EOP, VString_Ptr (Str))); |
| end Span; |
| |
| function Span (Str : VString_Func) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_Span_VF, 1, EOP, Str)); |
| end Span; |
| |
| ------------ |
| -- Str_BF -- |
| ------------ |
| |
| function Str_BF (A : Boolean_Func) return String is |
| function To_A is new Ada.Unchecked_Conversion (Boolean_Func, Address); |
| begin |
| return "BF(" & Image (To_A (A)) & ')'; |
| end Str_BF; |
| |
| ------------ |
| -- Str_FP -- |
| ------------ |
| |
| function Str_FP (A : File_Ptr) return String is |
| begin |
| return "FP(" & Image (A.all'Address) & ')'; |
| end Str_FP; |
| |
| ------------ |
| -- Str_NF -- |
| ------------ |
| |
| function Str_NF (A : Natural_Func) return String is |
| function To_A is new Ada.Unchecked_Conversion (Natural_Func, Address); |
| begin |
| return "NF(" & Image (To_A (A)) & ')'; |
| end Str_NF; |
| |
| ------------ |
| -- Str_NP -- |
| ------------ |
| |
| function Str_NP (A : Natural_Ptr) return String is |
| begin |
| return "NP(" & Image (A.all'Address) & ')'; |
| end Str_NP; |
| |
| ------------ |
| -- Str_PP -- |
| ------------ |
| |
| function Str_PP (A : Pattern_Ptr) return String is |
| begin |
| return "PP(" & Image (A.all'Address) & ')'; |
| end Str_PP; |
| |
| ------------ |
| -- Str_VF -- |
| ------------ |
| |
| function Str_VF (A : VString_Func) return String is |
| function To_A is new Ada.Unchecked_Conversion (VString_Func, Address); |
| begin |
| return "VF(" & Image (To_A (A)) & ')'; |
| end Str_VF; |
| |
| ------------ |
| -- Str_VP -- |
| ------------ |
| |
| function Str_VP (A : VString_Ptr) return String is |
| begin |
| return "VP(" & Image (A.all'Address) & ')'; |
| end Str_VP; |
| |
| ------------- |
| -- Succeed -- |
| ------------- |
| |
| function Succeed return Pattern is |
| begin |
| return (AFC with 1, new PE'(PC_Succeed, 1, EOP)); |
| end Succeed; |
| |
| --------- |
| -- Tab -- |
| --------- |
| |
| function Tab (Count : Natural) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_Tab_Nat, 1, EOP, Count)); |
| end Tab; |
| |
| function Tab (Count : Natural_Func) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_Tab_NF, 1, EOP, Count)); |
| end Tab; |
| |
| function Tab (Count : not null access Natural) return Pattern is |
| begin |
| return (AFC with 0, new PE'(PC_Tab_NP, 1, EOP, Natural_Ptr (Count))); |
| end Tab; |
| |
| --------------------------- |
| -- Uninitialized_Pattern -- |
| --------------------------- |
| |
| procedure Uninitialized_Pattern is |
| begin |
| raise Program_Error with |
| "uninitialized value of type GNAT.Spitbol.Patterns.Pattern"; |
| end Uninitialized_Pattern; |
| |
| ------------ |
| -- XMatch -- |
| ------------ |
| |
| procedure XMatch |
| (Subject : String; |
| Pat_P : PE_Ptr; |
| Pat_S : Natural; |
| Start : out Natural; |
| Stop : out Natural) |
| is |
| Node : PE_Ptr; |
| -- Pointer to current pattern node. Initialized from Pat_P, and then |
| -- updated as the match proceeds through its constituent elements. |
| |
| Length : constant Natural := Subject'Length; |
| -- Length of string (= Subject'Last, since Subject'First is always 1) |
| |
| Cursor : Integer := 0; |
| -- If the value is non-negative, then this value is the index showing |
| -- the current position of the match in the subject string. The next |
| -- character to be matched is at Subject (Cursor + 1). Note that since |
| -- our view of the subject string in XMatch always has a lower bound |
| -- of one, regardless of original bounds, that this definition exactly |
| -- corresponds to the cursor value as referenced by functions like Pos. |
| -- |
| -- If the value is negative, then this is a saved stack pointer, |
| -- typically a base pointer of an inner or outer region. Cursor |
| -- temporarily holds such a value when it is popped from the stack |
| -- by Fail. In all cases, Cursor is reset to a proper non-negative |
| -- cursor value before the match proceeds (e.g. by propagating the |
| -- failure and popping a "real" cursor value from the stack. |
| |
| PE_Unanchored : aliased PE := (PC_Unanchored, 0, Pat_P); |
| -- Dummy pattern element used in the unanchored case |
| |
| Stack : Stack_Type; |
| -- The pattern matching failure stack for this call to Match |
| |
| Stack_Ptr : Stack_Range; |
| -- Current stack pointer. This points to the top element of the stack |
| -- that is currently in use. At the outer level this is the special |
| -- entry placed on the stack according to the anchor mode. |
| |
| Stack_Init : constant Stack_Range := Stack'First + 1; |
| -- This is the initial value of the Stack_Ptr and Stack_Base. The |
| -- initial (Stack'First) element of the stack is not used so that |
| -- when we pop the last element off, Stack_Ptr is still in range. |
| |
| Stack_Base : Stack_Range; |
| -- This value is the stack base value, i.e. the stack pointer for the |
| -- first history stack entry in the current stack region. See separate |
| -- section on handling of recursive pattern matches. |
| |
| Assign_OnM : Boolean := False; |
| -- Set True if assign-on-match or write-on-match operations may be |
| -- present in the history stack, which must then be scanned on a |
| -- successful match. |
| |
| procedure Pop_Region; |
| pragma Inline (Pop_Region); |
| -- Used at the end of processing of an inner region. If the inner |
| -- region left no stack entries, then all trace of it is removed. |
| -- Otherwise a PC_Restore_Region entry is pushed to ensure proper |
| -- handling of alternatives in the inner region. |
| |
| procedure Push (Node : PE_Ptr); |
| pragma Inline (Push); |
| -- Make entry in pattern matching stack with current cursor value |
| |
| procedure Push_Region; |
| pragma Inline (Push_Region); |
| -- This procedure makes a new region on the history stack. The |
| -- caller first establishes the special entry on the stack, but |
| -- does not push the stack pointer. Then this call stacks a |
| -- PC_Remove_Region node, on top of this entry, using the cursor |
| -- field of the PC_Remove_Region entry to save the outer level |
| -- stack base value, and resets the stack base to point to this |
| -- PC_Remove_Region node. |
| |
| ---------------- |
| -- Pop_Region -- |
| ---------------- |
| |
| procedure Pop_Region is |
| begin |
| -- If nothing was pushed in the inner region, we can just get |
| -- rid of it entirely, leaving no traces that it was ever there |
| |
| if Stack_Ptr = Stack_Base then |
| Stack_Ptr := Stack_Base - 2; |
| Stack_Base := Stack (Stack_Ptr + 2).Cursor; |
| |
| -- If stuff was pushed in the inner region, then we have to |
| -- push a PC_R_Restore node so that we properly handle possible |
| -- rematches within the region. |
| |
| else |
| Stack_Ptr := Stack_Ptr + 1; |
| Stack (Stack_Ptr).Cursor := Stack_Base; |
| Stack (Stack_Ptr).Node := CP_R_Restore'Access; |
| Stack_Base := Stack (Stack_Base).Cursor; |
| end if; |
| end Pop_Region; |
| |
| ---------- |
| -- Push -- |
| ---------- |
| |
| procedure Push (Node : PE_Ptr) is |
| begin |
| Stack_Ptr := Stack_Ptr + 1; |
| Stack (Stack_Ptr).Cursor := Cursor; |
| Stack (Stack_Ptr).Node := Node; |
| end Push; |
| |
| ----------------- |
| -- Push_Region -- |
| ----------------- |
| |
| procedure Push_Region is |
| begin |
| Stack_Ptr := Stack_Ptr + 2; |
| Stack (Stack_Ptr).Cursor := Stack_Base; |
| Stack (Stack_Ptr).Node := CP_R_Remove'Access; |
| Stack_Base := Stack_Ptr; |
| end Push_Region; |
| |
| -- Start of processing for XMatch |
| |
| begin |
| if Pat_P = null then |
| Uninitialized_Pattern; |
| end if; |
| |
| -- Check we have enough stack for this pattern. This check deals with |
| -- every possibility except a match of a recursive pattern, where we |
| -- make a check at each recursion level. |
| |
| if Pat_S >= Stack_Size - 1 then |
| raise Pattern_Stack_Overflow; |
| end if; |
| |
| -- In anchored mode, the bottom entry on the stack is an abort entry |
| |
| if Anchored_Mode then |
| Stack (Stack_Init).Node := CP_Cancel'Access; |
| Stack (Stack_Init).Cursor := 0; |
| |
| -- In unanchored more, the bottom entry on the stack references |
| -- the special pattern element PE_Unanchored, whose Pthen field |
| -- points to the initial pattern element. The cursor value in this |
| -- entry is the number of anchor moves so far. |
| |
| else |
| Stack (Stack_Init).Node := PE_Unanchored'Unchecked_Access; |
| Stack (Stack_Init).Cursor := 0; |
| end if; |
| |
| Stack_Ptr := Stack_Init; |
| Stack_Base := Stack_Ptr; |
| Cursor := 0; |
| Node := Pat_P; |
| goto Match; |
| |
| ----------------------------------------- |
| -- Main Pattern Matching State Control -- |
| ----------------------------------------- |
| |
| -- This is a state machine which uses gotos to change state. The |
| -- initial state is Match, to initiate the matching of the first |
| -- element, so the goto Match above starts the match. In the |
| -- following descriptions, we indicate the global values that |
| -- are relevant for the state transition. |
| |
| -- Come here if entire match fails |
| |
| <<Match_Fail>> |
| Start := 0; |
| Stop := 0; |
| return; |
| |
| -- Come here if entire match succeeds |
| |
| -- Cursor current position in subject string |
| |
| <<Match_Succeed>> |
| Start := Stack (Stack_Init).Cursor + 1; |
| Stop := Cursor; |
| |
| -- Scan history stack for deferred assignments or writes |
| |
| if Assign_OnM then |
| for S in Stack_Init .. Stack_Ptr loop |
| if Stack (S).Node = CP_Assign'Access then |
| declare |
| Inner_Base : constant Stack_Range := |
| Stack (S + 1).Cursor; |
| Special_Entry : constant Stack_Range := |
| Inner_Base - 1; |
| Node_OnM : constant PE_Ptr := |
| Stack (Special_Entry).Node; |
| Start : constant Natural := |
| Stack (Special_Entry).Cursor + 1; |
| Stop : constant Natural := Stack (S).Cursor; |
| |
| begin |
| if Node_OnM.Pcode = PC_Assign_OnM then |
| Set_Unbounded_String |
| (Node_OnM.VP.all, Subject (Start .. Stop)); |
| |
| elsif Node_OnM.Pcode = PC_Write_OnM then |
| Put_Line (Node_OnM.FP.all, Subject (Start .. Stop)); |
| |
| else |
| Logic_Error; |
| end if; |
| end; |
| end if; |
| end loop; |
| end if; |
| |
| return; |
| |
| -- Come here if attempt to match current element fails |
| |
| -- Stack_Base current stack base |
| -- Stack_Ptr current stack pointer |
| |
| <<Fail>> |
| Cursor := Stack (Stack_Ptr).Cursor; |
| Node := Stack (Stack_Ptr).Node; |
| Stack_Ptr := Stack_Ptr - 1; |
| goto Match; |
| |
| -- Come here if attempt to match current element succeeds |
| |
| -- Cursor current position in subject string |
| -- Node pointer to node successfully matched |
| -- Stack_Base current stack base |
| -- Stack_Ptr current stack pointer |
| |
| <<Succeed>> |
| Node := Node.Pthen; |
| |
| -- Come here to match the next pattern element |
| |
| -- Cursor current position in subject string |
| -- Node pointer to node to be matched |
| -- Stack_Base current stack base |
| -- Stack_Ptr current stack pointer |
| |
| <<Match>> |
| |
| -------------------------------------------------- |
| -- Main Pattern Match Element Matching Routines -- |
| -------------------------------------------------- |
| |
| -- Here is the case statement that processes the current node. The |
| -- processing for each element does one of five things: |
| |
| -- goto Succeed to move to the successor |
| -- goto Match_Succeed if the entire match succeeds |
| -- goto Match_Fail if the entire match fails |
| -- goto Fail to signal failure of current match |
| |
| -- Processing is NOT allowed to fall through |
| |
| case Node.Pcode is |
| |
| -- Cancel |
| |
| when PC_Cancel => |
| goto Match_Fail; |
| |
| -- Alternation |
| |
| when PC_Alt => |
| Push (Node.Alt); |
| Node := Node.Pthen; |
| goto Match; |
| |
| -- Any (one character case) |
| |
| when PC_Any_CH => |
| if Cursor < Length |
| and then Subject (Cursor + 1) = Node.Char |
| then |
| Cursor := Cursor + 1; |
| goto Succeed; |
| else |
| goto Fail; |
| end if; |
| |
| -- Any (character set case) |
| |
| when PC_Any_CS => |
| if Cursor < Length |
| and then Is_In (Subject (Cursor + 1), Node.CS) |
| then |
| Cursor := Cursor + 1; |
| goto Succeed; |
| else |
| goto Fail; |
| end if; |
| |
| -- Any (string function case) |
| |
| when PC_Any_VF => declare |
| U : constant VString := Node.VF.all; |
| S : Big_String_Access; |
| L : Natural; |
| |
| begin |
| Get_String (U, S, L); |
| |
| if Cursor < Length |
| and then Is_In (Subject (Cursor + 1), S (1 .. L)) |
| then |
| Cursor := Cursor + 1; |
| goto Succeed; |
| else |
| goto Fail; |
| end if; |
| end; |
| |
| -- Any (string pointer case) |
| |
| when PC_Any_VP => declare |
| U : constant VString := Node.VP.all; |
| S : Big_String_Access; |
| L : Natural; |
| |
| begin |
| Get_String (U, S, L); |
| |
| if Cursor < Length |
| and then Is_In (Subject (Cursor + 1), S (1 .. L)) |
| then |
| Cursor := Cursor + 1; |
| goto Succeed; |
| else |
| goto Fail; |
| end if; |
| end; |
| |
| -- Arb (initial match) |
| |
| when PC_Arb_X => |
| Push (Node.Alt); |
| Node := Node.Pthen; |
| goto Match; |
| |
| -- Arb (extension) |
| |
| when PC_Arb_Y => |
| if Cursor < Length then |
| Cursor := Cursor + 1; |
| Push (Node); |
| goto Succeed; |
| else |
| goto Fail; |
| end if; |
| |
| -- Arbno_S (simple Arbno initialize). This is the node that |
| -- initiates the match of a simple Arbno structure. |
| |
| when PC_Arbno_S => |
| Push (Node.Alt); |
| Node := Node.Pthen; |
| goto Match; |
| |
| -- Arbno_X (Arbno initialize). This is the node that initiates |
| -- the match of a complex Arbno structure. |
| |
| when PC_Arbno_X => |
| Push (Node.Alt); |
| Node := Node.Pthen; |
| goto Match; |
| |
| -- Arbno_Y (Arbno rematch). This is the node that is executed |
| -- following successful matching of one instance of a complex |
| -- Arbno pattern. |
| |
| when PC_Arbno_Y => declare |
| Null_Match : constant Boolean := |
| Cursor = Stack (Stack_Base - 1).Cursor; |
| |
| begin |
| Pop_Region; |
| |
| -- If arbno extension matched null, then immediately fail |
| |
| if Null_Match then |
| goto Fail; |
| end if; |
| |
| -- Here we must do a stack check to make sure enough stack |
| -- is left. This check will happen once for each instance of |
| -- the Arbno pattern that is matched. The Nat field of a |
| -- PC_Arbno pattern contains the maximum stack entries needed |
| -- for the Arbno with one instance and the successor pattern |
| |
| if Stack_Ptr + Node.Nat >= Stack'Last then |
| raise Pattern_Stack_Overflow; |
| end if; |
| |
| goto Succeed; |
| end; |
| |
| -- Assign. If this node is executed, it means the assign-on-match |
| -- or write-on-match operation will not happen after all, so we |
| -- is propagate the failure, removing the PC_Assign node. |
| |
| when PC_Assign => |
| goto Fail; |
| |
| -- Assign immediate. This node performs the actual assignment |
| |
| when PC_Assign_Imm => |
| Set_Unbounded_String |
| (Node.VP.all, |
| Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor)); |
| Pop_Region; |
| goto Succeed; |
| |
| -- Assign on match. This node sets up for the eventual assignment |
| |
| when PC_Assign_OnM => |
| Stack (Stack_Base - 1).Node := Node; |
| Push (CP_Assign'Access); |
| Pop_Region; |
| Assign_OnM := True; |
| goto Succeed; |
| |
| -- Bal |
| |
| when PC_Bal => |
| if Cursor >= Length or else Subject (Cursor + 1) = ')' then |
| goto Fail; |
| |
| elsif Subject (Cursor + 1) = '(' then |
| declare |
| Paren_Count : Natural := 1; |
| |
| begin |
| loop |
| Cursor := Cursor + 1; |
| |
| if Cursor >= Length then |
| goto Fail; |
| |
| elsif Subject (Cursor + 1) = '(' then |
| Paren_Count := Paren_Count + 1; |
| |
| elsif Subject (Cursor + 1) = ')' then |
| Paren_Count := Paren_Count - 1; |
| exit when Paren_Count = 0; |
| end if; |
| end loop; |
| end; |
| end if; |
| |
| Cursor := Cursor + 1; |
| Push (Node); |
| goto Succeed; |
| |
| -- Break (one character case) |
| |
| when PC_Break_CH => |
| while Cursor < Length loop |
| if Subject (Cursor + 1) = Node.Char then |
| goto Succeed; |
| else |
| Cursor := Cursor + 1; |
| end if; |
| end loop; |
| |
| goto Fail; |
| |
| -- Break (character set case) |
| |
| when PC_Break_CS => |
| while Cursor < Length loop |
| if Is_In (Subject (Cursor + 1), Node.CS) then |
| goto Succeed; |
| else |
| Cursor := Cursor + 1; |
| end if; |
| end loop; |
| |
| goto Fail; |
| |
| -- Break (string function case) |
| |
| when PC_Break_VF => declare |
| U : constant VString := Node.VF.all; |
| S : Big_String_Access; |
| L : Natural; |
| |
| begin |
| Get_String (U, S, L); |
| |
| while Cursor < Length loop |
| if Is_In (Subject (Cursor + 1), S (1 .. L)) then |
| goto Succeed; |
| else |
| Cursor := Cursor + 1; |
| end if; |
| end loop; |
| |
| goto Fail; |
| end; |
| |
| -- Break (string pointer case) |
| |
| when PC_Break_VP => declare |
| U : constant VString := Node.VP.all; |
| S : Big_String_Access; |
| L : Natural; |
| |
| begin |
| Get_String (U, S, L); |
| |
| while Cursor < Length loop |
| if Is_In (Subject (Cursor + 1), S (1 .. L)) then |
| goto Succeed; |
| else |
| Cursor := Cursor + 1; |
| end if; |
| end loop; |
| |
| goto Fail; |
| end; |
| |
| -- BreakX (one character case) |
| |
| when PC_BreakX_CH => |
| while Cursor < Length loop |
| if Subject (Cursor + 1) = Node.Char then |
| goto Succeed; |
| else |
| Cursor := Cursor + 1; |
| end if; |
| end loop; |
| |
| goto Fail; |
| |
| -- BreakX (character set case) |
| |
| when PC_BreakX_CS => |
| while Cursor < Length loop |
| if Is_In (Subject (Cursor + 1), Node.CS) then |
| goto Succeed; |
| else |
| Cursor := Cursor + 1; |
| end if; |
| end loop; |
| |
| goto Fail; |
| |
| -- BreakX (string function case) |
| |
| when PC_BreakX_VF => declare |
| U : constant VString := Node.VF.all; |
| S : Big_String_Access; |
| L : Natural; |
| |
| begin |
| Get_String (U, S, L); |
| |
| while Cursor < Length loop |
| if Is_In (Subject (Cursor + 1), S (1 .. L)) then |
| goto Succeed; |
| else |
| Cursor := Cursor + 1; |
| end if; |
| end loop; |
| |
| goto Fail; |
| end; |
| |
| -- BreakX (string pointer case) |
| |
| when PC_BreakX_VP => declare |
| U : constant VString := Node.VP.all; |
| S : Big_String_Access; |
| L : Natural; |
| |
| begin |
| Get_String (U, S, L); |
| |
| while Cursor < Length loop |
| if Is_In (Subject (Cursor + 1), S (1 .. L)) then |
| goto Succeed; |
| else |
| Cursor := Cursor + 1; |
| end if; |
| end loop; |
| |
| goto Fail; |
| end; |
| |
| -- BreakX_X (BreakX extension). See section on "Compound Pattern |
| -- Structures". This node is the alternative that is stacked to |
| -- skip past the break character and extend the break. |
| |
| when PC_BreakX_X => |
| Cursor := Cursor + 1; |
| goto Succeed; |
| |
| -- Character (one character string) |
| |
| when PC_Char => |
| if Cursor < Length |
| and then Subject (Cursor + 1) = Node.Char |
| then |
| Cursor := Cursor + 1; |
| goto Succeed; |
| else |
| goto Fail; |
| end if; |
| |
| -- End of Pattern |
| |
| when PC_EOP => |
| if Stack_Base = Stack_Init then |
| goto Match_Succeed; |
| |
| -- End of recursive inner match. See separate section on |
| -- handing of recursive pattern matches for details. |
| |
| else |
| Node := Stack (Stack_Base - 1).Node; |
| Pop_Region; |
| goto Match; |
| end if; |
| |
| -- Fail |
| |
| when PC_Fail => |
| goto Fail; |
| |
| -- Fence (built in pattern) |
| |
| when PC_Fence => |
| Push (CP_Cancel'Access); |
| goto Succeed; |
| |
| -- Fence function node X. This is the node that gets control |
| -- after a successful match of the fenced pattern. |
| |
| when PC_Fence_X => |
| Stack_Ptr := Stack_Ptr + 1; |
| Stack (Stack_Ptr).Cursor := Stack_Base; |
| Stack (Stack_Ptr).Node := CP_Fence_Y'Access; |
| Stack_Base := Stack (Stack_Base).Cursor; |
| goto Succeed; |
| |
| -- Fence function node Y. This is the node that gets control on |
| -- a failure that occurs after the fenced pattern has matched. |
| |
| -- Note: the Cursor at this stage is actually the inner stack |
| -- base value. We don't reset this, but we do use it to strip |
| -- off all the entries made by the fenced pattern. |
| |
| when PC_Fence_Y => |
| Stack_Ptr := Cursor - 2; |
| goto Fail; |
| |
| -- Len (integer case) |
| |
| when PC_Len_Nat => |
| if Cursor + Node.Nat > Length then |
| goto Fail; |
| else |
| Cursor := Cursor + Node.Nat; |
| goto Succeed; |
| end if; |
| |
| -- Len (Integer function case) |
| |
| when PC_Len_NF => declare |
| N : constant Natural := Node.NF.all; |
| begin |
| if Cursor + N > Length then |
| goto Fail; |
| else |
| Cursor := Cursor + N; |
| goto Succeed; |
| end if; |
| end; |
| |
| -- Len (integer pointer case) |
| |
| when PC_Len_NP => |
| if Cursor + Node.NP.all > Length then |
| goto Fail; |
| else |
| Cursor := Cursor + Node.NP.all; |
| goto Succeed; |
| end if; |
| |
| -- NotAny (one character case) |
| |
| when PC_NotAny_CH => |
| if Cursor < Length |
| and then Subject (Cursor + 1) /= Node.Char |
| then |
| Cursor := Cursor + 1; |
| goto Succeed; |
| else |
| goto Fail; |
| end if; |
| |
| -- NotAny (character set case) |
| |
| when PC_NotAny_CS => |
| if Cursor < Length |
| and then not Is_In (Subject (Cursor + 1), Node.CS) |
|