blob: e333e4b13454ab8257ec2251ce004dcb01c50422 [file]
/*
* Copyright (c) 2021-2026 Symas Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
* * Neither the name of the Symas Corporation nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <algorithm>
#include <cctype>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <ctime>
#include <set>
#include <stack>
#include <string>
#include <unordered_map>
#include <vector>
#include <cwctype>
#include <dirent.h>
#include <dlfcn.h>
#include <err.h>
#include <fcntl.h>
#include <fenv.h>
#include <math.h> // required for fpclassify(3), not in cmath
#include <setjmp.h>
#include <signal.h>
#include <syslog.h>
#include <unistd.h>
#include <stdarg.h>
#if __has_include(<errno.h>)
# include <errno.h> // for program_invocation_short_name
#endif
#include <langinfo.h>
#include "config.h"
#include "libgcobol-fp.h"
#include "ec.h"
#include "common-defs.h"
#include "io.h"
#include "gcobolio.h"
#include "libgcobol.h"
#include "gfileio.h"
#include "charmaps.h"
#include "valconv.h"
#include <sys/mman.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/time.h>
#include <execinfo.h>
#include "exceptl.h"
#include "stringbin.h"
#define NO_RDIGITS (0)
typedef std::vector<cbl_char_t>::const_iterator char_it_c ;
typedef std::vector<cbl_char_t>::iterator char_it ;
static const char *
funky_find( const char *piece,
const char *piece_end,
const char *whole,
const char *whole_end )
{
const char *retval = NULL;
size_t length_of_piece = piece_end - piece;
if(length_of_piece == 0)
{
__gg__abort("funky_find() length_of_piece shouldn't be zero");
}
whole_end -= length_of_piece;
while( whole <= whole_end )
{
if( memcmp( piece, whole, length_of_piece) == 0 )
{
retval = whole;
break;
}
whole += 1;
}
return retval;
}
static char_it_c
funky_find_wide( char_it_c needle,
char_it_c needle_end, // Actually end+1
char_it_c haystack,
char_it_c haystack_end, // Actually end+1
char_it_c notfound)
{
// We are looking for the needle in the haystack
char_it_c retval = notfound;
size_t length_of_piece = needle_end - needle;
if(length_of_piece == 0)
{
__gg__abort("funky_find_wide() length_of_piece shouldn't be zero");
}
haystack_end -= length_of_piece;
while( haystack <= haystack_end )
{
// Compare the memory at needle to the memory at haystack
if( memcmp( &(*needle),
&(*haystack),
length_of_piece*sizeof(cbl_char_t)) == 0 )
{
// They are the same; return where needle was found
retval = haystack;
break;
}
// Not found; move to the next location in the haystach
haystack += 1;
}
return retval;
}
static const char *
funky_find_backward(const char *piece,
const char *piece_end,
const char *whole,
const char *whole_end )
{
const char *retval = NULL;
size_t length_of_piece = piece_end - piece;
if(length_of_piece == 0)
{
__gg__abort("funky_find_backward() length_of_piece shouldn't be zero");
}
whole_end -= length_of_piece;
while( whole <= whole_end )
{
if( memcmp( piece, whole_end, length_of_piece) == 0 )
{
retval = whole_end;
break;
}
whole_end -= 1;
}
return retval;
}
static char_it_c
funky_find_wide_backward( char_it_c needle,
char_it_c needle_end, // Actually end+1
char_it_c haystack,
char_it_c haystack_end, // Actually end+1
char_it_c notfound)
{
// We are looking for the needle in the haystack
char_it_c retval = notfound;
size_t length_of_piece = needle_end - needle;
if(length_of_piece == 0)
{
__gg__abort("funky_find_wide_backward() length_of_piece shouldn't be zero");
}
haystack_end -= length_of_piece;
while( haystack <= haystack_end )
{
if( memcmp( &(*needle),
&(*haystack_end),
length_of_piece*sizeof(cbl_char_t)) == 0 )
{
// They are the same; return where needle was found
retval = haystack_end;
break;
}
// Not found; move to the next location in the haystack
haystack_end -= 1;
}
return retval;
}
typedef struct normalized_operand
{
// These are the characters of the string. When the field is NumericDisplay
// any leading or trailing +/- characters are removed, and any embedded
// minus bits are removed.
// In order for INSPECT to handle things like UTF-8, which often has
// multi-byte codepoints, and UTF-16, which sometimes has multi-pair
// codepoints we are going to convert everything to UTF-32 for internal
// calculations and searches.
std::string the_characters;
std::vector<cbl_char_t>the_vectorxxxx;
// offset and length are maintained in characters, not bytes
size_t offset; // Usually zero. Increased by one for leading separate sign.
size_t length; // Usually the same as the original. But it is one less
// // than the original when there is a trailing separate sign.
} normalized_operand;
typedef struct comparand
{
size_t id_2_index;
cbl_inspect_bound_t operation;
normalized_operand identifier_3; // The thing to be found
normalized_operand identifier_5; // The replacement, for FORMAT 2
const char *alpha; // The start location within normalized_id_1
const char *omega; // The end+1 location within normalized_id_1
char_it_c alpha_it; // The start location within normalized_id_1
char_it_c omega_it; // The end+1 location within normalized_id_1
size_t leading_count;
bool leading;
bool first;
} comparand;
typedef struct comparand_sbc
{
size_t id_2_index;
cbl_inspect_bound_t operation;
std::string identifier_3; // The thing to be found
//q std::string identifier_5; // The replacement, for FORMAT 2
size_t alpha; // The start location within normalized_id_1
size_t omega; // The end+1 location within normalized_id_1
size_t leading_count;
bool leading;
bool first;
} comparand_sbc;
typedef struct id_2_result
{
cblc_field_t *id2;
size_t id2_o;
size_t id2_s;
size_t result;
} id_2_result;
static normalized_operand
normalize_id( const cblc_field_t *field,
size_t field_o,
size_t field_s,
cbl_encoding_t encoding )
{
normalized_operand retval;
if( field )
{
charmap_t *charmap = __gg__get_charmap(encoding);
// This is the old-style byte-based assumption
const unsigned char *data = field->data + field_o;
cbl_figconst_t figconst
= (cbl_figconst_t)(field->attr & FIGCONST_MASK);
retval.offset = 0;
retval.length = field_s;
if( field->type == FldNumericDisplay )
{
// The value is NumericDisplay.
if( field->attr & separate_e )
{
// Because the sign is a separate plus or minus, the length
// gets reduced by one:
retval.length = field_s - 1;
if( field->attr & leading_e )
{
// Because the sign character is LEADING, we increase the
// offset by one
retval.offset = 1;
}
}
for( size_t i=retval.offset; i<retval.length; i+=1 )
{
// Because we are dealing with a NumericDisplay that might have
// the minus bit turned on, we will to mask it off as we copy the
// input characters over to retval:
retval.the_characters += charmap->set_digit_negative(data[i], false);
}
}
else
{
// We are set up to create the_characters;
if( figconst == normal_value_e )
{
for( size_t i=retval.offset; i<retval.length; i+=1 )
{
retval.the_characters += data[i];
}
}
else
{
char ch = charmap->figconst_character(figconst);
for( size_t i=retval.offset; i<retval.length; i+=1 )
{
retval.the_characters += ch;
}
}
}
}
else
{
// There is no field, so leave the_characters empty.
retval.offset = 0;
retval.length = 0;
}
if( field )
{
cbl_encoding_t source_encoding = field->encoding;
const charmap_t *charmap_source = __gg__get_charmap(source_encoding);
charmap_t *charmap = __gg__get_charmap(encoding);
int stride = charmap->stride();
const unsigned char *data = field->data + field_o;
cbl_figconst_t figconst = (cbl_figconst_t)(field->attr & FIGCONST_MASK);
if( figconst == normal_value_e )
{
retval.offset = 0;
retval.length = field_s / stride;
if( field->type == FldNumericDisplay )
{
// The value is NumericDisplay, so we might need to adjust the offset
// and length:
if( field->attr & separate_e )
{
// Because the sign is a separate plus or minus, the length
// gets reduced by one:
retval.length = field_s - 1;
if( field->attr & leading_e )
{
// Because the sign character is LEADING, we increase the
// offset by one
retval.offset = 1;
}
}
}
// We are ready to convert from the input to UTF32
size_t converted_characters;
const char *converted = __gg__iconverter(source_encoding,
DEFAULT_32_ENCODING,
data+retval.offset * stride,
retval.length * stride,
&converted_characters);
// We are ready to copy the characters over:
for( size_t i=0; i<converted_characters; i+=width_of_utf32 )
{
// Because we are dealing with a NumericDisplay that might have
// the minus bit turned on, we will to mask it off as we copy the
// input characters over to retval:
cbl_char_t ch = charmap->getch(converted, i);
if( field->type == FldNumericDisplay )
{
if( charmap_source->is_like_ebcdic() )
{
// In EBCDIC, a flagged negative digit 0xF0 through 0xF9 becomes
// 0xD0 through 0xD9. Those represent the characters
// "}JKLMNOPQR", which, now that we are in UTF32 space, don't have
// the right bit pattern to be fixed with set_digit_negative().
// So, we fix it separately with this table: Note that location
// 0x7D, which is ASCII '{', becomes 0x30 '0'. See also that
// locations 0x4A through 0x52 become 0x31 through 0x39.
static const uint8_t fixit[256] =
{
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x80, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x81, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x82, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x83, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x84, 0x49, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
0x37, 0x38, 0x39, 0x53, 0x54, 0x55, 0x56, 0x57, 0x85, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x86, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x87, 0x79, 0x7a, 0x7b, 0x7c, 0x30, 0x7e, 0x7f,
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x89, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0x8a, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0x8b, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0x8c, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0x8d, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0x8e, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0x8f, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
};
ch = fixit[ch & 0xFF];
}
else
{
ch = charmap->set_digit_negative(ch, false);
}
}
retval.the_vectorxxxx.push_back(ch);
}
}
else
{
// We need to fill the field with a figurative constant:
// We are set up to create the_characters;
charmap_t *charmap32 = __gg__get_charmap(DEFAULT_32_ENCODING);
char ch = charmap32->figconst_character(figconst);
for( size_t i=retval.offset; i<retval.length; i+=1 )
{
retval.the_characters += ch;
retval.the_vectorxxxx.push_back(ch);
}
}
}
else
{
// There is no field, so leave the_characters empty.
retval.offset = 0;
retval.length = 0;
}
return retval;
}
static std::string
normalize_id_sbc( const cblc_field_t *field,
size_t field_o,
size_t field_s,
cbl_encoding_t encoding )
{
// We know that the field is ASCII or EBCDIC
std::string retval;
if( field && field_s )
{
charmap_t *charmap = __gg__get_charmap(encoding);
const unsigned char *data = field->data + field_o;
cbl_figconst_t figconst
= (cbl_figconst_t)(field->attr & FIGCONST_MASK);
if( field->type == FldNumericDisplay )
{
// The value is NumericDisplay.
if( field->attr & separate_e )
{
// Because the sign is a separate plus or minus, the length
// gets reduced by one:
field_s -= 1;
if( field->attr & leading_e )
{
// Because the sign character is LEADING, we increase the
// offset by one
data += 1;
}
}
// At this point, the bytes start at data, and there are field_s of them.
retval.assign(reinterpret_cast<const char *>(data), field_s);
if( field->attr & signable_e )
{
if( field->attr & leading_e )
{
// The sign might be in the first byte; get rid of it
retval[0] = charmap->set_digit_negative(data[0], false);
}
else
{
// The sign might be in the last byte; get rid of it
retval[0] = charmap->set_digit_negative(data[field_s-1], false);
}
}
}
else
{
// We aren't dealing with numeric-display, so
if( figconst == normal_value_e )
{
retval.assign(reinterpret_cast<const char *>(data), field_s);
}
else
{
// This field is flagged as figconst
char ch = charmap->figconst_character(figconst);
retval.assign(field_s, ch);
}
}
}
else
{
// There is no field, so leave retval empty
}
return retval;
}
static void
match_lengths( normalized_operand &id_target,
const normalized_operand &id_source)
{
// This routine gets called when id_source is a figurative constant and
// we need the target to be the same length as the source
char ch = id_target.the_characters[0];
id_target.the_characters.clear();
for(size_t i=0; i<id_source.length; i++)
{
id_target.the_characters += ch;
}
cbl_char_t wch = id_target.the_vectorxxxx[0];
id_target.the_vectorxxxx.clear();
for(size_t i=0; i<id_source.length; i++)
{
id_target.the_vectorxxxx.push_back(wch);
}
id_target.length = id_source.length;
}
static void
the_alpha_and_omega(const normalized_operand &id_before,
const normalized_operand &id_after,
const char * &alpha,
const char * &omega,
char_it_c &alpha_it,
char_it_c &omega_it,
char_it_c notfound)
{
/* The 2023 ISO description of the AFTER and BEFORE phrases of the INSPECT
statement is, in a word, garbled.
IBM's COBOL for Linux 1.2 is a little better, but still a bit confusing
because the description for AFTER neglects to specifically state that
the scan starts one character to the right of the *first* occurrence of
the AFTER value.
Micro Focus 9.2.5 has the advantage of being ungarbled, succinct, and
unambiguous.
The BEFORE phrase modifies the character position to use as the rightmost
position in source for the corresponding comparison operation. Comparisons
in source occur only to the left of the first occurrence of delimiter. If
delimiter is not present in source, then the comparison proceeds as if
there were no BEFORE phrase.
The AFTER phrase modifies the character position to use as the leftmost
position in source for the corresponding comparison operation. Comparisons
in source occur only to the right of the first occurrence of delimiter.
This character position is the one immediately to the right of the
rightmost character of the delimiter found. If delimiter is not found in
source, the INSPECT statement has no effect (no tallying or replacement
occurs).
"xyzxyzAFTERxyzxyzxyzxyzBEFORExyzxyzAFTERxyzxyz"
^ ^
| |
| |-- omega
----------------alpha
*/
if( id_before.length )
{
// This is the BEFORE delimiter. We look for the first occurrence of that
// delimiter starting at the left of id_1
const char *start = id_before.the_characters.c_str();
const char *end = start + id_before.length;
const char *found = funky_find(start, end, alpha, omega);
if( found )
{
// We found id_before within alpha/omega, so reduce omega
// to the found location.
omega = found;
// If not found, we just leave omega alone.
}
char_it_c omega_found = funky_find_wide(id_before.the_vectorxxxx.begin(),
id_before.the_vectorxxxx.end(),
alpha_it,
omega_it,
notfound );
if( omega_found != notfound )
{
// We found id_before within alpha/omega, so reduce omega
// to the found location.
omega_it = omega_found;
}
}
if( id_after.length )
{
// This is the AFTER delimiter. We look for the first occurrence of that
// delimiter in id_1
const char *start = id_after.the_characters.c_str();
const char *end = start + id_after.length;
const char *found = funky_find(start, end, alpha, omega);
if( found )
{
// We found id_after in the alpha/omega segment. We update alpha
// be the character after the id_after substring.
alpha = found + (end-start);
}
else
{
// We didn't find the id_after string, so we set the alpha to be
// omega. That means that no tally or replace operation will take
// because no characters will qualify.
alpha = omega;
}
char_it_c omega_found = funky_find_wide(id_after.the_vectorxxxx.begin(),
id_after.the_vectorxxxx.end(),
alpha_it,
omega_it,
notfound );
if( omega_found != notfound)
{
// We found id_after in the alpha/omega segment. We update alpha
// be the character after the id_after substring.
alpha_it = omega_found + (end-start);
}
else
{
// We didn't find the id_after string, so we set the alpha to be
// omega. That means that no tally or replace operation will take
// because no characters will qualify.
alpha_it = omega_it;
}
}
}
static void
the_alpha_and_omega_sbc(const std::string &id_before,
const std::string &id_after,
const std::string &haystack,
size_t &alpha,
size_t &omega)
{
/* The 2023 ISO description of the AFTER and BEFORE phrases of the INSPECT
statement is, in a word, garbled.
IBM's COBOL for Linux 1.2 is a little better, but still a bit confusing
because the description for AFTER neglects to specifically state that
the scan starts one character to the right of the *first* occurrence of
the AFTER value.
Micro Focus 9.2.5 has the advantage of being ungarbled, succinct, and
unambiguous.
The BEFORE phrase modifies the character position to use as the rightmost
position in source for the corresponding comparison operation. Comparisons
in source occur only to the left of the first occurrence of delimiter. If
delimiter is not present in source, then the comparison proceeds as if
there were no BEFORE phrase.
The AFTER phrase modifies the character position to use as the leftmost
position in source for the corresponding comparison operation. Comparisons
in source occur only to the right of the first occurrence of delimiter.
This character position is the one immediately to the right of the
rightmost character of the delimiter found. If delimiter is not found in
source, the INSPECT statement has no effect (no tallying or replacement
occurs).
"xyzxyzAFTERxyzxyzxyzxyzBEFORExyzxyzAFTERxyzxyz"
^ ^
| |
| |-- omega
----------------alpha
*/
if( id_before.length() )
{
// Look for BEFORE in the haystack.
omega = haystack.find(id_before);
if( omega == std::string::npos )
{
// If BEFORE isn't found, we use the whole haystack.
omega = haystack.length();
}
}
else
{
omega = haystack.length();
}
if( id_after.length() )
{
// This is the AFTER delimiter. We look for the first occurrence of that
// delimiter in id_1 that occurs to the left of BEFORE/omega
alpha = haystack.substr(0, omega).find(id_after);
if( alpha == std::string::npos )
{
// If there is no AFTER to the left of omega, then we can't find anything
// in this haystack.
alpha = haystack.length();
}
else
{
alpha += id_after.length();
}
}
else
{
alpha = 0;
}
}
static void
the_alpha_and_omega_backward( const normalized_operand &id_before,
const normalized_operand &id_after,
const char * &alpha,
const char * &omega,
char_it_c &alpha_it,
char_it_c &omega_it,
char_it_c notfound)
{
/* Like the_alpha_and_omega(), but for handling BACKWARD.
"xyzxyzBEFORExyzxyzAFTERxyzxyzxyzxyzBEFORExyzxyzAFTERxyzxyz"
^ ^
| |
| -- omega
|--------alpha
*/
const char *id_1 = alpha;
const char *id_1_end = omega;
if( id_before.length )
{
// This is the BEFORE delimiter. We look for the first occurrence of it
// from the right end of id_1
const char *start = id_before.the_characters.c_str();
const char *end = start + id_before.length;
const char *found = funky_find_backward(start, end, id_1, id_1_end);
if( found )
{
// We found id_before within id_1, so change alpha to the character just
// to the right of BEFORE. Otherwise, we will leave alpha alone, so that
// it stays at the beginning of id_1. That's because if you can't find
// id_before, it's as if there were no BEFORE phrase.
alpha = found + id_before.length;
}
char_it_c omega_found = funky_find_wide_backward(id_before.the_vectorxxxx.begin(),
id_before.the_vectorxxxx.end(),
alpha_it,
omega_it,
notfound );
if( omega_found != notfound )
{
// We found id_before within id_1, so change alpha to the character just
// to the right of BEFORE. Otherwise, we will leave alpha alone, so that
// it stays at the beginning of id_1
alpha_it = omega_found + id_before.length;
}
}
if( id_after.length )
{
// This is the AFTER delimiter. We look for the first occurrence in id_1
const char *start = id_after.the_characters.c_str();
const char *end = start + id_after.length;
const char *found = funky_find_backward(start, end, alpha, omega);
if( found )
{
// We found id_after in id_1. We update omega to be
// at that location.
omega = found;
}
else
{
// If the AFTER isn't found, we need to adjust things so that nothing
// happens.
omega = alpha;
}
char_it_c omega_found = funky_find_wide_backward(id_after.the_vectorxxxx.begin(),
id_after.the_vectorxxxx.end(),
alpha_it,
omega_it,
notfound );
if( omega_found != notfound)
{
// We found id_after in id_1. We update omega to be
// at that location.
omega_it = omega_found;
}
else
{
// If the AFTER isn't found, we need to adjust things so that nothing
// happens.
omega_it = alpha_it;
}
}
}
static
void
inspect_backward_format_1(const size_t integers[],
const cblc_referlet_t *params)
{
size_t int_index = 0;
size_t cblc_index = 0;
// Reference the language specification for the meanings of identifier_X
// Pick up the number of identifier_2 loops in this INSPECT statement
size_t n_identifier_2 = integers[int_index++];
std::vector<id_2_result> id_2_results(n_identifier_2);
// Pick up identifier_1, which is the string being inspected
const cblc_field_t *id1 = params[cblc_index].field ;
size_t id1_o = params[cblc_index].offset;
size_t id1_s = params[cblc_index].size ;
cblc_index += 1;
// normalize it, according to the language specification.
normalized_operand normalized_id_1 = normalize_id(id1, id1_o, id1_s, id1->encoding);
std::vector<comparand> comparands;
for(size_t i=0; i<n_identifier_2; i++)
{
// For each identifier_2, we pick up its value:
id_2_results[i].id2 = params[cblc_index].field ;
id_2_results[i].id2_o = params[cblc_index].offset;
id_2_results[i].id2_s = params[cblc_index].size ;
cblc_index += 1;
id_2_results[i].result = 0;
// For each identifier 2, there is a count of operations:
size_t nbounds = integers[int_index++];
for(size_t j=0; j<nbounds; j++ )
{
// each operation has a bound code:
cbl_inspect_bound_t operation
= (cbl_inspect_bound_t)integers[int_index++];
switch( operation )
{
case bound_characters_e:
{
// We are counting characters. There is no identifier-3,
// but we we hard-code the length to one to represent a
// single character.
comparand next_comparand = {};
next_comparand.id_2_index = i;
next_comparand.operation = operation;
next_comparand.identifier_3.length = 1;
const cblc_field_t *id4_before = params[cblc_index].field ;
size_t id4_before_o = params[cblc_index].offset;
size_t id4_before_s = params[cblc_index].size ;
cblc_index += 1;
const cblc_field_t *id4_after = params[cblc_index].field ;
size_t id4_after_o = params[cblc_index].offset;
size_t id4_after_s = params[cblc_index].size ;
cblc_index += 1;
normalized_operand normalized_id_4_before
= normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding);
normalized_operand normalized_id_4_after
= normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding);
next_comparand.alpha
= normalized_id_1.the_characters.c_str();
next_comparand.omega
= next_comparand.alpha + normalized_id_1.length;
next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
the_alpha_and_omega_backward(normalized_id_4_before,
normalized_id_4_after,
next_comparand.alpha,
next_comparand.omega,
next_comparand.alpha_it,
next_comparand.omega_it,
normalized_id_1.the_vectorxxxx.end());
comparands.push_back(next_comparand);
break;
}
default:
{
// We have some number of identifer-3 values,
// each with possible PHRASE1 modifiers.
size_t pair_count = integers[int_index++];
// We need to build up pair_count comparand structures:
for(size_t k=0; k<pair_count; k++)
{
comparand next_comparand = {};
next_comparand.id_2_index = i;
next_comparand.operation = operation;
const cblc_field_t *id3 = params[cblc_index].field ;
size_t id3_o = params[cblc_index].offset;
size_t id3_s = params[cblc_index].size ;
cblc_index += 1;
const cblc_field_t *id4_before = params[cblc_index].field ;
size_t id4_before_o = params[cblc_index].offset;
size_t id4_before_s = params[cblc_index].size ;
cblc_index += 1;
const cblc_field_t *id4_after = params[cblc_index].field ;
size_t id4_after_o = params[cblc_index].offset;
size_t id4_after_s = params[cblc_index].size ;
cblc_index += 1;
next_comparand.identifier_3
= normalize_id(id3, id3_o, id3_s, id1->encoding);
next_comparand.alpha
= normalized_id_1.the_characters.c_str();
next_comparand.omega
= next_comparand.alpha + normalized_id_1.length;
normalized_operand normalized_id_4_before
= normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding);
normalized_operand normalized_id_4_after
= normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding);
next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
the_alpha_and_omega_backward(normalized_id_4_before,
normalized_id_4_after,
next_comparand.alpha,
next_comparand.omega,
next_comparand.alpha_it,
next_comparand.omega_it,
normalized_id_1.the_vectorxxxx.end());
next_comparand.leading = true;
next_comparand.leading_count = 0;
comparands.push_back(next_comparand);
}
}
}
}
}
// We are now ready to walk through identifier-1, character by
// character, checking each of the comparands for a match:
// We are now set up to accomplish the data flow described
// in the language specification. We loop through the
// the character positions in normalized_id_1:
char_it_c leftmost = normalized_id_1.the_vectorxxxx.begin();
char_it_c rightmost = leftmost + normalized_id_1.length;
char_it_c the_end_of_the_world = rightmost;
while( leftmost < rightmost )
{
size_t rightmost_delta = 0;
rightmost -= 1;
// We look at the rightmost position. If that position is within the
// alpha-to-omega qualified range, we check all possible matches:
for(size_t k=0; k<comparands.size(); k++)
{
if( rightmost < comparands[k].alpha_it )
{
// This can't be a match, because rightmost is
// to the left of the comparand's alpha.
continue;
}
if( rightmost + comparands[k].identifier_3.length >
comparands[k].omega_it )
{
// This can't be a match, because the rightmost
// character of the comparand falls to the right
// of the comparand's omega
continue;
}
if( rightmost + comparands[k].identifier_3.length >
the_end_of_the_world )
{
// This can't be a match, because the rightmost character of the
// comparand falls past the new edge of id_1 established by a prior
// match.
continue;
}
// A match is theoretically possible, because all
// the characters of the comparand fall between
// alpha and omega:
bool possible_match = true;
if( comparands[k].operation != bound_characters_e )
{
for(size_t m=0; m<comparands[k].identifier_3.length; m++)
{
if( comparands[k].identifier_3.the_vectorxxxx[m] != rightmost[m] )
{
possible_match = false;
break;
}
}
}
if( possible_match )
{
// The characters of the comparand match the
// characters at rightmost.
bool match = false;
switch( comparands[k].operation )
{
case bound_first_e:
// This can't happen in a FORMAT_1
warnx("The compiler goofed: "
"INSPECT FORMAT 1 "
"shouldn't have "
"bound_first_e");
abort();
break;
case bound_characters_e:
match = 1;
break;
case bound_all_e:
{
// We have a match.
match = true;
break;
}
case bound_leading_e:
{
// We have a match at rightmost. But we need to figure out if this
// particular match is valid for LEADING.
if( comparands[k].leading )
{
if( rightmost + comparands[k].identifier_3.length
== comparands[k].omega_it)
{
// This means that the match here is just the latest of a
// string of LEADING matches that started at .omega
comparands[k].leading_count += 1;
match = true;
comparands[k].omega_it -= comparands[k].identifier_3.length;
the_end_of_the_world = rightmost;
rightmost_delta = comparands[k].identifier_3.length-1;
}
}
break;
}
case bound_trailing_e:
{
// We have a match at rightmost.
//
// We want to know if this is a trailing match. For that to be,
// all of the possible matches from here leftward to the alpha have
// to be true as well:
if( (rightmost - comparands[k].alpha_it )
% comparands[k].identifier_3.length == 0 )
{
// The remaining number of characters is correct for a match.
// Keep checking.
// Assume a match until we learn otherwise:
match = true;
char_it_c local_left = rightmost;
local_left -= comparands[k].identifier_3.length;
while( local_left >= comparands[k].alpha_it )
{
for(size_t m=0; m<comparands[k].identifier_3.length; m++)
{
if( comparands[k].identifier_3.the_vectorxxxx[m]
!= local_left[m] )
{
// We have a mismatched character, so no trailing match is
// possible
match = false;
break;
}
}
local_left -= comparands[k].identifier_3.length;
}
}
break;
}
}
if( match )
{
// We have a match at rightmost:
// Bump the result counter
id_2_results[comparands[k].id_2_index].result += 1;
// We have a match here at rightmost, so we need to set the end of
// the world here
the_end_of_the_world = rightmost;
// Adjust rightmost by the additional characters in a BACKWARD
// LEADING search:
rightmost -= rightmost_delta;
break;
}
}
else
{
// We are within alpha/omega, but there was no
// match, which permanently disqualifies the
// possibility of LEADING
comparands[k].leading = false;
}
}
}
// Add our results to the identifier_2 values:
for(size_t i = 0; i<id_2_results.size(); i++)
{
int rdigits;
__int128 id_2_value
= __gg__binary_value_from_qualified_field(&rdigits,
id_2_results[i].id2,
id_2_results[i].id2_o,
id_2_results[i].id2_s);
while(rdigits--)
{
id_2_value /= 10.0;
}
// Accumulate what we've found into it
id_2_value += id_2_results[i].result;
// And put it back:
__gg__int128_to_qualified_field(id_2_results[i].id2,
id_2_results[i].id2_o,
id_2_results[i].id2_s,
id_2_value,
0,
truncation_e,
NULL);
}
}
extern "C"
void
__gg__inspect_format_1( int backward,
size_t integers[],
const cblc_referlet_t *params)
{
if( backward )
{
return inspect_backward_format_1(integers, params);
}
size_t int_index = 0;
size_t cblc_index = 0;
// Reference the language specification for the meanings of identifier_X
// Pick up the number of identifier_2 loops in this INSPECT statement
size_t n_identifier_2 = integers[int_index++];
std::vector<id_2_result> id_2_results(n_identifier_2);
// Pick up identifier_1, which is the string being inspected
const cblc_field_t *id1 = params[cblc_index].field ;
size_t id1_o = params[cblc_index].offset;
size_t id1_s = params[cblc_index].size ;
cblc_index += 1;
// normalize it, according to the language specification.
normalized_operand normalized_id_1
= normalize_id(id1, id1_o, id1_s, id1->encoding);
std::vector<comparand> comparands;
for(size_t i=0; i<n_identifier_2; i++)
{
// For each identifier_2, we pick up its value:
id_2_results[i].id2 = params[cblc_index].field ;
id_2_results[i].id2_o = params[cblc_index].offset;
id_2_results[i].id2_s = params[cblc_index].size ;
cblc_index += 1;
id_2_results[i].result = 0;
// For each identifier 2, there is a count of operations:
size_t nbounds = integers[int_index++];
for(size_t j=0; j<nbounds; j++ )
{
// each operation has a bound code:
cbl_inspect_bound_t operation
= (cbl_inspect_bound_t)integers[int_index++];
switch( operation )
{
case bound_characters_e:
{
// We are counting characters. There is no identifier-3,
// but we we hard-code the length to one to represent a
// single character.
comparand next_comparand = {};
next_comparand.id_2_index = i;
next_comparand.operation = operation;
next_comparand.identifier_3.length = 1;
const cblc_field_t *id4_before = params[cblc_index].field ;
size_t id4_before_o = params[cblc_index].offset;
size_t id4_before_s = params[cblc_index].size ;
cblc_index += 1;
const cblc_field_t *id4_after = params[cblc_index].field ;
size_t id4_after_o = params[cblc_index].offset;
size_t id4_after_s = params[cblc_index].size ;
cblc_index += 1;
normalized_operand normalized_id_4_before
= normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding);
normalized_operand normalized_id_4_after
= normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding);
next_comparand.alpha
= normalized_id_1.the_characters.c_str();
next_comparand.omega
= next_comparand.alpha + normalized_id_1.length;
next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
the_alpha_and_omega(normalized_id_4_before,
normalized_id_4_after,
next_comparand.alpha,
next_comparand.omega,
next_comparand.alpha_it,
next_comparand.omega_it,
normalized_id_1.the_vectorxxxx.end());
comparands.push_back(next_comparand);
break;
}
default:
{
// We have some number of identifer-3 values,
// each with possible PHRASE1 modifiers.
size_t pair_count = integers[int_index++];
// We need to build up pair_count comparand structures:
for(size_t k=0; k<pair_count; k++)
{
comparand next_comparand = {};
next_comparand.id_2_index = i;
next_comparand.operation = operation;
const cblc_field_t *id3 = params[cblc_index].field ;
size_t id3_o = params[cblc_index].offset;
size_t id3_s = params[cblc_index].size ;
cblc_index += 1;
const cblc_field_t *id4_before = params[cblc_index].field ;
size_t id4_before_o = params[cblc_index].offset;
size_t id4_before_s = params[cblc_index].size ;
cblc_index += 1;
const cblc_field_t *id4_after = params[cblc_index].field ;
size_t id4_after_o = params[cblc_index].offset;
size_t id4_after_s = params[cblc_index].size ;
cblc_index += 1;
next_comparand.identifier_3
= normalize_id(id3,
id3_o,
id3_s,
id1->encoding);
next_comparand.alpha
= normalized_id_1.the_characters.c_str();
next_comparand.omega
= next_comparand.alpha + normalized_id_1.length;
next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
normalized_operand normalized_id_4_before
= normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding);
normalized_operand normalized_id_4_after
= normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding);
the_alpha_and_omega(normalized_id_4_before,
normalized_id_4_after,
next_comparand.alpha,
next_comparand.omega,
next_comparand.alpha_it,
next_comparand.omega_it,
normalized_id_1.the_vectorxxxx.end());
next_comparand.leading = true;
next_comparand.leading_count = 0;
comparands.push_back(next_comparand);
}
}
}
}
}
// We are now ready to walk through identifier-1, character by
// character, checking each of the comparands for a match:
// We are now set up to accomplish the data flow described
// in the language specification. We loop through the
// the character positions in normalized_id_1:
char_it_c leftmost = normalized_id_1.the_vectorxxxx.begin();
char_it_c rightmost = leftmost + normalized_id_1.length;
while( leftmost < rightmost )
{
// For each leftmost position, we check each of the
// pairs:
for(size_t k=0; k<comparands.size(); k++)
{
if( leftmost < comparands[k].alpha_it )
{
// This can't be a match, because leftmost is
// to the left of the comparand's alpha.
continue;
}
if( leftmost + comparands[k].identifier_3.length > comparands[k].omega_it )
{
// This can't be a match, because the rightmost
// character of the comparand falls to the right
// of the comparand's omega
continue;
}
// A match is theoretically possible, because all
// the characters of the comparand fall between
// alpha and omega:
bool possible_match = true;
if( comparands[k].operation != bound_characters_e )
{
for(size_t m=0; m<comparands[k].identifier_3.length; m++)
{
if( comparands[k].identifier_3.the_vectorxxxx[m] != leftmost[m] )
{
possible_match = false;
break;
}
}
}
if( possible_match )
{
// The characters of the comparand match the
// characters at leftmost.
bool match = false;
switch( comparands[k].operation )
{
case bound_first_e:
// This can't happen in a FORMAT_1
warnx("The compiler goofed: "
"INSPECT FORMAT 1 "
"shouldn't have "
"bound_first_e");
abort();
break;
case bound_characters_e:
match = true;
break;
case bound_all_e:
{
// We have a match.
match = true;
break;
}
case bound_leading_e:
{
// We have a match at leftmost. But we need to figure out if this
// particular match is valid for LEADING.
// Hang onto your hat. This is delightfully clever.
//
// This position is LEADING if:
// 1) .leading is still true
// 2) leftmost / (length_of_comparand ) = current_count
//
// I get chills every time I look at that.
if( comparands[k].leading )
{
// So far, so good.
size_t count = ((leftmost - comparands[k].alpha_it))
/ comparands[k].identifier_3.length;
if( count == comparands[k].leading_count )
{
// This means that the match here is just the latest of a
// string of LEADING matches that started at .alpha
comparands[k].leading_count += 1;
match = true;
}
}
break;
}
case bound_trailing_e:
{
// We have a match at leftmost.
//
// We want to know if this is a trailing match. For that to be,
// all of the possible matches from here to the omega have to be
// true as well:
if( (comparands[k].omega_it-leftmost)
% comparands[k].identifier_3.length == 0 )
{
// The remaining number of characters is correct for a match.
// Keep checking.
// Assume a match until we learn otherwise:
match = true;
char_it_c local_left = leftmost;
local_left += comparands[k].identifier_3.length;
while( match && local_left < comparands[k].omega_it )
{
for(size_t m=0; m<comparands[k].identifier_3.length; m++)
{
if( comparands[k].identifier_3.the_vectorxxxx[m]
!= local_left[m] )
{
// We have a mismatched character, so no trailing match is
// possible
match = false;
break;
}
}
local_left += comparands[k].identifier_3.length;
}
}
break;
}
}
if( match )
{
// We have a match at leftmost:
// Bump the result counter
id_2_results[comparands[k].id_2_index].result += 1;
// Adjust the leftmost pointer to point to
// the rightmost character of the matched
// string, keeping in mind that it will be
// bumped again after we break out of the
// k<pair_count loop:
leftmost += comparands[k].identifier_3.length - 1;
break;
}
}
else
{
// We are within alpha/omega, but there was no
// match, which permanently disqualifies the
// possibility of LEADING
comparands[k].leading = false;
}
}
leftmost += 1;
}
// Add our results to the identifier_2 values:
for(size_t i = 0; i<id_2_results.size(); i++)
{
int rdigits;
__int128 id_2_value
= __gg__binary_value_from_qualified_field(&rdigits,
id_2_results[i].id2,
id_2_results[i].id2_o,
id_2_results[i].id2_s);
while(rdigits--)
{
id_2_value /= 10.0;
}
// Accumulate what we've found into it
id_2_value += id_2_results[i].result;
// And put it back:
__gg__int128_to_qualified_field(id_2_results[i].id2,
id_2_results[i].id2_o,
id_2_results[i].id2_s,
id_2_value,
0,
truncation_e,
NULL);
}
}
static
void
inspect_backward_format_2(const size_t integers[],
const cblc_referlet_t *params)
{
size_t int_index = 0;
size_t cblc_index = 0;
// Reference the language specification for the meanings of identifier_X
// Pick up identifier_1, which is the string being inspected
cblc_field_t *id1 = params[cblc_index].field ;
size_t id1_o = params[cblc_index].offset;
size_t id1_s = params[cblc_index].size ;
cblc_index += 1;
// normalize it, according to the language specification.
normalized_operand normalized_id_1
= normalize_id(id1, id1_o, id1_s, id1->encoding);
std::vector<comparand> comparands;
// Pick up the count of operations:
size_t nbounds = integers[int_index++];
for(size_t j=0; j<nbounds; j++ )
{
// each operation has a bound code:
cbl_inspect_bound_t operation = (cbl_inspect_bound_t)integers[int_index++];
switch( operation )
{
case bound_characters_e:
{
comparand next_comparand = {};
next_comparand.operation = operation;
const cblc_field_t *id5 = params[cblc_index].field ;
size_t id5_o = params[cblc_index].offset;
size_t id5_s = params[cblc_index].size ;
cblc_index += 1;
const cblc_field_t *id4_before = params[cblc_index].field ;
size_t id4_before_o = params[cblc_index].offset;
size_t id4_before_s = params[cblc_index].size ;
cblc_index += 1;
const cblc_field_t *id4_after = params[cblc_index].field ;
size_t id4_after_o = params[cblc_index].offset;
size_t id4_after_s = params[cblc_index].size ;
cblc_index += 1;
next_comparand.identifier_5
= normalize_id(id5, id5_o, id5_s, id1->encoding);
normalized_operand normalized_id_4_before
= normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding);
normalized_operand normalized_id_4_after
= normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding);
// Because this is a CHARACTER operation, the lengths of
// identifier-3 and identifier-5 should be one. Let's avoid the
// chaos that will otherwise ensue should the lengths *not* be
// one.
next_comparand.identifier_3.length = 1;
next_comparand.identifier_5.length = 1;
next_comparand.alpha = normalized_id_1.the_characters.c_str();
next_comparand.omega
= next_comparand.alpha + normalized_id_1.length;
next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
the_alpha_and_omega_backward(normalized_id_4_before,
normalized_id_4_after,
next_comparand.alpha,
next_comparand.omega,
next_comparand.alpha_it,
next_comparand.omega_it,
normalized_id_1.the_vectorxxxx.end());
comparands.push_back(next_comparand);
break;
}
default:
{
// We have some number of identifer-3/identifier-5 pairs,
// each with possible PHRASE1 modifiers.
size_t pair_count = integers[int_index++];
for(size_t k=0; k<pair_count; k++)
{
comparand next_comparand = {};
next_comparand.operation = operation;
const cblc_field_t *id3 = params[cblc_index].field ;
size_t id3_o = params[cblc_index].offset;
size_t id3_s = params[cblc_index].size ;
cblc_index += 1;
const cblc_field_t *id5 = params[cblc_index].field ;
size_t id5_o = params[cblc_index].offset;
size_t id5_s = params[cblc_index].size ;
cblc_index += 1;
const cblc_field_t *id4_before = params[cblc_index].field ;
size_t id4_before_o = params[cblc_index].offset;
size_t id4_before_s = params[cblc_index].size ;
cblc_index += 1;
const cblc_field_t *id4_after = params[cblc_index].field ;
size_t id4_after_o = params[cblc_index].offset;
size_t id4_after_s = params[cblc_index].size ;
cblc_index += 1;
next_comparand.identifier_3 = normalize_id(id3, id3_o, id3_s, id1->encoding);
next_comparand.identifier_5 = normalize_id(id5, id5_o, id5_s, id1->encoding);
// Identifiers 3 and 5 have to be the same length. But
// but either, or both, can be figurative constants. If
// they are figurative constants, they start off with a
// length of one. We will expand figurative constants to
// match the length of the other one:
if( id3->attr & FIGCONST_MASK )
{
match_lengths( next_comparand.identifier_3,
next_comparand.identifier_5);
}
else if( id5->attr & FIGCONST_MASK )
{
match_lengths( next_comparand.identifier_5,
next_comparand.identifier_3);
}
next_comparand.alpha
= normalized_id_1.the_characters.c_str();
next_comparand.omega
= next_comparand.alpha + normalized_id_1.length;
normalized_operand normalized_id_4_before
= normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding);
normalized_operand normalized_id_4_after
= normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding);
next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
the_alpha_and_omega_backward(normalized_id_4_before,
normalized_id_4_after,
next_comparand.alpha,
next_comparand.omega,
next_comparand.alpha_it,
next_comparand.omega_it,
normalized_id_1.the_vectorxxxx.end());
next_comparand.leading = true;
next_comparand.leading_count = 0;
next_comparand.first = true;
comparands.push_back(next_comparand);
}
}
}
}
// We can now look through normalized_id_1 and replace characters:
char_it_c leftmost = normalized_id_1.the_vectorxxxx.begin();
char_it_c rightmost = leftmost + normalized_id_1.length;
char_it_c the_end_of_the_world = rightmost;
while( leftmost < rightmost )
{
size_t rightmost_delta = 0;
rightmost -= 1;
// We look at the rightmost position. If that position is within the
// alpha-to-omega qualified range, we check all possible matches:
for(size_t k=0; k<comparands.size(); k++)
{
if( rightmost < comparands[k].alpha_it )
{
// This can't be a match, because rightmost is
// to the left of the comparand's alpha.
continue;
}
if( rightmost + comparands[k].identifier_3.length > comparands[k].omega_it )
{
// This can't be a match, because the rightmost
// character of the comparand falls to the right
// of the comparand's omega
continue;
}
if( rightmost + comparands[k].identifier_3.length > the_end_of_the_world )
{
// This can't be a match, because the rightmost character of the
// comparand falls past the new edge of id_1 established by a prior
// match.
continue;
}
// A match is theoretically possible, because all
// the characters of the comparand fall between
// alpha and omega:
bool possible_match = true;
if( comparands[k].operation != bound_characters_e )
{
for(size_t m=0; m<comparands[k].identifier_3.length; m++)
{
if( comparands[k].identifier_3.the_vectorxxxx[m] != rightmost[m] )
{
possible_match = false;
break;
}
}
}
if( possible_match )
{
// The characters of the comparand match the
// characters at rightmost.
bool match = false;
switch( comparands[k].operation )
{
case bound_first_e:
// This can't happen in a FORMAT_2
warnx("The compiler goofed: "
"INSPECT FORMAT 2 "
"shouldn't have "
"bound_first_e");
abort();
break;
case bound_characters_e:
match = 1;
break;
case bound_all_e:
{
// We have a match.
match = true;
break;
}
case bound_leading_e:
{
// We have a match at rightmost. But we need to figure out if this
// particular match is valid for LEADING.
if( comparands[k].leading )
{
if( rightmost
+ comparands[k].identifier_3.length * (comparands[k].leading_count +1)
== comparands[k].omega_it)
{
// This means that the match here is just the latest of a
// string of LEADING matches that started at .omega
comparands[k].leading_count += 1;
match = true;
rightmost_delta = comparands[k].identifier_3.length-1;
}
}
break;
}
case bound_trailing_e:
{
// We have a match at rightmost.
//
// We want to know if this is a trailing match. For that to be,
// all of the possible matches from here leftward to the alpha have
// to be true as well:
if( (rightmost - comparands[k].alpha_it )
% comparands[k].identifier_3.length == 0 )
{
// The remaining number of characters is correct for a match.
// Keep checking.
// Assume a match until we learn otherwise:
match = true;
char_it_c local_left = rightmost;
local_left -= comparands[k].identifier_3.length;
while( local_left >= comparands[k].alpha_it )
{
for(size_t m=0; m<comparands[k].identifier_3.length; m++)
{
if( comparands[k].identifier_3.the_vectorxxxx[m]
!= local_left[m] )
{
// We have a mismatched character, so no trailing match is
// possible
match = false;
break;
}
}
local_left -= comparands[k].identifier_3.length;
}
}
break;
}
}
if( match )
{
// We have a match at rightmost. We need to
// to replace the characters in normalized_id_1
// with the characters from normalized_id_5
//fprintf(stderr, "Rule: %ld %p %s\n", k+1, rightmost, rightmost);
size_t index = rightmost - normalized_id_1.the_vectorxxxx.begin();
for( size_t l = 0;
l < comparands[k].identifier_5.length;
l++ )
{
cbl_char_t ch = comparands[k].identifier_5.
the_vectorxxxx[l];
normalized_id_1.the_vectorxxxx[index++] = ch;
}
the_end_of_the_world = rightmost;
rightmost -= rightmost_delta;
break;
}
}
else
{
comparands[k].leading = false;
}
}
}
// Here is where we take the characters from normalized_id_1 and put them
// back into identifier_1.
charmap_t *charmap = __gg__get_charmap(id1->encoding);
// Wastefully prefill id_1 with spaces in case the processing resulted in a
// string shorter than the original. (There is always the possiblity that
// a UTF-8 or UTF-16 codeset pair got replaced with a single character.) Do
// this before calling __gg__converter, because both mapped_character and
// __gg__iconverter use the same static buffer.
unsigned char *id1_data = id1->data + id1_o;
charmap->memset(id1_data, charmap->mapped_character(ascii_space), id1_s);
// We've been working in UTF32; we convert back to the original id1 encoding.
size_t bytes_converted;
const char *converted = __gg__iconverter( DEFAULT_32_ENCODING,
id1->encoding,
normalized_id_1.the_vectorxxxx.data(),
normalized_id_1.length*width_of_utf32,
&bytes_converted) ;
// And move those characters into place in id_1:
memcpy(id1_data,
converted,
std::min(bytes_converted, id1_s));
return;
}
extern "C"
void
__gg__inspect_format_2( int backward,
size_t integers[],
const cblc_referlet_t *params)
{
if( backward )
{
return inspect_backward_format_2(integers, params);
}
size_t int_index = 0;
size_t cblc_index = 0;
// Reference the language specification for the meanings of identifier_X
// id1 is the string being inspected
cblc_field_t *id1 = params[cblc_index].field;
size_t id1_o = params[cblc_index].offset;
size_t id1_s = params[cblc_index].size;
cblc_index += 1;
// normalize it, according to the language specification.
normalized_operand normalized_id_1
= normalize_id(id1, id1_o, id1_s, id1->encoding);
std::vector<comparand> comparands;
// Pick up the count of operations:
size_t nbounds = integers[int_index++];
for(size_t j=0; j<nbounds; j++ )
{
// each operation has a bound code:
cbl_inspect_bound_t operation
= (cbl_inspect_bound_t)integers[int_index++];
switch( operation )
{
case bound_characters_e:
{
comparand next_comparand = {} ;
next_comparand.operation = operation;
const cblc_field_t *id5 = params[cblc_index].field;
size_t id5_o = params[cblc_index].offset;
size_t id5_s = params[cblc_index].size;
cblc_index += 1;
const cblc_field_t *id4_before = params[cblc_index].field;
size_t id4_before_o = params[cblc_index].offset;
size_t id4_before_s = params[cblc_index].size;
cblc_index += 1;
const cblc_field_t *id4_after = params[cblc_index].field;
size_t id4_after_o = params[cblc_index].offset;
size_t id4_after_s = params[cblc_index].size;
cblc_index += 1;
next_comparand.identifier_5
= normalize_id(id5, id5_o, id5_s, id1->encoding);
normalized_operand normalized_id_4_before
= normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding);
normalized_operand normalized_id_4_after
= normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding);
// Because this is a CHARACTER operation, the lengths of
// identifier-3 and identifier-5 should be one. Let's avoid the
// chaos that will otherwise ensue should the lengths *not* be
// one.
next_comparand.identifier_3.length = 1;
next_comparand.identifier_5.length = 1;
next_comparand.alpha = normalized_id_1.the_characters.c_str();
next_comparand.omega
= next_comparand.alpha + normalized_id_1.length;
next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
the_alpha_and_omega(normalized_id_4_before,
normalized_id_4_after,
next_comparand.alpha,
next_comparand.omega,
next_comparand.alpha_it,
next_comparand.omega_it,
normalized_id_1.the_vectorxxxx.end());
comparands.push_back(next_comparand);
break;
}
default:
{
// We have some number of identifer-3/identifier-5 pairs,
// each with possible PHRASE1 modifiers.
size_t pair_count = integers[int_index++];
for(size_t k=0; k<pair_count; k++)
{
comparand next_comparand = {};
next_comparand.operation = operation;
const cblc_field_t *id3 = params[cblc_index].field;
size_t id3_o = params[cblc_index].offset;
size_t id3_s = params[cblc_index].size;
cblc_index += 1;
const cblc_field_t *id5 = params[cblc_index].field;
size_t id5_o = params[cblc_index].offset;
size_t id5_s = params[cblc_index].size;
cblc_index += 1;
const cblc_field_t *id4_before = params[cblc_index].field;
size_t id4_before_o = params[cblc_index].offset;
size_t id4_before_s = params[cblc_index].size;
cblc_index += 1;
const cblc_field_t *id4_after = params[cblc_index].field;
size_t id4_after_o = params[cblc_index].offset;
size_t id4_after_s = params[cblc_index].size;
cblc_index += 1;
next_comparand.identifier_3 = normalize_id(id3,
id3_o,
id3_s,
id1->encoding);
next_comparand.identifier_5 = normalize_id(id5,
id5_o,
id5_s,
id1->encoding);
// Identifiers 3 and 5 have to be the same length. But
// but either, or both, can be figurative constants. If
// they are figurative constants, they start off with a
// length of one. We will expand figurative constants to
// match the length of the other one:
if( id3->attr & FIGCONST_MASK )
{
match_lengths( next_comparand.identifier_3,
next_comparand.identifier_5);
}
else if( id5->attr & FIGCONST_MASK )
{
match_lengths( next_comparand.identifier_5,
next_comparand.identifier_3);
}
next_comparand.alpha
= normalized_id_1.the_characters.c_str();
next_comparand.omega
= next_comparand.alpha + normalized_id_1.length;
normalized_operand normalized_id_4_before
= normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding);
normalized_operand normalized_id_4_after
= normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding);
next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
the_alpha_and_omega(normalized_id_4_before,
normalized_id_4_after,
next_comparand.alpha,
next_comparand.omega,
next_comparand.alpha_it,
next_comparand.omega_it,
normalized_id_1.the_vectorxxxx.end());
next_comparand.leading = true;
next_comparand.leading_count = 0;
next_comparand.first = true;
comparands.push_back(next_comparand);
}
}
}
}
// We are now set up to accomplish the data flow described
// in the language specification. We loop through the
// the character positions in normalized_id_1:
char_it_c leftmost = normalized_id_1.the_vectorxxxx.begin();
char_it_c rightmost = leftmost + normalized_id_1.length;
while( leftmost < rightmost )
{
// For each leftmost position, we check each of the
// comparands
for(size_t k=0; k<comparands.size(); k++)
{
if( leftmost < comparands[k].alpha_it )
{
// This can't be a match, because leftmost is
// to the left of the comparand's alpha.
continue;
}
if( leftmost + comparands[k].identifier_3.length
> comparands[k].omega_it )
{
// This can't be a match, because the rightmost
// character of the comparand falls to the right
// of the comparand's omega
continue;
}
// A match is theoretically possible, because all
// the characters of the comparand fall between
// alpha and omega:
bool possible_match = true;
if( comparands[k].operation != bound_characters_e)
{
for(size_t m=0; m<comparands[k].identifier_3.length; m++)
{
if( comparands[k].identifier_3.the_vectorxxxx[m]
!= leftmost[m] )
{
possible_match = false;
break;
}
}
}
if( possible_match )
{
// The characters of the comparand match the
// characters at leftmost. See if further processing is
// indicated:
bool match = false;
switch( comparands[k].operation )
{
case bound_characters_e:
match = true;
break;
case bound_first_e:
if( comparands[k].first )
{
match = true;
comparands[k].first = false;
}
break;
case bound_all_e:
{
// We have a match.
match = true;
break;
}
case bound_leading_e:
{
// We have a match at leftmost. But we need to figure out if this
// particular match is valid for LEADING.
// Hang onto your hat. This is delightfully clever.
//
// This position is LEADING if:
// 1) .leading is still true
// 2) leftmost / (length_of_comparand ) = current_count
//
// I get chills every time I look at that.
if( comparands[k].leading )
{
// So far, so good.
size_t count = (leftmost - comparands[k].alpha_it)
/ comparands[k].identifier_3.length;
if( count == comparands[k].leading_count )
{
// This means that the match here is just the latest of a
// string of LEADING matches that started at .alpha
comparands[k].leading_count += 1;
match = true;
}
}
break;
}
case bound_trailing_e:
{
// We have a match at leftmost.
//
// We want to know if this is a trailing match. For that to be,
// all of the possible matches from here to the omega have to be
// true as well:
if( (comparands[k].omega_it-leftmost)
% comparands[k].identifier_3.length == 0 )
{
// The remaining number of characters is correct for a match.
// Keep checking.
// Assume a match until we learn otherwise:
match = true;
char_it_c local_left = leftmost;
local_left += comparands[k].identifier_3.length;
while( local_left < comparands[k].omega_it )
{
for(size_t m=0; m<comparands[k].identifier_3.length; m++)
{
if( comparands[k].identifier_3.the_vectorxxxx[m]
!= local_left[m] )
{
// We have a mismatched character, so no trailing match is
// possible
match = false;
break;
}
}
local_left += comparands[k].identifier_3.length;
}
}
break;
}
}
if( match )
{
// We have a match at leftmost. We need to
// to replace the characters in normalized_id_1
// with the characters from normalized_id_5
size_t index = leftmost
- normalized_id_1.the_vectorxxxx.begin();
for( size_t l = 0;
l < comparands[k].identifier_5.length;
l++ )
{
char ch = comparands[k].identifier_5.
the_vectorxxxx[l];
normalized_id_1.the_vectorxxxx[index++] = ch;
}
// Adjust the leftmost pointer to point to
// the rightmost character of the matched
// string, keeping in mind that it will be
// bumped again after we break out of the
// k<pair_count loop:
leftmost += comparands[k].identifier_3.length - 1;
break;
}
}
else
{
comparands[k].leading = false;
}
}
leftmost += 1;
}
// Here is where we take the characters from normalized_id_1 and put them
// back into identifier_1.
charmap_t *charmap = __gg__get_charmap(id1->encoding);
// Wastefully prefill id_1 with spaces in case the processing resulted in a
// string shorter than the original. (There is always the possiblity that
// a UTF-8 or UTF-16 codeset pair got replaced with a single character.) Do
// this before calling __gg__converter, because both mapped_character and
// __gg__iconverter use the same static buffer.
unsigned char *id1_data = id1->data + id1_o;
charmap->memset(id1_data, charmap->mapped_character(ascii_space), id1_s);
// We've been working in UTF32; we convert back to the original id1 encoding.
size_t bytes_converted;
const char *converted = __gg__iconverter( DEFAULT_32_ENCODING,
id1->encoding,
normalized_id_1.the_vectorxxxx.data(),
normalized_id_1.length*width_of_utf32,
&bytes_converted) ;
// And move those characters into place in id_1:
memcpy(id1_data,
converted,
std::min(bytes_converted, id1_s));
return;
}
static std::u32string
normalize_for_inspect_format_4(const cblc_field_t *var,
size_t var_offset,
size_t var_size,
cbl_encoding_t source_encoding)
{
std::u32string retval;
if(var)
{
const charmap_t *charmap_var = __gg__get_charmap(source_encoding);
charmap_t *charmap32 = __gg__get_charmap(DEFAULT_32_ENCODING);
cbl_figconst_t figconst =
static_cast<cbl_figconst_t>(var->attr & FIGCONST_MASK);
// We have a corner case to deal with:
if( strcmp(var->name, "NULLS") == 0 )
{
figconst = null_value_e;
}
if( figconst )
{
// Build up an var_size array of figconst characters
cbl_char_t figchar = '\0';
switch( figconst )
{
case low_value_e :
figchar = charmap32->low_value_character();
break;
case zero_value_e :
figchar = charmap32->mapped_character(ascii_0);
break;
case space_value_e :
figchar = charmap32->mapped_character(ascii_space);
break;
case quote_value_e :
figchar = charmap32->quote_character();
break;
case high_value_e :
{
if( __gg__high_value_character == DEFAULT_HIGH_VALUE_8 )
{
// See the comments where these constants are defined.
if(charmap_var->stride() == 1)
{
if(charmap_var->is_like_ebcdic())
{
// This maps back to 0xFF in CP1140
figchar = EBCDIC_HIGH_VALUE_32;
}
else
{
// This maps back to 0xFF in CP1252
figchar = ASCII_HIGH_VALUE_32;
}
}
else if(charmap_var->stride() == 2)
{
figchar = UTF16_HIGH_VALUE_32;
}
else
{
figchar = UTF32_HIGH_VALUE_32;
}
}
else
{
figchar = charmap32->mapped_character(__gg__high_value_character);
}
break;
}
case null_value_e:
break;
default:
figchar = '\0';
abort();
break;
}
retval.push_back(figchar);
}
else
{
// It's not a figurative constant, so convert var to UTF32.
size_t converted_bytes;
const char *converted = __gg__iconverter(
var->encoding,
DEFAULT_32_ENCODING,
var->data + var_offset,
var_size,
&converted_bytes);
void *duped = __gg__memdup(converted, converted_bytes);
for(size_t i=0; i<converted_bytes; i+=width_of_utf32)
{
cbl_char_t ch = charmap32->getch(duped, i);
retval.push_back(ch);
}
free(duped);
}
}
return retval;
}
extern "C"
void
__gg__inspect_format_4( int backward,
cblc_field_t *input, // identifier-1
size_t input_offset,
size_t input_size,
const cblc_field_t *original, // id-6 / literal-4
size_t original_offset,
size_t original_size,
const cblc_field_t *replacement, // id-7 / literal-5
size_t replacement_offset,
size_t replacement_size,
const cblc_field_t *after, // id-4 / literal-2
size_t after_offset,
size_t after_size,
const cblc_field_t *before, // id-4 / literal-2
size_t before_offset,
size_t before_size
)
{
// We need to cope with multiple encodings; the ISO specification says only
// that identifier-1 and -3 through -n are display or national.
// We will leave the input encoded as whatever it is, and we will convert the
// others to match.
// We also need to cope with anything except identifier-1 being a figurative
// constant.
cbl_figconst_t figconst_original =
static_cast<cbl_figconst_t>(original->attr & FIGCONST_MASK);
cbl_figconst_t figconst_replacement =
static_cast<cbl_figconst_t>(replacement->attr & FIGCONST_MASK);
int figswitch = (figconst_original ? 2 : 0) + (figconst_replacement ? 1 : 0);
switch( figswitch )
{
case 0:
// Neither are figconst; we leave the sizes alone
break;
case 1:
// Only replacement is figconst, so we make its size -1
// This will cause CONVERTING "ABC" TO ZERO to be the same as
// CONVERTING "ABC" TO "000"
replacement_size = (size_t)(-1LL);
break;
case 2:
// Only original is figconst. Set the size to one. (This is necessary
// because the size of NULL is eight, since NULL does double-duty as both
// a character (this is a MicroFocus specification) and a pointer.
original_size = 1;
break;
case 3:
// Both are figconst
replacement_size = original_size = 1;
break;
}
// Because before and after can be figurative constant NULL, we have to make
// sure that in such cases the size is 1:
if(before && before_size && before->attr & FIGCONST_MASK)
{
before_size = 1;
}
if(after && after_size && after->attr & FIGCONST_MASK)
{
after_size = 1;
}
bool all = (replacement_size == (size_t)(-1LL));
if( all )
{
// A replacement_size of -1 means that the statement is something like
// INSPECT XYZ CONVERTING "abcxyz" to ALL "?" That means replacement is
// a single character. We need to convert it to the target encoding.
const charmap_t * charmap = __gg__get_charmap(input->encoding);
replacement_size = charmap->stride();
}
std::u32string str_input = normalize_for_inspect_format_4(input , input_offset , input_size , input->encoding);
std::u32string str_original = normalize_for_inspect_format_4(original , original_offset , original_size , input->encoding);
std::u32string str_replacement = normalize_for_inspect_format_4(replacement, replacement_offset, replacement_size, input->encoding);
std::u32string str_after = normalize_for_inspect_format_4(after , after_offset , after_size , input->encoding);
std::u32string str_before = normalize_for_inspect_format_4(before , before_offset , before_size , input->encoding);
if( all )
{
// We now expand the single-character replacement to be the same length as
// original.
cbl_char_t ch = str_replacement[0];
str_replacement.clear();
for(size_t i=0; i<str_original.size(); i++)
{
str_replacement.push_back(ch);
}
}
// Use a map to make this O(N), rather than an O(N-squared),
// computational complexity
std::unordered_map<cbl_char_t, cbl_char_t>map;
typedef std::unordered_map<cbl_char_t, cbl_char_t>::const_iterator map_it_t ;
// The rule is, if the same character appears more than once in the
// original (which is identifier-6), then the first occurrence of the
// matching character in replacement is used. So, we create the map
// backwards. The one closest to zero will win.
for(size_t i=str_original.size()-1; i<str_original.size(); i--)
{
map[str_original[i]] = str_replacement[i];
}
size_t leftmost_i; // Leftmost index to replace at.
size_t rightmost_i; // Rightmost+1 index to replace at.
if( !backward )
{
// This is a forward conversion. We look for the first instance
// of str_after from the left. And then we look for the first instance
// of str_before after that. When there is no str_before, we move the
// rightmost limit to the end of str_input, as if there were no BEFORE
// phrase:
if( str_after.empty() )
{
// There is no AFTER phrase, so we start from the left.
leftmost_i = 0;
}
else
{
size_t nfound = str_input.find(str_after);
if( nfound != std::u32string::npos )
{
// Move the left limit to one character past the found element
leftmost_i = nfound + str_after.size();
}
else
{
// We didn't find the after phrase, so we move the left limit to the
// end of input, which means nothing will be replaced
leftmost_i = str_input.size();
}
}
// At this point, leftmost_i has been set to something. Look for the
// BEFORE phrase somewhere to the right of it:
if( str_before.empty() )
{
// There is no BEFORE phrase, so set rightmost to the end of the input
rightmost_i = str_input.size();
}
else
{
// Look for BEFORE to the right of leftmost_i:
size_t nfound = str_input.find(str_before, leftmost_i);
if( nfound != std::u32string::npos )
{
// We found the BEFORE phrase.
rightmost_i = nfound;
}
else
{
// We didn't find the BEFORE phrase; IOS says to treat this situation
// as if there were no BEFORE phrase
rightmost_i = str_input.size();
}
}
}
else
{
// We are doing a BACKWARD conversion. So, we look for the AFTER phrase
// and use that to establish the rightmost limit. And we look for the
// BEFORE to the left of AFTER phrase and use that to establish the
// leftmost limit
if( str_after.empty() )
{
// There is no AFTER phrase, so we set the rightmost limit to the end
// of the input:
rightmost_i = str_input.size();
}
else
{
// Start from the right and look for AFTER
size_t nfound = str_input.rfind(str_after, str_input.size());
if( nfound != std::u32string::npos )
{
// We found str_after, so its location becomes rightmost
rightmost_i = nfound;
}
else
{
// We didn't find str_after, so we move rightmost all the way to the
// left, so that nothing will ever be found.
rightmost_i = 0;
}
}
// rightmost_i has been established, so now look for BEFORE to the left
// of it
if( str_before.empty() )
{
// There is no str_before, so the left limit is all the way to the left
leftmost_i = 0;
}
else
{
size_t nfound = str_input.rfind(str_before, rightmost_i);
if( nfound != std::u32string::npos )
{
// We found BEFORE, so we put the left limit just to the right of
// where we found it:
leftmost_i = nfound + str_before.size();
}
else
{
// Not finding the BEFORE phrase is the same as the BEFORE phrase
// not having been specified:
leftmost_i = 0;
}
}
}
// leftmost_i and rightmost_i have been established. Do the conversion of
// characters inside those limits:
for(size_t i=leftmost_i; i<rightmost_i; i++)
{
cbl_char_t ch = str_input[i];
map_it_t cvt = map.find(ch);
if( cvt != map.end() )
{
str_input[i] = cvt->second;
}
}
// We now take the converted str_input, and put it back into id_1:
size_t bytes_converted;
const char *converted = __gg__iconverter(DEFAULT_32_ENCODING,
input->encoding,
str_input.data(),
str_input.size()*width_of_utf32,
&bytes_converted) ;
// And move those characters into place in input:
memcpy(input->data + input_offset,
converted,
std::min(bytes_converted, input_size));
}
extern "C"
void
__gg__inspect_format_1_sbc( int backward,
size_t integers[],
const cblc_referlet_t *params)
{
// When this routine is called, we know we are working in a single-byte-coded
// codeset like ASCII or EBCDIC.
if( backward )
{
return inspect_backward_format_1(integers, params);
}
size_t int_index = 0;
size_t cblc_index = 0;
// Reference the language specification for the meanings of identifier_X
// Pick up the number of identifier_2 loops in this INSPECT statement
size_t n_identifier_2 = integers[int_index++];
std::vector<id_2_result> id_2_results(n_identifier_2);
// Pick up identifier_1, which is the string being inspected
const cblc_field_t *id1 = params[cblc_index].field ;
size_t id1_o = params[cblc_index].offset;
size_t id1_s = params[cblc_index].size ;
cblc_index += 1;
// normalize it, according to the language specification.
std::string normalized_id_1
= normalize_id_sbc(id1, id1_o, id1_s, id1->encoding);
std::vector<comparand_sbc> comparands;
for(size_t i=0; i<n_identifier_2; i++)
{
// For each identifier_2, we pick up its value:
id_2_results[i].id2 = params[cblc_index].field ;
id_2_results[i].id2_o = params[cblc_index].offset;
id_2_results[i].id2_s = params[cblc_index].size ;
cblc_index += 1;
id_2_results[i].result = 0;
// For each identifier 2, there is a count of operations:
size_t nbounds = integers[int_index++];
for(size_t j=0; j<nbounds; j++ )
{
// each operation has a bound code:
cbl_inspect_bound_t operation
= (cbl_inspect_bound_t)integers[int_index++];
switch( operation )
{
case bound_characters_e:
{
// We are counting characters. There is no identifier-3,
// but we we hard-code it to " " to set the length to 1.
comparand_sbc next_comparand = {};
next_comparand.id_2_index = i;
next_comparand.operation = operation;
next_comparand.identifier_3 = " ";
const cblc_field_t *id4_before = params[cblc_index].field ;
size_t id4_before_o = params[cblc_index].offset;
size_t id4_before_s = params[cblc_index].size ;
cblc_index += 1;
const cblc_field_t *id4_after = params[cblc_index].field ;
size_t id4_after_o = params[cblc_index].offset;
size_t id4_after_s = params[cblc_index].size ;
cblc_index += 1;
std::string normalized_id_4_before
= normalize_id_sbc( id4_before,
id4_before_o,
id4_before_s,
id1->encoding);
std::string normalized_id_4_after
= normalize_id_sbc( id4_after,
id4_after_o,
id4_after_s,
id1->encoding);
the_alpha_and_omega_sbc(normalized_id_4_before,
normalized_id_4_after,
normalized_id_1,
next_comparand.alpha,
next_comparand.omega);
comparands.push_back(next_comparand);
break;
}
default:
{
// We have some number of identifer-3 values,
// each with possible PHRASE1 modifiers.
size_t pair_count = integers[int_index++];
// We need to build up pair_count comparand structures:
for(size_t k=0; k<pair_count; k++)
{
comparand_sbc next_comparand = {};
next_comparand.id_2_index = i;
next_comparand.operation = operation;
const cblc_field_t *id3 = params[cblc_index].field ;
size_t id3_o = params[cblc_index].offset;
size_t id3_s = params[cblc_index].size ;
cblc_index += 1;
const cblc_field_t *id4_before = params[cblc_index].field ;
size_t id4_before_o = params[cblc_index].offset;
size_t id4_before_s = params[cblc_index].size ;
cblc_index += 1;
const cblc_field_t *id4_after = params[cblc_index].field ;
size_t id4_after_o = params[cblc_index].offset;
size_t id4_after_s = params[cblc_index].size ;
cblc_index += 1;
next_comparand.identifier_3 = normalize_id_sbc(id3,
id3_o,
id3_s,
id1->encoding);
std::string normalized_id_4_before
= normalize_id_sbc( id4_before,
id4_before_o,
id4_before_s,
id1->encoding);
std::string normalized_id_4_after
= normalize_id_sbc( id4_after,
id4_after_o,
id4_after_s,
id1->encoding);
the_alpha_and_omega_sbc(normalized_id_4_before,
normalized_id_4_after,
normalized_id_1,
next_comparand.alpha,
next_comparand.omega);
next_comparand.leading = true;
next_comparand.leading_count = 0;
comparands.push_back(next_comparand);
}
}
}
}
}
// We are now ready to walk through identifier-1, character by
// character, checking each of the comparands for a match:
// We are now set up to accomplish the data flow described
// in the language specification. We loop through the
// the character positions in normalized_id_1:
size_t leftmost = 0;
size_t rightmost = leftmost + normalized_id_1.length();
while( leftmost < rightmost )
{
// For each leftmost position, we check each of the
// pairs:
for(size_t k=0; k<comparands.size(); k++)
{
if( leftmost < comparands[k].alpha )
{
// This can't be a match, because leftmost is
// to the left of the comparand's alpha.
continue;
}
if( leftmost + comparands[k].identifier_3.length() > comparands[k].omega)
{
// This can't be a match, because the rightmost
// character of the comparand falls to the right
// of the comparand's omega
continue;
}
// A match is theoretically possible, because all
// the characters of the comparand fall between
// alpha and omega:
bool possible_match = true;
if( comparands[k].operation != bound_characters_e )
{
for(size_t m=0; m<comparands[k].identifier_3.length(); m++)
{
if( comparands[k].identifier_3[m] != normalized_id_1[leftmost+m] )
{
possible_match = false;
break;
}
}
}
if( possible_match )
{
// The characters of the comparand match the
// characters at leftmost.
bool match = false;
switch( comparands[k].operation )
{
case bound_first_e:
// This can't happen in a FORMAT_1
warnx("The compiler goofed: "
"INSPECT FORMAT 1 "
"shouldn't have "
"bound_first_e");
abort();
break;
case bound_characters_e:
match = true;
break;
case bound_all_e:
{
// We have a match.
match = true;
break;
}
case bound_leading_e:
{
// We have a match at leftmost. But we need to figure out if this
// particular match is valid for LEADING.
// Hang onto your hat. This is delightfully clever.
//
// This position is LEADING if:
// 1) .leading is still true
// 2) leftmost / (length_of_comparand ) = current_count
//
// I get chills every time I look at that.
if( comparands[k].leading )
{
// So far, so good.
size_t count = ((leftmost - comparands[k].alpha))
/ comparands[k].identifier_3.length();
if( count == comparands[k].leading_count )
{
// This means that the match here is just the latest of a
// string of LEADING matches that started at .alpha
comparands[k].leading_count += 1;
match = true;
}
}
break;
}
case bound_trailing_e:
{
// We have a match at leftmost.
//
// We want to know if this is a trailing match. For that to be,
// all of the possible matches from here to the omega have to be
// true as well:
if( (comparands[k].omega-leftmost)
% comparands[k].identifier_3.length() == 0 )
{
// The remaining number of characters is correct for a match.
// Keep checking.
// Assume a match until we learn otherwise:
match = true;
size_t local_left = leftmost;
local_left += comparands[k].identifier_3.length();
while( match && local_left < comparands[k].omega )
{
for(size_t m=0; m<comparands[k].identifier_3.length(); m++)
{
if( comparands[k].identifier_3[m]
!= normalized_id_1[local_left+m] )
{
// We have a mismatched character, so no trailing match is
// possible
match = false;
break;
}
}
local_left += comparands[k].identifier_3.length();
}
}
break;
}
}
if( match )
{
// We have a match at leftmost:
// Bump the result counter
id_2_results[comparands[k].id_2_index].result += 1;
// Adjust the leftmost pointer to point to
// the rightmost character of the matched
// string, keeping in mind that it will be
// bumped again after we break out of the
// k<pair_count loop:
leftmost += comparands[k].identifier_3.length() - 1;
break;
}
}
else
{
// We are within alpha/omega, but there was no
// match, which permanently disqualifies the
// possibility of LEADING
comparands[k].leading = false;
}
}
leftmost += 1;
}
// Add our results to the identifier_2 values:
for(size_t i = 0; i<id_2_results.size(); i++)
{
int rdigits;
__int128 id_2_value
= __gg__binary_value_from_qualified_field(&rdigits,
id_2_results[i].id2,
id_2_results[i].id2_o,
id_2_results[i].id2_s);
while(rdigits--)
{
id_2_value /= 10.0;
}
// Accumulate what we've found into it
id_2_value += id_2_results[i].result;
// And put it back:
__gg__int128_to_qualified_field(id_2_results[i].id2,
id_2_results[i].id2_o,
id_2_results[i].id2_s,
id_2_value,
0,
truncation_e,
NULL);
}
}