blob: ba4ef0adb8e60606e91bde877576347939606498 [file] [log] [blame]
/*
* Copyright (c) 2021-2025 Symas Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
* * Neither the name of the Symas Corporation nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <algorithm>
#include <cassert>
#include <cctype>
#include <cstdlib>
#include <cstring>
#include <sys/mman.h>
#ifndef _LEXIO_H_
#define _LEXIO_H_
#define SPACE ' '
bool lexer_echo();
bool is_reference_format();
static inline bool isquote( char ch ) {
return ch == '\'' || ch == '"';
}
static inline void
erase_source( char *src, char *esrc ) {
std::replace_if(src, esrc,
[](char ch) { return ch != '\n'; },
SPACE );
}
/*
* Column number as in Cobol, with 1 at the start of the line.
* 0: free-format, but comment lines may start with '*'.
* N: columns less than N treated as space.
* '/' or '*' in N starts a comment
* 'D' starts a debug line
* '-' is a line-continuation indicator
* Others ignored.
* Right margin is enforced if it is greater than left margin.
*/
struct bytespan_t {
char *data, *eodata;
bytespan_t() : data( nullptr), eodata(nullptr) {}
bytespan_t( char *data, char *eodata )
: data(data), eodata(eodata)
{
if( eodata < data ) {
this->eodata = data + strlen(data);
}
assert( this->data <= this->eodata );
}
size_t size() const { return eodata - data; }
bool in_string( ) const {
char open = '\0';
for( char *q = data; (q = std::find_if(q, eodata, isquote)) != eodata; q++) {
if( !open ) {
open = *q; // first quote opens
continue;
}
if( open == *q && q + 1 < eodata && q[0] == q[1] ) { // doubled
q++;
continue;
}
if( open == *q ) open = '\0'; // closing quote must match
}
return isquote(open);
}
char * append( const char *input, const char *eoinput );
bytespan_t&
update( char *line, char *eoline, size_t right_margin ) {
*this = bytespan_t(line, eoline);
if( right_margin && data + right_margin < eodata ) {
erase_source(data + right_margin, eodata);
eodata = data + right_margin;
}
eodata = std::find(data, eodata, '\n');
return *this;
}
};
// YYLTYPE supplied by cbldiag.h. Borrowed from parse.h as generated by Bison.
struct filespan_t : public bytespan_t {
char *cur, *eol, *quote;
private:
size_t iline, icol;
size_t line_quote72;
static char empty_file[8];
public:
filespan_t()
: cur(data), eol(data), quote(NULL), iline(0), icol(0), line_quote72(0)
{}
filespan_t(void *p, size_t len)
: bytespan_t( static_cast<char*>(p), static_cast<char*>(p) + len )
, cur(data), eol(data), quote(NULL), iline(0), icol(0), line_quote72(0)
{}
size_t lineno() const { return iline; }
size_t colno() const { return icol; }
void lineno_reset() { iline = 0; }
size_t colno( size_t icol ) { return this->icol = icol; }
bool nada() const { return data == empty_file; }
void use_nada() {
assert(!data);
cur = eol = data = empty_file;
eol = eodata = empty_file + sizeof(empty_file) - 1;
}
const char *ccur() const { return cur; }
/*
* "If an alphanumeric or national literal that is to be continued on
* the next line has as its last character a quotation mark in
* column 72, the continuation line must start with two consecutive
* quotation marks."
*/
bool was_quote72() const { return iline == line_quote72 + 1; }
size_t next_line() {
// Before advancing, mark the current line as ending in a quote, if true.
if( is_reference_format() && 72 <= line_length() ) {
if( isquote(cur[71]) ) { line_quote72 = iline; }
}
cur = eol;
assert(data <= cur && cur <= eodata);
if( cur == eodata ) return 0;
eol = std::find(cur, eodata, '\n');
if( eol < eodata ) {
++eol;
++iline;
icol = 0;
}
return eol - cur;
}
size_t line_length() const { return eol - cur; }
static size_t tab_check( const char *src, const char *esrc );
bool is_blank_line() const {
auto p = std::find_if( cur, eol, []( char ch ) { return !fisspace(ch); } );
return p == eol;
}
YYLTYPE as_location() const {
YYLTYPE loc;
loc.first_line = loc.last_line = 1 + iline;
loc.first_column = loc.last_column = 1 + icol;
return loc;
}
};
#if USE_STD_REGEX
# include <regex>
#else
# include "dts.h"
using dts::csub_match;
using dts::cmatch;
using dts::regex;
using dts::regex_search;
#endif
struct span_t {
protected:
void verify() const {
if( !p ) {
dbgmsg("span_t::span_t: p is NULL");
} else if( ! (p <= pend) ) {
dbgmsg("span_t::span_t: p %p > pend %p", p, pend);
}
assert(p && p <= pend);
}
span_t& trim() {
while( p < pend && isblank(p[0]) ) p++;
while( p < pend - 1 && isblank(pend[-1]) ) pend--;
return *this;
}
public:
const char *p, *pend;
span_t() : p(NULL), pend(NULL) {}
span_t( size_t len, const char *data ) : p(data), pend(data + len) {
verify();
}
span_t( const char *data, const char *eodata ) : p(data), pend(eodata) {
verify();
}
// cppcheck-suppress operatorEqRetRefThis
span_t& operator=( const csub_match& cm ) {
p = cm.first;
pend = cm.second;
return p && pend ? trim() : *this;
}
int size() const { return pend - p; }
size_t nlines() const { return p && pend? std::count(p, pend, '\n') : 0; }
span_t dup() const {
auto output = new char[size() + 1];
auto eout = std::copy(p, pend, output);
*eout = '\0';
return span_t(output, eout);
}
const char * has_nul() const {
auto p_l = std::find(this->p, pend, '\0');
return p_l != pend? p_l : NULL;
}
bool at_eol() const {
return p < pend && '\n' == pend[-1];
}
const char * optional_eol() const {
return at_eol() ? "" : "\n";
}
};
struct replace_t {
struct span_t before, after;
replace_t() : before(span_t()), after(span_t()) {}
replace_t( span_t before, span_t after )
: before(before), after(after)
{}
replace_t& reset() {
before = after = span_t();
return *this;
}
};
#include <cstdio>
#include <list>
class cdftext {
static filespan_t free_form_reference_format( int fd );
static void process_file( filespan_t, int output, bool second_pass = false );
static filespan_t map_file( int fd );
static void echo_input( int input, const char filename[] );
static int open_input( const char filename[] );
static int open_output();
static std::list<span_t> segment_line( filespan_t& mfile );
public:
static FILE * lex_open( const char filename[] );
};
std::list<replace_t> free_form_reference_format( filespan_t mfile );
#endif