| /* |
| * Copyright (c) 2021-2025 Symas Corporation |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are |
| * met: |
| * |
| * * Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * * Redistributions in binary form must reproduce the above |
| * copyright notice, this list of conditions and the following disclaimer |
| * in the documentation and/or other materials provided with the |
| * distribution. |
| * * Neither the name of the Symas Corporation nor the names of its |
| * contributors may be used to endorse or promote products derived from |
| * this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include <fcntl.h> |
| #include <unistd.h> |
| |
| #include <cctype> |
| #include <cerrno> |
| #include <cmath> |
| #include <cfenv> |
| #include <cstdio> |
| #include <cstdlib> |
| #include <cstring> |
| #include <ctime> |
| |
| #include <algorithm> |
| #include <vector> |
| |
| #include <libxml/SAX2.h> |
| #include <libxml/parser.h> |
| |
| #include <syslog.h> |
| |
| #include "config.h" |
| #include "libgcobol-fp.h" |
| #include "ec.h" |
| #include "common-defs.h" |
| #include "io.h" |
| #include "gcobolio.h" |
| #include "libgcobol.h" |
| |
| #define COUNT_OF(X) (sizeof(X) / sizeof(X[0])) |
| |
| void sayso( const char func[], int line, |
| int len = 0 , const unsigned char data[] = { 0} ) { |
| if( getenv("XMLPARSE") ) { |
| switch(len) { |
| case 0: |
| fprintf(stderr, "%s:%d Kilroy was here\n", func, line); |
| break; |
| case -1: |
| fprintf(stderr, "%s:%d: '%s'\n", func, line, data); |
| break; |
| default: |
| fprintf(stderr, "%s:%d: '%.*s'\n", func, line, len, data); |
| break; |
| } |
| } |
| } |
| #define SAYSO() sayso(__func__, __LINE__) |
| #define SAYSO_DATAZ(S) sayso(__func__, __LINE__, -1, S) |
| #define SAYSO_DATA(N, S) sayso(__func__, __LINE__, N, S) |
| |
| #define CTX ctx __attribute__ ((unused)) |
| |
| struct xml_ec_value_t { |
| int ibm_code; |
| const char msg[80]; |
| } xml_ec_values[] = { |
| // Table 73. XML PARSE exceptions that allow continuation |
| { 1, "invalid character between elements" }, |
| { 2, "invalid start before element content" }, |
| { 3, "duplicate attribute" }, |
| { 4, "markup character '<' in an attribute value" }, |
| { 5, "start/end tag mismatch" }, |
| { 6, "invalid character in element" }, |
| { 7, "invalid start in element content. " }, |
| { 8, "CDATA closing character sequence ']]>' not opened" }, |
| { 10, "comment the character sequence '--' without '>'" }, |
| { 11, "invalid character in a processing instruction" }, |
| { 12, "XML declaration was not start of document" }, |
| { 13, "invalid digit in a hexadecimal character reference" }, |
| { 14, "invalid digit in a decimal character reference" }, |
| { 15, "encoding declaration value name must start with [a-zA-Z] character" }, |
| { 16, "character reference did not refer to a legal XML character" }, |
| { 17, "invalid character in an entity reference name" }, |
| { 70, "EBCDIC document, supported EBCDIC page, unsupported declaration" }, |
| { 71, "EBCDIC document, unsupported EBCDIC page " }, |
| { 72, "EBCDIC document, unsupported EBCDIC page, unsupported declaration" }, |
| { 73, "EBCDIC document, unsupported EBCDIC page and declaration " }, |
| { 80, "ASCII document, supported ASCII page, unsupported declaration" }, |
| { 81, "ASCII document, unsupported ASCII page " }, |
| { 82, "ASCII document, unsupported ASCII page, unsupported declaration" }, |
| { 83, "ASCII document, unsupported ASCII page and declaration " }, |
| { 84, "ASCII document, invalid UTF-8, external UTF-8, no declaration. " }, |
| { 85, "ASCII document, invalid UTF-8, external UTF-8, invalid declaration" }, |
| { 86, "ASCII document, invalid UTF-8, external ASCII" }, |
| { 87, "ASCII document, invalid UTF-8, external and document UTF-8" }, |
| { 88, "ASCII document, invalid UTF-8, unsupported ASCII/UTF-8, UTF-8 declaration" }, |
| { 89, "ASCII document, invalid UTF-8, external UTF-8, ASCII declaration" }, |
| { 92, "alphanumeric document expected, document is UTF-16. " }, |
| |
| // XML PARSE exceptions that allow continuation (continued) |
| //// 100,001 - 165,535 EBCDIC document encoding does not match code page |
| //// 200,001 - 265,535 ASCII document encoding does not match code page |
| |
| // XML PARSE exceptions that do not allow continuation |
| { 100, "end of document before start of XML declaration" }, |
| { 101, "end of document before end of XML declaration" }, |
| { 102, "end of document before root element" }, |
| { 103, "end of document before version information in XML declaration" }, |
| { 104, "end of document before version information value in XML declaration" }, |
| { 106, "end of document before encoding declaration value in XML declaration" }, |
| { 108, "end of document before standalone declaration value in XML declaration" }, |
| { 109, "end of document before attribute name" }, |
| { 110, "end of document before attribute value" }, |
| { 111, "end of document before character/entity reference in attribute value" }, |
| { 112, "end of document before empty element tag" }, |
| { 113, "end of document before root element name" }, |
| { 114, "end of document before element name" }, |
| { 115, "end of document before character data in element content" }, |
| { 116, "end of document before processing instruction in element content" }, |
| { 117, "end of document before comment or CDATA section in element content" }, |
| { 118, "end of document before comment in element content" }, |
| { 119, "end of document before CDATA section in element content" }, |
| { 120, "end of document before character/entity reference in element content" }, |
| { 121, "end of document before after close of root element" }, |
| { 122, "possible invalid start of a document type" }, |
| { 123, "duplicate document type" }, |
| { 124, "root element name must start with [A-Za-z_:]" }, |
| { 125, "first attribute name must start with [A-Za-z_:]" }, |
| { 126, "invalid character in or after element name" }, |
| { 127, "attribute name not followed by '=' " }, |
| { 128, "invalid attribute value delimiter" }, |
| { 130, "attribute name must start with [A-Za-z_:]" }, |
| { 131, "invalid character in or after attribute name" }, |
| { 132, "empty element tag not terminated with '/>'" }, |
| { 133, "element end tag name name must start with [A-Za-z_:]" }, |
| { 134, "element end tag not terminated with '>'" }, |
| { 135, "element name must start with [A-Za-z_:]" }, |
| { 136, "invalid start of comment/CDATA in element" }, |
| { 137, "invalid start of comment" }, |
| { 138, "processing instruction target name must start with [A-Za-z_:]" }, |
| { 139, "invalid character in/afterprocessing instruction target name" }, |
| { 140, "processing instruction not terminated with '?>'" }, |
| { 141, "invalid character following '&' in a character/entity reference" }, |
| { 142, "missing version information in XML declaration" }, |
| { 143, "missing '=' after 'version' in XML declaration " }, |
| { 144, "missing XML version declaration " }, |
| { 145, "invalid character in XML version information" }, |
| { 146, "invalid character following XML version information value " }, |
| { 147, "invalid attribute in XML declaration" }, |
| { 148, "missing '=' after 'encoding' in XML declaration" }, |
| { 149, "missing XML encoding declaration value" }, |
| { 150, "invalid XML encoding declaration value" }, |
| { 151, "invalid character afer XML declaration" }, |
| { 152, "invalid attribute XML declaration" }, |
| { 153, "missing '=' after standalone XML declaration" }, |
| { 154, "missing standalone XML declaration value" }, |
| { 155, "standalone declaration must be 'yes' or 'no'" }, |
| { 156, "invalid standalone XML declaration value" }, |
| { 157, "invalid character following XML standalone declaration value" }, |
| { 158, "unterminated XML declaration " }, |
| { 159, "start of document type declaration after end of root element" }, |
| { 160, "start of element after end of root element" }, |
| { 161, "invalid UTF-8 byte sequence" }, |
| { 162, "UTF-8 character that has a Unicode code point above x'FFFF'" }, |
| { 315, "UTF-16 document little-endian unsupported" }, |
| { 316, "UCS4 document unsupported" }, |
| { 317, "unrecognized document encoding" }, |
| { 318, "UTF-8 document unsupported " }, |
| { 320, "mismatched national document data item to document encoding EBCDIC" }, |
| { 321, "mismatched national document data item to document encoding ASCII" }, |
| { 322, "mismatched native alphanumeric document data item to document encoding EBCDIC" }, |
| { 323, "mismatched host alphanumeric document data item to document encoding ASCII" }, |
| { 324, "mismatched national document data item to document encoding UTF-8" }, |
| { 325, "mismatched host alphanumeric document datat to document encoding UTF-8" }, |
| { 500, "internal error" }, |
| }, *eoxml_ec_values = xml_ec_values + COUNT_OF(xml_ec_values); |
| |
| static const xml_ec_value_t * |
| xml_ec_value_of( int ibm_code ) { |
| if( 100000 < ibm_code && ibm_code < 200000 ) { |
| static xml_ec_value_t not_ebcdic{ 0, "EBCDIC document encoding " |
| "does not match code page" }; |
| not_ebcdic.ibm_code = ibm_code; |
| return ¬_ebcdic; |
| } |
| if( 200000 < ibm_code && ibm_code < 300000 ) { |
| static xml_ec_value_t not_ascii{ 0, "ASCII document encoding " |
| "does not match code page" }; |
| not_ascii.ibm_code = ibm_code; |
| return ¬_ascii; |
| } |
| auto p = std::find_if( xml_ec_values, eoxml_ec_values, |
| [ibm_code]( const auto& value ) { |
| return ibm_code == value.ibm_code; |
| } ); |
| return p < eoxml_ec_values ? &*p : nullptr; |
| } |
| |
| const char * |
| xml_ec_value_str( int ibm_code ) { |
| auto p = xml_ec_value_of(ibm_code); |
| return p? p->msg : nullptr; |
| } |
| |
| #if NEEDED |
| static bool |
| xml_fatal( int ibm_code ) { |
| if( ibm_code < 100 ) return false; |
| if( ibm_code > 100000 ) return false; |
| assert(ibm_code < 1000); |
| return true; |
| } |
| #endif |
| |
| static callback_t *cobol_callback; |
| |
| /* |
| * Internal handler functions |
| */ |
| /////////////// |
| /* |
| |
| ATTRIBUTE-CHARACTER The single character that corresponds with the predefined entity reference in the attribute value |
| ATTRIBUTE-CHARACTERS The value within quotation marks or apostrophes. This can be a substring of the attribute value if the value includes an entity reference. |
| ATTRIBUTE-NAME The attribute name; the string to the left of the equal sign |
| ATTRIBUTE-NATIONAL-CHARACTER Regardless of the type of the XML document specified by identifier-1 in the XML PARSE statement, XML-TEXT is empty with length zero and XML-NTEXT contains the single national character that corresponds with the numeric character reference. |
| |
| CONTENT-CHARACTER The single character that corresponds with the predefined entity reference in the element content |
| |
| CONTENT-NATIONAL-CHARACTER Regardless of the type of the XML document specified by identifier-1 in the XML PARSE statement, XML-TEXT is empty with length zero and XML-NTEXT contains the single national character that corresponds with the numeric character reference.1 |
| DOCUMENT-TYPE-DECLARATION The entire document type declaration, including the opening and closing character sequences "<!DOCTYPE" and ">" |
| ENCODING-DECLARATION The value, between quotes or apostrophes, of the encoding declaration in the XML declaration |
| END-OF-CDATA-SECTION The string "]]>" |
| END-OF-DOCUMENT Empty with length zero |
| |
| EXCEPTION The part of the document that was successfully scanned, up to and including the point at which the exception was detected.2 Special register XML-CODE contains the unique error code that identifies the exception. |
| |
| PROCESSING-INSTRUCTION-TARGET The processing instruction target name, which occurs immediately after the processing instruction opening sequence, "<?" |
| STANDALONE-DECLARATION The value, between quotation marks or apostrophes ("yes" or "no"), of the stand-alone declaration in the XML declaration |
| START-OF-CDATA-SECTION The string "<![CDATA[" |
| START-OF-DOCUMENT The entire document |
| |
| UNKNOWN-REFERENCE-IN-CONTENT The entity reference name, not including the "&" and ";" delimiters |
| UNKNOWN-REFERENCE-IN-ATTRIBUTE The entity reference name, not including the "&" and ";" delimiters |
| VERSION-INFORMATION The value, between quotation marks or apostrophes, of the version information in the XML declaration |
| |
| */ |
| /////////////// |
| |
| extern cblc_field_t __ggsr__xml_event; |
| extern cblc_field_t __ggsr__xml_code; |
| extern cblc_field_t __ggsr__xml_text; |
| extern cblc_field_t __ggsr__xml_ntext; |
| |
| static void |
| xml_event( const char event_name[], size_t len, char text[] ) { |
| assert(strlen(event_name) < __ggsr__xml_event.allocated); |
| |
| auto pend = __ggsr__xml_event.data + __ggsr__xml_event.allocated; |
| auto p = std::copy( event_name, event_name + strlen(event_name), |
| PTRCAST(char, __ggsr__xml_event.data) ); |
| std::fill(PTRCAST(unsigned char, p), pend, 0x20); |
| |
| __ggsr__xml_text.data = reinterpret_cast<unsigned char*>(text); |
| __ggsr__xml_text.capacity = __ggsr__xml_text.allocated = len; |
| __ggsr__xml_code.data = 0; |
| cobol_callback(); |
| } |
| |
| static inline void |
| xml_event( const char event_name[], char text[] ) { |
| xml_event(event_name, strlen(text), text); |
| } |
| |
| static inline void |
| xml_event( const char event_name[], size_t len, const xmlChar * value ) { |
| char *text = reinterpret_cast<char*>(const_cast<xmlChar*>(value)); |
| xml_event(event_name, len, text); |
| } |
| |
| static inline void |
| xml_event( const char event_name[], const xmlChar * value ) { |
| char *text = reinterpret_cast<char*>(const_cast<xmlChar*>(value)); |
| xml_event(event_name, strlen(text), text); |
| } |
| |
| /* |
| * Many static handler functions are defined but not used while we learn what |
| * is needed. |
| */ |
| #pragma GCC diagnostic ignored "-Wunused-function" |
| |
| static void attributeDecl(void * CTX, |
| const xmlChar * elem, |
| const xmlChar * fullname, |
| int type __attribute__ ((unused)), |
| int def __attribute__ ((unused)), |
| const xmlChar * defaultValue, |
| xmlEnumerationPtr tree __attribute__ ((unused)) ) |
| { |
| fprintf(stderr, "%s:%d: elem=%s, name=%s, default=%s\n", |
| __func__, __LINE__, elem, fullname, defaultValue); |
| } |
| |
| static void cdataBlock(void * CTX, |
| const xmlChar * data, |
| int len) |
| { |
| SAYSO_DATA(len, data); |
| xml_event("CONTENT-CHARACTERS", len, data); |
| } |
| |
| static void characters(void * CTX, |
| const xmlChar * data, |
| int len) |
| { |
| SAYSO_DATA(len, data); |
| xml_event("CONTENT-CHARACTERS", len, data); |
| } |
| |
| static void comment(void * CTX, const xmlChar * value) { |
| SAYSO_DATAZ(value); |
| xml_event("COMMENT", value); |
| } |
| |
| static void elementDecl(void * CTX, |
| const xmlChar * name, |
| int type __attribute__ ((unused)), |
| xmlElementContentPtr content __attribute__ ((unused)) ) |
| { SAYSO_DATAZ(name); } |
| |
| static void endDocument(void * CTX) |
| { SAYSO(); } |
| |
| static void endElementNs(void * CTX, |
| const xmlChar * localname, |
| const xmlChar * prefix, |
| const xmlChar * URI __attribute__ ((unused)) ) |
| { |
| SAYSO_DATAZ(prefix); |
| SAYSO_DATAZ(localname); |
| xml_event("END-OF-ELEMENT", localname); |
| } |
| |
| static void endElement(void * CTX, |
| const xmlChar * name) |
| { SAYSO_DATAZ(name); } |
| |
| static void entityDecl(void * CTX, |
| const xmlChar * name, |
| int type __attribute__ ((unused)), |
| const xmlChar * publicId __attribute__ ((unused)), |
| const xmlChar * systemId __attribute__ ((unused)), |
| xmlChar * content ) |
| { |
| SAYSO_DATAZ(name); |
| SAYSO_DATAZ(content); |
| } |
| |
| static void error(void * CTX, const char * msg, ...) |
| { |
| va_list ap; |
| va_start (ap, msg); |
| fprintf(stderr, "error: "); |
| vfprintf(stderr, msg, ap); |
| fprintf(stderr, "\n"); |
| va_end (ap); |
| } |
| |
| static void externalSubset(void * CTX, |
| const xmlChar * name, |
| const xmlChar * ExternalID, |
| const xmlChar * SystemID) |
| { |
| SAYSO_DATAZ(name); |
| SAYSO_DATAZ(ExternalID); |
| SAYSO_DATAZ(SystemID); |
| } |
| |
| static void fatalError(void * CTX, const char * msg, ...) |
| { |
| va_list ap; |
| va_start (ap, msg); |
| fprintf(stderr, "fatal: "); |
| vfprintf(stderr, msg, ap); |
| fprintf(stderr, "\n"); |
| va_end (ap); |
| } |
| |
| #if 0 |
| static xmlEntityPtr getEntity(void * CTX, |
| const xmlChar * name) |
| { SAYSO_DATAZ(name); } |
| |
| static xmlEntityPtr getParameterEntity(void * CTX, |
| const xmlChar * name) |
| { SAYSO_DATAZ(name); } |
| #endif |
| |
| static int hasExternalSubset(void * CTX) |
| { SAYSO(); return 0; } |
| |
| static int hasInternalSubset(void * CTX) |
| { SAYSO(); return 0; } |
| |
| static void ignorableWhitespace(void * CTX, |
| const xmlChar * ch, |
| int len) |
| { SAYSO_DATA(len, ch); } |
| |
| static void internalSubset(void * CTX, |
| const xmlChar * name, |
| const xmlChar * ExternalID, |
| const xmlChar * SystemID) |
| { |
| SAYSO_DATAZ(name); |
| SAYSO_DATAZ(ExternalID); |
| SAYSO_DATAZ(SystemID); |
| } |
| |
| #if 0 |
| static int isStandalone (void * CTX) |
| { SAYSO(); } |
| #endif |
| |
| static void notationDecl(void * CTX, |
| const xmlChar * name, |
| const xmlChar * publicId, |
| const xmlChar * systemId) |
| { |
| SAYSO_DATAZ(name); |
| SAYSO_DATAZ(publicId); |
| SAYSO_DATAZ(systemId); |
| } |
| |
| static void processingInstruction(void * CTX, |
| const xmlChar * target, |
| const xmlChar * data) |
| { |
| SAYSO_DATAZ(target); |
| xml_event("PROCESSING-INSTRUCTION-TARGET", target); |
| SAYSO_DATAZ(data); |
| xml_event("PROCESSING-INSTRUCTION-DATA", data); |
| } |
| |
| static void reference(void * CTX, |
| const xmlChar * name) |
| { SAYSO_DATAZ(name); } |
| |
| #if 0 |
| static xmlParserInputPtr resolveEntity( void * CTX, |
| const xmlChar * publicId, |
| const xmlChar * systemId) |
| { SAYSO(); } |
| #endif |
| |
| static void setDocumentLocator(void * CTX, |
| xmlSAXLocatorPtr loc __attribute__ ((unused)) ) |
| { SAYSO(); } |
| |
| /* |
| * Called after the XML declaration was parsed. |
| * Use xmlCtxtGetVersion(), xmlCtxtGetDeclaredEncoding() and |
| * xmlCtxtGetStandalone() to get data from the XML declaration. |
| */ |
| static void startDocument(void * CTX) |
| |
| { |
| SAYSO(); |
| } |
| |
| static void startElementNs(void * CTX, |
| const xmlChar * localname, |
| const xmlChar * prefix, |
| const xmlChar * URI, |
| int nb_namespaces __attribute__ ((unused)), |
| const xmlChar ** namespaces __attribute__ ((unused)), |
| int nb_attributes __attribute__ ((unused)), |
| int nb_defaulted __attribute__ ((unused)), |
| const xmlChar ** attributes __attribute__ ((unused))) |
| { |
| SAYSO_DATAZ(prefix); |
| SAYSO_DATAZ(URI); |
| SAYSO_DATAZ(localname); |
| xml_event("START-OF-ELEMENT", localname); |
| } |
| |
| static void startElement(void * CTX, |
| const xmlChar * name, |
| const xmlChar ** atts) |
| { |
| SAYSO_DATAZ(name); |
| for( int i=0; atts[i]; i++ ) SAYSO_DATAZ(atts[i]); |
| } |
| |
| static void unparsedEntityDecl(void * CTX, |
| const xmlChar * name, |
| const xmlChar * publicId, |
| const xmlChar * systemId, |
| const xmlChar * notationName) |
| { |
| SAYSO_DATAZ(name); |
| SAYSO_DATAZ(publicId); |
| SAYSO_DATAZ(systemId); |
| SAYSO_DATAZ(notationName); |
| } |
| |
| static void warning(void * CTX, const char * msg, ... ) |
| { |
| va_list ap; |
| va_start (ap, msg); |
| fprintf(stderr, "warning: "); |
| vfprintf(stderr, msg, ap); |
| fprintf(stderr, "\n"); |
| va_end (ap); |
| } |
| |
| /* |
| * xmlSAXHandler is a structure of function pointers that the SAX parser calls |
| * as it encounters XML elements in the input. Each pointer is a callback |
| * function, locally defined in this file. These we term "handlers". |
| * |
| * Each handler sets the XML registers per IBM, and then calls |
| * cobol_callback(), which is a function pointer supplied by the COBOL program |
| * to be the processing procedure for XML PARSE. |
| * |
| * There is no obvious way to abort parsing at the C level. See: |
| * http://veillard.com/XML/messages/0540.html |
| * |
| * > The simplest to implement this would not be to add a new SAX |
| * > callback but rather modify the xmlParserCtxtPtr passed to the |
| * > callbacks. The best seems to be: |
| * > - set ctxt->instate to XML_PARSER_EOF |
| * > - hack xmlCurrentChar() to return 0 |
| * > if (ctxt->instate == XML_PARSER_EOF) |
| * > Doing both should led to a quick termination of parsing |
| * > (but endElement(s)/endDocument will certainly be called anyway). |
| * |
| * Another hack might be to set the input to all blanks in cobol_callback. |
| */ |
| |
| static xmlSAXHandler handlers; |
| |
| static void |
| initialize_handlers( callback_t *callback ) { |
| handlers = xmlSAXHandler {}; |
| handlers.initialized = XML_SAX2_MAGIC; |
| |
| cobol_callback = callback; |
| |
| #if 0 |
| //// Should typically not be modified |
| handlers.attributeDecl = attributeDecl; |
| handlers.elementDecl = elementDecl; |
| handlers.entityDecl = entityDecl; |
| handlers.externalSubset = externalSubset; |
| handlers.getEntity = getEntity; |
| handlers.getParameterEntity = getParameterEntity; |
| handlers.internalSubset = internalSubset; |
| handlers.notationDecl = notationDecl; |
| handlers.resolveEntity = resolveEntity; |
| handlers.unparsedEntityDecl = unparsedEntityDecl; |
| |
| //// Not supposed to be changed by applications |
| handlers.hasExternalSubset = hasExternalSubset; |
| handlers.hasInternalSubset = hasInternalSubset; |
| handlers.isStandalone = isStandalone; |
| |
| //// SAX 1 only |
| handlers.startElement = startElement; |
| handlers.endElement = endElement; |
| |
| //// Everything is available on the context, so this is useless in our case |
| handlers.setDocumentLocator = setDocumentLocator; |
| #endif |
| |
| handlers.cdataBlock = cdataBlock; |
| handlers.characters = characters; |
| handlers.comment = comment; |
| handlers.endDocument = endDocument; |
| handlers.endElementNs = endElementNs; |
| handlers.ignorableWhitespace = ignorableWhitespace; |
| handlers.processingInstruction = processingInstruction; |
| handlers.reference = reference; |
| handlers.startDocument = startDocument; |
| handlers.startElementNs = startElementNs; |
| handlers.error = error; |
| handlers.fatalError = fatalError; |
| handlers.warning = warning; |
| } |
| |
| static xmlChar * |
| xmlchar_of( const char input[] ) { |
| return const_cast<xmlChar*>( reinterpret_cast<const xmlChar*>(input) ); |
| } |
| |
| static const char * |
| xmlParserErrors_str( xmlParserErrors erc, const char name[] ) { |
| const char *msg = "???"; |
| |
| switch( erc ) { |
| case XML_ERR_OK: |
| msg = "Success"; |
| break; |
| case XML_ERR_INTERNAL_ERROR: |
| msg = "Internal assertion failure"; |
| break; |
| case XML_ERR_NO_MEMORY: |
| msg = "Out of memory"; |
| break; |
| case XML_ERR_UNSUPPORTED_ENCODING: |
| msg = "Unsupported character encoding"; |
| break; |
| |
| #if LIBXML_VERSION >= 21400 |
| case XML_ERR_RESOURCE_LIMIT: |
| msg = "Internal resource limit like maximum amplification factor exceeded"; |
| break; |
| case XML_ERR_ARGUMENT: |
| msg = "Invalid argument"; |
| break; |
| case XML_ERR_SYSTEM: |
| msg = "Unexpected error from the OS or an external library"; |
| break; |
| #endif |
| case XML_IO_ENOENT: |
| msg = "File not found"; |
| break; |
| default: |
| msg = strdup(name); |
| if( ! msg ) msg = "unknown XML error"; |
| break; |
| } |
| return msg; |
| } |
| |
| #define xmlerror_str(E) xmlParserErrors_str( (E), #E ) |
| |
| /* |
| * The global context is NULL if XML PARSE is not in progress. |
| */ |
| static class context_t { |
| const int priority; |
| public: |
| xmlParserCtxt * ctxt; |
| context_t() : priority(LOG_INFO), ctxt(nullptr) { |
| const int option = LOG_PERROR, facility = LOG_USER; |
| #if HAVE_DECL_PROGRAM_INVOCATION_SHORT_NAME |
| /* Declared in errno.h, when available. */ |
| static const char * const ident = program_invocation_short_name; |
| #elif defined (HAVE_GETPROGNAME) |
| /* Declared in stdlib.h. */ |
| static const char * const ident = getprogname(); |
| #else |
| /* Avoid a NULL entry. */ |
| static const char * const ident = "unnamed_COBOL_program"; |
| #endif |
| // TODO: Program to set option in library via command-line and/or |
| // environment. |
| // Library listens to program, not to the environment. |
| openlog(ident, option, facility); |
| |
| initialize_handlers(nullptr); |
| } |
| |
| void |
| push( const cblc_field_t *input_field, |
| size_t input_offset, |
| size_t len, bool done ) { |
| if( ! ctxt ) { |
| init(); |
| } |
| assert(cobol_callback); // caller must set |
| |
| if( input_offset < len ) { |
| int size = len - input_offset; |
| const char *chunk = PTRCAST(char, input_field->data + input_offset); |
| int terminate = done? 1 : 0; |
| |
| auto erc = (xmlParserErrors )xmlParseChunk( ctxt, chunk, size, terminate ); |
| if( erc != 0 ) { |
| auto msg = xmlerror_str(erc); |
| syslog(priority, "XML PARSE: XML error: %s", msg); |
| } |
| |
| if( done ) this->done(); |
| |
| } |
| } |
| |
| void done() { |
| if( ctxt ) { |
| xmlFreeParserCtxt( ctxt ); |
| ctxt = nullptr; |
| } |
| } |
| |
| protected: |
| void init() { |
| const char *external_entities = nullptr; |
| void * const user_data = nullptr; |
| |
| ctxt = xmlCreatePushParserCtxt( &handlers, user_data, |
| nullptr, 0, external_entities); |
| } |
| } context; |
| |
| static int |
| xml_push_parse( const cblc_field_t *input_field, |
| size_t input_offset, |
| size_t len, |
| cblc_field_t *encoding __attribute__ ((unused)), |
| cblc_field_t *validating __attribute__ ((unused)), |
| int returns_national __attribute__ ((unused)), |
| void (*callback)(void) ) |
| { |
| ::cobol_callback = callback; |
| |
| context.push( input_field, input_offset, len, false); |
| |
| #if LIBXML_VERSION >= 21400 |
| const xmlChar * version = xmlCtxtGetVersion( context.ctxt ); |
| #else |
| const xmlChar * version = xmlchar_of("requires version 2.14"); |
| #endif |
| assert(version); |
| assert(nullptr == "function not ready and not called"); |
| return 0; |
| } |
| |
| |
| extern "C" // Parser calls via parser_xml_parse_end, probabably. |
| int |
| __gg__xml_parse_done() { |
| context.done(); |
| return 0; |
| } |
| |
| |
| extern "C" |
| int |
| __gg__xml_parse( const cblc_field_t *input_field, |
| size_t input_offset, |
| size_t len, |
| cblc_field_t *encoding __attribute__ ((unused)), |
| cblc_field_t *validating __attribute__ ((unused)), |
| int returns_national __attribute__ ((unused)), |
| void (*callback)(void) ) |
| { |
| initialize_handlers(callback); |
| |
| const char *input = PTRCAST(char, input_field->data + input_offset); |
| |
| int erc = xmlSAXUserParseMemory(&handlers, nullptr, input, len); |
| |
| if( erc ) { |
| const xmlError *msg = xmlCtxtGetLastError(nullptr); |
| fprintf(stderr, "XML PARSE: error: line %d: %s (%d: %d.%d.%d)\n", |
| msg->line, msg->message, erc, msg->domain, msg->level, msg->code); |
| } |
| return erc; |
| } |
| |
| |