* tta/C/convert/get_converter_perl_info.c (copy_sv_options_for_convert_text), tta/C/main/build_perl_info.c (build_convert_text_options), tta/C/main/convert_to_text.c (new_text_options, destroy_text_options) (TEXT_INDICATOR_CONVERTER_OPTIONS, copy_options_for_convert_text), tta/C/main/convert_to_text.h (TEXT_OPTIONS), tta/perl/Texinfo/Convert/Text.pm (@text_indicator_converter_option) (copy_options_for_convert_text): get and set DOC_ENCODING_FOR_INPUT_FILE_NAME, INPUT_FILE_NAME_ENCODING, LOCALE_ENCODING and INCLUDE_DIRECTORIES in text options. * tta/C/main/convert_to_text.c (convert_to_text_internal): use information directly in text_options for the call to expand_verbatiminclude. * tta/perl/Texinfo/Convert/Text.pm (convert, output): set OUTPUT_ENCODING_NAME directly. * tta/perl/Texinfo/Convert/Text.pm (_initialize_text_options_encoding, convert, output): use only one argument. * tta/perl/Texinfo/Convert/Text.pm: comment out get_conf and set_conf now that thy are not used anymore.
diff --git a/ChangeLog b/ChangeLog index e8aeb59..2b8d60f 100644 --- a/ChangeLog +++ b/ChangeLog
@@ -1,5 +1,32 @@ 2025-03-12 Patrice Dumas <pertusus@free.fr> + * tta/C/convert/get_converter_perl_info.c + (copy_sv_options_for_convert_text), tta/C/main/build_perl_info.c + (build_convert_text_options), tta/C/main/convert_to_text.c + (new_text_options, destroy_text_options) + (TEXT_INDICATOR_CONVERTER_OPTIONS, copy_options_for_convert_text), + tta/C/main/convert_to_text.h (TEXT_OPTIONS), + tta/perl/Texinfo/Convert/Text.pm (@text_indicator_converter_option) + (copy_options_for_convert_text): get and set + DOC_ENCODING_FOR_INPUT_FILE_NAME, INPUT_FILE_NAME_ENCODING, + LOCALE_ENCODING and INCLUDE_DIRECTORIES in text options. + + * tta/C/main/convert_to_text.c (convert_to_text_internal): use + information directly in text_options for the call to + expand_verbatiminclude. + + * tta/perl/Texinfo/Convert/Text.pm (convert, output): set + OUTPUT_ENCODING_NAME directly. + + * tta/perl/Texinfo/Convert/Text.pm + (_initialize_text_options_encoding, convert, output): use only one + argument. + + * tta/perl/Texinfo/Convert/Text.pm: comment out get_conf and set_conf + now that thy are not used anymore. + +2025-03-12 Patrice Dumas <pertusus@free.fr> + Remove OUTPUT_PERL_ENCODING and set_output_perl_encoding * tta/perl/Texinfo/Common.pm (count_bytes): do not set encoding to
diff --git a/tta/C/convert/get_converter_perl_info.c b/tta/C/convert/get_converter_perl_info.c index 47567a6..a312097 100644 --- a/tta/C/convert/get_converter_perl_info.c +++ b/tta/C/convert/get_converter_perl_info.c
@@ -178,6 +178,9 @@ SV **set_case_sv; SV **documentlanguage_sv; SV **DEBUG_sv; + SV **DOC_ENCODING_FOR_INPUT_FILE_NAME_sv; + SV **INPUT_FILE_NAME_ENCODING_sv; + SV **LOCALE_ENCODING_sv; TEXT_OPTIONS *text_options = new_text_options (); dTHX; @@ -229,6 +232,22 @@ if (DEBUG_sv && SvOK (*DEBUG_sv)) text_options->DEBUG = SvIV (*DEBUG_sv); + FETCH(DOC_ENCODING_FOR_INPUT_FILE_NAME) + if (DOC_ENCODING_FOR_INPUT_FILE_NAME_sv + && SvOK (*DOC_ENCODING_FOR_INPUT_FILE_NAME_sv)) + text_options->DOC_ENCODING_FOR_INPUT_FILE_NAME + = SvIV (*DOC_ENCODING_FOR_INPUT_FILE_NAME_sv); + + FETCH(INPUT_FILE_NAME_ENCODING) + if (INPUT_FILE_NAME_ENCODING_sv) + text_options->INPUT_FILE_NAME_ENCODING + = non_perl_strdup (SvPVutf8_nolen (*INPUT_FILE_NAME_ENCODING_sv)); + + FETCH(LOCALE_ENCODING) + if (LOCALE_ENCODING_sv) + text_options->LOCALE_ENCODING + = non_perl_strdup (SvPVutf8_nolen (*LOCALE_ENCODING_sv)); + FETCH(converter) if (converter_sv) {
diff --git a/tta/C/main/build_perl_info.c b/tta/C/main/build_perl_info.c index febac66..b1afd9f 100644 --- a/tta/C/main/build_perl_info.c +++ b/tta/C/main/build_perl_info.c
@@ -2883,6 +2883,12 @@ if (text_options->ASCII_GLYPH) STORE("ASCII_GLYPH", newSViv (1)); + if (text_options->DEBUG) + STORE("DEBUG", newSViv (1)); + + if (text_options->DOC_ENCODING_FOR_INPUT_FILE_NAME) + STORE("DOC_ENCODING_FOR_INPUT_FILE_NAME", newSViv (1)); + if (text_options->NUMBER_SECTIONS) STORE("NUMBER_SECTIONS", newSViv (1)); @@ -2901,6 +2907,18 @@ if (text_options->code_state) STORE("_code_state", newSViv (text_options->code_state)); + if (text_options->documentlanguage) + STORE("documentlanguage", + newSVpv_utf8 (text_options->documentlanguage, 0)); + + if (text_options->INPUT_FILE_NAME_ENCODING) + STORE("INPUT_FILE_NAME_ENCODING", + newSVpv_utf8 (text_options->INPUT_FILE_NAME_ENCODING, 0)); + + if (text_options->LOCALE_ENCODING) + STORE("LOCALE_ENCODING", + newSVpv_utf8 (text_options->LOCALE_ENCODING, 0)); + expanded_formats_hv = build_expanded_formats (text_options->expanded_formats); STORE("expanded_formats", newRV_noinc ((SV *)expanded_formats_hv));
diff --git a/tta/C/main/convert_to_text.c b/tta/C/main/convert_to_text.c index ea03337..ce548ea 100644 --- a/tta/C/main/convert_to_text.c +++ b/tta/C/main/convert_to_text.c
@@ -57,6 +57,7 @@ memset (options, 0, sizeof (TEXT_OPTIONS)); options->expanded_formats = new_expanded_formats (); options->NUMBER_SECTIONS = -1; + options->DOC_ENCODING_FOR_INPUT_FILE_NAME = -1; memset (&options->include_directories, 0, sizeof (STRING_LIST)); return options; } @@ -67,6 +68,8 @@ free (text_options->encoding); free (text_options->expanded_formats); free (text_options->documentlanguage); + free (text_options->LOCALE_ENCODING); + free (text_options->INPUT_FILE_NAME_ENCODING); free_strings_list (&text_options->include_directories); /* if the customization options come from a converter or are another structure options, in practice a document, options should not be @@ -92,8 +95,10 @@ } #define TEXT_INDICATOR_CONVERTER_OPTIONS \ - tico_option_name(NUMBER_SECTIONS) \ tico_option_name(ASCII_GLYPH) \ + tico_option_name(DEBUG) \ + tico_option_name(DOC_ENCODING_FOR_INPUT_FILE_NAME) \ + tico_option_name(NUMBER_SECTIONS) \ tico_option_name(TEST) /* the string and strlist options need to be copied, in case they are @@ -126,8 +131,13 @@ text_options->documentlanguage = strdup (options->documentlanguage.o.string); - if (options->DEBUG.o.integer > 0) - text_options->DEBUG = 1; + if (options->INPUT_FILE_NAME_ENCODING.o.string) + text_options->INPUT_FILE_NAME_ENCODING + = strdup (options->INPUT_FILE_NAME_ENCODING.o.string); + + if (options->LOCALE_ENCODING.o.string) + text_options->LOCALE_ENCODING + = strdup (options->LOCALE_ENCODING.o.string); /* not a copy but a reference to the options */ text_options->other_converter_options = options; @@ -1017,29 +1027,22 @@ text_options->other_converter_options, 0, element); } else { - const char *input_file_name_encoding = 0; - int doc_encoding_for_input_file_name = -1; - const char *locale_encoding = 0; - const STRING_LIST *include_directories = 0; + const char *input_file_name_encoding + = text_options->INPUT_FILE_NAME_ENCODING; + int doc_encoding_for_input_file_name + = text_options->DOC_ENCODING_FOR_INPUT_FILE_NAME; + const char *locale_encoding = text_options->LOCALE_ENCODING; + const STRING_LIST *include_directories + = &text_options->include_directories; GLOBAL_INFO *global_information = 0; error_messages = &text_options->error_messages; + if (text_options->document_descriptor) { DOCUMENT *document = retrieve_document (text_options->document_descriptor); if (document) global_information = &document->global_info; } - input_file_name_encoding - = text_options->self_converter_options - ->INPUT_FILE_NAME_ENCODING.o.string; - doc_encoding_for_input_file_name - = text_options->self_converter_options - ->DOC_ENCODING_FOR_INPUT_FILE_NAME.o.integer; - - locale_encoding - = text_options->self_converter_options->LOCALE_ENCODING.o.string; - include_directories - = text_options->self_converter_options->INCLUDE_DIRECTORIES.o.strlist; verbatim_include_verbatim = expand_verbatiminclude (input_file_name_encoding,
diff --git a/tta/C/main/convert_to_text.h b/tta/C/main/convert_to_text.h index b0ce300..de96853 100644 --- a/tta/C/main/convert_to_text.h +++ b/tta/C/main/convert_to_text.h
@@ -20,10 +20,13 @@ int TEST; int NUMBER_SECTIONS; int DEBUG; + int DOC_ENCODING_FOR_INPUT_FILE_NAME; int other_options; EXPANDED_FORMAT *expanded_formats; STRING_LIST include_directories; char *documentlanguage; + char *INPUT_FILE_NAME_ENCODING; + char *LOCALE_ENCODING; CONVERTER *converter; /* other_converter_options is used if set. If not set and in some cases self_converter_options, if set, is used. */
diff --git a/tta/perl/Texinfo/Convert/Text.pm b/tta/perl/Texinfo/Convert/Text.pm index 54018aa..115f273a 100644 --- a/tta/perl/Texinfo/Convert/Text.pm +++ b/tta/perl/Texinfo/Convert/Text.pm
@@ -138,18 +138,18 @@ } my @text_indicator_converter_options - = ('ASCII_GLYPH', 'NUMBER_SECTIONS', 'TEST'); + = ('ASCII_GLYPH', 'DEBUG', 'DOC_ENCODING_FOR_INPUT_FILE_NAME', + 'NUMBER_SECTIONS', 'TEST'); # for this module converter -sub _initialize_text_options_encoding($$) +sub _initialize_text_options_encoding($) { - my $self = shift; my $text_options = shift; - if ($self->{'ENABLE_ENCODING'} - and defined($self->{'OUTPUT_ENCODING_NAME'})) { + if ($text_options->{'ENABLE_ENCODING'} + and defined($text_options->{'OUTPUT_ENCODING_NAME'})) { $text_options->{'enabled_encoding'} - = $self->{'OUTPUT_ENCODING_NAME'}; + = $text_options->{'OUTPUT_ENCODING_NAME'}; } } @@ -199,10 +199,15 @@ $options{'expanded_formats'}->{$expanded_format} = 1; } } - $options{'documentlanguage'} = $converter->get_conf('documentlanguage'); - if ($converter->get_conf('DEBUG')) { - $options{'DEBUG'} = 1; + foreach my $string_option ('documentlanguage', 'INPUT_FILE_NAME_ENCODING', + 'LOCALE_ENCODING') { + $options{$string_option} = $converter->get_conf($string_option); + } + + my $include_directories = $converter->get_conf('INCLUDE_DIRECTORIES'); + if ($include_directories and scalar(@{$include_directories})) { + $options{'INCLUDE_DIRECTORIES'} = [@{$include_directories}]; } $options{'converter'} = $converter; @@ -772,13 +777,13 @@ my $include_directories = $options->{'INCLUDE_DIRECTORIES'}; - my $document = $options->{'document'}; + my $document = $options->{'document'}; - $verbatim_include_verbatim + $verbatim_include_verbatim = Texinfo::Convert::Utils::expand_verbatiminclude($element, $input_file_name_encoding, - $doc_encoding_for_input_file_name, $locale_encoding, - $include_directories, $document, $options); + $doc_encoding_for_input_file_name, $locale_encoding, + $include_directories, $document, $options); } if (defined($verbatim_include_verbatim)) { $result .= _convert($options, $verbatim_include_verbatim); @@ -878,16 +883,24 @@ } elsif (!ref($options)) { confess("convert_to_text options not a ref\n"); } - # FIXME remove? Should not be useful at some point - bless $options, "Texinfo::Convert::Text"; - # Interface with XS converter. if ($XS_convert and defined($root->{'tree_document_descriptor'}) and $Texinfo::Convert::ConvertXS::XS_package) { return _convert_tree_with_XS($options, $root); } - return _convert($options, $root); + # needed for converter_document_warn call for verbatiminclude, when + # called without a converter, for t/*.t TESTS. + bless $options, "Texinfo::Convert::Text"; + + my $result = _convert($options, $root); + + # TODO it could be possible here to pass 'error_warning_messages' + # back to $options->{'converter'} instead of dropping them. + # in this case, there also would not be a need for calling + # expand_verbatiminclude on the converter anymore + + return $result; } @@ -940,9 +953,17 @@ my $self = shift; my $document = shift; - Texinfo::Common::set_output_encoding($self, $document); + my $document_info; + if ($document) { + $document_info = $document->global_information(); + + # same as Texinfo::Common::set_output_encoding + $self->{'OUTPUT_ENCODING_NAME'} = $document_info->{'input_encoding_name'} + if ($document_info and $document_info->{'input_encoding_name'}); + } + # Cf comment in output() on using $self for options. - _initialize_text_options_encoding($self, $self); + _initialize_text_options_encoding($self); $self->{'document'} = $document; @@ -970,9 +991,11 @@ my $document_info; if ($document) { $document_info = $document->global_information(); - } - Texinfo::Common::set_output_encoding($self, $document); + # same as Texinfo::Common::set_output_encoding + $self->{'OUTPUT_ENCODING_NAME'} = $document_info->{'input_encoding_name'} + if ($document_info and $document_info->{'input_encoding_name'}); + } # Text options and converter are of different nature. # However, since the option keys are very similar between the converter @@ -980,9 +1003,10 @@ # we use the converter object as text options and we call # _initialize_text_options_encoding for the only option that is set up # based on other customization options. - # Also, we need a blessed reference as get_conf can be called on the options, - # using the converter brings that too. - _initialize_text_options_encoding($self, $self); + # Also, we need a blessed reference as converter_line_error + # and other methods can be called on the options, using the converter + # brings that too. + _initialize_text_options_encoding($self); # for expand_verbatiminclude call. $self->{'document'} = $document; @@ -1120,26 +1144,25 @@ { } -# FIXME used where? -sub get_conf($$) -{ - my $self = shift; - my $key = shift; +# Following not used anywhere. Would not be an issue if they were. +#sub get_conf($$) +#{ +# my $self = shift; +# my $key = shift; +# +# return $self->{$key}; +#} - return $self->{$key}; -} - -# used in Texinfo::Common::set_output_encoding -sub set_conf($$$) -{ - my $self = shift; - my $conf = shift; - my $value = shift; - - $self->{$conf} = $value; - - return 1; -} +#sub set_conf($$$) +#{ +# my $self = shift; +# my $conf = shift; +# my $value = shift; +# +# $self->{$conf} = $value; +# +# return 1; +#} # used in Texinfo::Convert::Utils::expand_verbatiminclude sub converter_line_error($$$;$)