blob: b777085d1a4253eaba705fa5c199a849a0837ff2 [file] [log] [blame]
# Copyright 2023-2025 Free Software Foundation, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License,
# or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# This package provides a view of a parsed Texinfo document. The
# instantiated objects are also used to carry information for the
# XS modules, mainly to be able to find the document information
# in memory.
# Also note that the initialization of the C library is done by
# this module XS code when XS extensions are used.
#
# ALTIMP perl/XSTexinfo/parser_document/DocumentXS.xs
# ALTIMP C/main/document.c
package Texinfo::Document;
use strict;
use warnings;
# To check if there is no erroneous autovivification
#no autovivification qw(fetch delete exists store strict);
use Carp qw(cluck confess);
eval { require Devel::Cycle; Devel::Cycle->import(); };
use Texinfo::DocumentXS;
use Texinfo::XSLoader;
use Texinfo::TreeElement;
use Texinfo::Common;
use Texinfo::Report;
use Texinfo::Indices;
use Texinfo::ManipulateTree;
our $VERSION = '7.2dev';
# There is a full coverage by the C implementation.
# Relevant XS interfaces are all implemented.
# See comments before methods definitions for an explanation of why some
# methods have no XS override.
my $XS_parser = Texinfo::XSLoader::XS_parser_enabled();
our %XS_overrides = (
"Texinfo::Document::_XS_destroy_document"
=> "Texinfo::DocumentXS::destroy_document",
"Texinfo::Document::set_document_global_info",
=> "Texinfo::DocumentXS::set_document_global_info",
"Texinfo::Document::errors"
=> "Texinfo::DocumentXS::document_errors",
"Texinfo::Document::parser_errors"
=> "Texinfo::DocumentXS::document_parser_errors",
"Texinfo::Document::build_tree"
=> "Texinfo::DocumentXS::build_tree",
"Texinfo::Document::tree"
=> "Texinfo::DocumentXS::document_tree",
"Texinfo::Document::register_document_options"
=> "Texinfo::DocumentXS::register_document_options",
"Texinfo::Document::get_conf",
=> "Texinfo::DocumentXS::document_get_conf",
"Texinfo::Document::global_information"
=> "Texinfo::DocumentXS::document_global_information",
"Texinfo::Document::indices_information"
=> "Texinfo::DocumentXS::document_indices_information",
"Texinfo::Document::global_commands_information"
=> "Texinfo::DocumentXS::document_global_commands_information",
"Texinfo::Document::labels_information"
=> "Texinfo::DocumentXS::document_labels_information",
"Texinfo::Document::labels_list"
=> "Texinfo::DocumentXS::document_labels_list",
"Texinfo::Document::nodes_list"
=> "Texinfo::DocumentXS::document_nodes_list",
"Texinfo::Document::sections_list"
=> "Texinfo::DocumentXS::document_sections_list",
"Texinfo::Document::sectioning_root"
=> "Texinfo::DocumentXS::document_sectioning_root",
"Texinfo::Document::headings_list"
=> "Texinfo::DocumentXS::document_headings_list",
"Texinfo::Document::floats_information"
=> "Texinfo::DocumentXS::document_floats_information",
"Texinfo::Document::internal_references_information"
=> "Texinfo::DocumentXS::document_internal_references_information",
"Texinfo::Document::setup_indices_sort_strings"
=> "Texinfo::DocumentXS::setup_indices_sort_strings",
"Texinfo::Document::indices_sort_strings"
=> "Texinfo::DocumentXS::indices_sort_strings",
"Texinfo::Document::print_document_indices_information"
=> "Texinfo::DocumentXS::print_document_indices_information",
"Texinfo::Document::print_document_indices_sort_strings"
=> "Texinfo::DocumentXS::print_document_indices_sort_strings",
);
my $XS_structuring = Texinfo::XSLoader::XS_structuring_enabled();
our %XS_structure_overrides = (
"Texinfo::Document::print_document_listoffloats"
=> "Texinfo::DocumentXS::print_document_listoffloats",
# needed with the Reader/TreeElement interfaces only
"Texinfo::Document::register_document_relations_lists_elements"
=> "Texinfo::DocumentXS::register_document_relations_lists_elements",
);
our $module_loaded = 0;
sub import {
if (!$module_loaded) {
if ($XS_parser) {
for my $sub (keys %XS_overrides) {
Texinfo::XSLoader::override ($sub, $XS_overrides{$sub});
}
}
if ($XS_structuring) {
for my $sub (keys %XS_structure_overrides) {
Texinfo::XSLoader::override ($sub, $XS_structure_overrides{$sub});
}
}
$module_loaded = 1;
}
# The usual import method
goto &Exporter::import;
}
# No XS override, only called from Texinfo::ParserNonXS.
sub new_document($) {
my $indices_information = shift;
my $document = {
'indices' => $indices_information,
'listoffloats_list' => {},
'internal_references' => [],
'global_info' => {'input_encoding_name' => 'utf-8',
#'included_files' => [],
},
'commands_info' => {},
'identifiers_target' => {},
'labels_list' => [],
'nodes_list' => [],
'sections_list' => [],
'headings_list' => [],
# error messages for parsing
'parser_error_messages' => [],
# error messages for the document for structuring, not for parsing
'error_messages' => [],
};
bless $document;
return $document;
}
sub register_tree($$)
{
my $document = shift;
my $tree = shift;
$document->{'tree'} = $tree;
}
sub set_document_global_info($$$)
{
my $document = shift;
my $key = shift;
my $value = shift;
$document->{'global_info'}->{$key} = $value;
}
sub tree($;$)
{
my $self = shift;
return $self->{'tree'};
}
# return indices information
sub indices_information($)
{
my $self = shift;
return $self->{'indices'};
}
sub floats_information($)
{
my $self = shift;
return $self->{'listoffloats_list'};
}
sub internal_references_information($)
{
my $self = shift;
return $self->{'internal_references'};
}
sub global_commands_information($)
{
my $self = shift;
return $self->{'commands_info'};
}
sub global_information($)
{
my $self = shift;
return $self->{'global_info'};
}
sub labels_information($)
{
my $self = shift;
return $self->{'identifiers_target'};
}
sub labels_list($)
{
my $self = shift;
return $self->{'labels_list'};
}
sub nodes_list($)
{
my $self = shift;
return $self->{'nodes_list'};
}
sub sections_list($)
{
my $self = shift;
return $self->{'sections_list'};
}
sub sectioning_root($)
{
my $self = shift;
return $self->{'sectioning_root'};
}
sub headings_list($)
{
my $self = shift;
return $self->{'headings_list'};
}
# Useful for options used in structuring/tree transformations.
sub register_document_options($$)
{
my $self = shift;
my $options = shift;
$self->{'options'} = $options;
}
sub get_conf($$)
{
my $self = shift;
my $var = shift;
if ($self->{'options'}) {
return $self->{'options'}->{$var};
}
# This may happen if a tree/document is manipulated without having
# any configuration set. This is or was the case for pod2texi.
# This is allowed.
return undef;
}
# remove cycles
sub _remove_section_relations_relations($) {
my $section_relation = shift;
foreach my $relation ('associated_anchor_command', 'associated_node',
'part_following_node', 'part_associated_section',
'section_children') {
delete $section_relation->{$relation};
}
if (exists($section_relation->{'section_directions'})) {
# next/prev cycles
delete $section_relation->{'section_directions'}->{'next'};
}
if (exists($section_relation->{'toplevel_directions'})) {
# next/prev cycles, but also next/up and prev/up as the first
# chapter level relation is next for top and also up
delete $section_relation->{'toplevel_directions'}->{'next'};
delete $section_relation->{'toplevel_directions'}->{'prev'};
}
}
sub _remove_section_relations_references($) {
my $section_relation = shift;
foreach my $relation ('element') {
delete $section_relation->{$relation};
}
}
sub _remove_node_relations_references($) {
my $node_relation = shift;
foreach my $relation ('element', 'associated_title_command',
'node_description', 'node_long_description',
'menus', 'node_directions') {
delete $node_relation->{$relation};
}
}
sub _remove_heading_relations_references($) {
my $heading_relation = shift;
foreach my $relation ('element') {
delete $heading_relation->{$relation};
}
}
# can also be called from XS
# If $REMOVE_REFERENCES is not set, removing items objective is to remove
# cycles such that Perl can reclaim the removed memory. If
# $REMOVE_REFERENCES is set, the references to elements are removed, in
# particular to be able to check that there is no reference remaining
# other than the reference kept in C code.
sub remove_document_references($;$) {
my ($document, $remove_references) = @_;
my $tree = $document->{'tree'};
my $sections_list = $document->{'sections_list'};
foreach my $section_relation (@$sections_list) {
_remove_section_relations_relations($section_relation);
}
if (defined($tree)) {
Texinfo::ManipulateTree::tree_remove_parents($tree);
}
# REMARK some tests take a long time, for example
# t/formats_encodings.t at_commands_in_refs
#find_cycle($document);
if ($remove_references) {
foreach my $section_relation (@$sections_list) {
_remove_section_relations_references($section_relation);
}
my $nodes_list = $document->{'nodes_list'};
foreach my $node_relation (@$nodes_list) {
_remove_node_relations_references($node_relation);
}
my $headings_list = $document->{'headings_list'};
foreach my $heading_relation (@$headings_list) {
_remove_heading_relations_references($heading_relation);
}
# Refers to section relations not directly to tree elements
#if (exists($document->{'sectioning_root'})) {
#}
delete $document->{'identifiers_target'};
delete $document->{'labels_list'};
delete $document->{'internal_references'};
delete $document->{'commands_info'};
delete $document->{'listoffloats_list'};
# indices not existing is not possible for a document created in
# Perl code, but seems to be possible when built from XS.
if (exists($document->{'indices'})) {
# the same index_entries are used in sorted_indices_by_*, so
# this also removes the references there.
foreach my $index_name (keys(%{$document->{'indices'}})) {
my $index = $document->{'indices'}->{$index_name};
foreach my $index_entry (@{$index->{'index_entries'}}) {
delete $index_entry->{'entry_element'};
delete $index_entry->{'entry_associated_element'};
}
}
}
# Texinfo tree elements in translation caches are not released, they may
# be showed for debugging here to verify that they do not show up
# somewhere.
#if (1) {
if (0) {
foreach my $lang (sort(keys(
%{$Texinfo::Translations::translation_cache}))) {
my $lang_cache = $Texinfo::Translations::translation_cache->{$lang};
foreach my $string (sort(keys(%$lang_cache))) {
foreach my $context (sort(keys(%{$lang_cache->{$string}}))) {
my ($translation, $trans_tree)
= @{$lang_cache->{$string}->{$context}};
print STDERR "TRANSL: $string-$context: ";
if (defined($trans_tree)) {
Texinfo::ManipulateTree::_print_tree_elements_ref($trans_tree, 0);
} else {
print STDERR "NOT NEEDED\n";
}
}
}
}
}
if (defined($tree)) {
my $test_level = $document->get_conf('TEST');
my $check_refcount;
if (defined($test_level) and $test_level > 1) {
$check_refcount = $document;
}
Texinfo::ManipulateTree::tree_remove_references($document->{'tree'},
$check_refcount);
delete $document->{'tree'};
}
}
}
sub _XS_destroy_document($;$) {
my ($document, $remove_references) = @_;
}
sub destroy_document($;$) {
my ($document, $remove_references) = @_;
remove_document_references($document, $remove_references);
_XS_destroy_document($document, $remove_references);
}
# The XS override register a reference to the C element in Perl
# nodes, sectioning and heading commands. Only needed if the
# TreeElement/Reader interfaces are used, which is not the case for
# converters used used in texi2any.
sub register_document_relations_lists_elements($)
{
my $document = shift;
}
# this method does nothing, but the XS override rebuilds the Perl
# tree based on XS data.
sub build_tree($;$)
{
my $tree = shift;
my $no_store = shift;
return $tree;
}
sub document_line_warn($$$;$)
{
my $document = shift;
my $text = shift;
my $error_location_info = shift;
my $continuation = shift;
$continuation = 0 if !defined($continuation);
my $error_messages = $document->{'error_messages'};
my $debug = $document->get_conf('DEBUG');
push @$error_messages, Texinfo::Report::line_warn($text,
$error_location_info, $continuation, $debug);
}
sub document_line_error($$$;$)
{
my $document = shift;
my $text = shift;
my $error_location_info = shift;
my $continuation = shift;
$continuation = 0 if !defined($continuation);
my $error_messages = $document->{'error_messages'};
my $debug = $document->get_conf('DEBUG');
push @$error_messages,
Texinfo::Report::line_error($text, $error_location_info,
$continuation, $debug);
}
sub parser_errors($)
{
my $document = shift;
my $errors_output = [@{$document->{'parser_error_messages'}}];
$document->{'parser_error_messages'} = [];
return $errors_output;
}
# The XS override pass C error messages to the document
# error_messages and remove error messages in C.
sub errors($)
{
my $document = shift;
my $errors_output = [splice(@{$document->{'error_messages'}})];
return $errors_output;
}
# No XS override.
# This method is already called by other methods, in particular
# sorted_indices_by_* when the indexes are sorted. When the indexes
# are merged but not sorted, it is sensible to call this function
# directly. Also called directly in tests.
# XS override is not needed, if the converters calling this function
# are implemented in C, even partly, they should call the C counterpart
# rather than go through an XS interface.
sub merged_indices($)
{
my $self = shift;
if ($self->{'indices'}) {
if (!$self->{'merged_indices'}) {
$self->{'merged_indices'}
= Texinfo::Indices::merge_indices($self->{'indices'});
}
}
return $self->{'merged_indices'};
}
# calls Texinfo::Indices::setup_index_entries_sort_strings and caches the
# result.
# In general, it is not needed to call that function directly,
# as it is called by Texinfo::Indices::sort_indices_by_*. It could
# be called in advance if errors need to be collected early.
sub setup_indices_sort_strings($$)
{
my $document = shift;
my $converter = shift;
if (!$document->{'index_entries_sort_strings'}) {
my $indices_sort_strings
= Texinfo::Indices::setup_index_entries_sort_strings($document,
$converter, $document->merged_indices(),
$document->indices_information(), 0);
$document->{'index_entries_sort_strings'} = $indices_sort_strings;
}
}
# index_entries_sort_strings accessor. A different function from
# setup_indices_sort_strings such that there is no need to build C data
# to Perl when calling setup_indices_sort_strings, to make it possible
# to delay building Perl data for indices_sort_strings function call in XS.
sub indices_sort_strings($$)
{
my $document = shift;
my $converter = shift;
setup_indices_sort_strings($document, $converter);
return $document->{'index_entries_sort_strings'};
}
# calls Texinfo::Indices::sort_indices_by_letter and caches the result.
# No XS override, as there is no reason to call this function directly
# outside of tests, Texinfo::Convert::Converter
# get_converter_indices_sorted_by_letter should be called directly.
# In general a CONVERTER argument is given, but if not the DOCUMENT is
# used instead to register error messages.
sub sorted_indices_by_letter($$$$)
{
my $document = shift;
my $converter = shift;
my $use_unicode_collation = shift;
my $locale_lang = shift;
my $lang_key;
if (!$use_unicode_collation) {
$lang_key = '';
} elsif (!defined($locale_lang)) {
# special name corresponding to Unicode Collation with 'Non-Ignorable'
# set for variable collation elements
$lang_key = '-';
} else {
$lang_key = $locale_lang;
}
$document->{'sorted_indices_by_letter'} = {}
if (!$document->{'sorted_indices_by_letter'});
if (!$document->{'sorted_indices_by_letter'}->{$lang_key}) {
$document->merged_indices();
$document->{'sorted_indices_by_letter'}->{$lang_key}
= Texinfo::Indices::sort_indices_by_letter
($document, $converter,
$use_unicode_collation, $locale_lang);
}
return $document->{'sorted_indices_by_letter'}->{$lang_key};
}
# calls Texinfo::Indices::sort_indices_by_index and caches the result.
# No XS override, as there is no reason to call this function directly
# outside of tests, Texinfo::Convert::Converter
# get_converter_indices_sorted_by_index should be called directly.
# In general a CONVERTER argument is given, but if not the DOCUMENT is
# used instead to register error messages.
sub sorted_indices_by_index($$$$)
{
my $document = shift;
my $converter = shift;
my $use_unicode_collation = shift;
my $locale_lang = shift;
my $lang_key;
if (!$use_unicode_collation) {
$lang_key = '';
} elsif (!defined($locale_lang)) {
# special name corresponding to Unicode Collation with 'Non-Ignorable'
# set for variable collation elements
$lang_key = '-';
} else {
$lang_key = $locale_lang;
}
$document->{'sorted_indices_by_index'} = {}
if (!$document->{'sorted_indices_by_index'});
if (!$document->{'sorted_indices_by_index'}->{$lang_key}) {
$document->merged_indices();
$document->{'sorted_indices_by_index'}->{$lang_key}
= Texinfo::Indices::sort_indices_by_index
($document, $converter,
$use_unicode_collation, $locale_lang);
}
return $document->{'sorted_indices_by_index'}->{$lang_key};
}
# wrapper on print_listoffloats_types that can be used for XS overriding.
# Used in tests only.
sub print_document_listoffloats($)
{
my $document = shift;
my $float_text;
if ($document) {
my $floats = $document->floats_information();
if ($floats) {
$float_text
= Texinfo::ManipulateTree::print_listoffloats_types($floats);
}
}
return $float_text;
}
# wrapper on print_indices_information that can be used for XS overriding.
# Used in tests only.
sub print_document_indices_information($)
{
my $document = shift;
my $indices_info_text;
if ($document) {
my $indices_information = $document->indices_information();
if ($indices_information) {
$indices_info_text
= Texinfo::Indices::print_indices_information($indices_information);
}
}
return $indices_info_text;
}
# for tests, to be used for overriding
sub print_document_indices_sort_strings($)
{
my $document = shift;
# read from C data if needed
$document->indices_information();
my $merged_index_entries = $document->merged_indices();
# use merged indices here as there are only indices with
# entries in that data
return undef unless ($merged_index_entries);
my $use_unicode_collation
= $document->get_conf('USE_UNICODE_COLLATION');
my $locale_lang;
if (!(defined($use_unicode_collation) and !$use_unicode_collation)) {
$locale_lang
= $document->get_conf('COLLATION_LANGUAGE');
}
my $indices_sort_strings = indices_sort_strings($document, undef);
my $index_entries_sort_strings
= Texinfo::Indices::format_index_entries_sort_strings(
$indices_sort_strings);
my $sorted_index_entries
= sorted_indices_by_index($document, undef,
$use_unicode_collation, $locale_lang);
my $idx_sort_strings_str = '';
foreach my $index_name (sort(keys(%$sorted_index_entries))) {
# index entries sort strings sorted in the order of the index entries
my $index_entries = $sorted_index_entries->{$index_name};
if (scalar(@{$index_entries})) {
$idx_sort_strings_str .= "${index_name}:\n";
foreach my $index_entry (@{$index_entries}) {
my $sort_string = $index_entries_sort_strings->{$index_entry};
$idx_sort_strings_str .= " ${sort_string}\n";
}
}
}
return $idx_sort_strings_str;
}
# In general, we avoid passing error messages separate from the object holding
# them. In that case, however, when called from parser, we want
# parser_error_messages error messages to be modified from a document, and not
# the error messages of the document, so we pass the error messages list
# separately.
sub _existing_label_error($$;$$)
{
my $self = shift;
my $element = shift;
my $error_messages = shift;
my $debug = shift;
if ($element->{'extra'}
and defined($element->{'extra'}->{'normalized'})) {
my $normalized = $element->{'extra'}->{'normalized'};
if (defined($error_messages)) {
my $existing_target = $self->{'identifiers_target'}->{$normalized};
my $label_element = Texinfo::Common::get_label_element($element);
push @$error_messages, Texinfo::Report::line_error(
sprintf(__("\@%s `%s' previously defined"),
$element->{'cmdname'},
Texinfo::Convert::Texinfo::convert_to_texinfo(
Texinfo::TreeElement::new({'contents' => $label_element->{'contents'}}))),
$element->{'source_info'}, 0, $debug);
push @$error_messages, Texinfo::Report::line_error(
sprintf(__("here is the previous definition as \@%s"),
$existing_target->{'cmdname'}),
$existing_target->{'source_info'}, 1, $debug);
}
}
}
sub _add_element_to_identifiers_target($$)
{
my $self = shift;
my $element = shift;
if ($element->{'extra'}
and defined($element->{'extra'}->{'normalized'})) {
my $normalized = $element->{'extra'}->{'normalized'};
if (!defined $self->{'identifiers_target'}->{$normalized}) {
$self->{'identifiers_target'}->{$normalized} = $element;
$element->{'extra'}->{'is_target'} = 1;
return 1;
}
}
return 0;
}
# No XS override, only called from Texinfo::ParserNonXS.
# This should be considered an internal function of the parser.
# It is here to reuse code.
# Sets $self->{'identifiers_target'} based on $self->{'labels_list'}.
sub set_labels_identifiers_target($$;$)
{
my $self = shift;
my $error_messages = shift;
my $debug = shift;
my @elements_with_error;
$self->{'identifiers_target'} = {};
if (defined $self->{'labels_list'}) {
foreach my $element (@{$self->{'labels_list'}}) {
my $retval = _add_element_to_identifiers_target($self, $element);
if (!$retval and $element->{'extra'}
and defined($element->{'extra'}->{'normalized'})) {
push @elements_with_error, $element;
}
}
}
# when a sorted list was used for identifiers in C code, identifiers
# order was used to have the same error messages order; otherwise document
# order should be used
if (scalar(@elements_with_error) > 0) {
my @sorted
# use document order since C code uses hashmap for identifiers.
= #sort {$a->{'extra'}->{'normalized'} cmp $b->{'extra'}->{'normalized'}}
@elements_with_error;
foreach my $element (@sorted) {
_existing_label_error($self, $element, $error_messages, $debug);
}
}
}
# Not clear whether this should be considered as a private function
# here solely to reuse code, or if it should be considered to be public.
# There is no XS override but the function modifies Perl data that is also
# in C when XS is used. Therefore this function should only be called from
# Perl if there is no XS used.
sub register_label_element($$;$$)
{
my $self = shift;
my $element = shift;
my $error_messages = shift;
my $debug = shift;
my $retval = _add_element_to_identifiers_target($self, $element);
if (!$retval) {
_existing_label_error($self, $element, $error_messages, $debug);
}
# TODO do not push at the end but have the caller give an information
# on the element it should be after or before in the list?
push @{$self->{'labels_list'}}, $element;
return $retval;
}
1;
__END__
=head1 NAME
Texinfo::Document - Texinfo document tree and information
=head1 SYNOPSIS
use Texinfo::Parser;
my $parser = Texinfo::Parser::parser();
my $document = $parser->parse_texi_file("somefile.texi");
my $indices_information = $document->indices_information();
my $float_types_arrays = $document->floats_information();
my $internal_references_array
= $document->internal_references_information();
# $identifier_target is an hash reference on normalized
# node/float/anchor/namedanchor names.
my $identifier_target = $document->labels_information();
# A hash reference, keys are @-command names, value is the
# corresponding @-commands tree element or an
# array reference holding all the corresponding @-commands.
# Also contains dircategory and direntry list.
my $global_commands_information
= $document->global_commands_information();
# a hash reference on document information (encodings,
# input file name, for example).
my $global_information = $document->global_information();
=head1 NOTES
The Texinfo Perl module main purpose is to be used in C<texi2any> to convert
Texinfo to other formats. There is no promise of API stability.
=head1 DESCRIPTION
This module is used to represent parsed Texinfo documents, with the Texinfo
tree and associated information. A document is always obtained from
a Texinfo parser method call as the result of parsing a Texinfo text or file.
For example, the I<$document> obtained in the following example is a
C<Texinfo::Document> object:
my $parser = Texinfo::Parser::parser();
my $document = $parser->parse_texi_file("somefile.texi");
=head1 METHODS
=head2 Getting document information
The main purpose of Texinfo::Document methods is to retrieve information
on a Texinfo document.
The Texinfo tree obtained by parsing a Texinfo document is available through
C<tree>:
=over
=item $tree = tree($document, $handler_only)
X<C<tree>>
The I<$tree> is a hash reference. It is described in
L<Texinfo::Parser/TEXINFO TREE>.
If I<$handler_only> is set and XS extensions are used, the returned
tree holds a reference to the C Texinfo tree data only, but no actual
Perl Texinfo tree. This avoids building the Perl tree if all the
functions called with the tree as argument have XS interfaces and
directly use the C data and do not use the Perl tree.
=back
Some global information is available through C<global_information>:
=over
=item $info = global_information($document)
X<C<global_information>>
The I<$info> returned is a hash reference. The possible keys are
=over
=item included_files
An array of included file paths as they appear in the document. Binary
strings. From both C<@include> and C<@verbatiminclude>.
=item input_encoding_name
C<input_encoding_name> string is the encoding name used for the
Texinfo code.
=item input_file_name
=item input_directory
The name of the main Texinfo input file and the associated directory.
Binary strings. In C<texi2any>, they should come from the command line
(and can be decoded with the encoding in the customization variable
C<COMMAND_LINE_ENCODING>).
=back
If the global information changed, C<global_information> should be called
to update the hash returned by previous calls before accessing the hash again.
=back
Some command lists are available, such that it is possible to go through
the corresponding tree elements without walking the tree. They are
available through C<global_commands_information>:
=over
=item $commands = global_commands_information($document)
X<C<global_commands_information>>
I<$commands> is an hash reference. The keys are @-command names. For
@-commands that may appear multiple time in the Texinfo document, the
associated values are array references containing all the corresponding
tree elements. For @-commands that should appear only once, the associated
value is the tree element.
The following list of commands is also available as a key:
=over
=item dircategory_direntry
An array of successive C<@dircategory> and C<@direntry> as they appear
in the document.
=back
=back
All the @-commands that have an associated label (so can be the
target of cross references) -- C<@node>, C<@anchor>, C<@namedanchor>
and C<@float> with label -- have a normalized name associated, constructed as
described in the I<HTML Xref> node in the Texinfo documentation. Those
normalized labels and the association with @-commands is available through
C<labels_information>:
=over
=item $identifier_target = labels_information($document)
X<C<labels_information>>
I<$identifier_target> is a hash reference whose keys are normalized
labels, and the associated value is the corresponding @-command.
=item $labels_list = labels_list ($document)
X<C<labels_list>>
I<$labels_list> is a list of Texinfo tree command elements that
could be the target of cross references.
=back
Information on C<@float> grouped by type of floats, each type corresponding
to potential C<@listoffloats> is available through C<floats_information>.
=over
=item $float_types = floats_information($document)
X<C<floats_information>>
I<$float_types> is a hash reference whose keys are normalized float
types (the first float argument, or the C<@listoffloats> argument).
The normalization is the same as for the first step of node names
normalization. The value is the list of array references with
first element the float tree elements appearing in the texinfo document
and second element the section element the float appeared in.
=back
Internal references, nodes and section information may also be available.
=over
=item $internal_references_array = internal_references_information($document)
X<C<internal_references_information>>
The function returns an array reference of cross-reference commands referring
to the same document with @-commands that refer to node, anchors or floats.
=item $nodes_list = nodes_list($document)
Returns an array reference containing information on each node.
=item $sections_list = sections_list($document)
Returns an array reference containing information on each section.
=item $sectioning_root = sectioning_root($document)
Return the sectioning root information. It is an hash reference with the
I<sections_list> key, an array reference of the top level sectioning
commands relations.
=back
Information about defined indexes, indexes merging and index entries is
available through C<indices_information>.
=over
=item $indices_information = $document->indices_information()
X<C<indices_information>>
I<$indices_information> is a hash reference. The keys are
=over
=item in_code
1 if the index entries should be formatted as code, 0 in the opposite case.
=item name
The index name.
=item prefix
An array reference of prefix associated to the index.
=item merged_in
In case the index is merged to another index, this key holds the name of
the index the index is merged into. It takes into account indirectly
merged indexes.
=item index_entries
An array reference containing index entry structures for index entries
associated with the index. The index entry could be associated to
@-commands like C<@cindex>, or C<@item> in C<@vtable>, or definition
commands entries like C<@deffn>.
The keys of the index entry structures are
=over
=item index_name
The index name associated to the command. Not modified if the corresponding
index is merged in another index (with C<@synindex>, for example).
=item entry_element
The element in the parsed tree associated with the @-command holding the
index entry.
=item entry_number
The number of the index entry.
=back
=back
The following shows the references corresponding to the default indexes
I<cp> and I<fn>, the I<fn> index having its entries formatted as code and
the indexes corresponding to the following texinfo
@defindex some
@defcodeindex code
$index_names = {'cp' => {'name' => 'cp', 'in_code' => 0, },
'fn' => {'name' => 'fn', 'in_code' => 1, },
'some' => {'in_code' => 0},
'code' => {'in_code' => 1}};
If C<name> is not set, it is set to the index name.
=back
=head2 Merging and sorting indexes
Merged and sorted document indexes are also available. Parsed indexes
are not merged nor sorted, L<Texinfo::Indices> functions are
called to merge or sort the indexes the first time the following
methods are called. The results are afterwards associated to the
document and simply returned.
=over
=item $merged_indices = $document->merged_indices()
X<C<merged_indices>>
Merge indexes if needed and return merged indexes. The I<$merged_indices>
returned is a hash reference whose keys are the index names and values arrays
of index entry structures described in L</index_entries>.
L<< C<Texinfo::Indices::merge_indices>|Texinfo::Indices/$merged_indices = merge_indices($indices_information) >>
is used to merge the indexes.
It is not useful to call this function directly if indexes are sorted, as
it is already called by index sorting functions.
=back
In general, the sorting methods should not be called directly, instead
L<Texinfo::Convert::Converter/Index sorting> Converter methods should be
used, which already call the following functions.
=over
=item $sorted_indices = sorted_indices_by_index($document, $converter, $use_unicode_collation, $locale_lang)
=item $sorted_indices = sorted_indices_by_letter($document, $converter, $use_unicode_collation, $locale_lang)
X<C<sorted_indices_by_index>> X<C<sorted_indices_by_letter>>
C<sorted_indices_by_letter> returns the indices sorted by index and letter,
while C<sorted_indices_by_index> returns the indexes with all entries
of an index together.
By default, indexes are sorted according to the I<Unicode Collation Algorithm>
defined in the L<Unicode Technical Standard
#10|http://www.unicode.org/reports/tr10/>, without language-specific collation
tailoring. If I<$use_unicode_collation> is set to 0, the sorting will not use
the I<Unicode Collation Algorithm> and simply sort according to the codepoints.
If I<$locale_lang> is set, the language is used for linguistic tailoring of the
sorting, if possible.
When sorting by letter, an array reference of letter hash references is
associated with each index name. Each letter hash reference has two
keys, a I<letter> key with the letter, and an I<entries> key with an array
reference of sorted index entries beginning with the letter. The letter
is a character string suitable for sorting letters, but is not necessarily
the best to use for output.
When simply sorting, the array of the sorted index entries is associated
with the index name.
The optional I<$converter> argument is used for error reporting, if not
defined, the I<$document> is used.
L<< C<Texinfo::Indices::sort_indices_by_index>|Texinfo::Indices/$index_entries_sorted = sort_indices_by_index($document, $converter, $use_unicode_collation, $locale_lang) >>
and L<< C<Texinfo::Indices::sort_indices_by_letter>|Texinfo::Indices/$index_entries_sorted = sort_indices_by_letter($document, $converter, $use_unicode_collation, $locale_lang) >>
are used to sort the indexes, if needed.
In general, those methods should not be called directly, instead
L<< C<Texinfo::Convert::Converter::get_converter_indices_sorted_by_index>|Texinfo::Convert::Converter/$sorted_indices = $converter->get_converter_indices_sorted_by_index() >>
and L<< C<Texinfo::Convert::Converter::get_converter_indices_sorted_by_letter>|Texinfo::Convert::Converter/$sorted_indices = $converter->get_converter_indices_sorted_by_letter() >>
should be used. The C<Texinfo::Convert::Converter> methods call
C<sorted_indices_by_index> and C<sorted_indices_by_letter>.
=back
=head2 Getting errors and warnings
A document has a list of error and warning messages associated, that is used to
register errors and warning messages in. To get the errors registered in the
document, the C<errors> method should be called.
=over
=item $error_warnings_list = errors($document)
This function returns as the errors since setting
up the I<$document> (or calling the function). The returned
I<$error_warnings_list> is an array of hash references
one for each error, warning or error line continuation. The format of
these hash references is described
L<Texinfo::Report::count_errors|Texinfo::Report/$error_count = count_errors ($error_messages)>.
=back
=head2 Getting customization options values registered in document
By default, customization information is registered in a document object
just after parsing the Texinfo code. Structuring and tree transformation
methods then get customization variables values from the document object
they have in argument. The customization variables set by default may be a
subset selected to be useful for structuring and tree transformation codes.
To retrieve Texinfo customization variables you can call C<get_conf>:
=over
=item $value = $document->get_conf($variable_name)
Returns the value of the Texinfo customization variable I<$variable_name>
(possibly C<undef>), if the variable value was registered in the document,
or C<undef>.
=back
=head2 Registering information in document
Some information can be registered in the document.
=over
=item register_document_options ($document, $options)
X<C<register_document_options>>
The I<$options> hash reference holds options for the document. These options
should be Texinfo customization options. Usually, the options registered in
the document contain those useful for structuring and tree transformation
getting place between Texinfo code parsing and conversion to output formats.
Indeed, document customization options are mainly accessed by structuring and
tree transformation methods (by calling L<< C<get_conf>|/$value = $document->get_conf($variable_name) >>). The options should in general be registered before
the calls to C<get_conf>.
=item set_document_global_info($document, $key, $value)
X<C<set_document_global_info>>
Add I<$value> I<$key> global information to I<$document>. This method
should not be generally useful, as document global information is already
set by the Texinfo parser. The information set should be available through
the next calls to L<global_information|/$info = global_information($document)>.
The method should in general be called before the calls to
C<global_information>.
=back
=head2 Methods for Perl and C code interactions
The parsing of Texinfo code, structuring and transformations of the document
and tree called through Texinfo Perl modules may be done by pure Perl modules
or by Perl XS extensions in native code (written in C).
In general, it makes no difference whether pure Perl or C code is used. When
the document and tree are modified through native code based on C code, the
Perl data are automatically rebuilt when calling the accessors described
previously. In some cases, however, specific functions need to be called to
pass information from C data to Perl or perform actions related to C data.
When the tree is directly accessed in Perl (not through a document)
but is modified by C code, when the Perl tree holds only a reference to
C data, but no Perl data, or when you want to update all the Perl data
before doing changes in Perl only, you can build the Perl data from the C data
with C<build_tree>:
=over
=item $tree = build_tree($tree, $no_store)
X<C<build_tree>>
Return a I<$tree>, built from C data. If there is no Perl extensions in C,
the tree is returned as is. The tree built is based on the Texinfo parsed
document associated to the Texinfo tree I<$tree>.
If the optional I<$no_store> argument is set, remove the C data. It may be
useful if you call functions that modify the Perl tree only, and not the C data,
followed by functions call that output the result and uses the C data if present.
Removing the C data makes sure that the output is not based on unmodified C data,
but on the modified Perl data.
=back
Note that the Perl tree associated to a document is rebuilt from C data
when calling C<< $document->tree() >>. Similarly, the tree is rebuilt when
calling other accessors that depend on the document tree. Therefore
C<build_tree> should mainly be called when there is no document associated to a
tree and C<< $document->tree() >> cannot be called to rebuild the tree,
or with the I<$no_store> argument, if you want to remove the link with C data.
Some methods allow to release the memory held by C data associated
to a Texinfo parsed document:
=over
=item destroy_document($document)
X<C<destroy_document>>
Release the C data corresponding to I<$document>.
=back
=head1 SEE ALSO
L<Texinfo::Parser>. L<Texinfo::Structuring>.
=head1 AUTHOR
Patrice Dumas, E<lt>bug-texinfo@gnu.orgE<gt>
=head1 COPYRIGHT AND LICENSE
Copyright 2010- Free Software Foundation, Inc. See the source file for
all copyright years.
This library is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or (at
your option) any later version.
=cut