tta/perl/Texinfo/Document.pm - texinfo - Git at Google

 # Copyright 2023-2025 Free Software Foundation, Inc.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License,
 # or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.

 # This package provides a view of a parsed Texinfo document.  The
 # instantiated objects are also used to carry information for the
 # XS modules, mainly to be able to find the document information
 # in memory.
 # Also note that the initialization of the C library is done by
 # this module XS code when XS extensions are used.
 #
 # ALTIMP perl/XSTexinfo/parser_document/DocumentXS.xs
 # ALTIMP C/main/document.c

 package Texinfo::Document;

 use strict;
 use warnings;

 # To check if there is no erroneous autovivification
 #no autovivification qw(fetch delete exists store strict);

 use Carp qw(cluck confess);

 eval { require Devel::Cycle; Devel::Cycle->import(); };

 use Texinfo::DocumentXS;

 use Texinfo::XSLoader;

 use Texinfo::TreeElement;

 use Texinfo::Common;

 use Texinfo::Report;
 use Texinfo::Indices;
 use Texinfo::ManipulateTree;

 our $VERSION = '7.2dev';

 # There is a full coverage by the C implementation.
 # Relevant XS interfaces are all implemented.
 # See comments before methods definitions for an explanation of why some
 # methods have no XS override.

 my $XS_parser = Texinfo::XSLoader::XS_parser_enabled();

 our %XS_overrides = (
   "Texinfo::Document::_XS_destroy_document"
     => "Texinfo::DocumentXS::destroy_document",
   "Texinfo::Document::set_document_global_info",
     => "Texinfo::DocumentXS::set_document_global_info",
   "Texinfo::Document::errors"
     => "Texinfo::DocumentXS::document_errors",
   "Texinfo::Document::parser_errors"
     => "Texinfo::DocumentXS::document_parser_errors",
   "Texinfo::Document::build_tree"
     => "Texinfo::DocumentXS::build_tree",
   "Texinfo::Document::tree"
     => "Texinfo::DocumentXS::document_tree",
   "Texinfo::Document::register_document_options"
     => "Texinfo::DocumentXS::register_document_options",
   "Texinfo::Document::get_conf",
     => "Texinfo::DocumentXS::document_get_conf",
   "Texinfo::Document::global_information"
     => "Texinfo::DocumentXS::document_global_information",
   "Texinfo::Document::indices_information"
     => "Texinfo::DocumentXS::document_indices_information",
   "Texinfo::Document::global_commands_information"
     => "Texinfo::DocumentXS::document_global_commands_information",
   "Texinfo::Document::labels_information"
     => "Texinfo::DocumentXS::document_labels_information",
   "Texinfo::Document::labels_list"
     => "Texinfo::DocumentXS::document_labels_list",
   "Texinfo::Document::nodes_list"
     => "Texinfo::DocumentXS::document_nodes_list",
   "Texinfo::Document::sections_list"
     => "Texinfo::DocumentXS::document_sections_list",
   "Texinfo::Document::sectioning_root"
     => "Texinfo::DocumentXS::document_sectioning_root",
   "Texinfo::Document::headings_list"
     => "Texinfo::DocumentXS::document_headings_list",
   "Texinfo::Document::floats_information"
     => "Texinfo::DocumentXS::document_floats_information",
   "Texinfo::Document::internal_references_information"
     => "Texinfo::DocumentXS::document_internal_references_information",

   "Texinfo::Document::setup_indices_sort_strings"
     => "Texinfo::DocumentXS::setup_indices_sort_strings",
   "Texinfo::Document::indices_sort_strings"
     => "Texinfo::DocumentXS::indices_sort_strings",

   "Texinfo::Document::print_document_indices_information"
     => "Texinfo::DocumentXS::print_document_indices_information",
   "Texinfo::Document::print_document_indices_sort_strings"
     => "Texinfo::DocumentXS::print_document_indices_sort_strings",
 );

 my $XS_structuring = Texinfo::XSLoader::XS_structuring_enabled();

 our %XS_structure_overrides = (
   "Texinfo::Document::print_document_listoffloats"
     => "Texinfo::DocumentXS::print_document_listoffloats",

   # needed with the Reader/TreeElement interfaces only
   "Texinfo::Document::register_document_relations_lists_elements"
     => "Texinfo::DocumentXS::register_document_relations_lists_elements",
 );


 our $module_loaded = 0;
 sub import {
   if (!$module_loaded) {
     if ($XS_parser) {
       for my $sub (keys %XS_overrides) {
         Texinfo::XSLoader::override ($sub, $XS_overrides{$sub});
       }
     }
     if ($XS_structuring) {
       for my $sub (keys %XS_structure_overrides) {
         Texinfo::XSLoader::override ($sub, $XS_structure_overrides{$sub});
       }
     }
     $module_loaded = 1;
   }
   # The usual import method
   goto &Exporter::import;
 }

 # No XS override, only called from Texinfo::ParserNonXS.
 sub new_document($) {
   my $indices_information = shift;

   my $document = {
     'indices' => $indices_information,
     'listoffloats_list' => {},
     'internal_references' => [],
     'global_info' => {'input_encoding_name' => 'utf-8',
                     #'included_files' => [],
                     },
     'commands_info' => {},
     'identifiers_target' => {},
     'labels_list' => [],
     'nodes_list' => [],
     'sections_list' => [],
     'headings_list' => [],
      # error messages for parsing
     'parser_error_messages' => [],
      # error messages for the document for structuring, not for parsing
     'error_messages' => [],
   };

   bless $document;
   return $document;
 }

 sub register_tree($$)
 {
   my $document = shift;
   my $tree = shift;
   $document->{'tree'} = $tree;
 }

 sub set_document_global_info($$$)
 {
   my $document = shift;
   my $key = shift;
   my $value = shift;
   $document->{'global_info'}->{$key} = $value;
 }

 sub tree($;$)
 {
   my $self = shift;
   return $self->{'tree'};
 }

 # return indices information
 sub indices_information($)
 {
   my $self = shift;
   return $self->{'indices'};
 }

 sub floats_information($)
 {
   my $self = shift;
   return $self->{'listoffloats_list'};
 }

 sub internal_references_information($)
 {
   my $self = shift;
   return $self->{'internal_references'};
 }

 sub global_commands_information($)
 {
   my $self = shift;
   return $self->{'commands_info'};
 }

 sub global_information($)
 {
   my $self = shift;
   return $self->{'global_info'};
 }

 sub labels_information($)
 {
   my $self = shift;
   return $self->{'identifiers_target'};
 }

 sub labels_list($)
 {
   my $self = shift;
   return $self->{'labels_list'};
 }

 sub nodes_list($)
 {
   my $self = shift;
   return $self->{'nodes_list'};
 }

 sub sections_list($)
 {
   my $self = shift;
   return $self->{'sections_list'};
 }

 sub sectioning_root($)
 {
   my $self = shift;
   return $self->{'sectioning_root'};
 }

 sub headings_list($)
 {
   my $self = shift;
   return $self->{'headings_list'};
 }

 # Useful for options used in structuring/tree transformations.
 sub register_document_options($$)
 {
   my $self = shift;
   my $options = shift;

   $self->{'options'} = $options;
 }

 sub get_conf($$)
 {
   my $self = shift;
   my $var = shift;

   if ($self->{'options'}) {
     return $self->{'options'}->{$var};
   }

   # This may happen if a tree/document is manipulated without having
   # any configuration set.  This is or was the case for pod2texi.
   # This is allowed.
   return undef;
 }


 # remove cycles
 sub _remove_section_relations_relations($) {
   my $section_relation = shift;
   foreach my $relation ('associated_anchor_command', 'associated_node',
                         'part_following_node', 'part_associated_section',
                         'section_children') {
     delete $section_relation->{$relation};
   }
   if (exists($section_relation->{'section_directions'})) {
     # next/prev cycles
     delete $section_relation->{'section_directions'}->{'next'};
   }
   if (exists($section_relation->{'toplevel_directions'})) {
     # next/prev cycles, but also next/up and prev/up as the first
     # chapter level relation is next for top and also up
     delete $section_relation->{'toplevel_directions'}->{'next'};
     delete $section_relation->{'toplevel_directions'}->{'prev'};
   }
 }

 sub _remove_section_relations_references($) {
   my $section_relation = shift;
   foreach my $relation ('element') {
     delete $section_relation->{$relation};
   }
 }

 sub _remove_node_relations_references($) {
   my $node_relation = shift;
   foreach my $relation ('element', 'associated_title_command',
                         'node_description', 'node_long_description',
                         'menus', 'node_directions') {
     delete $node_relation->{$relation};
   }
 }

 sub _remove_heading_relations_references($) {
   my $heading_relation = shift;
   foreach my $relation ('element') {
     delete $heading_relation->{$relation};
   }
 }

 # can also be called from XS
 # If $REMOVE_REFERENCES is not set, removing items objective is to remove
 # cycles such that Perl can reclaim the removed memory.  If
 # $REMOVE_REFERENCES is set, the references to elements are removed, in
 # particular to be able to check that there is no reference remaining
 # other than the reference kept in C code.
 sub remove_document_references($;$) {
   my ($document, $remove_references) = @_;

   my $tree = $document->{'tree'};

   my $sections_list = $document->{'sections_list'};
   foreach my $section_relation (@$sections_list) {
     _remove_section_relations_relations($section_relation);
   }

   if (defined($tree)) {
     Texinfo::ManipulateTree::tree_remove_parents($tree);
   }

   # REMARK some tests take a long time, for example
   # t/formats_encodings.t at_commands_in_refs
   #find_cycle($document);

   if ($remove_references) {
     foreach my $section_relation (@$sections_list) {
       _remove_section_relations_references($section_relation);
     }
     my $nodes_list = $document->{'nodes_list'};
     foreach my $node_relation (@$nodes_list) {
       _remove_node_relations_references($node_relation);
     }
     my $headings_list = $document->{'headings_list'};
     foreach my $heading_relation (@$headings_list) {
       _remove_heading_relations_references($heading_relation);
     }
     # Refers to section relations not directly to tree elements
     #if (exists($document->{'sectioning_root'})) {
     #}

     delete $document->{'identifiers_target'};
     delete $document->{'labels_list'};
     delete $document->{'internal_references'};
     delete $document->{'commands_info'};
     delete $document->{'listoffloats_list'};

     # indices not existing is not possible for a document created in
     # Perl code, but seems to be possible when built from XS.
     if (exists($document->{'indices'})) {
       # the same index_entries are used in sorted_indices_by_*, so
       # this also removes the references there.
       foreach my $index_name (keys(%{$document->{'indices'}})) {
         my $index = $document->{'indices'}->{$index_name};
         foreach my $index_entry (@{$index->{'index_entries'}}) {
           delete $index_entry->{'entry_element'};
           delete $index_entry->{'entry_associated_element'};
         }
       }
     }

     # Texinfo tree elements in translation caches are not released, they may
     # be showed for debugging here to verify that they do not show up
     # somewhere.
     #if (1) {
     if (0) {
       foreach my $lang (sort(keys(
                          %{$Texinfo::Translations::translation_cache}))) {
         my $lang_cache = $Texinfo::Translations::translation_cache->{$lang};
         foreach my $string (sort(keys(%$lang_cache))) {
           foreach my $context (sort(keys(%{$lang_cache->{$string}}))) {
             my ($translation, $trans_tree)
               = @{$lang_cache->{$string}->{$context}};
             print STDERR "TRANSL: $string-$context: ";
             if (defined($trans_tree)) {
               Texinfo::ManipulateTree::_print_tree_elements_ref($trans_tree, 0);
             } else {
               print STDERR "NOT NEEDED\n";
             }
           }
         }
       }
     }

     if (defined($tree)) {
       my $test_level = $document->get_conf('TEST');
       my $check_refcount;
       if (defined($test_level) and $test_level > 1) {
         $check_refcount = $document;
       }
       Texinfo::ManipulateTree::tree_remove_references($document->{'tree'},
                                                       $check_refcount);
       delete $document->{'tree'};
     }
   }
 }

 sub _XS_destroy_document($;$) {
   my ($document, $remove_references) = @_;
 }

 sub destroy_document($;$) {
   my ($document, $remove_references) = @_;

   remove_document_references($document, $remove_references);

   _XS_destroy_document($document, $remove_references);
 }

 # The XS override register a reference to the C element in Perl
 # nodes, sectioning and heading commands.  Only needed if the
 # TreeElement/Reader interfaces are used, which is not the case for
 # converters used used in texi2any.
 sub register_document_relations_lists_elements($)
 {
   my $document = shift;
 }

 # this method does nothing, but the XS override rebuilds the Perl
 # tree based on XS data.
 sub build_tree($;$)
 {
   my $tree = shift;
   my $no_store = shift;

   return $tree;
 }


 sub document_line_warn($$$;$)
 {
   my $document = shift;
   my $text = shift;
   my $error_location_info = shift;
   my $continuation = shift;

   $continuation = 0 if !defined($continuation);

   my $error_messages = $document->{'error_messages'};
   my $debug = $document->get_conf('DEBUG');

   push @$error_messages, Texinfo::Report::line_warn($text,
                            $error_location_info, $continuation, $debug);
 }

 sub document_line_error($$$;$)
 {
   my $document = shift;
   my $text = shift;
   my $error_location_info = shift;
   my $continuation = shift;

   $continuation = 0 if !defined($continuation);

   my $error_messages = $document->{'error_messages'};
   my $debug = $document->get_conf('DEBUG');

   push @$error_messages,
           Texinfo::Report::line_error($text, $error_location_info,
                                       $continuation, $debug);
 }

 sub parser_errors($)
 {
   my $document = shift;

   my $errors_output = [@{$document->{'parser_error_messages'}}];

   $document->{'parser_error_messages'} = [];

   return $errors_output;
 }

 # The XS override pass C error messages to the document
 # error_messages and remove error messages in C.
 sub errors($)
 {
   my $document = shift;

   my $errors_output = [splice(@{$document->{'error_messages'}})];

   return $errors_output;
 }


 # No XS override.
 # This method is already called by other methods, in particular
 # sorted_indices_by_* when the indexes are sorted.  When the indexes
 # are merged but not sorted, it is sensible to call this function
 # directly.  Also called directly in tests.
 # XS override is not needed, if the converters calling this function
 # are implemented in C, even partly, they should call the C counterpart
 # rather than go through an XS interface.
 sub merged_indices($)
 {
   my $self = shift;

   if ($self->{'indices'}) {
     if (!$self->{'merged_indices'}) {
       $self->{'merged_indices'}
         = Texinfo::Indices::merge_indices($self->{'indices'});
     }
   }
   return $self->{'merged_indices'};
 }

 # calls Texinfo::Indices::setup_index_entries_sort_strings and caches the
 # result.
 # In general, it is not needed to call that function directly,
 # as it is called by Texinfo::Indices::sort_indices_by_*.  It could
 # be called in advance if errors need to be collected early.
 sub setup_indices_sort_strings($$)
 {
   my $document = shift;
   my $converter = shift;

   if (!$document->{'index_entries_sort_strings'}) {
     my $indices_sort_strings
       = Texinfo::Indices::setup_index_entries_sort_strings($document,
               $converter, $document->merged_indices(),
               $document->indices_information(), 0);
     $document->{'index_entries_sort_strings'} = $indices_sort_strings;
   }
 }

 # index_entries_sort_strings accessor.  A different function from
 # setup_indices_sort_strings such that there is no need to build C data
 # to Perl when calling setup_indices_sort_strings, to make it possible
 # to delay building Perl data for indices_sort_strings function call in XS.
 sub indices_sort_strings($$)
 {
   my $document = shift;
   my $converter = shift;

   setup_indices_sort_strings($document, $converter);
   return $document->{'index_entries_sort_strings'};
 }

 # calls Texinfo::Indices::sort_indices_by_letter and caches the result.
 # No XS override, as there is no reason to call this function directly
 # outside of tests, Texinfo::Convert::Converter
 # get_converter_indices_sorted_by_letter should be called directly.
 # In general a CONVERTER argument is given, but if not the DOCUMENT is
 # used instead to register error messages.
 sub sorted_indices_by_letter($$$$)
 {
   my $document = shift;
   my $converter = shift;
   my $use_unicode_collation = shift;
   my $locale_lang = shift;

   my $lang_key;
   if (!$use_unicode_collation) {
     $lang_key = '';
   } elsif (!defined($locale_lang)) {
     # special name corresponding to Unicode Collation with 'Non-Ignorable'
     # set for variable collation elements
     $lang_key = '-';
   } else {
     $lang_key = $locale_lang;
   }

   $document->{'sorted_indices_by_letter'} = {}
     if (!$document->{'sorted_indices_by_letter'});

   if (!$document->{'sorted_indices_by_letter'}->{$lang_key}) {
     $document->merged_indices();
     $document->{'sorted_indices_by_letter'}->{$lang_key}
       = Texinfo::Indices::sort_indices_by_letter
                     ($document, $converter,
                      $use_unicode_collation, $locale_lang);
   }
   return $document->{'sorted_indices_by_letter'}->{$lang_key};
 }

 # calls Texinfo::Indices::sort_indices_by_index and caches the result.
 # No XS override, as there is no reason to call this function directly
 # outside of tests, Texinfo::Convert::Converter
 # get_converter_indices_sorted_by_index should be called directly.
 # In general a CONVERTER argument is given, but if not the DOCUMENT is
 # used instead to register error messages.
 sub sorted_indices_by_index($$$$)
 {
   my $document = shift;
   my $converter = shift;
   my $use_unicode_collation = shift;
   my $locale_lang = shift;

   my $lang_key;
   if (!$use_unicode_collation) {
     $lang_key = '';
   } elsif (!defined($locale_lang)) {
     # special name corresponding to Unicode Collation with 'Non-Ignorable'
     # set for variable collation elements
     $lang_key = '-';
   } else {
     $lang_key = $locale_lang;
   }

   $document->{'sorted_indices_by_index'} = {}
     if (!$document->{'sorted_indices_by_index'});

   if (!$document->{'sorted_indices_by_index'}->{$lang_key}) {
     $document->merged_indices();
     $document->{'sorted_indices_by_index'}->{$lang_key}
       = Texinfo::Indices::sort_indices_by_index
                       ($document, $converter,
                        $use_unicode_collation, $locale_lang);
   }
   return $document->{'sorted_indices_by_index'}->{$lang_key};
 }


 # wrapper on print_listoffloats_types that can be used for XS overriding.
 # Used in tests only.
 sub print_document_listoffloats($)
 {
   my $document = shift;

   my $float_text;

   if ($document) {
     my $floats = $document->floats_information();
     if ($floats) {
       $float_text
           = Texinfo::ManipulateTree::print_listoffloats_types($floats);
     }
   }

   return $float_text;
 }

 # wrapper on print_indices_information that can be used for XS overriding.
 # Used in tests only.
 sub print_document_indices_information($)
 {
   my $document = shift;

   my $indices_info_text;

   if ($document) {
     my $indices_information = $document->indices_information();
     if ($indices_information) {
       $indices_info_text
         = Texinfo::Indices::print_indices_information($indices_information);
     }
   }

   return $indices_info_text;
 }

 # for tests, to be used for overriding
 sub print_document_indices_sort_strings($)
 {
   my $document = shift;

   # read from C data if needed
   $document->indices_information();

   my $merged_index_entries = $document->merged_indices();

   # use merged indices here as there are only indices with
   # entries in that data
   return undef unless ($merged_index_entries);

   my $use_unicode_collation
     = $document->get_conf('USE_UNICODE_COLLATION');
   my $locale_lang;
   if (!(defined($use_unicode_collation) and !$use_unicode_collation)) {
     $locale_lang
      = $document->get_conf('COLLATION_LANGUAGE');
   }

   my $indices_sort_strings = indices_sort_strings($document, undef);

   my $index_entries_sort_strings
    = Texinfo::Indices::format_index_entries_sort_strings(
                                                      $indices_sort_strings);

   my $sorted_index_entries
        = sorted_indices_by_index($document, undef,
                                $use_unicode_collation, $locale_lang);

   my $idx_sort_strings_str = '';
   foreach my $index_name (sort(keys(%$sorted_index_entries))) {
     # index entries sort strings sorted in the order of the index entries
     my $index_entries = $sorted_index_entries->{$index_name};
     if (scalar(@{$index_entries})) {
       $idx_sort_strings_str .= "${index_name}:\n";
       foreach my $index_entry (@{$index_entries}) {
         my $sort_string = $index_entries_sort_strings->{$index_entry};
         $idx_sort_strings_str .= " ${sort_string}\n";
       }
     }
   }
   return $idx_sort_strings_str;
 }


 # In general, we avoid passing error messages separate from the object holding
 # them.  In that case, however, when called from parser, we want
 # parser_error_messages error messages to be modified from a document, and not
 # the error messages of the document, so we pass the error messages list
 # separately.
 sub _existing_label_error($$;$$)
 {
   my $self = shift;
   my $element = shift;
   my $error_messages = shift;
   my $debug = shift;

   if ($element->{'extra'}
       and defined($element->{'extra'}->{'normalized'})) {
     my $normalized = $element->{'extra'}->{'normalized'};
     if (defined($error_messages)) {
       my $existing_target = $self->{'identifiers_target'}->{$normalized};
       my $label_element = Texinfo::Common::get_label_element($element);
       push @$error_messages, Texinfo::Report::line_error(
                        sprintf(__("\@%s `%s' previously defined"),
                                      $element->{'cmdname'},
                     Texinfo::Convert::Texinfo::convert_to_texinfo(
     Texinfo::TreeElement::new({'contents' => $label_element->{'contents'}}))),
                               $element->{'source_info'}, 0, $debug);
       push @$error_messages, Texinfo::Report::line_error(
                     sprintf(__("here is the previous definition as \@%s"),
                             $existing_target->{'cmdname'}),
                              $existing_target->{'source_info'}, 1, $debug);
     }
   }
 }

 sub _add_element_to_identifiers_target($$)
 {
   my $self = shift;
   my $element = shift;

   if ($element->{'extra'}
       and defined($element->{'extra'}->{'normalized'})) {
     my $normalized = $element->{'extra'}->{'normalized'};
     if (!defined $self->{'identifiers_target'}->{$normalized}) {
       $self->{'identifiers_target'}->{$normalized} = $element;
       $element->{'extra'}->{'is_target'} = 1;
       return 1;
     }
   }
   return 0;
 }

 # No XS override, only called from Texinfo::ParserNonXS.
 # This should be considered an internal function of the parser.
 # It is here to reuse code.
 # Sets $self->{'identifiers_target'} based on $self->{'labels_list'}.
 sub set_labels_identifiers_target($$;$)
 {
   my $self = shift;
   my $error_messages = shift;
   my $debug = shift;

   my @elements_with_error;

   $self->{'identifiers_target'} = {};
   if (defined $self->{'labels_list'}) {
     foreach my $element (@{$self->{'labels_list'}}) {
       my $retval = _add_element_to_identifiers_target($self, $element);
       if (!$retval and $element->{'extra'}
           and defined($element->{'extra'}->{'normalized'})) {
         push @elements_with_error, $element;
       }
     }
   }
   # when a sorted list was used for identifiers in C code, identifiers
   # order was used to have the same error messages order; otherwise document
   # order should be used
   if (scalar(@elements_with_error) > 0) {
     my @sorted
     # use document order since C code uses hashmap for identifiers.
      = #sort {$a->{'extra'}->{'normalized'} cmp $b->{'extra'}->{'normalized'}}
         @elements_with_error;
     foreach my $element (@sorted) {
       _existing_label_error($self, $element, $error_messages, $debug);
     }
   }
 }

 # Not clear whether this should be considered as a private function
 # here solely to reuse code, or if it should be considered to be public.
 # There is no XS override but the function modifies Perl data that is also
 # in C when XS is used.  Therefore this function should only be called from
 # Perl if there is no XS used.
 sub register_label_element($$;$$)
 {
   my $self = shift;
   my $element = shift;
   my $error_messages = shift;
   my $debug = shift;

   my $retval = _add_element_to_identifiers_target($self, $element);
   if (!$retval) {
     _existing_label_error($self, $element, $error_messages, $debug);
   }
   # TODO do not push at the end but have the caller give an information
   # on the element it should be after or before in the list?
   push @{$self->{'labels_list'}}, $element;
   return $retval;
 }

 1;
 __END__
 =head1 NAME

 Texinfo::Document - Texinfo document tree and information

 =head1 SYNOPSIS

   use Texinfo::Parser;

   my $parser = Texinfo::Parser::parser();
   my $document = $parser->parse_texi_file("somefile.texi");

   my $indices_information = $document->indices_information();
   my $float_types_arrays = $document->floats_information();
   my $internal_references_array
     = $document->internal_references_information();

   # $identifier_target is an hash reference on normalized
   # node/float/anchor/namedanchor names.
   my $identifier_target = $document->labels_information();

   # A hash reference, keys are @-command names, value is the
   # corresponding @-commands tree element or an
   # array reference holding all the corresponding @-commands.
   # Also contains dircategory and direntry list.
   my $global_commands_information
                  = $document->global_commands_information();

   # a hash reference on document information (encodings,
   # input file name, for example).
   my $global_information = $document->global_information();

 =head1 NOTES

 The Texinfo Perl module main purpose is to be used in C<texi2any> to convert
 Texinfo to other formats.  There is no promise of API stability.

 =head1 DESCRIPTION

 This module is used to represent parsed Texinfo documents, with the Texinfo
 tree and associated information.  A document is always obtained from
 a Texinfo parser method call as the result of parsing a Texinfo text or file.

 For example, the I<$document> obtained in the following example is a
 C<Texinfo::Document> object:

   my $parser = Texinfo::Parser::parser();
   my $document = $parser->parse_texi_file("somefile.texi");

 =head1 METHODS

 =head2 Getting document information

 The main purpose of Texinfo::Document methods is to retrieve information
 on a Texinfo document.

 The Texinfo tree obtained by parsing a Texinfo document is available through
 C<tree>:

 =over

 =item $tree = tree($document, $handler_only)
 X<C<tree>>

 The I<$tree> is a hash reference.  It is described in
 L<Texinfo::Parser/TEXINFO TREE>.

 If I<$handler_only> is set and XS extensions are used, the returned
 tree holds a reference to the C Texinfo tree data only, but no actual
 Perl Texinfo tree.  This avoids building the Perl tree if all the
 functions called with the tree as argument have XS interfaces and
 directly use the C data and do not use the Perl tree.

 =back

 Some global information is available through C<global_information>:

 =over

 =item $info = global_information($document)
 X<C<global_information>>

 The I<$info> returned is a hash reference.  The possible keys are

 =over

 =item included_files

 An array of included file paths as they appear in the document.  Binary
 strings.  From both C<@include> and C<@verbatiminclude>.

 =item input_encoding_name

 C<input_encoding_name> string is the encoding name used for the
 Texinfo code.

 =item input_file_name

 =item input_directory

 The name of the main Texinfo input file and the associated directory.
 Binary strings.  In C<texi2any>, they should come from the command line
 (and can be decoded with the encoding in the customization variable
 C<COMMAND_LINE_ENCODING>).

 =back

 If the global information changed, C<global_information> should be called
 to update the hash returned by previous calls before accessing the hash again.

 =back

 Some command lists are available, such that it is possible to go through
 the corresponding tree elements without walking the tree.  They are
 available through C<global_commands_information>:

 =over

 =item $commands = global_commands_information($document)
 X<C<global_commands_information>>

 I<$commands> is an hash reference.  The keys are @-command names.  For
 @-commands that may appear multiple time in the Texinfo document, the
 associated values are array references containing all the corresponding
 tree elements.  For @-commands that should appear only once, the associated
 value is the tree element.

 The following list of commands is also available as a key:

 =over

 =item dircategory_direntry

 An array of successive C<@dircategory> and C<@direntry> as they appear
 in the document.

 =back

 =back

 All the @-commands that have an associated label (so can be the
 target of cross references) -- C<@node>, C<@anchor>, C<@namedanchor>
 and C<@float> with label -- have a normalized name associated, constructed as
 described in the I<HTML Xref> node in the Texinfo documentation.  Those
 normalized labels and the association with @-commands is available through
 C<labels_information>:

 =over

 =item $identifier_target = labels_information($document)
 X<C<labels_information>>

 I<$identifier_target> is a hash reference whose keys are normalized
 labels, and the associated value is the corresponding @-command.

 =item $labels_list = labels_list ($document)
 X<C<labels_list>>

 I<$labels_list> is a list of Texinfo tree command elements that
 could be the target of cross references.

 =back

 Information on C<@float> grouped by type of floats, each type corresponding
 to potential C<@listoffloats> is available through C<floats_information>.

 =over

 =item $float_types = floats_information($document)
 X<C<floats_information>>

 I<$float_types> is a hash reference whose keys are normalized float
 types (the first float argument, or the C<@listoffloats> argument).
 The normalization is the same as for the first step of node names
 normalization. The value is the list of array references with
 first element the float tree elements appearing in the texinfo document
 and second element the section element the float appeared in.

 =back

 Internal references, nodes and section information may also be available.

 =over

 =item $internal_references_array = internal_references_information($document)
 X<C<internal_references_information>>

 The function returns an array reference of cross-reference commands referring
 to the same document with @-commands that refer to node, anchors or floats.

 =item $nodes_list = nodes_list($document)

 Returns an array reference containing information on each node.

 =item $sections_list = sections_list($document)

 Returns an array reference containing information on each section.

 =item $sectioning_root = sectioning_root($document)

 Return the sectioning root information.  It is an hash reference with the
 I<sections_list> key, an array reference of the top level sectioning
 commands relations.

 =back

 Information about defined indexes, indexes merging and index entries is
 available through C<indices_information>.

 =over

 =item $indices_information = $document->indices_information()
 X<C<indices_information>>

 I<$indices_information> is a hash reference.  The keys are

 =over

 =item in_code

 1 if the index entries should be formatted as code, 0 in the opposite case.

 =item name

 The index name.

 =item prefix

 An array reference of prefix associated to the index.

 =item merged_in

 In case the index is merged to another index, this key holds the name of
 the index the index is merged into.  It takes into account indirectly
 merged indexes.

 =item index_entries

 An array reference containing index entry structures for index entries
 associated with the index.  The index entry could be associated to
 @-commands like C<@cindex>, or C<@item> in C<@vtable>, or definition
 commands entries like C<@deffn>.

 The keys of the index entry structures are

 =over

 =item index_name

 The index name associated to the command.  Not modified if the corresponding
 index is merged in another index (with C<@synindex>, for example).

 =item entry_element

 The element in the parsed tree associated with the @-command holding the
 index entry.

 =item entry_number

 The number of the index entry.

 =back

 =back

 The following shows the references corresponding to the default indexes
 I<cp> and I<fn>, the I<fn> index having its entries formatted as code and
 the indexes corresponding to the following texinfo

   @defindex some
   @defcodeindex code

   $index_names = {'cp' => {'name' => 'cp', 'in_code' => 0, },
                   'fn' => {'name' => 'fn', 'in_code' => 1, },
                   'some' => {'in_code' => 0},
                   'code' => {'in_code' => 1}};

 If C<name> is not set, it is set to the index name.

 =back

 =head2 Merging and sorting indexes

 Merged and sorted document indexes are also available.  Parsed indexes
 are not merged nor sorted, L<Texinfo::Indices> functions are
 called to merge or sort the indexes the first time the following
 methods are called.  The results are afterwards associated to the
 document and simply returned.

 =over

 =item $merged_indices = $document->merged_indices()
 X<C<merged_indices>>

 Merge indexes if needed and return merged indexes.  The I<$merged_indices>
 returned is a hash reference whose keys are the index names and values arrays
 of index entry structures described in L</index_entries>.

 L<< C<Texinfo::Indices::merge_indices>|Texinfo::Indices/$merged_indices = merge_indices($indices_information) >>
 is used to merge the indexes.

 It is not useful to call this function directly if indexes are sorted, as
 it is already called by index sorting functions.

 =back

 In general, the sorting methods should not be called directly, instead
 L<Texinfo::Convert::Converter/Index sorting> Converter methods should be
 used, which already call the following functions.

 =over

 =item $sorted_indices = sorted_indices_by_index($document, $converter, $use_unicode_collation, $locale_lang)

 =item $sorted_indices = sorted_indices_by_letter($document, $converter, $use_unicode_collation, $locale_lang)
 X<C<sorted_indices_by_index>> X<C<sorted_indices_by_letter>>

 C<sorted_indices_by_letter> returns the indices sorted by index and letter,
 while C<sorted_indices_by_index> returns the indexes with all entries
 of an index together.

 By default, indexes are sorted according to the I<Unicode Collation Algorithm>
 defined in the L<Unicode Technical Standard
 #10|http://www.unicode.org/reports/tr10/>, without language-specific collation
 tailoring.  If I<$use_unicode_collation> is set to 0, the sorting will not use
 the I<Unicode Collation Algorithm> and simply sort according to the codepoints.
 If I<$locale_lang> is set, the language is used for linguistic tailoring of the
 sorting, if possible.

 When sorting by letter, an array reference of letter hash references is
 associated with each index name.  Each letter hash reference has two
 keys, a I<letter> key with the letter, and an I<entries> key with an array
 reference of sorted index entries beginning with the letter.  The letter
 is a character string suitable for sorting letters, but is not necessarily
 the best to use for output.

 When simply sorting, the array of the sorted index entries is associated
 with the index name.

 The optional I<$converter> argument is used for error reporting, if not
 defined, the I<$document> is used.

 L<< C<Texinfo::Indices::sort_indices_by_index>|Texinfo::Indices/$index_entries_sorted = sort_indices_by_index($document, $converter, $use_unicode_collation, $locale_lang) >>
 and L<< C<Texinfo::Indices::sort_indices_by_letter>|Texinfo::Indices/$index_entries_sorted = sort_indices_by_letter($document, $converter, $use_unicode_collation, $locale_lang) >>
 are used to sort the indexes, if needed.

 In general, those methods should not be called directly, instead
 L<< C<Texinfo::Convert::Converter::get_converter_indices_sorted_by_index>|Texinfo::Convert::Converter/$sorted_indices = $converter->get_converter_indices_sorted_by_index() >>
 and L<< C<Texinfo::Convert::Converter::get_converter_indices_sorted_by_letter>|Texinfo::Convert::Converter/$sorted_indices = $converter->get_converter_indices_sorted_by_letter() >>
 should be used.  The C<Texinfo::Convert::Converter> methods call
 C<sorted_indices_by_index> and C<sorted_indices_by_letter>.

 =back

 =head2 Getting errors and warnings

 A document has a list of error and warning messages associated, that is used to
 register errors and warning messages in.  To get the errors registered in the
 document, the C<errors> method should be called.

 =over

 =item $error_warnings_list = errors($document)

 This function returns as the errors since setting
 up the I<$document> (or calling the function). The returned
 I<$error_warnings_list> is an array of hash references
 one for each error, warning or error line continuation.  The format of
 these hash references is described
 L<Texinfo::Report::count_errors|Texinfo::Report/$error_count  = count_errors ($error_messages)>.

 =back

 =head2 Getting customization options values registered in document

 By default, customization information is registered in a document object
 just after parsing the Texinfo code. Structuring and tree transformation
 methods then get customization variables values from the document object
 they have in argument. The customization variables set by default may be a
 subset selected to be useful for structuring and tree transformation codes.

 To retrieve Texinfo customization variables you can call C<get_conf>:

 =over

 =item $value = $document->get_conf($variable_name)

 Returns the value of the Texinfo customization variable I<$variable_name>
 (possibly C<undef>), if the variable value was registered in the document,
 or C<undef>.

 =back

 =head2 Registering information in document

 Some information can be registered in the document.

 =over

 =item register_document_options ($document, $options)
 X<C<register_document_options>>

 The I<$options> hash reference holds options for the document. These options
 should be Texinfo customization options.  Usually, the options registered in
 the document contain those useful for structuring and tree transformation
 getting place between Texinfo code parsing and conversion to output formats.
 Indeed, document customization options are mainly accessed by structuring and
 tree transformation methods (by calling L<< C<get_conf>|/$value = $document->get_conf($variable_name) >>). The options should in general be registered before
 the calls to C<get_conf>.

 =item set_document_global_info($document, $key, $value)
 X<C<set_document_global_info>>

 Add I<$value> I<$key> global information to I<$document>.  This method
 should not be generally useful, as document global information is already
 set by the Texinfo parser.  The information set should be available through
 the next calls to L<global_information|/$info = global_information($document)>.
 The method should in general be called before the calls to
 C<global_information>.

 =back

 =head2 Methods for Perl and C code interactions

 The parsing of Texinfo code, structuring and transformations of the document
 and tree called through Texinfo Perl modules may be done by pure Perl modules
 or by Perl XS extensions in native code (written in C).
 In general, it makes no difference whether pure Perl or C code is used.  When
 the document and tree are modified through native code based on C code, the
 Perl data are automatically rebuilt when calling the accessors described
 previously.  In some cases, however, specific functions need to be called to
 pass information from C data to Perl or perform actions related to C data.

 When the tree is directly accessed in Perl (not through a document)
 but is modified by C code, when the Perl tree holds only a reference to
 C data, but no Perl data, or when you want to update all the Perl data
 before doing changes in Perl only, you can build the Perl data from the C data
 with C<build_tree>:

 =over

 =item $tree = build_tree($tree, $no_store)
 X<C<build_tree>>

 Return a I<$tree>, built from C data.  If there is no Perl extensions in C,
 the tree is returned as is.  The tree built is based on the Texinfo parsed
 document associated to the Texinfo tree I<$tree>.

 If the optional I<$no_store> argument is set, remove the C data.  It may be
 useful if you call functions that modify the Perl tree only, and not the C data,
 followed by functions call that output the result and uses the C data if present.
 Removing the C data makes sure that the output is not based on unmodified C data,
 but on the modified Perl data.

 =back

 Note that the Perl tree associated to a document is rebuilt from C data
 when calling C<< $document->tree() >>.  Similarly, the tree is rebuilt when
 calling other accessors that depend on the document tree.  Therefore
 C<build_tree> should mainly be called when there is no document associated to a
 tree and C<< $document->tree() >> cannot be called to rebuild the tree,
 or with the I<$no_store> argument, if you want to remove the link with C data.

 Some methods allow to release the memory held by C data associated
 to a Texinfo parsed document:

 =over

 =item destroy_document($document)
 X<C<destroy_document>>

 Release the C data corresponding to I<$document>.

 =back

 =head1 SEE ALSO

 L<Texinfo::Parser>. L<Texinfo::Structuring>.

 =head1 AUTHOR

 Patrice Dumas, E<lt>bug-texinfo@gnu.orgE<gt>

 =head1 COPYRIGHT AND LICENSE

 Copyright 2010- Free Software Foundation, Inc.  See the source file for
 all copyright years.

 This library is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 3 of the License, or (at
 your option) any later version.

 =cut