| #! /usr/bin/env perl |
| # texi-elements-by-size -- dump list of elements based on words or line counts. |
| # Also serve as an example of using the Texinfo::Parser module, |
| # including the usual per-format options. |
| # |
| # Copyright 2012-2026 Free Software Foundation, Inc. |
| # |
| # This program is free software; you can redistribute it and/or modify |
| # it under the terms of the GNU General Public License as published by |
| # the Free Software Foundation; either version 3 of the License, |
| # or (at your option) any later version. |
| # |
| # This program is distributed in the hope that it will be useful, |
| # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| # GNU General Public License for more details. |
| # |
| # You should have received a copy of the GNU General Public License |
| # along with this program. If not, see <https://www.gnu.org/licenses/>. |
| # |
| # Original author: Patrice Dumas <pertusus@free.fr> |
| |
| use strict; |
| |
| use warnings; |
| |
| use Encode qw(decode encode); |
| use Config; # to determine the path separator |
| # for fileparse |
| use File::Basename; |
| use File::Spec; |
| use Getopt::Long qw(GetOptions); |
| # for dclone |
| use Storable; |
| |
| Getopt::Long::Configure("gnu_getopt"); |
| |
| BEGIN { |
| my $mydir = $0; |
| $mydir =~ s,/[^/]+$,,; |
| my $t2a_srcdir = `cd $mydir/../tta && pwd`; |
| chomp($t2a_srcdir); |
| if (defined($ENV{'top_builddir'})) { |
| unshift(@INC, join('/', ($ENV{'top_builddir'}, 'tta', 'perl'))); |
| } else { |
| # only works for in-source builds |
| unshift(@INC, join('/', ($t2a_srcdir, 'perl'))); |
| } |
| |
| $ENV{t2a_srcdir} = "$t2a_srcdir"; |
| require Texinfo::ModulePath; |
| Texinfo::ModulePath::init(undef, undef, undef); |
| } |
| |
| use Texinfo::Parser; |
| use Texinfo::Convert::TextContent; |
| |
| my $my_version = "0.1 (TP $Texinfo::Parser::VERSION)"; |
| |
| my ($real_command_name, $command_directory, $command_suffix) |
| = fileparse($0, '.pl'); |
| |
| my $curdir = File::Spec->curdir(); |
| |
| # determine the path separators |
| my $path_separator = $Config{'path_sep'}; |
| $path_separator = ':' if (!defined($path_separator)); |
| my $quoted_path_separator = quotemeta($path_separator); |
| |
| # the encoding used to decode command line arguments, and also for |
| # file names encoding, Perl is expecting sequences of bytes, not unicode |
| # code points. |
| my $locale_encoding; |
| |
| eval 'require I18N::Langinfo'; |
| if (!$@) { |
| $locale_encoding = I18N::Langinfo::langinfo(I18N::Langinfo::CODESET()); |
| $locale_encoding = undef if ($locale_encoding eq ''); |
| } |
| |
| if (!defined($locale_encoding) and $^O eq 'MSWin32') { |
| eval 'require Win32::API'; |
| if (!$@) { |
| Win32::API::More->Import("kernel32", "int GetACP()"); |
| my $CP = GetACP(); |
| if (defined($CP)) { |
| $locale_encoding = 'cp'.$CP; |
| } |
| } |
| } |
| |
| my $force = 0; |
| my $use_sections = 0; |
| my $count_words = 0; |
| my $no_warn = 0; |
| |
| # placeholder for future i18n. |
| sub __($) { |
| return $_[0]; |
| } |
| |
| my $format = 'info'; # make our counts from the Info output |
| # this is the format associated with the output format, which is replaced |
| # when the output format changes. It may also be removed if there is the |
| # corresponding --no-ifformat. |
| #my $default_expanded_format = [ $format ]; |
| |
| # directories specified on the command line. |
| my @include_dirs = (); |
| my @prepend_dirs = (); |
| |
| my $parser_options = { |
| 'EXPANDED_FORMATS' => [ $format ], |
| 'values' => {'txicommandconditionals' => 1}, |
| }; |
| |
| sub _encode_message($) |
| { |
| my $text = shift; |
| my $encoding = $locale_encoding; |
| if (defined($encoding)) { |
| return encode($encoding, $text); |
| } else { |
| return $text; |
| } |
| } |
| |
| sub _decode_input($) |
| { |
| my $text = shift; |
| |
| my $encoding = $locale_encoding; |
| if (defined($encoding)) { |
| return decode($encoding, $text); |
| } else { |
| return $text; |
| } |
| } |
| |
| sub set_expansion($$) { |
| my $region = shift; |
| my $set = shift; |
| $set = 1 if (!defined($set)); |
| if ($set) { |
| push @{$parser_options->{'EXPANDED_FORMATS'}}, $region |
| unless (grep {$_ eq $region} @{$parser_options->{'expanded_formats'}}); |
| } else { |
| @{$parser_options->{'EXPANDED_FORMATS'}} = |
| grep {$_ ne $region} @{$parser_options->{'EXPANDED_FORMATS'}}; |
| } |
| } |
| |
| my $result_options = Getopt::Long::GetOptions ( |
| 'help|h' => sub { print _encode_message(help()); exit 0; }, |
| 'version|V' => sub {print _encode_message( |
| "$real_command_name $my_version\n\n"); |
| print _encode_message(sprintf __( |
| "Copyright (C) %s Free Software Foundation, Inc. |
| License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html> |
| This is free software: you are free to change and redistribute it. |
| There is NO WARRANTY, to the extent permitted by law."), "2025")."\n"; |
| exit 0;}, |
| 'force' => \$force, |
| 'ifhtml!' => sub { set_expansion('html', $_[1]); }, |
| 'ifinfo!' => sub { set_expansion('info', $_[1]); }, |
| 'ifxml!' => sub { set_expansion('xml', $_[1]); }, |
| 'ifdocbook!' => sub { set_expansion('docbook', $_[1]); }, |
| 'iftex!' => sub { set_expansion('tex', $_[1]); }, |
| 'ifplaintext!' => sub { set_expansion('plaintext', $_[1]); }, |
| 'use-sections!' => \$use_sections, |
| 'count-words!' => \$count_words, |
| 'no-warn' => \$no_warn, |
| 'D=s' => sub { |
| my $var = $_[1]; |
| my @field = split (/\s+/, $var, 2); |
| if (@field == 1) { |
| $parser_options->{'values'}->{_decode_input($var)} = 1; |
| } else { |
| $parser_options->{'values'}->{_decode_input($field[0])} |
| = _decode_input($field[1]); |
| } |
| }, |
| 'U=s' => sub { |
| delete $parser_options->{'values'}->{_decode_input($_[1])}; |
| }, |
| |
| 'I=s' => sub { |
| push @include_dirs, split(/$quoted_path_separator/, $_[1]); }, |
| 'P=s' => sub { unshift @prepend_dirs, split(/$quoted_path_separator/, $_[1]); }, |
| 'number-sections!' => sub { set_from_cmdline('NUMBER_SECTIONS', $_[1]); }, |
| ); |
| |
| exit 1 if (!$result_options); |
| |
| my @input_files = @ARGV; |
| # use STDIN if not a tty, like makeinfo does |
| @input_files = ('-') if (!scalar(@input_files) and !-t STDIN); |
| |
| die _encode_message( |
| sprintf(__("%s: missing file argument.")."\n", $real_command_name) |
| .sprintf(__("Try `%s --help' for more information.")."\n", $real_command_name)) |
| unless (scalar(@input_files) >= 1); |
| |
| if (scalar(@input_files) > 1) { |
| my $encoded_files = join('|', @input_files[1..scalar(@input_files) -1]); |
| my $files = _decode_input($encoded_files); |
| warn _encode_message( |
| sprintf(__("%s: superfluous file arguments: %s")."\n", |
| $real_command_name, $files)); |
| } |
| |
| my $input_file_name = shift @input_files; |
| |
| sub help() { |
| my $help = |
| sprintf(__("Usage: %s [OPTION]... TEXINFO-FILE...\n"), $real_command_name) |
| ."\n". |
| __("Write to standard output a list of Texinfo elements (nodes or sections) |
| sorted by the number of lines (or words) they contain, |
| after translation to Info format.\n") |
| ."\n"; |
| |
| $help .= __("General Options: |
| --count-words count words instead of lines. |
| --force keep going even if Texinfo file parsing fails. |
| --help display this help and exit. |
| --no-warn suppress warnings (but not errors). |
| --use-sections use sections as elements instead of nodes. |
| --version display version information and exit.\n") |
| ."\n"; |
| $help .= __("Input file options: |
| -D VAR define the variable VAR, as with \@set. |
| -I DIR append DIR to the \@include search path. |
| -P DIR prepend DIR to the \@include search path. |
| -U VAR undefine the variable VAR, as with \@clear.\n") |
| ."\n"; |
| $help .= __("Conditional processing in input: |
| --ifdocbook process \@ifdocbook and \@docbook. |
| --ifhtml process \@ifhtml and \@html. |
| --ifinfo process \@ifinfo. |
| --ifplaintext process \@ifplaintext. |
| --iftex process \@iftex and \@tex. |
| --ifxml process \@ifxml and \@xml. |
| --no-ifdocbook do not process \@ifdocbook and \@docbook text. |
| --no-ifhtml do not process \@ifhtml and \@html text. |
| --no-ifinfo do not process \@ifinfo text. |
| --no-ifplaintext do not process \@ifplaintext text. |
| --no-iftex do not process \@iftex and \@tex text. |
| --no-ifxml do not process \@ifxml and \@xml text. |
| |
| Also, for the --no-ifFORMAT options, do process \@ifnotFORMAT text.\n"); |
| return $help; |
| |
| } |
| |
| sub _exit($) { |
| my $error_count = shift; |
| exit(1) if ($error_count and !$force); |
| } |
| |
| sub handle_errors(@) { |
| my $errors = shift; |
| my $additional_error_count = shift; |
| my $error_count = shift; |
| |
| $error_count += $additional_error_count if ($additional_error_count); |
| foreach my $error_message (@$errors) { |
| if ($error_message->{'type'} eq 'error' or !$no_warn) { |
| my $s = ''; |
| if (defined($error_message->{'file_name'})) { |
| $s .= $error_message->{'file_name'} . ':'; |
| } |
| if (defined($error_message->{'line_nr'})) { |
| $s .= $error_message->{'line_nr'} . ':'; |
| } |
| $s .= ' ' if ($s ne ''); |
| |
| $s .= _encode_message($error_message->{'error_line'}); |
| warn $s; |
| } |
| } |
| |
| _exit($error_count); |
| return $error_count; |
| } |
| |
| |
| my ($input_filename, $input_directory, $suffix) |
| = fileparse($input_file_name); |
| if (!defined($input_directory) or $input_directory eq '') { |
| $input_directory = $curdir; |
| } |
| |
| my $parser_file_options = Storable::dclone($parser_options); |
| $parser_file_options->{'INCLUDE_DIRECTORIES'} = [@include_dirs]; |
| my @prepended_include_directories = ('.'); |
| push @prepended_include_directories, $input_directory |
| if ($input_directory ne $curdir); |
| unshift @{$parser_file_options->{'INCLUDE_DIRECTORIES'}}, |
| @prepended_include_directories; |
| unshift @{$parser_file_options->{'INCLUDE_DIRECTORIES'}}, @prepend_dirs; |
| |
| my $error_count = 0; |
| my $parser = Texinfo::Parser::parser($parser_file_options); |
| my $document = $parser->parse_texi_file($input_file_name); |
| |
| $error_count |
| = handle_errors($document->parser_errors(), $error_count); |
| |
| my $converter_options = {}; |
| my $converter = Texinfo::Convert::TextContent->converter($converter_options); |
| |
| my ($sorted_name_counts_array, $formatted_result) |
| = $converter->sort_element_counts($document, $use_sections, |
| $count_words); |
| |
| print STDOUT _encode_message($formatted_result); |
| |
| exit(0); |