blob: 6fa9ef52a9cc807ae34689dea2eba3b65c5d8318 [file] [log] [blame]
#! /usr/bin/env perl
# texi-elements-by-size -- dump list of elements based on words or line counts.
# Also serve as an example of using the Texinfo::Parser module,
# including the usual per-format options.
#
# Copyright 2012-2026 Free Software Foundation, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License,
# or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
# Original author: Patrice Dumas <pertusus@free.fr>
use strict;
use warnings;
use Encode qw(decode encode);
use Config; # to determine the path separator
# for fileparse
use File::Basename;
use File::Spec;
use Getopt::Long qw(GetOptions);
# for dclone
use Storable;
Getopt::Long::Configure("gnu_getopt");
BEGIN {
my $mydir = $0;
$mydir =~ s,/[^/]+$,,;
my $t2a_srcdir = `cd $mydir/../tta && pwd`;
chomp($t2a_srcdir);
if (defined($ENV{'top_builddir'})) {
unshift(@INC, join('/', ($ENV{'top_builddir'}, 'tta', 'perl')));
} else {
# only works for in-source builds
unshift(@INC, join('/', ($t2a_srcdir, 'perl')));
}
$ENV{t2a_srcdir} = "$t2a_srcdir";
require Texinfo::ModulePath;
Texinfo::ModulePath::init(undef, undef, undef);
}
use Texinfo::Parser;
use Texinfo::Convert::TextContent;
my $my_version = "0.1 (TP $Texinfo::Parser::VERSION)";
my ($real_command_name, $command_directory, $command_suffix)
= fileparse($0, '.pl');
my $curdir = File::Spec->curdir();
# determine the path separators
my $path_separator = $Config{'path_sep'};
$path_separator = ':' if (!defined($path_separator));
my $quoted_path_separator = quotemeta($path_separator);
# the encoding used to decode command line arguments, and also for
# file names encoding, Perl is expecting sequences of bytes, not unicode
# code points.
my $locale_encoding;
eval 'require I18N::Langinfo';
if (!$@) {
$locale_encoding = I18N::Langinfo::langinfo(I18N::Langinfo::CODESET());
$locale_encoding = undef if ($locale_encoding eq '');
}
if (!defined($locale_encoding) and $^O eq 'MSWin32') {
eval 'require Win32::API';
if (!$@) {
Win32::API::More->Import("kernel32", "int GetACP()");
my $CP = GetACP();
if (defined($CP)) {
$locale_encoding = 'cp'.$CP;
}
}
}
my $force = 0;
my $use_sections = 0;
my $count_words = 0;
my $no_warn = 0;
# placeholder for future i18n.
sub __($) {
return $_[0];
}
my $format = 'info'; # make our counts from the Info output
# this is the format associated with the output format, which is replaced
# when the output format changes. It may also be removed if there is the
# corresponding --no-ifformat.
#my $default_expanded_format = [ $format ];
# directories specified on the command line.
my @include_dirs = ();
my @prepend_dirs = ();
my $parser_options = {
'EXPANDED_FORMATS' => [ $format ],
'values' => {'txicommandconditionals' => 1},
};
sub _encode_message($)
{
my $text = shift;
my $encoding = $locale_encoding;
if (defined($encoding)) {
return encode($encoding, $text);
} else {
return $text;
}
}
sub _decode_input($)
{
my $text = shift;
my $encoding = $locale_encoding;
if (defined($encoding)) {
return decode($encoding, $text);
} else {
return $text;
}
}
sub set_expansion($$) {
my $region = shift;
my $set = shift;
$set = 1 if (!defined($set));
if ($set) {
push @{$parser_options->{'EXPANDED_FORMATS'}}, $region
unless (grep {$_ eq $region} @{$parser_options->{'expanded_formats'}});
} else {
@{$parser_options->{'EXPANDED_FORMATS'}} =
grep {$_ ne $region} @{$parser_options->{'EXPANDED_FORMATS'}};
}
}
my $result_options = Getopt::Long::GetOptions (
'help|h' => sub { print _encode_message(help()); exit 0; },
'version|V' => sub {print _encode_message(
"$real_command_name $my_version\n\n");
print _encode_message(sprintf __(
"Copyright (C) %s Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law."), "2025")."\n";
exit 0;},
'force' => \$force,
'ifhtml!' => sub { set_expansion('html', $_[1]); },
'ifinfo!' => sub { set_expansion('info', $_[1]); },
'ifxml!' => sub { set_expansion('xml', $_[1]); },
'ifdocbook!' => sub { set_expansion('docbook', $_[1]); },
'iftex!' => sub { set_expansion('tex', $_[1]); },
'ifplaintext!' => sub { set_expansion('plaintext', $_[1]); },
'use-sections!' => \$use_sections,
'count-words!' => \$count_words,
'no-warn' => \$no_warn,
'D=s' => sub {
my $var = $_[1];
my @field = split (/\s+/, $var, 2);
if (@field == 1) {
$parser_options->{'values'}->{_decode_input($var)} = 1;
} else {
$parser_options->{'values'}->{_decode_input($field[0])}
= _decode_input($field[1]);
}
},
'U=s' => sub {
delete $parser_options->{'values'}->{_decode_input($_[1])};
},
'I=s' => sub {
push @include_dirs, split(/$quoted_path_separator/, $_[1]); },
'P=s' => sub { unshift @prepend_dirs, split(/$quoted_path_separator/, $_[1]); },
'number-sections!' => sub { set_from_cmdline('NUMBER_SECTIONS', $_[1]); },
);
exit 1 if (!$result_options);
my @input_files = @ARGV;
# use STDIN if not a tty, like makeinfo does
@input_files = ('-') if (!scalar(@input_files) and !-t STDIN);
die _encode_message(
sprintf(__("%s: missing file argument.")."\n", $real_command_name)
.sprintf(__("Try `%s --help' for more information.")."\n", $real_command_name))
unless (scalar(@input_files) >= 1);
if (scalar(@input_files) > 1) {
my $encoded_files = join('|', @input_files[1..scalar(@input_files) -1]);
my $files = _decode_input($encoded_files);
warn _encode_message(
sprintf(__("%s: superfluous file arguments: %s")."\n",
$real_command_name, $files));
}
my $input_file_name = shift @input_files;
sub help() {
my $help =
sprintf(__("Usage: %s [OPTION]... TEXINFO-FILE...\n"), $real_command_name)
."\n".
__("Write to standard output a list of Texinfo elements (nodes or sections)
sorted by the number of lines (or words) they contain,
after translation to Info format.\n")
."\n";
$help .= __("General Options:
--count-words count words instead of lines.
--force keep going even if Texinfo file parsing fails.
--help display this help and exit.
--no-warn suppress warnings (but not errors).
--use-sections use sections as elements instead of nodes.
--version display version information and exit.\n")
."\n";
$help .= __("Input file options:
-D VAR define the variable VAR, as with \@set.
-I DIR append DIR to the \@include search path.
-P DIR prepend DIR to the \@include search path.
-U VAR undefine the variable VAR, as with \@clear.\n")
."\n";
$help .= __("Conditional processing in input:
--ifdocbook process \@ifdocbook and \@docbook.
--ifhtml process \@ifhtml and \@html.
--ifinfo process \@ifinfo.
--ifplaintext process \@ifplaintext.
--iftex process \@iftex and \@tex.
--ifxml process \@ifxml and \@xml.
--no-ifdocbook do not process \@ifdocbook and \@docbook text.
--no-ifhtml do not process \@ifhtml and \@html text.
--no-ifinfo do not process \@ifinfo text.
--no-ifplaintext do not process \@ifplaintext text.
--no-iftex do not process \@iftex and \@tex text.
--no-ifxml do not process \@ifxml and \@xml text.
Also, for the --no-ifFORMAT options, do process \@ifnotFORMAT text.\n");
return $help;
}
sub _exit($) {
my $error_count = shift;
exit(1) if ($error_count and !$force);
}
sub handle_errors(@) {
my $errors = shift;
my $additional_error_count = shift;
my $error_count = shift;
$error_count += $additional_error_count if ($additional_error_count);
foreach my $error_message (@$errors) {
if ($error_message->{'type'} eq 'error' or !$no_warn) {
my $s = '';
if (defined($error_message->{'file_name'})) {
$s .= $error_message->{'file_name'} . ':';
}
if (defined($error_message->{'line_nr'})) {
$s .= $error_message->{'line_nr'} . ':';
}
$s .= ' ' if ($s ne '');
$s .= _encode_message($error_message->{'error_line'});
warn $s;
}
}
_exit($error_count);
return $error_count;
}
my ($input_filename, $input_directory, $suffix)
= fileparse($input_file_name);
if (!defined($input_directory) or $input_directory eq '') {
$input_directory = $curdir;
}
my $parser_file_options = Storable::dclone($parser_options);
$parser_file_options->{'INCLUDE_DIRECTORIES'} = [@include_dirs];
my @prepended_include_directories = ('.');
push @prepended_include_directories, $input_directory
if ($input_directory ne $curdir);
unshift @{$parser_file_options->{'INCLUDE_DIRECTORIES'}},
@prepended_include_directories;
unshift @{$parser_file_options->{'INCLUDE_DIRECTORIES'}}, @prepend_dirs;
my $error_count = 0;
my $parser = Texinfo::Parser::parser($parser_file_options);
my $document = $parser->parse_texi_file($input_file_name);
$error_count
= handle_errors($document->parser_errors(), $error_count);
my $converter_options = {};
my $converter = Texinfo::Convert::TextContent->converter($converter_options);
my ($sorted_name_counts_array, $formatted_result)
= $converter->sort_element_counts($document, $use_sections,
$count_words);
print STDOUT _encode_message($formatted_result);
exit(0);