util/texi-elements-by-size - texinfo - Git at Google

 #! /usr/bin/env perl
 # texi-elements-by-size -- dump list of elements based on words or line counts.
 # Also serve as an example of using the Texinfo::Parser module,
 # including the usual per-format options.
 #
 # Copyright 2012, 2013, 2014, 2015, 2016 Free Software Foundation, Inc.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License,
 # or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 # Original author: Patrice Dumas <pertusus@free.fr>

 use strict;

 use Config; # to determine the path separator
 use Getopt::Long qw(GetOptions);
 Getopt::Long::Configure("gnu_getopt");

 BEGIN {
   # The purpose of these includes is to make it possible to run the
   # script from a Texinfo source checkout.  If that's not relevant,
   # probably best to simply assume all the needed packages are in the
   # Perl include path.
   #
   (my $mydir = $0) =~ s,/[^/]*$,,;  # dir we are in
   my $txi_libdir = "$mydir/../tp";  # find tp relative to $0
   unshift (@INC, $txi_libdir);
   #
   my @txi_maint_dirs = qw(Text-Unidecode Unicode-EastAsianWidth libintl-perl);
   unshift (@INC, map { "$txi_libdir/maintain/lib/$_/lib" } @txi_maint_dirs );
 }

 use Texinfo::Parser;
 use Texinfo::Structuring;
 use Texinfo::Convert::TextContent;

 my $my_version = "0.1 (TP $Texinfo::Parser::VERSION)";

 (my $real_command_name = $0) =~ s/.*\///;
 $real_command_name =~ s/\.pl$//;

 # determine the path separators
 my $path_separator = $Config{'path_sep'};
 $path_separator = ':' if (!defined($path_separator));
 my $quoted_path_separator = quotemeta($path_separator);

 my $force = 0;
 my $use_sections = 0;
 my $count_words = 0;
 my $no_warn = 0;

 # placeholder for future i18n.
 sub __($) {
   return $_[0];
 }

 my $format = 'info';  # make our counts from the Info output
 # this is the format associated with the output format, which is replaced
 # when the output format changes.  It may also be removed if there is the
 # corresponding --no-ifformat.
 #my $default_expanded_format = [ $format ];

 # directories specified on the command line.
 my @include_dirs = ();
 my @prepend_dirs = ();

 my $parser_default_options = {
                               #'expanded_formats' => [],
                               'expanded_formats' => [ $format ],
                               'values' => {},
                               #'gettext' => \&__
                               };

 sub set_expansion($$) {
   my $region = shift;
   my $set = shift;
   $set = 1 if (!defined($set));
   if ($set) {
     push @{$parser_default_options->{'expanded_formats'}}, $region
       unless (grep {$_ eq $region} @{$parser_default_options->{'expanded_formats'}});
   } else {
     @{$parser_default_options->{'expanded_formats'}} =
       grep {$_ ne $region} @{$parser_default_options->{'expanded_formats'}};
 #    @{$default_expanded_format}
 #       = grep {$_ ne $region} @{$default_expanded_format};
   }
 }

 my $result_options = Getopt::Long::GetOptions (
  'help|h' => sub { print help(); exit 0; },
  'version|V' => sub {print "$real_command_name $my_version\n\n";
                      printf __("Copyright (C) %s Free Software Foundation, Inc.
 License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
 This is free software: you are free to change and redistribute it.
 There is NO WARRANTY, to the extent permitted by law.\n"), "2016";
       exit 0;},
   'force' => \$force,
   'ifhtml!' => sub { set_expansion('html', $_[1]); },
   'ifinfo!' => sub { set_expansion('info', $_[1]); },
   'ifxml!' => sub { set_expansion('xml', $_[1]); },
   'ifdocbook!' => sub { set_expansion('docbook', $_[1]); },
   'iftex!' => sub { set_expansion('tex', $_[1]); },
   'ifplaintext!' => sub { set_expansion('plaintext', $_[1]); },
   'use-sections!' => \$use_sections,
   'count-words!' => \$count_words,
   'no-warn' => \$no_warn,
   'D=s' => sub {$parser_default_options->{'values'}->{$_[1]} = 1;},
   'U=s' => sub {delete $parser_default_options->{'values'}->{$_[1]};},
   'I=s' => sub {
                 push @include_dirs, split(/$quoted_path_separator/, $_[1]); },
   'P=s' => sub { unshift @prepend_dirs, split(/$quoted_path_separator/, $_[1]); },
  'number-sections!' => sub { set_from_cmdline('NUMBER_SECTIONS', $_[1]); },
 );

 exit 1 if (!$result_options);

 my @input_files = @ARGV;
 # use STDIN if not a tty, like makeinfo does
 @input_files = ('-') if (!scalar(@input_files) and !-t STDIN);

 die sprintf(__("%s: missing file argument.\n"), $real_command_name)
    .sprintf(__("Try `%s --help' for more information.\n"), $real_command_name)
      unless (scalar(@input_files) >= 1);


 if (scalar(@input_files) > 1) {
   warn sprintf(__("%s: superfluous file arguments: @input_files\n"),
        $real_command_name);
 }

 my $input_file_name = shift @input_files;

 sub help() {
   my $help =
     sprintf(__("Usage: %s [OPTION]... TEXINFO-FILE...\n"), $real_command_name)
    ."\n".
     __("Write to standard output a list of Texinfo elements (nodes or sections)
 sorted by the number of lines (or words) they contain,
 after translation to Info format.\n")
 ."\n";

   $help .= __("General Options:
   --count-words    count words instead of lines.
   --force          keep going even if Texinfo file parsing fails.
   --help           display this help and exit.
   --no-warn        suppress warnings (but not errors).
   --use-sections   use sections as elements instead of nodes.
   --version        display version information and exit.\n")
 ."\n";
   $help .= __("Input file options:
   -D VAR                        define the variable VAR, as with \@set.
   -I DIR                        append DIR to the \@include search path.
   -P DIR                        prepend DIR to the \@include search path.
   -U VAR                        undefine the variable VAR, as with \@clear.\n")
 ."\n";
   $help .= __("Conditional processing in input:
   --ifdocbook       process \@ifdocbook and \@docbook.
   --ifhtml          process \@ifhtml and \@html.
   --ifinfo          process \@ifinfo.
   --ifplaintext     process \@ifplaintext.
   --iftex           process \@iftex and \@tex.
   --ifxml           process \@ifxml and \@xml.
   --no-ifdocbook    do not process \@ifdocbook and \@docbook text.
   --no-ifhtml       do not process \@ifhtml and \@html text.
   --no-ifinfo       do not process \@ifinfo text.
   --no-ifplaintext  do not process \@ifplaintext text.
   --no-iftex        do not process \@iftex and \@tex text.
   --no-ifxml        do not process \@ifxml and \@xml text.

   Also, for the --no-ifFORMAT options, do process \@ifnotFORMAT text.\n");
   return $help;

 }

 sub _exit($) {
   my $error_count = shift;
   exit (1) if ($error_count and !$force);
 }

 sub handle_errors($$) {
   my $self = shift;
   my $error_count = shift;
   my ($errors, $new_error_count) = $self->errors();
   $error_count += $new_error_count if ($new_error_count);
   foreach my $error_message (@$errors) {
     warn $error_message->{'error_line'} if ($error_message->{'type'} eq 'error'
                                            or !$no_warn);
   }

   _exit($error_count);
   return $error_count;
 }

 my $input_directory = '.';
 if ($input_file_name =~ /(.*\/)/) {
   $input_directory = $1;
 }

 my $parser_options = { %$parser_default_options };
 $parser_options->{'include_directories'} = [@include_dirs];
 my @prepended_include_directories = ('.');
 push @prepended_include_directories, $input_directory
     if ($input_directory ne '.');
 unshift @{$parser_options->{'include_directories'}},
    @prepended_include_directories;
 unshift @{$parser_options->{'include_directories'}}, @prepend_dirs;

 my $error_count = 0;
 my $parser = Texinfo::Parser::parser($parser_options);
 my $tree = $parser->parse_texi_file($input_file_name);

 my $registrar = $parser->registered_errors();

 if (!defined($tree)) {
   handle_errors($registrar, $error_count);
   exit (1);
 }

 my $converter_options = {};
 $converter_options->{'parser'} = $parser;
 my $converter = Texinfo::Convert::TextContent->converter($converter_options);

 my ($sorted_name_counts_array, $formatted_result)
   = $converter->sort_element_counts($tree, $use_sections,
                                     $count_words);

 print STDOUT $formatted_result;

 exit (0);
	#! /usr/bin/env perl
	# texi-elements-by-size -- dump list of elements based on words or line counts.
	# Also serve as an example of using the Texinfo::Parser module,
	# including the usual per-format options.
	#
	# Copyright 2012, 2013, 2014, 2015, 2016 Free Software Foundation, Inc.
	#
	# This program is free software; you can redistribute it and/or modify
	# it under the terms of the GNU General Public License as published by
	# the Free Software Foundation; either version 3 of the License,
	# or (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program. If not, see <http://www.gnu.org/licenses/>.
	#
	# Original author: Patrice Dumas <pertusus@free.fr>

	use strict;

	use Config; # to determine the path separator
	use Getopt::Long qw(GetOptions);
	Getopt::Long::Configure("gnu_getopt");

	BEGIN {
	# The purpose of these includes is to make it possible to run the
	# script from a Texinfo source checkout. If that's not relevant,
	# probably best to simply assume all the needed packages are in the
	# Perl include path.
	#
	(my $mydir = $0) =~ s,/[^/]*$,,; # dir we are in
	my $txi_libdir = "$mydir/../tp"; # find tp relative to $0
	unshift (@INC, $txi_libdir);
	#
	my @txi_maint_dirs = qw(Text-Unidecode Unicode-EastAsianWidth libintl-perl);
	unshift (@INC, map { "$txi_libdir/maintain/lib/$_/lib" } @txi_maint_dirs );
	}

	use Texinfo::Parser;
	use Texinfo::Structuring;
	use Texinfo::Convert::TextContent;

	my $my_version = "0.1 (TP $Texinfo::Parser::VERSION)";

	(my $real_command_name = $0) =~ s/.*\///;
	$real_command_name =~ s/\.pl$//;

	# determine the path separators
	my $path_separator = $Config{'path_sep'};
	$path_separator = ':' if (!defined($path_separator));
	my $quoted_path_separator = quotemeta($path_separator);

	my $force = 0;
	my $use_sections = 0;
	my $count_words = 0;
	my $no_warn = 0;

	# placeholder for future i18n.
	sub __($) {
	return $_[0];
	}

	my $format = 'info'; # make our counts from the Info output
	# this is the format associated with the output format, which is replaced
	# when the output format changes. It may also be removed if there is the
	# corresponding --no-ifformat.
	#my $default_expanded_format = [ $format ];

	# directories specified on the command line.
	my @include_dirs = ();
	my @prepend_dirs = ();

	my $parser_default_options = {
	#'expanded_formats' => [],
	'expanded_formats' => [ $format ],
	'values' => {},
	#'gettext' => \&__
	};

	sub set_expansion($$) {
	my $region = shift;
	my $set = shift;
	$set = 1 if (!defined($set));
	if ($set) {
	push @{$parser_default_options->{'expanded_formats'}}, $region
	unless (grep {$_ eq $region} @{$parser_default_options->{'expanded_formats'}});
	} else {
	@{$parser_default_options->{'expanded_formats'}} =
	grep {$_ ne $region} @{$parser_default_options->{'expanded_formats'}};
	# @{$default_expanded_format}
	# = grep {$_ ne $region} @{$default_expanded_format};
	}
	}

	my $result_options = Getopt::Long::GetOptions (
	'help\|h' => sub { print help(); exit 0; },
	'version\|V' => sub {print "$real_command_name $my_version\n\n";
	printf __("Copyright (C) %s Free Software Foundation, Inc.
	License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
	This is free software: you are free to change and redistribute it.
	There is NO WARRANTY, to the extent permitted by law.\n"), "2016";
	exit 0;},
	'force' => \$force,
	'ifhtml!' => sub { set_expansion('html', $_[1]); },
	'ifinfo!' => sub { set_expansion('info', $_[1]); },
	'ifxml!' => sub { set_expansion('xml', $_[1]); },
	'ifdocbook!' => sub { set_expansion('docbook', $_[1]); },
	'iftex!' => sub { set_expansion('tex', $_[1]); },
	'ifplaintext!' => sub { set_expansion('plaintext', $_[1]); },
	'use-sections!' => \$use_sections,
	'count-words!' => \$count_words,
	'no-warn' => \$no_warn,
	'D=s' => sub {$parser_default_options->{'values'}->{$_[1]} = 1;},
	'U=s' => sub {delete $parser_default_options->{'values'}->{$_[1]};},
	'I=s' => sub {
	push @include_dirs, split(/$quoted_path_separator/, $_[1]); },
	'P=s' => sub { unshift @prepend_dirs, split(/$quoted_path_separator/, $_[1]); },
	'number-sections!' => sub { set_from_cmdline('NUMBER_SECTIONS', $_[1]); },
	);

	exit 1 if (!$result_options);

	my @input_files = @ARGV;
	# use STDIN if not a tty, like makeinfo does
	@input_files = ('-') if (!scalar(@input_files) and !-t STDIN);

	die sprintf(__("%s: missing file argument.\n"), $real_command_name)
	.sprintf(__("Try `%s --help' for more information.\n"), $real_command_name)
	unless (scalar(@input_files) >= 1);


	if (scalar(@input_files) > 1) {
	warn sprintf(__("%s: superfluous file arguments: @input_files\n"),
	$real_command_name);
	}

	my $input_file_name = shift @input_files;

	sub help() {
	my $help =
	sprintf(__("Usage: %s [OPTION]... TEXINFO-FILE...\n"), $real_command_name)
	."\n".
	__("Write to standard output a list of Texinfo elements (nodes or sections)
	sorted by the number of lines (or words) they contain,
	after translation to Info format.\n")
	."\n";

	$help .= __("General Options:
	--count-words count words instead of lines.
	--force keep going even if Texinfo file parsing fails.
	--help display this help and exit.
	--no-warn suppress warnings (but not errors).
	--use-sections use sections as elements instead of nodes.
	--version display version information and exit.\n")
	."\n";
	$help .= __("Input file options:
	-D VAR define the variable VAR, as with \@set.
	-I DIR append DIR to the \@include search path.
	-P DIR prepend DIR to the \@include search path.
	-U VAR undefine the variable VAR, as with \@clear.\n")
	."\n";
	$help .= __("Conditional processing in input:
	--ifdocbook process \@ifdocbook and \@docbook.
	--ifhtml process \@ifhtml and \@html.
	--ifinfo process \@ifinfo.
	--ifplaintext process \@ifplaintext.
	--iftex process \@iftex and \@tex.
	--ifxml process \@ifxml and \@xml.
	--no-ifdocbook do not process \@ifdocbook and \@docbook text.
	--no-ifhtml do not process \@ifhtml and \@html text.
	--no-ifinfo do not process \@ifinfo text.
	--no-ifplaintext do not process \@ifplaintext text.
	--no-iftex do not process \@iftex and \@tex text.
	--no-ifxml do not process \@ifxml and \@xml text.

	Also, for the --no-ifFORMAT options, do process \@ifnotFORMAT text.\n");
	return $help;

	}

	sub _exit($) {
	my $error_count = shift;
	exit (1) if ($error_count and !$force);
	}

	sub handle_errors($$) {
	my $self = shift;
	my $error_count = shift;
	my ($errors, $new_error_count) = $self->errors();
	$error_count += $new_error_count if ($new_error_count);
	foreach my $error_message (@$errors) {
	warn $error_message->{'error_line'} if ($error_message->{'type'} eq 'error'
	or !$no_warn);
	}

	_exit($error_count);
	return $error_count;
	}

	my $input_directory = '.';
	if ($input_file_name =~ /(.*\/)/) {
	$input_directory = $1;
	}

	my $parser_options = { %$parser_default_options };
	$parser_options->{'include_directories'} = [@include_dirs];
	my @prepended_include_directories = ('.');
	push @prepended_include_directories, $input_directory
	if ($input_directory ne '.');
	unshift @{$parser_options->{'include_directories'}},
	@prepended_include_directories;
	unshift @{$parser_options->{'include_directories'}}, @prepend_dirs;

	my $error_count = 0;
	my $parser = Texinfo::Parser::parser($parser_options);
	my $tree = $parser->parse_texi_file($input_file_name);

	my $registrar = $parser->registered_errors();

	if (!defined($tree)) {
	handle_errors($registrar, $error_count);
	exit (1);
	}

	my $converter_options = {};
	$converter_options->{'parser'} = $parser;
	my $converter = Texinfo::Convert::TextContent->converter($converter_options);

	my ($sorted_name_counts_array, $formatted_result)
	= $converter->sort_element_counts($tree, $use_sections,
	$count_words);

	print STDOUT $formatted_result;

	exit (0);