gdb/contrib/spellcheck.sh - binutils-gdb - Git at Google

 #!/bin/bash

 # Copyright (C) 2024 Free Software Foundation, Inc.
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
 # (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.

 # Script to auto-correct common spelling mistakes.
 #
 # Example usage:
 # $ ./gdb/contrib/spellcheck.sh gdb*

 scriptdir=$(cd "$(dirname "$0")" || exit; pwd -P)
 this_script=$scriptdir/$(basename "$0")

 url=https://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines
 cache_dir=$scriptdir/../../.git
 cache_file=wikipedia-common-misspellings.txt
 dictionary=$cache_dir/$cache_file
 local_dictionary=$scriptdir/common-misspellings.txt
 cache_file2=spell-check.pat1

 bash_version_at_least ()
 {
     local major
     major="$1"
     local minor
     minor="$2"

     if [ "$bash_major" = "" ]; then
 	bash_major=$(echo "$BASH_VERSION" | awk -F '.' '{print $1}')
 	bash_minor=$(echo "$BASH_VERSION" | awk -F '.' '{print $2}')
     fi

     if [ "$bash_major" -lt "$major" ]; then
 	# Major version less then required, return false.
 	return 1
     fi

     if [ "$bash_major" -gt "$major" ]; then
 	# Major version more then required, return true.
 	return 0
     fi

     # Check minor version.
     [ "$bash_minor" -ge "$minor" ]
 }

 # Separators: space, slash, tab, colon, comma.
 declare -a grep_separators
 grep_separators=(
     " "
     "/"
     "	"
     ":"
     ","
     "\""
 )
 declare -a sed_separators
 sed_separators=(
     " "
     "/"
     "\t"
     ":"
     ","
     "\""
 )

 # Pre: start of line, left parenthesis.
 declare -a grep_pre
 grep_pre=(
     "^"
     "\("
 )
 declare -a sed_pre
 sed_pre=(
     "^"
     "("
 )

 # Post: dot, right parenthesis, end of line.
 declare -a grep_post
 grep_post=(
     "\."
     "\)"
     "$"
 )
 declare -a sed_post
 sed_post=(
     "\."
     ")"
     "$"
 )

 join ()
 {
     local or
     or="$1"
     shift

     local res
     res=""

     local first
     first=true

     for item in "$@"; do
 	if $first; then
 	    first=false
 	    res="$item"
 	else
 	    res="$res$or$item"
 	fi
     done

     echo "$res"
 }

 grep_or="|"
 sed_or="\|"

 grep_join ()
 {
     local res
     res=$(join $grep_or "$@")
     echo "($res)"
 }

 sed_join ()
 {
     local res
     res=$(join $sed_or "$@")
     echo "\($res\)"
 }

 usage ()
 {
     echo "usage: $(basename "$0") [--check] <file|dir>+"
     echo "       $(basename "$0") --print-dictionary"
 }

 make_absolute ()
 {
     local arg
     arg="$1"

     case "$arg" in
 	/*)
 	;;
 	*)
 	    arg=$(pwd -P)/"$arg"
 	    ;;
     esac

     echo "$arg"
 }

 parse_args ()
 {
     local files
     files=$(mktemp)
     trap 'rm -f "$files"' EXIT

     if [ $# -eq 1 ] && [ "$1" =  "--print-dictionary" ]; then
 	print_dictionary=true
 	return
     fi

     while true; do
 	case " $1 " in
 	    " --check ")
 		check=true
 		shift
 		;;
 	    *)
 		break
 		;;
 	esac
     done

     if [ $# -eq -0 ]; then
 	usage
 	exit 1
     fi

     local arg
     for arg in "$@"; do
 	if [ -f "$arg" ]; then
 	    arg=$(make_absolute "$arg")
 	    readlink -e "$arg" \
 		     >> "$files"
 	elif [ -d "$arg" ]; then
 	    arg=$(make_absolute "$arg")
 	    local f
 	    find "$arg" -type f -exec readlink -e {} \; \
 		 >> "$files"
 	else
 	    echo "Not a file or directory: $arg"
 	    exit 1
 	fi
     done

     mapfile -t unique_files \
 	    < <(sort -u "$files" \
 		    | grep -v ChangeLog)

     rm -f "$files"
     trap "" EXIT
 }

 get_dictionary ()
 {
     if [ -f "$dictionary" ]; then
 	return
     fi

     local webpage
     webpage=$(mktemp)
     trap 'rm -f "$webpage"' EXIT

     # Download web page containing table.
     wget $url -O "$webpage"

     # Extract table from web page.
     awk '/<pre>/,/<\/pre>/' "$webpage" \
 	| sed 's/<pre>//;s/<\/pre>//' \
 	| grep -E -v "^$" \
 	       > "$dictionary"

     rm -f "$webpage"
     trap "" EXIT
 }

 output_local_dictionary ()
 {
     # Filter out comments and empty lines.
     grep -E -v \
 	 "^#|^$" \
 	 "$local_dictionary"
 }

 output_dictionaries ()
 {
     (
 	output_local_dictionary
 	cat "$dictionary"
     ) | grep -E -v "[A-Z]"
 }

 parse_dictionary ()
 {
     # Parse dictionary.
     mapfile -t words \
 	    < <(awk -F '->' '{print $1}' <(output_dictionaries))
     mapfile -t replacements \
 	    < <(awk -F '->' '{print $2}' <(output_dictionaries))

     local words_done
     declare -A words_done
     local i word replacement
     i=0
     for word in "${words[@]}"; do
 	replacement=${replacements[i]}

 	# Skip words that are already handled.  This ensures that the local
 	# dictionary overrides the wiki dictionary.
 	if [ "${words_done[$word]}" == 1 ]; then
 	    words[i]=""
 	    replacements[i]=""
 	    i=$((i + 1))
 	    continue
 	fi
 	words_done[$word]=1

 	# Skip identity rules.
 	if [ "$word" = "$replacement" ]; then
 	    words[i]=""
 	    replacements[i]=""
 	fi

 	i=$((i + 1))
     done
 }

 print_dictionary ()
 {
     local i word replacement
     i=0
     for word in "${words[@]}"; do
 	replacement=${replacements[i]}
 	i=$((i + 1))

 	if [ "$word" == "" ]; then
 	    continue
 	fi

 	echo "$word -> $replacement"
     done
 }

 find_files_matching_words ()
 {
     local cache_id
     cache_id=$(cat "$local_dictionary" "$dictionary" "$this_script" \
 		 | md5sum  \
 		 | awk '{print $1}')

     local patfile
     patfile="$cache_dir/$cache_file2".$cache_id

     local pat
     if [ -f "$patfile" ]; then
 	pat=$(cat "$patfile")
     else
 	rm -f "$cache_dir/$cache_file2".*

 	declare -a re_words
 	mapfile -t re_words \
 		< <(for f in "${words[@]}"; do
 			if [ "$f" = "" ]; then
 			    continue
 			fi
 			echo "$f"
 		    done \
 			| sed "s/^\(.\)/[\u\1\1]/")

 	pat=$(grep_join "${re_words[@]}")

 	local before after
 	before=$(grep_join \
 		     "${grep_pre[@]}" \
 		     "${grep_separators[@]}")
 	after=$(grep_join \
 		    "${grep_separators[@]}" \
 		    "${grep_post[@]}")

 	pat="$before$pat$after"

 	echo "$pat" \
 	     > "$patfile"
     fi

     grep -E \
 	-l \
 	"$pat" \
 	"$@"
 }

 find_files_matching_word ()
 {
     local pat
     pat="$1"
     shift

     local before after
     before=$(grep_join \
 		 "${grep_pre[@]}" \
 		 "${grep_separators[@]}")
     after=$(grep_join \
 		"${grep_separators[@]}" \
 		"${grep_post[@]}")

     if bash_version_at_least 5 1; then
 	patc=${pat@u}
     else
 	# shellcheck disable=SC2001
 	patc=$(echo "$pat" | sed 's/^\(.\)/\u\1/')
     fi
     pat="($patc|$pat)"

     pat="$before$pat$after"

     grep -E \
 	-l \
 	"$pat" \
 	"$@"
 }

 replace_word_in_file ()
 {
     local word
     word="$1"

     local replacement
     replacement="$2"

     local file
     file="$3"

     local before after
     before=$(sed_join \
 		 "${sed_pre[@]}" \
 		 "${sed_separators[@]}")
     after=$(sed_join \
 		"${sed_separators[@]}" \
 		"${sed_post[@]}")

     if bash_version_at_least 5 1; then
 	wordc=${word@u}
 	replacementc=${replacement@u}
     else
 	# shellcheck disable=SC2001
 	wordc=$(echo "$word" | sed 's/^\(.\)/\u\1/')
 	# shellcheck disable=SC2001
 	replacementc=$(echo "$replacement" | sed 's/^\(.\)/\u\1/')
     fi

     local repl1
     local repl2
     repl1="s%$before$word$after%\1$replacement\2%g"
     repl2="s%$before$wordc$after%\1$replacementc\2%g"

     sed -i \
 	"$repl1;$repl2" \
 	"$file"
 }

 replace_word_in_files ()
 {
     local word
     word="$1"

     local replacement
     replacement="$2"

     shift 2

     local id
     id="$word -> $replacement"

     # Reduce set of files for sed to operate on.
     local files_matching_word
     declare -a files_matching_word
     mapfile -t files_matching_word \
 	    < <(find_files_matching_word "$word" "$@")

     if [ ${#files_matching_word[@]} -eq 0 ]; then
 	return
     fi

     if echo "$replacement"| grep -q ","; then
 	echo "TODO: $id"
 	return
     fi

     declare -A md5sums

     local changed f before after
     changed=false
     for f in "${files_matching_word[@]}"; do
 	if [ "${md5sums[$f]}" = "" ]; then
 	    md5sums[$f]=$(md5sum "$f")
 	fi

 	before="${md5sums[$f]}"

 	replace_word_in_file \
 	    "$word" \
 	    "$replacement" \
 	    "$f"

 	after=$(md5sum "$f")

 	if [ "$after" != "$before" ]; then
 	    md5sums[$f]="$after"
 	    changed=true
 	fi
     done

     if $changed; then
 	echo "$id"
     fi

     find_files_matching_word "$word" "${files_matching_word[@]}" \
 	| awk "{ printf \"TODO: $id: replacement failed: %s\n\", \$0}"
 }

 main ()
 {
     declare -a unique_files
     check=false
     print_dictionary=false
     parse_args "$@"

     get_dictionary

     declare -a words
     declare -a replacements
     parse_dictionary

     if $print_dictionary; then
 	print_dictionary
 	exit 0
     fi

     # Reduce set of files for sed to operate on.
     local files_matching_words
     declare -a files_matching_words
     mapfile -t files_matching_words \
 	    < <(find_files_matching_words "${unique_files[@]}")

     if [ ${#files_matching_words[@]} -eq 0 ]; then
 	return
     fi

     if $check; then
 	exit 1
     fi

     local i word replacement
     i=0
     for word in "${words[@]}"; do
 	replacement=${replacements[i]}
 	i=$((i + 1))

 	if [ "$word" = "" ]; then
 	    continue
 	fi

 	replace_word_in_files \
 	    "$word" \
 	    "$replacement" \
 	    "${files_matching_words[@]}"
     done
 }

 main "$@"
	#!/bin/bash

	# Copyright (C) 2024 Free Software Foundation, Inc.
	# This program is free software; you can redistribute it and/or modify
	# it under the terms of the GNU General Public License as published by
	# the Free Software Foundation; either version 3 of the License, or
	# (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program. If not, see <http://www.gnu.org/licenses/>.

	# Script to auto-correct common spelling mistakes.
	#
	# Example usage:
	# $ ./gdb/contrib/spellcheck.sh gdb*

	scriptdir=$(cd "$(dirname "$0")" \|\| exit; pwd -P)
	this_script=$scriptdir/$(basename "$0")

	url=https://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines
	cache_dir=$scriptdir/../../.git
	cache_file=wikipedia-common-misspellings.txt
	dictionary=$cache_dir/$cache_file
	local_dictionary=$scriptdir/common-misspellings.txt
	cache_file2=spell-check.pat1

	bash_version_at_least ()
	{
	local major
	major="$1"
	local minor
	minor="$2"

	if [ "$bash_major" = "" ]; then
	bash_major=$(echo "$BASH_VERSION" \| awk -F '.' '{print $1}')
	bash_minor=$(echo "$BASH_VERSION" \| awk -F '.' '{print $2}')
	fi

	if [ "$bash_major" -lt "$major" ]; then
	# Major version less then required, return false.
	return 1
	fi

	if [ "$bash_major" -gt "$major" ]; then
	# Major version more then required, return true.
	return 0
	fi

	# Check minor version.
	[ "$bash_minor" -ge "$minor" ]
	}

	# Separators: space, slash, tab, colon, comma.
	declare -a grep_separators
	grep_separators=(
	" "
	"/"
	" "
	":"
	","
	"\""
	)
	declare -a sed_separators
	sed_separators=(
	" "
	"/"
	"\t"
	":"
	","
	"\""
	)

	# Pre: start of line, left parenthesis.
	declare -a grep_pre
	grep_pre=(
	"^"
	"\("
	)
	declare -a sed_pre
	sed_pre=(
	"^"
	"("
	)

	# Post: dot, right parenthesis, end of line.
	declare -a grep_post
	grep_post=(
	"\."
	"\)"
	"$"
	)
	declare -a sed_post
	sed_post=(
	"\."
	")"
	"$"
	)

	join ()
	{
	local or
	or="$1"
	shift

	local res
	res=""

	local first
	first=true

	for item in "$@"; do
	if $first; then
	first=false
	res="$item"
	else
	res="$res$or$item"
	fi
	done

	echo "$res"
	}

	grep_or="\|"
	sed_or="\\|"

	grep_join ()
	{
	local res
	res=$(join $grep_or "$@")
	echo "($res)"
	}

	sed_join ()
	{
	local res
	res=$(join $sed_or "$@")
	echo "\($res\)"
	}

	usage ()
	{
	echo "usage: $(basename "$0") [--check] <file\|dir>+"
	echo " $(basename "$0") --print-dictionary"
	}

	make_absolute ()
	{
	local arg
	arg="$1"

	case "$arg" in
	/*)
	;;
	*)
	arg=$(pwd -P)/"$arg"
	;;
	esac

	echo "$arg"
	}

	parse_args ()
	{
	local files
	files=$(mktemp)
	trap 'rm -f "$files"' EXIT

	if [ $# -eq 1 ] && [ "$1" = "--print-dictionary" ]; then
	print_dictionary=true
	return
	fi

	while true; do
	case " $1 " in
	" --check ")
	check=true
	shift
	;;
	*)
	break
	;;
	esac
	done

	if [ $# -eq -0 ]; then
	usage
	exit 1
	fi

	local arg
	for arg in "$@"; do
	if [ -f "$arg" ]; then
	arg=$(make_absolute "$arg")
	readlink -e "$arg" \
	>> "$files"
	elif [ -d "$arg" ]; then
	arg=$(make_absolute "$arg")
	local f
	find "$arg" -type f -exec readlink -e {} \; \
	>> "$files"
	else
	echo "Not a file or directory: $arg"
	exit 1
	fi
	done

	mapfile -t unique_files \
	< <(sort -u "$files" \
	\| grep -v ChangeLog)

	rm -f "$files"
	trap "" EXIT
	}

	get_dictionary ()
	{
	if [ -f "$dictionary" ]; then
	return
	fi

	local webpage
	webpage=$(mktemp)
	trap 'rm -f "$webpage"' EXIT

	# Download web page containing table.
	wget $url -O "$webpage"

	# Extract table from web page.
	awk '/<pre>/,/<\/pre>/' "$webpage" \
	\| sed 's/<pre>//;s/<\/pre>//' \
	\| grep -E -v "^$" \
	> "$dictionary"

	rm -f "$webpage"
	trap "" EXIT
	}

	output_local_dictionary ()
	{
	# Filter out comments and empty lines.
	grep -E -v \
	"^#\|^$" \
	"$local_dictionary"
	}

	output_dictionaries ()
	{
	(
	output_local_dictionary
	cat "$dictionary"
	) \| grep -E -v "[A-Z]"
	}

	parse_dictionary ()
	{
	# Parse dictionary.
	mapfile -t words \
	< <(awk -F '->' '{print $1}' <(output_dictionaries))
	mapfile -t replacements \
	< <(awk -F '->' '{print $2}' <(output_dictionaries))

	local words_done
	declare -A words_done
	local i word replacement
	i=0
	for word in "${words[@]}"; do
	replacement=${replacements[i]}

	# Skip words that are already handled. This ensures that the local
	# dictionary overrides the wiki dictionary.
	if [ "${words_done[$word]}" == 1 ]; then
	words[i]=""
	replacements[i]=""
	i=$((i + 1))
	continue
	fi
	words_done[$word]=1

	# Skip identity rules.
	if [ "$word" = "$replacement" ]; then
	words[i]=""
	replacements[i]=""
	fi

	i=$((i + 1))
	done
	}

	print_dictionary ()
	{
	local i word replacement
	i=0
	for word in "${words[@]}"; do
	replacement=${replacements[i]}
	i=$((i + 1))

	if [ "$word" == "" ]; then
	continue
	fi

	echo "$word -> $replacement"
	done
	}

	find_files_matching_words ()
	{
	local cache_id
	cache_id=$(cat "$local_dictionary" "$dictionary" "$this_script" \
	\| md5sum \
	\| awk '{print $1}')

	local patfile
	patfile="$cache_dir/$cache_file2".$cache_id

	local pat
	if [ -f "$patfile" ]; then
	pat=$(cat "$patfile")
	else
	rm -f "$cache_dir/$cache_file2".*

	declare -a re_words
	mapfile -t re_words \
	< <(for f in "${words[@]}"; do
	if [ "$f" = "" ]; then
	continue
	fi
	echo "$f"
	done \
	\| sed "s/^\(.\)/[\u\1\1]/")

	pat=$(grep_join "${re_words[@]}")

	local before after
	before=$(grep_join \
	"${grep_pre[@]}" \
	"${grep_separators[@]}")
	after=$(grep_join \
	"${grep_separators[@]}" \
	"${grep_post[@]}")

	pat="$before$pat$after"

	echo "$pat" \
	> "$patfile"
	fi

	grep -E \
	-l \
	"$pat" \
	"$@"
	}

	find_files_matching_word ()
	{
	local pat
	pat="$1"
	shift

	local before after
	before=$(grep_join \
	"${grep_pre[@]}" \
	"${grep_separators[@]}")
	after=$(grep_join \
	"${grep_separators[@]}" \
	"${grep_post[@]}")

	if bash_version_at_least 5 1; then
	patc=${pat@u}
	else
	# shellcheck disable=SC2001
	patc=$(echo "$pat" \| sed 's/^\(.\)/\u\1/')
	fi
	pat="($patc\|$pat)"

	pat="$before$pat$after"

	grep -E \
	-l \
	"$pat" \
	"$@"
	}

	replace_word_in_file ()
	{
	local word
	word="$1"

	local replacement
	replacement="$2"

	local file
	file="$3"

	local before after
	before=$(sed_join \
	"${sed_pre[@]}" \
	"${sed_separators[@]}")
	after=$(sed_join \
	"${sed_separators[@]}" \
	"${sed_post[@]}")

	if bash_version_at_least 5 1; then
	wordc=${word@u}
	replacementc=${replacement@u}
	else
	# shellcheck disable=SC2001
	wordc=$(echo "$word" \| sed 's/^\(.\)/\u\1/')
	# shellcheck disable=SC2001
	replacementc=$(echo "$replacement" \| sed 's/^\(.\)/\u\1/')
	fi

	local repl1
	local repl2
	repl1="s%$before$word$after%\1$replacement\2%g"
	repl2="s%$before$wordc$after%\1$replacementc\2%g"

	sed -i \
	"$repl1;$repl2" \
	"$file"
	}

	replace_word_in_files ()
	{
	local word
	word="$1"

	local replacement
	replacement="$2"

	shift 2

	local id
	id="$word -> $replacement"

	# Reduce set of files for sed to operate on.
	local files_matching_word
	declare -a files_matching_word
	mapfile -t files_matching_word \
	< <(find_files_matching_word "$word" "$@")

	if [ ${#files_matching_word[@]} -eq 0 ]; then
	return
	fi

	if echo "$replacement"\| grep -q ","; then
	echo "TODO: $id"
	return
	fi

	declare -A md5sums

	local changed f before after
	changed=false
	for f in "${files_matching_word[@]}"; do
	if [ "${md5sums[$f]}" = "" ]; then
	md5sums[$f]=$(md5sum "$f")
	fi

	before="${md5sums[$f]}"

	replace_word_in_file \
	"$word" \
	"$replacement" \
	"$f"

	after=$(md5sum "$f")

	if [ "$after" != "$before" ]; then
	md5sums[$f]="$after"
	changed=true
	fi
	done

	if $changed; then
	echo "$id"
	fi

	find_files_matching_word "$word" "${files_matching_word[@]}" \
	\| awk "{ printf \"TODO: $id: replacement failed: %s\n\", \$0}"
	}

	main ()
	{
	declare -a unique_files
	check=false
	print_dictionary=false
	parse_args "$@"

	get_dictionary

	declare -a words
	declare -a replacements
	parse_dictionary

	if $print_dictionary; then
	print_dictionary
	exit 0
	fi

	# Reduce set of files for sed to operate on.
	local files_matching_words
	declare -a files_matching_words
	mapfile -t files_matching_words \
	< <(find_files_matching_words "${unique_files[@]}")

	if [ ${#files_matching_words[@]} -eq 0 ]; then
	return
	fi

	if $check; then
	exit 1
	fi

	local i word replacement
	i=0
	for word in "${words[@]}"; do
	replacement=${replacements[i]}
	i=$((i + 1))

	if [ "$word" = "" ]; then
	continue
	fi

	replace_word_in_files \
	"$word" \
	"$replacement" \
	"${files_matching_words[@]}"
	done
	}

	main "$@"