blob: 7752d75522669039bec156168937d9ce487bad1d [file] [log] [blame]
# Copyright 2025 Free Software Foundation, Inc.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# Friendlier regexp facility.
# Map from quotemeta names to regular expressions. Internal to this
# module.
array set _quotemeta {}
# This is a friendlier regexp facility. It takes an input "quotemeta"
# string and returns a regular expression. The intent is to use
# quotemeta expressions wherever a complicated regexp might otherwise
# be needed.
#
# Any ordinary text in a quotemeta expression simply matches itself.
# Such text is properly regexp-quoted in the return value. So, the
# quotemeta expression "xyz." matches exactly the text "xyz." -- the
# "." here is just a "." and not a special regexp character, and the
# resulting regular expression will reflect this.
#
# Special matches are introduced by the "@" character. There are
# various forms of this:
#
# 1. "@NAME", where NAME was previously registered by
# define_quotemeta. This simply expands to whatever regexp was
# provided at registration time. For example, the predefined @HEX
# matches a hex constant starting with 0x.
#
# 2. "@...". This expands to ".*" and is a just a handy catch-all.
#
# 3. "@@". This matches a single "@".
#
# 4. "@/xyz/". This matches the regular expression "xyz". Note that
# in this case, "xyz" may not contain a "/".
#
# 5. "@{...}", "@{{...}}", etc. This is "raw string" style, where any
# number of leading braces can be used, and then the substitution
# must end with the corresponding number of closing braces. The
# contents are then recognized as in cases 1..4. For example,
# @{NAME} and @NAME are the same. The {} notation is handy when
# you want to match a "/" in a regexp, and {{..}} is handy if you
# want to match a "}" in a regexp. @{...} is also useful when you
# want to make the boundary between a match and non-whitespace text
# more clear, like "@HEXname" versus "@{HEX}name".
#
# Finally, a space in a quotemeta string always matches any amount of
# whitespace (at least one). So, "@HEX hi" will match "0x23 hi" but
# also "0x23 hi". This is handy when dissecting gdb table output.
proc quotemeta {str} {
set result ""
# Perform the quotemeta substitutions. This is done in a somewhat
# odd way so that "@" can be supported inside a regular
# expression. I.e., "@/...@.../" is valid. This lets users avoid
# excess quoting.
while true {
regexp -- "^(\[^@\]*)(@(.*))?$" $str ignore prefix full_at at_text
append result [string_to_regexp $prefix]
if {$full_at == ""} {
# No more substitutions, we're done.
break
}
if {[regexp -- "^\{+" $at_text braces]} {
set closer [string repeat "\}" [string length $braces]]
# We want a non-greedy match here in case the input looks
# like @{X}...stuff...@{Y}.
if {![regexp -- "^${braces}(.*?)${closer}(.*)$" $at_text ignore \
at_text remainder]} {
error "invalid quotemeta with @$braces..."
}
} else {
# Match the usual forms.
if {![regexp -- "^(\\.\\.\\.|\[A-Z\]\[A-Z_\]*|@|/\[^/\]*/)(.*)$" \
$at_text ignore at_text remainder]} {
error "invalid quotemeta expression"
}
}
# Now AT_TEXT is the quotemeta substitution.
if {$at_text == "..."} {
append result ".*"
} elseif {$at_text == "@"} {
append result "@"
} elseif {[regexp -- "^/(.*)/$" $at_text ignore rx]} {
# Note that here we don't use [^/] in the regexp, because
# we want to support @{/blah/blah/}, where the regexp has
# an embedded "/".
append result $rx
} else {
append result $::_quotemeta($at_text)
}
set str $remainder
}
# Now replace any whitespace with a regular expression that will
# match any amount of whitespace. This is done after constructing
# the regular expression so we don't regexp-quote the \s+.
return [regsub -all -- "\\s+" $result "\\\\s+"]
}
# Define a new quotemeta substitution.
#
# NAME is the name that is used in the quotemeta string. While the
# implementation doesn't really care, for now we force this to be an
# upper-case identifier, that is, starts with a capital letter and
# only contains capital letters and underscores.
#
# RX is the regular expression that is used to implement the matching
# for this name.
proc define_quotemeta {name rx} {
if {![regexp -- "^\[A-Z\]\[A-Z_\]*$" $name]} {
error "bad quotemeta name"
}
set ::_quotemeta($name) $rx
}
# Some pre-defined quotemeta operators, available everywhere.
# Match a hex number.
define_quotemeta HEX $hex
# Match a decimal number.
define_quotemeta DECIMAL $decimal