blob: 4f45bc263410071078909925129418d407509753 [file]
#!/usr/bin/env python3
# Tool for canonical RISC-V architecture string.
# Copyright (C) 2011-2026 Free Software Foundation, Inc.
# Contributed by Andrew Waterman (andrew@sifive.com).
#
# This file is part of GCC.
#
# GCC is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
#
# GCC is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GCC; see the file COPYING3. If not see
# <http://www.gnu.org/licenses/>.
from __future__ import print_function
import sys
import argparse
import collections
import itertools
import re
import os
from functools import reduce
SUPPORTED_ISA_SPEC = ["2.2", "20190608", "20191213"]
CANONICAL_ORDER = "imafdqlcbkjtpvnh"
LONG_EXT_PREFIXES = ['z', 's', 'h', 'x']
def parse_define_riscv_ext(content):
"""Parse DEFINE_RISCV_EXT macros using position-based parsing."""
extensions = []
# Find all DEFINE_RISCV_EXT blocks
pattern = r'DEFINE_RISCV_EXT\s*\('
matches = []
pos = 0
while True:
match = re.search(pattern, content[pos:])
if not match:
break
start_pos = pos + match.start()
paren_count = 0
current_pos = pos + match.end() - 1 # Start at the opening parenthesis
# Find the matching closing parenthesis
while current_pos < len(content):
if content[current_pos] == '(':
paren_count += 1
elif content[current_pos] == ')':
paren_count -= 1
if paren_count == 0:
break
current_pos += 1
if paren_count == 0:
# Extract the content inside parentheses
macro_content = content[pos + match.end():current_pos]
ext_data = parse_macro_arguments(macro_content)
if ext_data:
extensions.append(ext_data)
pos = current_pos + 1
return extensions
def parse_macro_arguments(macro_content):
"""Parse the arguments of a DEFINE_RISCV_EXT macro."""
# Remove comments /* ... */
cleaned_content = re.sub(r'/\*[^*]*\*/', '', macro_content)
# Split arguments by comma, but respect nested structures
args = []
current_arg = ""
paren_count = 0
brace_count = 0
in_string = False
escape_next = False
for char in cleaned_content:
if escape_next:
current_arg += char
escape_next = False
continue
if char == '\\':
escape_next = True
current_arg += char
continue
if char == '"' and not escape_next:
in_string = not in_string
current_arg += char
continue
if in_string:
current_arg += char
continue
if char == '(':
paren_count += 1
elif char == ')':
paren_count -= 1
elif char == '{':
brace_count += 1
elif char == '}':
brace_count -= 1
elif char == ',' and paren_count == 0 and brace_count == 0:
args.append(current_arg.strip())
current_arg = ""
continue
current_arg += char
# Add the last argument
if current_arg.strip():
args.append(current_arg.strip())
# We need at least 6 arguments to get DEP_EXTS (position 5)
if len(args) < 6:
return None
ext_name = args[0].strip()
dep_exts_arg = args[5].strip() # DEP_EXTS is at position 5
# Parse dependency extensions from the DEP_EXTS argument
deps = parse_dep_exts(dep_exts_arg)
return {
'name': ext_name,
'dep_exts': deps
}
def parse_dep_exts(dep_exts_str):
"""Parse the DEP_EXTS argument to extract dependency list with conditions."""
# Remove outer parentheses if present
dep_exts_str = dep_exts_str.strip()
if dep_exts_str.startswith('(') and dep_exts_str.endswith(')'):
dep_exts_str = dep_exts_str[1:-1].strip()
# Remove outer braces if present
if dep_exts_str.startswith('{') and dep_exts_str.endswith('}'):
dep_exts_str = dep_exts_str[1:-1].strip()
if not dep_exts_str:
return []
deps = []
# First, find and process conditional dependencies
conditional_pattern = r'\{\s*"([^"]+)"\s*,\s*(\[.*?\]\s*\([^)]*\)\s*->\s*bool.*?)\}'
conditional_matches = []
for match in re.finditer(conditional_pattern, dep_exts_str, re.DOTALL):
ext_name = match.group(1)
condition_code = match.group(2)
deps.append({'ext': ext_name, 'type': 'conditional', 'condition': condition_code})
# The conditional_pattern RE matches only the first code block enclosed
# in braces.
#
# Extend the match to the condition block's closing brace, encompassing
# all code blocks, by simply trying to match the numbers of opening
# and closing braces. While crude, this avoids writing a complicated
# parse here.
closing_braces_left = condition_code.count('{') - condition_code.count('}')
condition_end = match.end()
while closing_braces_left > 0:
condition_end = dep_exts_str.find('}', condition_end)
closing_braces_left -= 1
conditional_matches.append((match.start(), condition_end))
# Remove conditional dependency blocks from the string
remaining_str = dep_exts_str
for start, end in reversed(conditional_matches): # Reverse order to maintain indices
remaining_str = remaining_str[:start] + remaining_str[end:]
# Now handle simple quoted strings in the remaining text
for match in re.finditer(r'"([^"]+)"', remaining_str):
deps.append({'ext': match.group(1), 'type': 'simple'})
# Remove duplicates while preserving order
seen = set()
unique_deps = []
for dep in deps:
key = (dep['ext'], dep['type'])
if key not in seen:
seen.add(key)
unique_deps.append(dep)
return unique_deps
def evaluate_conditional_dependency(ext, dep, xlen, current_exts):
"""Evaluate whether a conditional dependency should be included."""
ext_name = dep['ext']
condition = dep['condition']
# Parse the condition based on known patterns
if ext_name == 'zcf' and ext in ['zca', 'c', 'zce']:
# zcf depends on RV32 and F extension
return xlen == 32 and 'f' in current_exts
elif ext_name == 'zcd' and ext in ['zca', 'c']:
# zcd depends on D extension
return 'd' in current_exts
elif ext_name == 'c' and ext in ['zca']:
# Special case for zca -> c conditional dependency
if xlen == 32:
if 'd' in current_exts:
return 'zcf' in current_exts and 'zcd' in current_exts
elif 'f' in current_exts:
return 'zcf' in current_exts
else:
return True
elif xlen == 64:
if 'd' in current_exts:
return 'zcd' in current_exts
else:
return True
return False
else:
# Report error for unhandled conditional dependencies
import sys
print(f"ERROR: Unhandled conditional dependency: '{ext_name}' with condition:", file=sys.stderr)
print(f" Condition code: {condition[:100]}...", file=sys.stderr)
print(f" Current context: xlen={xlen}, exts={sorted(current_exts)}", file=sys.stderr)
# For now, return False to be safe
return False
def resolve_dependencies(arch_parts, xlen):
"""Resolve all dependencies including conditional ones."""
current_exts = set(arch_parts)
implied_deps = set()
# Keep resolving until no new dependencies are found
changed = True
while changed:
changed = False
new_deps = set()
for ext in current_exts | implied_deps:
if ext in IMPLIED_EXT:
for dep in IMPLIED_EXT[ext]:
if dep['type'] == 'simple':
if dep['ext'] not in current_exts and dep['ext'] not in implied_deps:
new_deps.add(dep['ext'])
changed = True
elif dep['type'] == 'conditional':
should_include = evaluate_conditional_dependency(ext, dep, xlen, current_exts | implied_deps)
if should_include:
if dep['ext'] not in current_exts and dep['ext'] not in implied_deps:
new_deps.add(dep['ext'])
changed = True
implied_deps.update(new_deps)
return implied_deps
def parse_def_file(file_path, script_dir, processed_files=None, collect_all=False):
"""Parse a single .def file and recursively process #include directives."""
if processed_files is None:
processed_files = set()
# Avoid infinite recursion
if file_path in processed_files:
return ({}, set()) if collect_all else {}
processed_files.add(file_path)
implied_ext = {}
all_extensions = set() if collect_all else None
if not os.path.exists(file_path):
return (implied_ext, all_extensions) if collect_all else implied_ext
with open(file_path, 'r') as f:
content = f.read()
# Process #include directives first
include_pattern = r'#include\s+"([^"]+)"'
includes = re.findall(include_pattern, content)
for include_file in includes:
include_path = os.path.join(script_dir, include_file)
if collect_all:
included_ext, included_all = parse_def_file(include_path, script_dir, processed_files, collect_all)
implied_ext.update(included_ext)
all_extensions.update(included_all)
else:
included_ext = parse_def_file(include_path, script_dir, processed_files, collect_all)
implied_ext.update(included_ext)
# Parse DEFINE_RISCV_EXT blocks using position-based parsing
parsed_exts = parse_define_riscv_ext(content)
for ext_data in parsed_exts:
ext_name = ext_data['name']
deps = ext_data['dep_exts']
if collect_all:
all_extensions.add(ext_name)
if deps:
implied_ext[ext_name] = deps
return (implied_ext, all_extensions) if collect_all else implied_ext
def parse_def_files():
"""Parse RISC-V extension definition files starting from riscv-ext.def."""
# Get directory containing this script
try:
script_dir = os.path.dirname(os.path.abspath(__file__))
except NameError:
# When __file__ is not defined (e.g., interactive mode)
script_dir = os.getcwd()
# Start with the main definition file
main_def_file = os.path.join(script_dir, 'riscv-ext.def')
return parse_def_file(main_def_file, script_dir)
def get_all_extensions():
"""Get all supported extensions and their implied extensions."""
# Get directory containing this script
try:
script_dir = os.path.dirname(os.path.abspath(__file__))
except NameError:
# When __file__ is not defined (e.g., interactive mode)
script_dir = os.getcwd()
# Start with the main definition file
main_def_file = os.path.join(script_dir, 'riscv-ext.def')
return parse_def_file(main_def_file, script_dir, collect_all=True)
#
# IMPLIED_EXT(ext) -> implied extension list.
# This is loaded dynamically from .def files
#
IMPLIED_EXT = parse_def_files()
def load_profiles():
profiles = set()
def_path = os.path.join(os.path.dirname(__file__), "riscv-profiles.def")
with open(def_path) as f:
for line in f:
line = line.strip()
if line.startswith("RISCV_PROFILE"):
# Format: RISCV_PROFILE("rva20u64", "rv64imafd...")
parts = line.split('"')
if len(parts) >= 2:
profiles.add(parts[1]) # Compare PROFILE_NAME
return profiles
SUPPORTED_PROFILES = load_profiles()
def is_profile_arch(arch):
return arch in SUPPORTED_PROFILES
def arch_canonicalize(arch, isa_spec):
# TODO: Support extension version.
is_isa_spec_2p2 = isa_spec == '2.2'
new_arch = ""
extra_long_ext = []
std_exts = []
if arch[:5] in ['rv32e', 'rv32i', 'rv32g', 'rv64e', 'rv64i', 'rv64g']:
new_arch = arch[:5].replace("g", "i")
if arch[:5] in ['rv32g', 'rv64g']:
std_exts = ['m', 'a', 'f', 'd']
if not is_isa_spec_2p2:
extra_long_ext = ['zicsr', 'zifencei']
else:
raise Exception("Unexpected arch: `%s`" % arch[:5])
# Find any Z, S, H or X
long_ext_prefixes_idx = map(lambda x: arch.find(x), LONG_EXT_PREFIXES)
# Filter out any non-existent index.
long_ext_prefixes_idx = list(filter(lambda x: x != -1, long_ext_prefixes_idx))
if long_ext_prefixes_idx:
first_long_ext_idx = min(long_ext_prefixes_idx)
long_exts = arch[first_long_ext_idx:].split("_")
std_exts += list(arch[5:first_long_ext_idx])
else:
long_exts = []
std_exts += list(arch[5:])
long_exts += extra_long_ext
#
# Handle implied extensions using new conditional logic.
#
# Extract xlen from architecture string
# TODO: We should support profile here.
if arch.startswith('rv32'):
xlen = 32
elif arch.startswith('rv64'):
xlen = 64
else:
raise Exception("Unsupported prefix `%s`" % arch)
# Get all current extensions
current_exts = std_exts + long_exts
# Resolve dependencies
implied_deps = resolve_dependencies(current_exts, xlen)
# Filter out zicsr for ISA spec 2.2
if is_isa_spec_2p2:
implied_deps.discard('zicsr')
# Add implied dependencies to long_exts
for dep in implied_deps:
if dep not in current_exts:
long_exts.append(dep)
# Single letter extension might appear in the long_exts list,
# because we just append extensions list to the arch string.
std_exts += list(filter(lambda x:len(x) == 1, long_exts))
def longext_sort (exts):
if not exts.startswith("zxm") and exts.startswith("z"):
# If "Z" extensions are named, they should be ordered first by CANONICAL.
if exts[1] not in CANONICAL_ORDER:
raise Exception("Unsupported extension `%s`" % exts)
canonical_sort = CANONICAL_ORDER.index(exts[1])
else:
canonical_sort = -1
return (exts.startswith("x"), exts.startswith("zxm"),
LONG_EXT_PREFIXES.index(exts[0]), canonical_sort, exts[1:])
# Removing duplicates.
long_exts = list(set(long_exts))
# Multi-letter extension must be in lexicographic order.
long_exts = list(sorted(filter(lambda x:len(x) != 1, long_exts),
key=longext_sort))
# Put extensions in canonical order.
for ext in CANONICAL_ORDER:
if ext in std_exts:
new_arch += ext
# Check every extension is processed.
for ext in std_exts:
if ext == '_':
continue
if ext not in CANONICAL_ORDER:
raise Exception("Unsupported extension `%s`" % ext)
# Concat rest of the multi-char extensions.
if long_exts:
new_arch += "_" + "_".join(long_exts)
return new_arch
def dump_all_extensions():
"""Dump all extensions and their implied extensions."""
implied_ext, all_extensions = get_all_extensions()
print("All supported RISC-V extensions:")
print("=" * 60)
if not all_extensions:
print("No extensions found.")
return
# Sort all extensions for consistent output
sorted_all = sorted(all_extensions)
# Print all extensions with their dependencies (if any)
for ext_name in sorted_all:
if ext_name in implied_ext:
deps = implied_ext[ext_name]
dep_strs = []
for dep in deps:
if dep['type'] == 'simple':
dep_strs.append(dep['ext'])
else:
dep_strs.append(f"{dep['ext']}*") # Mark conditional deps with *
print(f"{ext_name:15} -> {', '.join(dep_strs)}")
else:
print(f"{ext_name:15} -> (no dependencies)")
print(f"\nTotal extensions: {len(all_extensions)}")
print(f"Extensions with dependencies: {len(implied_ext)}")
print(f"Extensions without dependencies: {len(all_extensions) - len(implied_ext)}")
def run_unit_tests():
"""Run unit tests using pytest dynamically imported."""
try:
import pytest
except ImportError:
print("Error: pytest is required for running unit tests.")
print("Please install pytest: pip install pytest")
return 1
# Define test functions
def test_basic_arch_parsing():
"""Test basic architecture string parsing."""
result = arch_canonicalize("rv64i", "20191213")
assert result == "rv64i"
def test_simple_extensions():
"""Test simple extension handling."""
result = arch_canonicalize("rv64im", "20191213")
assert "zmmul" in result
def test_implied_extensions():
"""Test implied extension resolution."""
result = arch_canonicalize("rv64imaf", "20191213")
assert "zicsr" in result
def test_conditional_dependencies():
"""Test conditional dependency evaluation."""
# Test RV32 with F extension should include zcf when c is present
result = arch_canonicalize("rv32ifc", "20191213")
parts = result.split("_")
if "c" in parts:
assert "zca" in parts
if "f" in parts:
assert "zcf" in parts
def test_parse_dep_exts():
"""Test dependency parsing function."""
# Test simple dependency
deps = parse_dep_exts('{"ext1", "ext2"}')
assert len(deps) == 2
assert deps[0]['ext'] == 'ext1'
assert deps[0]['type'] == 'simple'
def test_evaluate_conditional_dependency():
"""Test conditional dependency evaluation."""
# Test zcf condition for RV32 with F
dep = {'ext': 'zcf', 'type': 'conditional', 'condition': 'test'}
result = evaluate_conditional_dependency('zce', dep, 32, {'f'})
assert result == True
# Test zcf condition for RV64 with F (should be False)
result = evaluate_conditional_dependency('zce', dep, 64, {'f'})
assert result == False
def test_parse_define_riscv_ext():
"""Test DEFINE_RISCV_EXT parsing."""
content = '''
DEFINE_RISCV_EXT(
/* NAME */ test,
/* UPPERCASE_NAME */ TEST,
/* FULL_NAME */ "Test extension",
/* DESC */ "",
/* URL */ ,
/* DEP_EXTS */ ({"dep1", "dep2"}),
/* SUPPORTED_VERSIONS */ ({{1, 0}}),
/* FLAG_GROUP */ test,
/* BITMASK_GROUP_ID */ 0,
/* BITMASK_BIT_POSITION*/ 0,
/* EXTRA_EXTENSION_FLAGS */ 0)
'''
extensions = parse_define_riscv_ext(content)
assert len(extensions) == 1
assert extensions[0]['name'] == 'test'
assert len(extensions[0]['dep_exts']) == 2
def test_parse_long_condition_block():
"""Test condition block containing several code blocks."""
result = arch_canonicalize("rv32ec", "20191213")
assert "rv32ec_zca" in result
# Collect test functions
test_functions = [
test_basic_arch_parsing,
test_simple_extensions,
test_implied_extensions,
test_conditional_dependencies,
test_parse_dep_exts,
test_evaluate_conditional_dependency,
test_parse_define_riscv_ext,
test_parse_long_condition_block
]
# Run tests manually first, then optionally with pytest
print("Running unit tests...")
passed = 0
failed = 0
for i, test_func in enumerate(test_functions):
try:
print(f" Running {test_func.__name__}...", end=" ")
test_func()
print("PASSED")
passed += 1
except Exception as e:
print(f"FAILED: {e}")
failed += 1
print(f"\nTest Summary: {passed} passed, {failed} failed")
if failed == 0:
print("\nAll tests passed!")
return 0
else:
print(f"\n{failed} test(s) failed!")
return 1
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-misa-spec', type=str,
default='20191213',
choices=SUPPORTED_ISA_SPEC)
parser.add_argument('--dump-all', action='store_true',
help='Dump all extensions and their implied extensions')
parser.add_argument('--selftest', action='store_true',
help='Run unit tests using pytest')
parser.add_argument('arch_strs', nargs='*',
help='Architecture strings to canonicalize')
args = parser.parse_args()
if args.dump_all:
dump_all_extensions()
elif args.selftest:
sys.exit(run_unit_tests())
elif args.arch_strs:
for arch in args.arch_strs:
if is_profile_arch(arch):
print(arch)
else:
print(arch_canonicalize(arch, args.misa_spec))
else:
parser.print_help()
sys.exit(1)