| #!/usr/bin/env python3 |
| |
| # Copyright (C) 2023-2025 Free Software Foundation, Inc. |
| # |
| # Script to regenerate attr-urls.def from generated HTML. |
| # |
| # This file is part of GCC. |
| # |
| # GCC is free software; you can redistribute it and/or modify it under |
| # the terms of the GNU General Public License as published by the Free |
| # Software Foundation; either version 3, or (at your option) any later |
| # version. |
| # |
| # GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
| # WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| # for more details. |
| # |
| # You should have received a copy of the GNU General Public License |
| # along with GCC; see the file COPYING3. If not see |
| # <http://www.gnu.org/licenses/>. */ |
| |
| DESCRIPTION = """ |
| Parses the generated HTML (from "make html") to locate anchors |
| for attributes, and generates a gcc/attr-urls.def file in the source tree, |
| giving URLs for each attribute, where it can. |
| |
| Usage (from build/gcc subdirectory): |
| ../../src/gcc/regenerate-attr-urls.py HTML/gcc-15.0.0/ ../../src |
| |
| To run unit tests: |
| ../../src/gcc/regenerate-attr-urls.py HTML/gcc-15.0.0/ ../../src --unit-test |
| """ |
| |
| import argparse |
| import json |
| import os |
| from pathlib import Path |
| from pprint import pprint |
| import sys |
| import re |
| import unittest |
| |
| class Index: |
| def __init__(self): |
| self.entries = [] |
| self.entries_by_kind = {} |
| |
| def add_entry(self, url_suffix, name, kind, extra_text, verbose=False): |
| #if len(self.entries) > 5: |
| # return |
| self.entries.append( (url_suffix, name, kind, extra_text) ) |
| |
| if kind in self.entries_by_kind: |
| by_kind = self.entries_by_kind[kind] |
| else: |
| by_kind = [] |
| self.entries_by_kind[kind] = by_kind |
| by_kind.append( (name, url_suffix, extra_text) ) |
| |
| def parse_attribute_index(self, input_filename, verbose=False): |
| with open(input_filename) as f: |
| for line in f: |
| self.parse_html_line_attribute_index(line, verbose) |
| |
| def parse_html_line_attribute_index(self, line, verbose=False): |
| if verbose: |
| print(repr(line)) |
| |
| # Update for this in the GCC website's bin/preprocess process_html_file: |
| # | sed -e 's/_002d/-/g' -e 's/_002a/*/g' \ |
| line = line.replace('_002d', '-') |
| line = line.replace('_002a', '*') |
| |
| # e.g. <a href="Common-Function-Attributes.html#index-access-function-attribute"><code>access</code> function attribute</a> |
| # e.g. <a href="MIPS-Function-Attributes.html#index-nocompression-function-attribute_002c-MIPS"><code class="code">nocompression</code> function attribute, MIPS</a> |
| m = re.search(r'<a href="([\S]+)"><code[^>]*>([\S]+)</code> (\S+) attribute([^<]*)</a>', line) |
| if not m: |
| return |
| if verbose: |
| print(m.groups()) |
| |
| url_suffix, name, kind, extra_text = m.groups() |
| |
| if extra_text.startswith(', '): |
| extra_text = extra_text[2:] |
| |
| # Reject anchors where the name contains a paren |
| # e.g. 'target("3dnowa")': |
| if '(' in name: |
| return |
| |
| self.add_entry(url_suffix, name, kind, extra_text) |
| |
| def generate_file(self, dstpath): |
| with open(dstpath, 'w') as outf: |
| self.write_file(outf) |
| |
| def write_file(self, outf): |
| outf.write("/* Autogenerated by regenerate-attr-urls.py. */\n\n") |
| |
| for kind in sorted(self.entries_by_kind.keys()): |
| by_kind = self.entries_by_kind[kind] |
| outf.write("const attr_url_entry %s_attrs[] = {\n" % kind) |
| for name, url_suffix, extra_text in sorted(self.entries_by_kind[kind]): |
| outf.write(' { "%s", "gcc/%s", "%s", %i},\n' |
| % (name, url_suffix, extra_text, len(name))) |
| outf.write("};\n\n") |
| |
| outf.write('static const struct attr_url_table {\n') |
| outf.write(' const attr_url_entry *m_table;\n') |
| outf.write(' const size_t m_table_sz;\n') |
| outf.write('} attr_url_tables[] = {\n') |
| for kind in sorted(self.entries_by_kind.keys()): |
| outf.write(" { %s_attrs, ARRAY_SIZE (%s_attrs) },\n" % (kind, kind)) |
| outf.write("};\n") |
| |
| INDEX_REL_PATH = 'gcc/Concept-and-Symbol-Index.html' |
| |
| class TestParsingIndex(unittest.TestCase): |
| def test_function_attribute(self): |
| index = Index() |
| index.parse_html_line_attribute_index('<a href="Common-Function-Attributes.html#index-access-function-attribute"><code>access</code> function attribute</a>') |
| self.assertEqual(index.entries, [('Common-Function-Attributes.html#index-access-function-attribute', |
| 'access', |
| 'function', |
| '')]) |
| |
| def test_function_attribute_with_target(self): |
| index = Index() |
| index.parse_html_line_attribute_index('<a href="MIPS-Function-Attributes.html#index-nocompression-function-attribute_002c-MIPS"><code class="code">nocompression</code> function attribute, MIPS</a>') |
| self.assertEqual(index.entries, [('MIPS-Function-Attributes.html#index-nocompression-function-attribute_002c-MIPS', |
| 'nocompression', |
| 'function', |
| 'MIPS')]) |
| |
| def test_reject_parens(self): |
| index = Index() |
| index.parse_html_line_attribute_index('<a href="x86-Function-Attributes.html#index-target_0028_00223dnow_0022_0029-function-attribute_002c-x86"><code>target("3dnow")</code> function attribute, x86</a>') |
| self.assertEqual(len(index.entries), 0) |
| |
| def test_type_attribute(self): |
| index = Index() |
| index.parse_html_line_attribute_index('<a href="Common-Type-Attributes.html#index-aligned-type-attribute"><code>aligned</code> type attribute</a>') |
| self.assertEqual(index.entries, [('Common-Type-Attributes.html#index-aligned-type-attribute', |
| 'aligned', |
| 'type', |
| '')]) |
| |
| def test_enumerator_attribute(self): |
| index = Index() |
| index.parse_html_line_attribute_index('<a href="Enumerator-Attributes.html#index-deprecated-enumerator-attribute"><code>deprecated</code> enumerator attribute</a>') |
| self.assertEqual(index.entries, [('Enumerator-Attributes.html#index-deprecated-enumerator-attribute', |
| 'deprecated', |
| 'enumerator', |
| '')]) |
| def test_label_attribute(self): |
| index = Index() |
| index.parse_html_line_attribute_index('<a href="Label-Attributes.html#index-cold-label-attribute"><code>cold</code> label attribute</a>') |
| self.assertEqual(index.entries, [('Label-Attributes.html#index-cold-label-attribute', |
| 'cold', |
| 'label', |
| '')]) |
| |
| def test_statement_attribute(self): |
| index = Index() |
| index.parse_html_line_attribute_index('<a href="Statement-Attributes.html#index-assume-statement-attribute"><code>assume</code> statement attribute</a>') |
| self.assertEqual(index.entries, [('Statement-Attributes.html#index-assume-statement-attribute', |
| 'assume', |
| 'statement', |
| '')]) |
| |
| def test_variable_attribute(self): |
| index = Index() |
| index.parse_html_line_attribute_index('<a href="AVR-Variable-Attributes.html#index-absdata-variable-attribute_002c-AVR"><code>absdata</code> variable attribute, AVR</a>') |
| self.assertEqual(index.entries, [('AVR-Variable-Attributes.html#index-absdata-variable-attribute_002c-AVR', |
| 'absdata', |
| 'variable', |
| 'AVR')]) |
| |
| def test_parse_attribute_index(self): |
| index = Index() |
| index.parse_attribute_index(INPUT_HTML_PATH / INDEX_REL_PATH) |
| self.assertEqual(index.entries_by_kind['enumerator'][0], |
| ('deprecated', |
| 'Enumerator-Attributes.html#index-deprecated-enumerator-attribute', |
| '')) |
| self.assertEqual(index.entries_by_kind['label'][0], |
| ('cold', 'Label-Attributes.html#index-cold-label-attribute', '')) |
| |
| def main(args): |
| index = Index() |
| index.parse_attribute_index(args.base_html_dir / INDEX_REL_PATH) |
| dstpath = args.src_gcc_dir / 'gcc' / 'attr-urls.def' |
| index.generate_file(dstpath) |
| |
| if __name__ == '__main__': |
| parser = argparse.ArgumentParser(description=DESCRIPTION, |
| formatter_class=argparse.RawDescriptionHelpFormatter) |
| parser.add_argument('base_html_dir', type=Path) |
| parser.add_argument('src_gcc_dir', type=Path) |
| parser.add_argument('--unit-test', action='store_true') |
| args = parser.parse_args() |
| |
| if args.unit_test: |
| INPUT_HTML_PATH = args.base_html_dir |
| unittest.main(argv=[sys.argv[0], '-v']) |
| else: |
| main(args) |