|  | #!/usr/bin/env python3 | 
|  | # | 
|  | # Script to generate tables for libstdc++ std::text_encoding. | 
|  | # | 
|  | # This file is part of GCC. | 
|  | # | 
|  | # GCC is free software; you can redistribute it and/or modify it under | 
|  | # the terms of the GNU General Public License as published by the Free | 
|  | # Software Foundation; either version 3, or (at your option) any later | 
|  | # version. | 
|  | # | 
|  | # GCC is distributed in the hope that it will be useful, but WITHOUT ANY | 
|  | # WARRANTY; without even the implied warranty of MERCHANTABILITY or | 
|  | # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License | 
|  | # for more details. | 
|  | # | 
|  | # You should have received a copy of the GNU General Public License | 
|  | # along with GCC; see the file COPYING3.  If not see | 
|  | # <http://www.gnu.org/licenses/>. | 
|  |  | 
|  | # To update the Libstdc++ static data in <bits/text_encoding-data.h> download | 
|  | # the latest: | 
|  | # https://www.iana.org/assignments/character-sets/character-sets-1.csv | 
|  | # Then run this script and save the output to | 
|  | # include/bits/text_encoding-data.h | 
|  |  | 
|  | import sys | 
|  | import csv | 
|  | import os | 
|  |  | 
|  | if len(sys.argv) != 2: | 
|  | print("Usage: %s <character sets csv>" % sys.argv[0], file=sys.stderr) | 
|  | sys.exit(1) | 
|  |  | 
|  | self = os.path.basename(__file__) | 
|  | print("// Generated by scripts/{}, do not edit.".format(self)) | 
|  | print(""" | 
|  |  | 
|  | // Copyright The GNU Toolchain Authors. | 
|  | // | 
|  | // This file is part of the GNU ISO C++ Library.  This library is free | 
|  | // software; you can redistribute it and/or modify it under the | 
|  | // terms of the GNU General Public License as published by the | 
|  | // Free Software Foundation; either version 3, or (at your option) | 
|  | // any later version. | 
|  |  | 
|  | // This library is distributed in the hope that it will be useful, | 
|  | // but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|  | // GNU General Public License for more details. | 
|  |  | 
|  | // Under Section 7 of GPL version 3, you are granted additional | 
|  | // permissions described in the GCC Runtime Library Exception, version | 
|  | // 3.1, as published by the Free Software Foundation. | 
|  |  | 
|  | // You should have received a copy of the GNU General Public License and | 
|  | // a copy of the GCC Runtime Library Exception along with this program; | 
|  | // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see | 
|  | // <http://www.gnu.org/licenses/>. | 
|  |  | 
|  | /** @file bits/text_encoding-data.h | 
|  | *  This is an internal header file, included by other library headers. | 
|  | *  Do not attempt to use it directly. @headername{text_encoding} | 
|  | */ | 
|  | """) | 
|  | print("#ifndef _GLIBCXX_GET_ENCODING_DATA") | 
|  | print('# error "This is not a public header, do not include it directly"') | 
|  | print("#endif\n") | 
|  |  | 
|  | # We need to generate a list of initializers of the form { mib, alias }, e.g., | 
|  | # { 3, "US-ASCII" }, | 
|  | # { 3, "ISO646-US" }, | 
|  | # { 3, "csASCII" }, | 
|  | # { 4, "ISO_8859-1:1987" }, | 
|  | # { 4, "latin1" }, | 
|  | # The initializers must be sorted by the mib value. The first entry for | 
|  | # a given mib must be the primary name for the encoding. Any aliases for | 
|  | # the encoding come after the primary name. | 
|  | # We also define a macro _GLIBCXX_TEXT_ENCODING_UTF8_OFFSET which is the | 
|  | # offset into the list of the mib=106, alias="UTF-8" entry. This is used | 
|  | # to optimize the common case, so we don't need to search for "UTF-8". | 
|  |  | 
|  | charsets = {} | 
|  | with open(sys.argv[1], newline='') as f: | 
|  | reader = csv.reader(f) | 
|  | next(reader) # skip header row | 
|  | for row in reader: | 
|  | mib = int(row[2]) | 
|  | if mib in charsets: | 
|  | raise ValueError("Multiple rows for mibEnum={}".format(mib)) | 
|  | name = row[1] | 
|  | aliases = row[5].split() | 
|  | # Ensure primary name comes first | 
|  | if name in aliases: | 
|  | aliases.remove(name) | 
|  | charsets[mib] = [name] + aliases | 
|  |  | 
|  | # Remove "NATS-DANO" and "NATS-DANO-ADD" as specified by the C++ standard. | 
|  | charsets.pop(33, None) | 
|  | charsets.pop(34, None) | 
|  |  | 
|  | # This is not an official IANA alias, but we include it in the | 
|  | # implementation-defined superset of aliases for US-ASCII. | 
|  | # See also LWG 4043. | 
|  | extra_aliases = {3: ["ASCII"]} | 
|  |  | 
|  | count = 0 | 
|  | for mib in sorted(charsets.keys()): | 
|  | names = charsets[mib] | 
|  | if names[0] == "UTF-8": | 
|  | print("#define _GLIBCXX_TEXT_ENCODING_UTF8_OFFSET {}".format(count)) | 
|  | for name in names: | 
|  | print('  {{ {:4}, "{}" }},'.format(mib, name)) | 
|  | count += len(names) | 
|  | if mib in extra_aliases: | 
|  | names = extra_aliases[mib] | 
|  | for name in names: | 
|  | print('  {{ {:4}, "{}" }}, // libstdc++ extension'.format(mib, name)) | 
|  | count += len(names) | 
|  |  | 
|  | # <text_encoding> gives an error if this macro is left defined. | 
|  | # Do this last, so that the generated output is not usable unless we reach here. | 
|  | print("\n#undef _GLIBCXX_GET_ENCODING_DATA") |