| /* DWARF indexer |
| |
| Copyright (C) 2022-2025 Free Software Foundation, Inc. |
| |
| This file is part of GDB. |
| |
| This program is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 3 of the License, or |
| (at your option) any later version. |
| |
| This program is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with this program. If not, see <http://www.gnu.org/licenses/>. */ |
| |
| #include "dwarf2/cooked-indexer.h" |
| #include "dwarf2/cooked-index-storage.h" |
| #include "dwarf2/error.h" |
| |
| /* See cooked-indexer.h. */ |
| |
| cooked_indexer::cooked_indexer (cooked_index_storage *storage, |
| dwarf2_per_cu *per_cu, enum language language) |
| : m_index_storage (storage), |
| m_per_cu (per_cu), |
| m_language (language), |
| m_die_range_map (storage->get_parent_map ()) |
| { |
| } |
| |
| /* See cooked-indexer.h. */ |
| |
| void |
| cooked_indexer::check_bounds (cutu_reader *reader) |
| { |
| dwarf2_cu *cu = reader->cu (); |
| |
| if (cu->per_cu->addresses_seen) |
| return; |
| |
| unrelocated_addr best_lowpc = {}, best_highpc = {}; |
| /* Possibly set the default values of LOWPC and HIGHPC from |
| `DW_AT_ranges'. */ |
| dwarf2_find_base_address (reader->top_level_die (), cu); |
| enum pc_bounds_kind cu_bounds_kind |
| = dwarf2_get_pc_bounds (reader->top_level_die (), &best_lowpc, &best_highpc, |
| cu, m_index_storage->get_addrmap (), cu->per_cu); |
| if (cu_bounds_kind == PC_BOUNDS_HIGH_LOW && best_lowpc < best_highpc) |
| { |
| /* Store the contiguous range if it is not empty; it can be |
| empty for CUs with no code. addrmap requires CORE_ADDR, so |
| we cast here. */ |
| m_index_storage->get_addrmap ()->set_empty ((CORE_ADDR) best_lowpc, |
| (CORE_ADDR) best_highpc - 1, |
| cu->per_cu); |
| |
| cu->per_cu->addresses_seen = true; |
| } |
| } |
| |
| /* Helper function that returns true if TAG can have a linkage |
| name. */ |
| |
| static bool |
| tag_can_have_linkage_name (enum dwarf_tag tag) |
| { |
| switch (tag) |
| { |
| case DW_TAG_variable: |
| case DW_TAG_subprogram: |
| return true; |
| |
| default: |
| return false; |
| } |
| } |
| |
| /* See cooked-indexer.h. */ |
| |
| cutu_reader * |
| cooked_indexer::ensure_cu_exists (cutu_reader *reader, |
| sect_offset sect_off, bool is_dwz, |
| bool for_scanning) |
| { |
| /* Lookups for type unit references are always in the CU, and |
| cross-CU references will crash. */ |
| if (reader->cu ()->per_cu->is_dwz == is_dwz |
| && reader->cu ()->header.offset_in_cu_p (sect_off)) |
| return reader; |
| |
| dwarf2_per_objfile *per_objfile = reader->cu ()->per_objfile; |
| dwarf2_per_cu *per_cu |
| = dwarf2_find_containing_comp_unit (sect_off, is_dwz, |
| per_objfile->per_bfd); |
| |
| /* When scanning, we only want to visit a given CU a single time. |
| Doing this check here avoids self-imports as well. */ |
| if (for_scanning) |
| { |
| bool nope = false; |
| if (!per_cu->scanned.compare_exchange_strong (nope, true)) |
| return nullptr; |
| } |
| |
| cutu_reader *result = m_index_storage->get_reader (per_cu); |
| if (result == nullptr) |
| { |
| cutu_reader new_reader (per_cu, per_objfile, nullptr, nullptr, false, |
| language_minimal, |
| &m_index_storage->get_abbrev_table_cache ()); |
| |
| if (new_reader.is_dummy () || new_reader.top_level_die () == nullptr |
| || !new_reader.top_level_die ()->has_children) |
| return nullptr; |
| |
| auto copy = std::make_unique<cutu_reader> (std::move (new_reader)); |
| result = m_index_storage->preserve (std::move (copy)); |
| } |
| |
| if (result->is_dummy () || result->top_level_die () == nullptr |
| || !result->top_level_die ()->has_children) |
| return nullptr; |
| |
| if (for_scanning) |
| check_bounds (result); |
| |
| return result; |
| } |
| |
| /* See cooked-indexer.h. */ |
| |
| const gdb_byte * |
| cooked_indexer::scan_attributes (dwarf2_per_cu *scanning_per_cu, |
| cutu_reader *reader, |
| const gdb_byte *watermark_ptr, |
| const gdb_byte *info_ptr, |
| const abbrev_info *abbrev, |
| const char **name, |
| const char **linkage_name, |
| cooked_index_flag *flags, |
| sect_offset *sibling_offset, |
| const cooked_index_entry **parent_entry, |
| parent_map::addr_type *maybe_defer, |
| bool *is_enum_class, |
| bool for_specification) |
| { |
| bool origin_is_dwz = false; |
| bool is_declaration = false; |
| sect_offset origin_offset {}; |
| |
| std::optional<unrelocated_addr> low_pc; |
| std::optional<unrelocated_addr> high_pc; |
| bool high_pc_relative = false; |
| |
| for (int i = 0; i < abbrev->num_attrs; ++i) |
| { |
| attribute attr; |
| info_ptr = reader->read_attribute (&attr, &abbrev->attrs[i], info_ptr); |
| |
| /* Store the data if it is of an attribute we want to keep in a |
| partial symbol table. */ |
| switch (attr.name) |
| { |
| case DW_AT_name: |
| switch (abbrev->tag) |
| { |
| case DW_TAG_compile_unit: |
| case DW_TAG_partial_unit: |
| case DW_TAG_type_unit: |
| /* Compilation units have a DW_AT_name that is a filename, not |
| a source language identifier. */ |
| break; |
| |
| default: |
| if (*name == nullptr) |
| *name = attr.as_string (); |
| break; |
| } |
| break; |
| |
| case DW_AT_linkage_name: |
| case DW_AT_MIPS_linkage_name: |
| /* Note that both forms of linkage name might appear. We |
| assume they will be the same, and we only store the last |
| one we see. */ |
| if (*linkage_name == nullptr) |
| *linkage_name = attr.as_string (); |
| break; |
| |
| /* DWARF 4 has defined a dedicated DW_AT_main_subprogram |
| attribute to indicate the starting function of the program... */ |
| case DW_AT_main_subprogram: |
| if (attr.as_boolean ()) |
| *flags |= IS_MAIN; |
| break; |
| |
| /* ... however with older versions the DW_CC_program value of |
| the DW_AT_calling_convention attribute was used instead as |
| the only means available. We handle both variants then. */ |
| case DW_AT_calling_convention: |
| { |
| std::optional<ULONGEST> value = attr.unsigned_constant (); |
| if (value.has_value () && *value == DW_CC_program) |
| *flags |= IS_MAIN; |
| } |
| break; |
| |
| case DW_AT_declaration: |
| is_declaration = attr.as_boolean (); |
| break; |
| |
| case DW_AT_sibling: |
| if (sibling_offset != nullptr) |
| *sibling_offset = attr.get_ref_die_offset (); |
| break; |
| |
| case DW_AT_specification: |
| case DW_AT_abstract_origin: |
| case DW_AT_extension: |
| origin_offset = attr.get_ref_die_offset (); |
| origin_is_dwz = attr.form == DW_FORM_GNU_ref_alt; |
| break; |
| |
| case DW_AT_external: |
| if (attr.as_boolean ()) |
| *flags &= ~IS_STATIC; |
| break; |
| |
| case DW_AT_enum_class: |
| if (attr.as_boolean ()) |
| *is_enum_class = true; |
| break; |
| |
| case DW_AT_low_pc: |
| low_pc = attr.as_address (); |
| break; |
| |
| case DW_AT_high_pc: |
| high_pc = attr.as_address (); |
| if (reader->cu ()->header.version >= 4 && attr.form_is_constant ()) |
| high_pc_relative = true; |
| break; |
| |
| case DW_AT_location: |
| if (!scanning_per_cu->addresses_seen && attr.form_is_block ()) |
| { |
| struct dwarf_block *locdesc = attr.as_block (); |
| CORE_ADDR addr; |
| dwarf2_cu *cu = reader->cu (); |
| |
| if (decode_locdesc (locdesc, cu, &addr) |
| && (addr != 0 |
| || cu->per_objfile->per_bfd->has_section_at_zero)) |
| { |
| low_pc = (unrelocated_addr) addr; |
| /* For variables, we don't want to try decoding the |
| type just to find the size -- for gdb's purposes |
| we only need the address of a variable. */ |
| high_pc = (unrelocated_addr) (addr + 1); |
| high_pc_relative = false; |
| } |
| } |
| break; |
| |
| case DW_AT_ranges: |
| if (!scanning_per_cu->addresses_seen) |
| { |
| /* Offset in the .debug_ranges or .debug_rnglist section |
| (depending on DWARF version). */ |
| ULONGEST ranges_offset = attr.as_unsigned (); |
| |
| /* See dwarf2_cu::gnu_ranges_base's doc for why we might |
| want to add this value. */ |
| ranges_offset += reader->cu ()->gnu_ranges_base; |
| |
| unrelocated_addr lowpc, highpc; |
| dwarf2_ranges_read (ranges_offset, &lowpc, &highpc, reader->cu (), |
| m_index_storage->get_addrmap (), |
| scanning_per_cu, abbrev->tag); |
| } |
| break; |
| } |
| } |
| |
| /* We don't want to examine declarations, but if we found a |
| declaration when handling DW_AT_specification or the like, then |
| that is ok. Similarly, we allow an external variable without a |
| location; those are resolved via minimal symbols. */ |
| if (is_declaration && !for_specification |
| && !(abbrev->tag == DW_TAG_variable && (*flags & IS_STATIC) == 0)) |
| { |
| /* We always want to recurse into some types, but we may not |
| want to treat them as definitions. */ |
| if ((abbrev->tag == DW_TAG_class_type |
| || abbrev->tag == DW_TAG_structure_type |
| || abbrev->tag == DW_TAG_union_type |
| || abbrev->tag == DW_TAG_namespace) |
| && abbrev->has_children) |
| *flags |= IS_TYPE_DECLARATION; |
| else |
| { |
| *linkage_name = nullptr; |
| *name = nullptr; |
| } |
| } |
| else if ((*name == nullptr |
| || (*linkage_name == nullptr |
| && tag_can_have_linkage_name (abbrev->tag)) |
| || (*parent_entry == nullptr && m_language != language_c)) |
| && origin_offset != sect_offset (0)) |
| { |
| cutu_reader *new_reader |
| = ensure_cu_exists (reader, origin_offset, origin_is_dwz, false); |
| if (new_reader == nullptr) |
| error (_(DWARF_ERROR_PREFIX |
| "cannot follow reference to DIE at %s" |
| " [in module %s]"), |
| sect_offset_str (origin_offset), |
| bfd_get_filename (reader->abfd ())); |
| |
| const gdb_byte *new_info_ptr |
| = (new_reader->buffer () + to_underlying (origin_offset)); |
| |
| if (*parent_entry == nullptr) |
| { |
| /* We only perform immediate lookups of parents for DIEs |
| from earlier in this CU. This avoids any problem |
| with a NULL result when when we see a reference to a |
| DIE in another CU that we may or may not have |
| imported locally. */ |
| parent_map::addr_type addr = parent_map::form_addr (new_info_ptr); |
| if (new_reader->cu () != reader->cu () |
| || new_info_ptr > watermark_ptr) |
| *maybe_defer = addr; |
| else |
| *parent_entry = m_die_range_map->find (addr); |
| } |
| |
| unsigned int bytes_read; |
| const abbrev_info *new_abbrev |
| = new_reader->peek_die_abbrev (new_info_ptr, &bytes_read); |
| |
| if (new_abbrev == nullptr) |
| error (_(DWARF_ERROR_PREFIX |
| "Unexpected null DIE at offset %s [in module %s]"), |
| sect_offset_str (origin_offset), |
| bfd_get_filename (new_reader->abfd ())); |
| |
| new_info_ptr += bytes_read; |
| |
| if (new_reader->cu () == reader->cu () && new_info_ptr == watermark_ptr) |
| { |
| /* Self-reference, we're done. */ |
| } |
| else |
| scan_attributes (scanning_per_cu, new_reader, new_info_ptr, |
| new_info_ptr, new_abbrev, name, linkage_name, |
| flags, nullptr, parent_entry, maybe_defer, |
| is_enum_class, true); |
| } |
| |
| if (!for_specification) |
| { |
| /* Older versions of GNAT emit full-qualified encoded names. In |
| this case, also use this name as the linkage name. */ |
| if (m_language == language_ada |
| && *linkage_name == nullptr |
| && *name != nullptr |
| && strstr (*name, "__") != nullptr) |
| *linkage_name = *name; |
| |
| if (!scanning_per_cu->addresses_seen && low_pc.has_value () |
| && (reader->cu ()->per_objfile->per_bfd->has_section_at_zero |
| || *low_pc != (unrelocated_addr) 0) |
| && high_pc.has_value ()) |
| { |
| if (high_pc_relative) |
| high_pc = (unrelocated_addr) ((ULONGEST) *high_pc |
| + (ULONGEST) *low_pc); |
| |
| if (*high_pc > *low_pc) |
| { |
| /* Need CORE_ADDR casts for addrmap. */ |
| m_index_storage->get_addrmap ()->set_empty |
| ((CORE_ADDR) *low_pc, (CORE_ADDR) *high_pc - 1, |
| scanning_per_cu); |
| } |
| } |
| |
| if (abbrev->tag == DW_TAG_namespace && *name == nullptr) |
| *name = "(anonymous namespace)"; |
| |
| /* Keep in sync with new_symbol. */ |
| if (abbrev->tag == DW_TAG_subprogram |
| && (m_language == language_ada |
| || m_language == language_fortran)) |
| *flags &= ~IS_STATIC; |
| } |
| |
| return info_ptr; |
| } |
| |
| /* See cooked-indexer.h. */ |
| |
| const gdb_byte * |
| cooked_indexer::index_imported_unit (cutu_reader *reader, |
| const gdb_byte *info_ptr, |
| const abbrev_info *abbrev) |
| { |
| sect_offset sect_off {}; |
| bool is_dwz = false; |
| |
| for (int i = 0; i < abbrev->num_attrs; ++i) |
| { |
| /* Note that we never need to reprocess attributes here. */ |
| attribute attr; |
| info_ptr = reader->read_attribute (&attr, &abbrev->attrs[i], info_ptr); |
| |
| if (attr.name == DW_AT_import) |
| { |
| sect_off = attr.get_ref_die_offset (); |
| is_dwz = (attr.form == DW_FORM_GNU_ref_alt |
| || reader->cu ()->per_cu->is_dwz); |
| } |
| } |
| |
| /* Did not find DW_AT_import. */ |
| if (sect_off == sect_offset (0)) |
| return info_ptr; |
| |
| cutu_reader *new_reader |
| = ensure_cu_exists (reader, sect_off, is_dwz, true); |
| if (new_reader != nullptr) |
| { |
| index_dies (new_reader, new_reader->info_ptr (), nullptr, false); |
| |
| reader->cu ()->add_dependence (new_reader->cu ()->per_cu); |
| } |
| |
| return info_ptr; |
| } |
| |
| /* See cooked-indexer.h. */ |
| |
| const gdb_byte * |
| cooked_indexer::recurse (cutu_reader *reader, |
| const gdb_byte *info_ptr, |
| std::variant<const cooked_index_entry *, |
| parent_map::addr_type> parent, |
| bool fully) |
| { |
| info_ptr = index_dies (reader, info_ptr, parent, fully); |
| |
| if (!std::holds_alternative<const cooked_index_entry *> (parent)) |
| return info_ptr; |
| const cooked_index_entry *parent_entry |
| = std::get<const cooked_index_entry *> (parent); |
| |
| if (parent_entry != nullptr) |
| { |
| /* Both start and end are inclusive, so use both "+ 1" and "- 1" to |
| limit the range to the children of parent_entry. */ |
| parent_map::addr_type start |
| = parent_map::form_addr (reader->buffer () |
| + to_underlying (parent_entry->die_offset) |
| + 1); |
| parent_map::addr_type end = parent_map::form_addr (info_ptr - 1); |
| m_die_range_map->add_entry (start, end, parent_entry); |
| } |
| |
| return info_ptr; |
| } |
| |
| /* See cooked-indexer.h. */ |
| |
| const gdb_byte * |
| cooked_indexer::index_dies (cutu_reader *reader, |
| const gdb_byte *info_ptr, |
| std::variant<const cooked_index_entry *, |
| parent_map::addr_type> parent, |
| bool fully) |
| { |
| const gdb_byte *end_ptr |
| = (reader->buffer () + to_underlying (reader->cu ()->header.sect_off) |
| + reader->cu ()->header.get_length_with_initial ()); |
| |
| while (info_ptr < end_ptr) |
| { |
| sect_offset this_die = (sect_offset) (info_ptr - reader->buffer ()); |
| unsigned int bytes_read; |
| const abbrev_info *abbrev |
| = reader->peek_die_abbrev (info_ptr, &bytes_read); |
| info_ptr += bytes_read; |
| if (abbrev == nullptr) |
| break; |
| |
| if (abbrev->tag == DW_TAG_imported_unit) |
| { |
| info_ptr = index_imported_unit (reader, info_ptr, abbrev); |
| continue; |
| } |
| |
| parent_map::addr_type defer {}; |
| if (std::holds_alternative<parent_map::addr_type> (parent)) |
| defer = std::get<parent_map::addr_type> (parent); |
| const cooked_index_entry *parent_entry = nullptr; |
| if (std::holds_alternative<const cooked_index_entry *> (parent)) |
| parent_entry = std::get<const cooked_index_entry *> (parent); |
| |
| /* If a DIE parent is a DW_TAG_subprogram, then the DIE is only |
| interesting if it's a DW_TAG_subprogram or a DW_TAG_entry_point. */ |
| bool die_interesting |
| = (abbrev->interesting |
| && (parent_entry == nullptr |
| || parent_entry->tag != DW_TAG_subprogram |
| || abbrev->tag == DW_TAG_subprogram |
| || abbrev->tag == DW_TAG_entry_point)); |
| |
| if (!die_interesting) |
| { |
| info_ptr = reader->skip_one_die (info_ptr, abbrev, !fully); |
| if (fully && abbrev->has_children) |
| info_ptr = index_dies (reader, info_ptr, parent, fully); |
| continue; |
| } |
| |
| const char *name = nullptr; |
| const char *linkage_name = nullptr; |
| cooked_index_flag flags = IS_STATIC; |
| sect_offset sibling {}; |
| const cooked_index_entry *this_parent_entry = parent_entry; |
| bool is_enum_class = false; |
| |
| /* The scope of a DW_TAG_entry_point cooked_index_entry is the one of |
| its surrounding subroutine. */ |
| if (abbrev->tag == DW_TAG_entry_point) |
| this_parent_entry = parent_entry->get_parent (); |
| info_ptr |
| = scan_attributes (reader->cu ()->per_cu, reader, info_ptr, info_ptr, |
| abbrev, &name, &linkage_name, &flags, &sibling, |
| &this_parent_entry, &defer, &is_enum_class, false); |
| /* A DW_TAG_entry_point inherits its static/extern property from |
| the enclosing subroutine. */ |
| if (abbrev->tag == DW_TAG_entry_point) |
| { |
| flags &= ~IS_STATIC; |
| flags |= parent_entry->flags & IS_STATIC; |
| } |
| |
| if (abbrev->tag == DW_TAG_namespace |
| && m_language == language_cplus |
| && strcmp (name, "::") == 0) |
| { |
| /* GCC 4.0 and 4.1 had a bug (PR c++/28460) where they |
| generated bogus DW_TAG_namespace DIEs with a name of "::" |
| for the global namespace. Work around this problem |
| here. */ |
| name = nullptr; |
| } |
| |
| cooked_index_entry *this_entry = nullptr; |
| if (name != nullptr) |
| { |
| if (defer != 0) |
| this_entry |
| = m_index_storage->add (this_die, abbrev->tag, |
| flags | IS_PARENT_DEFERRED, name, |
| defer, m_per_cu); |
| else |
| this_entry |
| = m_index_storage->add (this_die, abbrev->tag, flags, name, |
| this_parent_entry, m_per_cu); |
| } |
| |
| if (linkage_name != nullptr) |
| { |
| /* We only want this to be "main" if it has a linkage name |
| but not an ordinary name. */ |
| if (name != nullptr) |
| flags = flags & ~IS_MAIN; |
| /* Set the IS_LINKAGE on for everything except when functions |
| have linkage name present but name is absent. */ |
| if (name != nullptr |
| || (abbrev->tag != DW_TAG_subprogram |
| && abbrev->tag != DW_TAG_inlined_subroutine |
| && abbrev->tag != DW_TAG_entry_point)) |
| flags = flags | IS_LINKAGE; |
| m_index_storage->add (this_die, abbrev->tag, flags, |
| linkage_name, nullptr, m_per_cu); |
| } |
| |
| if (abbrev->has_children) |
| { |
| switch (abbrev->tag) |
| { |
| case DW_TAG_class_type: |
| case DW_TAG_interface_type: |
| case DW_TAG_structure_type: |
| case DW_TAG_union_type: |
| if (m_language != language_c && this_entry != nullptr) |
| { |
| info_ptr = recurse (reader, info_ptr, this_entry, fully); |
| continue; |
| } |
| break; |
| |
| case DW_TAG_enumeration_type: |
| /* Some versions of gdc could emit an "enum class" |
| without a name, which is nonsensical. These are |
| skipped. */ |
| if (is_enum_class && this_entry == nullptr) |
| continue; |
| |
| /* We need to recurse even for an anonymous enumeration. |
| Which scope we record as the parent scope depends on |
| whether we're reading an "enum class". If so, we use |
| the enum itself as the parent, yielding names like |
| "enum_class::enumerator"; otherwise we inject the |
| names into our own parent scope. */ |
| { |
| std::variant<const cooked_index_entry *, |
| parent_map::addr_type> recurse_parent; |
| if (is_enum_class) |
| { |
| gdb_assert (this_entry != nullptr); |
| recurse_parent = this_entry; |
| } |
| else if (defer != 0) |
| recurse_parent = defer; |
| else |
| recurse_parent = this_parent_entry; |
| |
| info_ptr = recurse (reader, info_ptr, recurse_parent, fully); |
| } |
| continue; |
| |
| case DW_TAG_module: |
| if (this_entry == nullptr) |
| break; |
| [[fallthrough]]; |
| case DW_TAG_namespace: |
| /* We don't check THIS_ENTRY for a namespace, to handle |
| the ancient G++ workaround pointed out above. */ |
| info_ptr = recurse (reader, info_ptr, this_entry, fully); |
| continue; |
| |
| case DW_TAG_subprogram: |
| if ((m_language == language_fortran |
| || m_language == language_ada) |
| && this_entry != nullptr) |
| { |
| info_ptr = recurse (reader, info_ptr, this_entry, true); |
| continue; |
| } |
| break; |
| } |
| |
| if (sibling != sect_offset (0)) |
| { |
| const gdb_byte *sibling_ptr |
| = reader->buffer () + to_underlying (sibling); |
| |
| if (sibling_ptr < info_ptr) |
| complaint (_("DW_AT_sibling points backwards")); |
| else if (sibling_ptr > reader->buffer_end ()) |
| reader->section ()->overflow_complaint (); |
| else |
| info_ptr = sibling_ptr; |
| } |
| else |
| info_ptr = reader->skip_children (info_ptr); |
| } |
| } |
| |
| return info_ptr; |
| } |
| |
| /* See cooked-indexer.h. */ |
| |
| void |
| cooked_indexer::make_index (cutu_reader *reader) |
| { |
| check_bounds (reader); |
| find_file_and_directory (reader->top_level_die (), reader->cu ()); |
| |
| if (!reader->top_level_die ()->has_children) |
| return; |
| |
| index_dies (reader, reader->info_ptr (), nullptr, false); |
| } |