xref: /aosp_15_r20/external/abseil-cpp/absl/debugging/internal/elf_mem_image.cc (revision 9356374a3709195abf420251b3e825997ff56c0f)
1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // Allow dynamic symbol lookup in an in-memory Elf image.
16 //
17 
18 #include "absl/debugging/internal/elf_mem_image.h"
19 
20 #ifdef ABSL_HAVE_ELF_MEM_IMAGE  // defined in elf_mem_image.h
21 
22 #include <string.h>
23 
24 #include <cassert>
25 #include <cstddef>
26 #include <cstdint>
27 
28 #include "absl/base/config.h"
29 #include "absl/base/internal/raw_logging.h"
30 
31 // From binutils/include/elf/common.h (this doesn't appear to be documented
32 // anywhere else).
33 //
34 //   /* This flag appears in a Versym structure.  It means that the symbol
35 //      is hidden, and is only visible with an explicit version number.
36 //      This is a GNU extension.  */
37 //   #define VERSYM_HIDDEN           0x8000
38 //
39 //   /* This is the mask for the rest of the Versym information.  */
40 //   #define VERSYM_VERSION          0x7fff
41 
42 #define VERSYM_VERSION 0x7fff
43 
44 namespace absl {
45 ABSL_NAMESPACE_BEGIN
46 namespace debugging_internal {
47 
48 namespace {
49 
50 #if __SIZEOF_POINTER__ == 4
51 const int kElfClass = ELFCLASS32;
ElfBind(const ElfW (Sym)* symbol)52 int ElfBind(const ElfW(Sym) *symbol) { return ELF32_ST_BIND(symbol->st_info); }
ElfType(const ElfW (Sym)* symbol)53 int ElfType(const ElfW(Sym) *symbol) { return ELF32_ST_TYPE(symbol->st_info); }
54 #elif __SIZEOF_POINTER__ == 8
55 const int kElfClass = ELFCLASS64;
56 int ElfBind(const ElfW(Sym) *symbol) { return ELF64_ST_BIND(symbol->st_info); }
57 int ElfType(const ElfW(Sym) *symbol) { return ELF64_ST_TYPE(symbol->st_info); }
58 #else
59 const int kElfClass = -1;
60 int ElfBind(const ElfW(Sym) *) {
61   ABSL_RAW_LOG(FATAL, "Unexpected word size");
62   return 0;
63 }
64 int ElfType(const ElfW(Sym) *) {
65   ABSL_RAW_LOG(FATAL, "Unexpected word size");
66   return 0;
67 }
68 #endif
69 
70 // Extract an element from one of the ELF tables, cast it to desired type.
71 // This is just a simple arithmetic and a glorified cast.
72 // Callers are responsible for bounds checking.
73 template <typename T>
GetTableElement(const ElfW (Ehdr)* ehdr,ElfW (Off)table_offset,ElfW (Word)element_size,size_t index)74 const T *GetTableElement(const ElfW(Ehdr) * ehdr, ElfW(Off) table_offset,
75                          ElfW(Word) element_size, size_t index) {
76   return reinterpret_cast<const T*>(reinterpret_cast<const char *>(ehdr)
77                                     + table_offset
78                                     + index * element_size);
79 }
80 
81 }  // namespace
82 
83 // The value of this variable doesn't matter; it's used only for its
84 // unique address.
85 const int ElfMemImage::kInvalidBaseSentinel = 0;
86 
ElfMemImage(const void * base)87 ElfMemImage::ElfMemImage(const void *base) {
88   ABSL_RAW_CHECK(base != kInvalidBase, "bad pointer");
89   Init(base);
90 }
91 
GetNumSymbols() const92 uint32_t ElfMemImage::GetNumSymbols() const { return num_syms_; }
93 
ElfW(Sym)94 const ElfW(Sym) * ElfMemImage::GetDynsym(uint32_t index) const {
95   ABSL_RAW_CHECK(index < GetNumSymbols(), "index out of range");
96   return dynsym_ + index;
97 }
98 
ElfW(Versym)99 const ElfW(Versym) *ElfMemImage::GetVersym(uint32_t index) const {
100   ABSL_RAW_CHECK(index < GetNumSymbols(), "index out of range");
101   return versym_ + index;
102 }
103 
ElfW(Phdr)104 const ElfW(Phdr) *ElfMemImage::GetPhdr(int index) const {
105   ABSL_RAW_CHECK(index >= 0 && index < ehdr_->e_phnum, "index out of range");
106   return GetTableElement<ElfW(Phdr)>(ehdr_, ehdr_->e_phoff, ehdr_->e_phentsize,
107                                      static_cast<size_t>(index));
108 }
109 
GetDynstr(ElfW (Word)offset) const110 const char *ElfMemImage::GetDynstr(ElfW(Word) offset) const {
111   ABSL_RAW_CHECK(offset < strsize_, "offset out of range");
112   return dynstr_ + offset;
113 }
114 
GetSymAddr(const ElfW (Sym)* sym) const115 const void *ElfMemImage::GetSymAddr(const ElfW(Sym) *sym) const {
116   if (sym->st_shndx == SHN_UNDEF || sym->st_shndx >= SHN_LORESERVE) {
117     // Symbol corresponds to "special" (e.g. SHN_ABS) section.
118     return reinterpret_cast<const void *>(sym->st_value);
119   }
120   ABSL_RAW_CHECK(link_base_ < sym->st_value, "symbol out of range");
121   return GetTableElement<char>(ehdr_, 0, 1, sym->st_value - link_base_);
122 }
123 
ElfW(Verdef)124 const ElfW(Verdef) *ElfMemImage::GetVerdef(int index) const {
125   ABSL_RAW_CHECK(0 <= index && static_cast<size_t>(index) <= verdefnum_,
126                  "index out of range");
127   const ElfW(Verdef) *version_definition = verdef_;
128   while (version_definition->vd_ndx < index && version_definition->vd_next) {
129     const char *const version_definition_as_char =
130         reinterpret_cast<const char *>(version_definition);
131     version_definition =
132         reinterpret_cast<const ElfW(Verdef) *>(version_definition_as_char +
133                                                version_definition->vd_next);
134   }
135   return version_definition->vd_ndx == index ? version_definition : nullptr;
136 }
137 
ElfW(Verdaux)138 const ElfW(Verdaux) *ElfMemImage::GetVerdefAux(
139     const ElfW(Verdef) *verdef) const {
140   return reinterpret_cast<const ElfW(Verdaux) *>(verdef+1);
141 }
142 
GetVerstr(ElfW (Word)offset) const143 const char *ElfMemImage::GetVerstr(ElfW(Word) offset) const {
144   ABSL_RAW_CHECK(offset < strsize_, "offset out of range");
145   return dynstr_ + offset;
146 }
147 
Init(const void * base)148 void ElfMemImage::Init(const void *base) {
149   ehdr_      = nullptr;
150   dynsym_    = nullptr;
151   dynstr_    = nullptr;
152   versym_    = nullptr;
153   verdef_    = nullptr;
154   num_syms_ = 0;
155   strsize_   = 0;
156   verdefnum_ = 0;
157   // Sentinel: PT_LOAD .p_vaddr can't possibly be this.
158   link_base_ = ~ElfW(Addr){0};  // NOLINT(readability/braces)
159   if (!base) {
160     return;
161   }
162   const char *const base_as_char = reinterpret_cast<const char *>(base);
163   if (base_as_char[EI_MAG0] != ELFMAG0 || base_as_char[EI_MAG1] != ELFMAG1 ||
164       base_as_char[EI_MAG2] != ELFMAG2 || base_as_char[EI_MAG3] != ELFMAG3) {
165     assert(false);
166     return;
167   }
168   int elf_class = base_as_char[EI_CLASS];
169   if (elf_class != kElfClass) {
170     assert(false);
171     return;
172   }
173   switch (base_as_char[EI_DATA]) {
174     case ELFDATA2LSB: {
175 #ifndef ABSL_IS_LITTLE_ENDIAN
176       assert(false);
177       return;
178 #endif
179       break;
180     }
181     case ELFDATA2MSB: {
182 #ifndef ABSL_IS_BIG_ENDIAN
183       assert(false);
184       return;
185 #endif
186       break;
187     }
188     default: {
189       assert(false);
190       return;
191     }
192   }
193 
194   ehdr_ = reinterpret_cast<const ElfW(Ehdr) *>(base);
195   const ElfW(Phdr) *dynamic_program_header = nullptr;
196   for (int i = 0; i < ehdr_->e_phnum; ++i) {
197     const ElfW(Phdr) *const program_header = GetPhdr(i);
198     switch (program_header->p_type) {
199       case PT_LOAD:
200         if (!~link_base_) {
201           link_base_ = program_header->p_vaddr;
202         }
203         break;
204       case PT_DYNAMIC:
205         dynamic_program_header = program_header;
206         break;
207     }
208   }
209   if (!~link_base_ || !dynamic_program_header) {
210     assert(false);
211     // Mark this image as not present. Can not recur infinitely.
212     Init(nullptr);
213     return;
214   }
215   ptrdiff_t relocation =
216       base_as_char - reinterpret_cast<const char *>(link_base_);
217   ElfW(Dyn)* dynamic_entry = reinterpret_cast<ElfW(Dyn)*>(
218       static_cast<intptr_t>(dynamic_program_header->p_vaddr) + relocation);
219   uint32_t *sysv_hash = nullptr;
220   uint32_t *gnu_hash = nullptr;
221   for (; dynamic_entry->d_tag != DT_NULL; ++dynamic_entry) {
222     const auto value =
223         static_cast<intptr_t>(dynamic_entry->d_un.d_val) + relocation;
224     switch (dynamic_entry->d_tag) {
225       case DT_HASH:
226         sysv_hash = reinterpret_cast<uint32_t *>(value);
227         break;
228       case DT_GNU_HASH:
229         gnu_hash = reinterpret_cast<uint32_t *>(value);
230         break;
231       case DT_SYMTAB:
232         dynsym_ = reinterpret_cast<ElfW(Sym) *>(value);
233         break;
234       case DT_STRTAB:
235         dynstr_ = reinterpret_cast<const char *>(value);
236         break;
237       case DT_VERSYM:
238         versym_ = reinterpret_cast<ElfW(Versym) *>(value);
239         break;
240       case DT_VERDEF:
241         verdef_ = reinterpret_cast<ElfW(Verdef) *>(value);
242         break;
243       case DT_VERDEFNUM:
244         verdefnum_ = static_cast<size_t>(dynamic_entry->d_un.d_val);
245         break;
246       case DT_STRSZ:
247         strsize_ = static_cast<size_t>(dynamic_entry->d_un.d_val);
248         break;
249       default:
250         // Unrecognized entries explicitly ignored.
251         break;
252     }
253   }
254   if ((!sysv_hash && !gnu_hash) || !dynsym_ || !dynstr_ || !versym_ ||
255       !verdef_ || !verdefnum_ || !strsize_) {
256     assert(false);  // invalid VDSO
257     // Mark this image as not present. Can not recur infinitely.
258     Init(nullptr);
259     return;
260   }
261   if (sysv_hash) {
262     num_syms_ = sysv_hash[1];
263   } else {
264     assert(gnu_hash);
265     // Compute the number of symbols for DT_GNU_HASH, which is specified by
266     // https://sourceware.org/gnu-gabi/program-loading-and-dynamic-linking.txt
267     uint32_t nbuckets = gnu_hash[0];
268     // The buckets array is located after the header (4 uint32) and the bloom
269     // filter (size_t array of gnu_hash[2] elements).
270     uint32_t *buckets = gnu_hash + 4 + sizeof(size_t) / 4 * gnu_hash[2];
271     // Find the chain of the last non-empty bucket.
272     uint32_t idx = 0;
273     for (uint32_t i = nbuckets; i > 0;) {
274       idx = buckets[--i];
275       if (idx != 0) break;
276     }
277     if (idx != 0) {
278       // Find the last element of the chain, which has an odd value.
279       // Add one to get the number of symbols.
280       uint32_t *chain = buckets + nbuckets - gnu_hash[1];
281       while (chain[idx++] % 2 == 0) {
282       }
283     }
284     num_syms_ = idx;
285   }
286 }
287 
LookupSymbol(const char * name,const char * version,int type,SymbolInfo * info_out) const288 bool ElfMemImage::LookupSymbol(const char *name,
289                                const char *version,
290                                int type,
291                                SymbolInfo *info_out) const {
292   for (const SymbolInfo& info : *this) {
293     if (strcmp(info.name, name) == 0 && strcmp(info.version, version) == 0 &&
294         ElfType(info.symbol) == type) {
295       if (info_out) {
296         *info_out = info;
297       }
298       return true;
299     }
300   }
301   return false;
302 }
303 
LookupSymbolByAddress(const void * address,SymbolInfo * info_out) const304 bool ElfMemImage::LookupSymbolByAddress(const void *address,
305                                         SymbolInfo *info_out) const {
306   for (const SymbolInfo& info : *this) {
307     const char *const symbol_start =
308         reinterpret_cast<const char *>(info.address);
309     const char *const symbol_end = symbol_start + info.symbol->st_size;
310     if (symbol_start <= address && address < symbol_end) {
311       if (info_out) {
312         // Client wants to know details for that symbol (the usual case).
313         if (ElfBind(info.symbol) == STB_GLOBAL) {
314           // Strong symbol; just return it.
315           *info_out = info;
316           return true;
317         } else {
318           // Weak or local. Record it, but keep looking for a strong one.
319           *info_out = info;
320         }
321       } else {
322         // Client only cares if there is an overlapping symbol.
323         return true;
324       }
325     }
326   }
327   return false;
328 }
329 
SymbolIterator(const void * const image,uint32_t index)330 ElfMemImage::SymbolIterator::SymbolIterator(const void *const image,
331                                             uint32_t index)
332     : index_(index), image_(image) {}
333 
operator ->() const334 const ElfMemImage::SymbolInfo *ElfMemImage::SymbolIterator::operator->() const {
335   return &info_;
336 }
337 
operator *() const338 const ElfMemImage::SymbolInfo& ElfMemImage::SymbolIterator::operator*() const {
339   return info_;
340 }
341 
operator ==(const SymbolIterator & rhs) const342 bool ElfMemImage::SymbolIterator::operator==(const SymbolIterator &rhs) const {
343   return this->image_ == rhs.image_ && this->index_ == rhs.index_;
344 }
345 
operator !=(const SymbolIterator & rhs) const346 bool ElfMemImage::SymbolIterator::operator!=(const SymbolIterator &rhs) const {
347   return !(*this == rhs);
348 }
349 
operator ++()350 ElfMemImage::SymbolIterator &ElfMemImage::SymbolIterator::operator++() {
351   this->Update(1);
352   return *this;
353 }
354 
begin() const355 ElfMemImage::SymbolIterator ElfMemImage::begin() const {
356   SymbolIterator it(this, 0);
357   it.Update(0);
358   return it;
359 }
360 
end() const361 ElfMemImage::SymbolIterator ElfMemImage::end() const {
362   return SymbolIterator(this, GetNumSymbols());
363 }
364 
Update(uint32_t increment)365 void ElfMemImage::SymbolIterator::Update(uint32_t increment) {
366   const ElfMemImage *image = reinterpret_cast<const ElfMemImage *>(image_);
367   ABSL_RAW_CHECK(image->IsPresent() || increment == 0, "");
368   if (!image->IsPresent()) {
369     return;
370   }
371   index_ += increment;
372   if (index_ >= image->GetNumSymbols()) {
373     index_ = image->GetNumSymbols();
374     return;
375   }
376   const ElfW(Sym)    *symbol = image->GetDynsym(index_);
377   const ElfW(Versym) *version_symbol = image->GetVersym(index_);
378   ABSL_RAW_CHECK(symbol && version_symbol, "");
379   const char *const symbol_name = image->GetDynstr(symbol->st_name);
380 #if defined(__NetBSD__)
381   const int version_index = version_symbol->vs_vers & VERSYM_VERSION;
382 #else
383   const ElfW(Versym) version_index = version_symbol[0] & VERSYM_VERSION;
384 #endif
385   const ElfW(Verdef) *version_definition = nullptr;
386   const char *version_name = "";
387   if (symbol->st_shndx == SHN_UNDEF) {
388     // Undefined symbols reference DT_VERNEED, not DT_VERDEF, and
389     // version_index could well be greater than verdefnum_, so calling
390     // GetVerdef(version_index) may trigger assertion.
391   } else {
392     version_definition = image->GetVerdef(version_index);
393   }
394   if (version_definition) {
395     // I am expecting 1 or 2 auxiliary entries: 1 for the version itself,
396     // optional 2nd if the version has a parent.
397     ABSL_RAW_CHECK(
398         version_definition->vd_cnt == 1 || version_definition->vd_cnt == 2,
399         "wrong number of entries");
400     const ElfW(Verdaux) *version_aux = image->GetVerdefAux(version_definition);
401     version_name = image->GetVerstr(version_aux->vda_name);
402   }
403   info_.name    = symbol_name;
404   info_.version = version_name;
405   info_.address = image->GetSymAddr(symbol);
406   info_.symbol  = symbol;
407 }
408 
409 }  // namespace debugging_internal
410 ABSL_NAMESPACE_END
411 }  // namespace absl
412 
413 #endif  // ABSL_HAVE_ELF_MEM_IMAGE
414