1 //===- ModuleFile.h - Module file description -------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the Module class, which describes a module that has 10 // been loaded from an AST file. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CLANG_SERIALIZATION_MODULEFILE_H 15 #define LLVM_CLANG_SERIALIZATION_MODULEFILE_H 16 17 #include "clang/Basic/FileManager.h" 18 #include "clang/Basic/Module.h" 19 #include "clang/Basic/SourceLocation.h" 20 #include "clang/Serialization/ASTBitCodes.h" 21 #include "clang/Serialization/ContinuousRangeMap.h" 22 #include "clang/Serialization/ModuleFileExtension.h" 23 #include "llvm/ADT/BitVector.h" 24 #include "llvm/ADT/DenseMap.h" 25 #include "llvm/ADT/PointerIntPair.h" 26 #include "llvm/ADT/SetVector.h" 27 #include "llvm/ADT/SmallVector.h" 28 #include "llvm/ADT/StringRef.h" 29 #include "llvm/Bitstream/BitstreamReader.h" 30 #include "llvm/Support/Endian.h" 31 #include <cassert> 32 #include <cstdint> 33 #include <memory> 34 #include <string> 35 #include <vector> 36 37 namespace clang { 38 39 namespace serialization { 40 41 /// Specifies the kind of module that has been loaded. 42 enum ModuleKind { 43 /// File is an implicitly-loaded module. 44 MK_ImplicitModule, 45 46 /// File is an explicitly-loaded module. 47 MK_ExplicitModule, 48 49 /// File is a PCH file treated as such. 50 MK_PCH, 51 52 /// File is a PCH file treated as the preamble. 53 MK_Preamble, 54 55 /// File is a PCH file treated as the actual main file. 56 MK_MainFile, 57 58 /// File is from a prebuilt module path. 59 MK_PrebuiltModule 60 }; 61 62 /// The input file info that has been loaded from an AST file. 63 struct InputFileInfo { 64 std::string FilenameAsRequested; 65 std::string Filename; 66 uint64_t ContentHash; 67 off_t StoredSize; 68 time_t StoredTime; 69 bool Overridden; 70 bool Transient; 71 bool TopLevel; 72 bool ModuleMap; 73 }; 74 75 /// The input file that has been loaded from this AST file, along with 76 /// bools indicating whether this was an overridden buffer or if it was 77 /// out-of-date or not-found. 78 class InputFile { 79 enum { 80 Overridden = 1, 81 OutOfDate = 2, 82 NotFound = 3 83 }; 84 llvm::PointerIntPair<const FileEntryRef::MapEntry *, 2, unsigned> Val; 85 86 public: 87 InputFile() = default; 88 89 InputFile(FileEntryRef File, bool isOverridden = false, 90 bool isOutOfDate = false) { 91 assert(!(isOverridden && isOutOfDate) && 92 "an overridden cannot be out-of-date"); 93 unsigned intVal = 0; 94 if (isOverridden) 95 intVal = Overridden; 96 else if (isOutOfDate) 97 intVal = OutOfDate; 98 Val.setPointerAndInt(&File.getMapEntry(), intVal); 99 } 100 getNotFound()101 static InputFile getNotFound() { 102 InputFile File; 103 File.Val.setInt(NotFound); 104 return File; 105 } 106 getFile()107 OptionalFileEntryRef getFile() const { 108 if (auto *P = Val.getPointer()) 109 return FileEntryRef(*P); 110 return std::nullopt; 111 } isOverridden()112 bool isOverridden() const { return Val.getInt() == Overridden; } isOutOfDate()113 bool isOutOfDate() const { return Val.getInt() == OutOfDate; } isNotFound()114 bool isNotFound() const { return Val.getInt() == NotFound; } 115 }; 116 117 /// Information about a module that has been loaded by the ASTReader. 118 /// 119 /// Each instance of the Module class corresponds to a single AST file, which 120 /// may be a precompiled header, precompiled preamble, a module, or an AST file 121 /// of some sort loaded as the main file, all of which are specific formulations 122 /// of the general notion of a "module". A module may depend on any number of 123 /// other modules. 124 class ModuleFile { 125 public: ModuleFile(ModuleKind Kind,FileEntryRef File,unsigned Generation)126 ModuleFile(ModuleKind Kind, FileEntryRef File, unsigned Generation) 127 : Kind(Kind), File(File), Generation(Generation) {} 128 ~ModuleFile(); 129 130 // === General information === 131 132 /// The index of this module in the list of modules. 133 unsigned Index = 0; 134 135 /// The type of this module. 136 ModuleKind Kind; 137 138 /// The file name of the module file. 139 std::string FileName; 140 141 /// The name of the module. 142 std::string ModuleName; 143 144 /// The base directory of the module. 145 std::string BaseDirectory; 146 getTimestampFilename()147 std::string getTimestampFilename() const { 148 return FileName + ".timestamp"; 149 } 150 151 /// The original source file name that was used to build the 152 /// primary AST file, which may have been modified for 153 /// relocatable-pch support. 154 std::string OriginalSourceFileName; 155 156 /// The actual original source file name that was used to 157 /// build this AST file. 158 std::string ActualOriginalSourceFileName; 159 160 /// The file ID for the original source file that was used to 161 /// build this AST file. 162 FileID OriginalSourceFileID; 163 164 std::string ModuleMapPath; 165 166 /// Whether this precompiled header is a relocatable PCH file. 167 bool RelocatablePCH = false; 168 169 /// Whether this module file is a standard C++ module. 170 bool StandardCXXModule = false; 171 172 /// Whether timestamps are included in this module file. 173 bool HasTimestamps = false; 174 175 /// Whether the top-level module has been read from the AST file. 176 bool DidReadTopLevelSubmodule = false; 177 178 /// The file entry for the module file. 179 FileEntryRef File; 180 181 /// The signature of the module file, which may be used instead of the size 182 /// and modification time to identify this particular file. 183 ASTFileSignature Signature; 184 185 /// The signature of the AST block of the module file, this can be used to 186 /// unique module files based on AST contents. 187 ASTFileSignature ASTBlockHash; 188 189 /// The bit vector denoting usage of each header search entry (true = used). 190 llvm::BitVector SearchPathUsage; 191 192 /// The bit vector denoting usage of each VFS entry (true = used). 193 llvm::BitVector VFSUsage; 194 195 /// Whether this module has been directly imported by the 196 /// user. 197 bool DirectlyImported = false; 198 199 /// The generation of which this module file is a part. 200 unsigned Generation; 201 202 /// The memory buffer that stores the data associated with 203 /// this AST file, owned by the InMemoryModuleCache. 204 llvm::MemoryBuffer *Buffer = nullptr; 205 206 /// The size of this file, in bits. 207 uint64_t SizeInBits = 0; 208 209 /// The global bit offset (or base) of this module 210 uint64_t GlobalBitOffset = 0; 211 212 /// The bit offset of the AST block of this module. 213 uint64_t ASTBlockStartOffset = 0; 214 215 /// The serialized bitstream data for this file. 216 StringRef Data; 217 218 /// The main bitstream cursor for the main block. 219 llvm::BitstreamCursor Stream; 220 221 /// The source location where the module was explicitly or implicitly 222 /// imported in the local translation unit. 223 /// 224 /// If module A depends on and imports module B, both modules will have the 225 /// same DirectImportLoc, but different ImportLoc (B's ImportLoc will be a 226 /// source location inside module A). 227 /// 228 /// WARNING: This is largely useless. It doesn't tell you when a module was 229 /// made visible, just when the first submodule of that module was imported. 230 SourceLocation DirectImportLoc; 231 232 /// The source location where this module was first imported. 233 SourceLocation ImportLoc; 234 235 /// The first source location in this module. 236 SourceLocation FirstLoc; 237 238 /// The list of extension readers that are attached to this module 239 /// file. 240 std::vector<std::unique_ptr<ModuleFileExtensionReader>> ExtensionReaders; 241 242 /// The module offset map data for this file. If non-empty, the various 243 /// ContinuousRangeMaps described below have not yet been populated. 244 StringRef ModuleOffsetMap; 245 246 // === Input Files === 247 248 /// The cursor to the start of the input-files block. 249 llvm::BitstreamCursor InputFilesCursor; 250 251 /// Absolute offset of the start of the input-files block. 252 uint64_t InputFilesOffsetBase = 0; 253 254 /// Relative offsets for all of the input file entries in the AST file. 255 const llvm::support::unaligned_uint64_t *InputFileOffsets = nullptr; 256 257 /// The input files that have been loaded from this AST file. 258 std::vector<InputFile> InputFilesLoaded; 259 260 /// The input file infos that have been loaded from this AST file. 261 std::vector<InputFileInfo> InputFileInfosLoaded; 262 263 // All user input files reside at the index range [0, NumUserInputFiles), and 264 // system input files reside at [NumUserInputFiles, InputFilesLoaded.size()). 265 unsigned NumUserInputFiles = 0; 266 267 /// If non-zero, specifies the time when we last validated input 268 /// files. Zero means we never validated them. 269 /// 270 /// The time is specified in seconds since the start of the Epoch. 271 uint64_t InputFilesValidationTimestamp = 0; 272 273 // === Source Locations === 274 275 /// Cursor used to read source location entries. 276 llvm::BitstreamCursor SLocEntryCursor; 277 278 /// The bit offset to the start of the SOURCE_MANAGER_BLOCK. 279 uint64_t SourceManagerBlockStartOffset = 0; 280 281 /// The number of source location entries in this AST file. 282 unsigned LocalNumSLocEntries = 0; 283 284 /// The base ID in the source manager's view of this module. 285 int SLocEntryBaseID = 0; 286 287 /// The base offset in the source manager's view of this module. 288 SourceLocation::UIntTy SLocEntryBaseOffset = 0; 289 290 /// Base file offset for the offsets in SLocEntryOffsets. Real file offset 291 /// for the entry is SLocEntryOffsetsBase + SLocEntryOffsets[i]. 292 uint64_t SLocEntryOffsetsBase = 0; 293 294 /// Offsets for all of the source location entries in the 295 /// AST file. 296 const uint32_t *SLocEntryOffsets = nullptr; 297 298 /// Remapping table for source locations in this module. 299 ContinuousRangeMap<SourceLocation::UIntTy, SourceLocation::IntTy, 2> 300 SLocRemap; 301 302 // === Identifiers === 303 304 /// The number of identifiers in this AST file. 305 unsigned LocalNumIdentifiers = 0; 306 307 /// Offsets into the identifier table data. 308 /// 309 /// This array is indexed by the identifier ID (-1), and provides 310 /// the offset into IdentifierTableData where the string data is 311 /// stored. 312 const uint32_t *IdentifierOffsets = nullptr; 313 314 /// Base identifier ID for identifiers local to this module. 315 serialization::IdentID BaseIdentifierID = 0; 316 317 /// Remapping table for identifier IDs in this module. 318 ContinuousRangeMap<uint32_t, int, 2> IdentifierRemap; 319 320 /// Actual data for the on-disk hash table of identifiers. 321 /// 322 /// This pointer points into a memory buffer, where the on-disk hash 323 /// table for identifiers actually lives. 324 const unsigned char *IdentifierTableData = nullptr; 325 326 /// A pointer to an on-disk hash table of opaque type 327 /// IdentifierHashTable. 328 void *IdentifierLookupTable = nullptr; 329 330 /// Offsets of identifiers that we're going to preload within 331 /// IdentifierTableData. 332 std::vector<unsigned> PreloadIdentifierOffsets; 333 334 // === Macros === 335 336 /// The cursor to the start of the preprocessor block, which stores 337 /// all of the macro definitions. 338 llvm::BitstreamCursor MacroCursor; 339 340 /// The number of macros in this AST file. 341 unsigned LocalNumMacros = 0; 342 343 /// Base file offset for the offsets in MacroOffsets. Real file offset for 344 /// the entry is MacroOffsetsBase + MacroOffsets[i]. 345 uint64_t MacroOffsetsBase = 0; 346 347 /// Offsets of macros in the preprocessor block. 348 /// 349 /// This array is indexed by the macro ID (-1), and provides 350 /// the offset into the preprocessor block where macro definitions are 351 /// stored. 352 const uint32_t *MacroOffsets = nullptr; 353 354 /// Base macro ID for macros local to this module. 355 serialization::MacroID BaseMacroID = 0; 356 357 /// Remapping table for macro IDs in this module. 358 ContinuousRangeMap<uint32_t, int, 2> MacroRemap; 359 360 /// The offset of the start of the set of defined macros. 361 uint64_t MacroStartOffset = 0; 362 363 // === Detailed PreprocessingRecord === 364 365 /// The cursor to the start of the (optional) detailed preprocessing 366 /// record block. 367 llvm::BitstreamCursor PreprocessorDetailCursor; 368 369 /// The offset of the start of the preprocessor detail cursor. 370 uint64_t PreprocessorDetailStartOffset = 0; 371 372 /// Base preprocessed entity ID for preprocessed entities local to 373 /// this module. 374 serialization::PreprocessedEntityID BasePreprocessedEntityID = 0; 375 376 /// Remapping table for preprocessed entity IDs in this module. 377 ContinuousRangeMap<uint32_t, int, 2> PreprocessedEntityRemap; 378 379 const PPEntityOffset *PreprocessedEntityOffsets = nullptr; 380 unsigned NumPreprocessedEntities = 0; 381 382 /// Base ID for preprocessed skipped ranges local to this module. 383 unsigned BasePreprocessedSkippedRangeID = 0; 384 385 const PPSkippedRange *PreprocessedSkippedRangeOffsets = nullptr; 386 unsigned NumPreprocessedSkippedRanges = 0; 387 388 // === Header search information === 389 390 /// The number of local HeaderFileInfo structures. 391 unsigned LocalNumHeaderFileInfos = 0; 392 393 /// Actual data for the on-disk hash table of header file 394 /// information. 395 /// 396 /// This pointer points into a memory buffer, where the on-disk hash 397 /// table for header file information actually lives. 398 const char *HeaderFileInfoTableData = nullptr; 399 400 /// The on-disk hash table that contains information about each of 401 /// the header files. 402 void *HeaderFileInfoTable = nullptr; 403 404 // === Submodule information === 405 406 /// The number of submodules in this module. 407 unsigned LocalNumSubmodules = 0; 408 409 /// Base submodule ID for submodules local to this module. 410 serialization::SubmoduleID BaseSubmoduleID = 0; 411 412 /// Remapping table for submodule IDs in this module. 413 ContinuousRangeMap<uint32_t, int, 2> SubmoduleRemap; 414 415 // === Selectors === 416 417 /// The number of selectors new to this file. 418 /// 419 /// This is the number of entries in SelectorOffsets. 420 unsigned LocalNumSelectors = 0; 421 422 /// Offsets into the selector lookup table's data array 423 /// where each selector resides. 424 const uint32_t *SelectorOffsets = nullptr; 425 426 /// Base selector ID for selectors local to this module. 427 serialization::SelectorID BaseSelectorID = 0; 428 429 /// Remapping table for selector IDs in this module. 430 ContinuousRangeMap<uint32_t, int, 2> SelectorRemap; 431 432 /// A pointer to the character data that comprises the selector table 433 /// 434 /// The SelectorOffsets table refers into this memory. 435 const unsigned char *SelectorLookupTableData = nullptr; 436 437 /// A pointer to an on-disk hash table of opaque type 438 /// ASTSelectorLookupTable. 439 /// 440 /// This hash table provides the IDs of all selectors, and the associated 441 /// instance and factory methods. 442 void *SelectorLookupTable = nullptr; 443 444 // === Declarations === 445 446 /// DeclsCursor - This is a cursor to the start of the DECLTYPES_BLOCK block. 447 /// It has read all the abbreviations at the start of the block and is ready 448 /// to jump around with these in context. 449 llvm::BitstreamCursor DeclsCursor; 450 451 /// The offset to the start of the DECLTYPES_BLOCK block. 452 uint64_t DeclsBlockStartOffset = 0; 453 454 /// The number of declarations in this AST file. 455 unsigned LocalNumDecls = 0; 456 457 /// Offset of each declaration within the bitstream, indexed 458 /// by the declaration ID (-1). 459 const DeclOffset *DeclOffsets = nullptr; 460 461 /// Base declaration ID for declarations local to this module. 462 serialization::DeclID BaseDeclID = 0; 463 464 /// Remapping table for declaration IDs in this module. 465 ContinuousRangeMap<uint32_t, int, 2> DeclRemap; 466 467 /// Mapping from the module files that this module file depends on 468 /// to the base declaration ID for that module as it is understood within this 469 /// module. 470 /// 471 /// This is effectively a reverse global-to-local mapping for declaration 472 /// IDs, so that we can interpret a true global ID (for this translation unit) 473 /// as a local ID (for this module file). 474 llvm::DenseMap<ModuleFile *, serialization::DeclID> GlobalToLocalDeclIDs; 475 476 /// Array of file-level DeclIDs sorted by file. 477 const serialization::DeclID *FileSortedDecls = nullptr; 478 unsigned NumFileSortedDecls = 0; 479 480 /// Array of category list location information within this 481 /// module file, sorted by the definition ID. 482 const serialization::ObjCCategoriesInfo *ObjCCategoriesMap = nullptr; 483 484 /// The number of redeclaration info entries in ObjCCategoriesMap. 485 unsigned LocalNumObjCCategoriesInMap = 0; 486 487 /// The Objective-C category lists for categories known to this 488 /// module. 489 SmallVector<uint64_t, 1> ObjCCategories; 490 491 // === Types === 492 493 /// The number of types in this AST file. 494 unsigned LocalNumTypes = 0; 495 496 /// Offset of each type within the bitstream, indexed by the 497 /// type ID, or the representation of a Type*. 498 const UnderalignedInt64 *TypeOffsets = nullptr; 499 500 /// Base type ID for types local to this module as represented in 501 /// the global type ID space. 502 serialization::TypeID BaseTypeIndex = 0; 503 504 /// Remapping table for type IDs in this module. 505 ContinuousRangeMap<uint32_t, int, 2> TypeRemap; 506 507 // === Miscellaneous === 508 509 /// Diagnostic IDs and their mappings that the user changed. 510 SmallVector<uint64_t, 8> PragmaDiagMappings; 511 512 /// List of modules which depend on this module 513 llvm::SetVector<ModuleFile *> ImportedBy; 514 515 /// List of modules which this module depends on 516 llvm::SetVector<ModuleFile *> Imports; 517 518 /// Determine whether this module was directly imported at 519 /// any point during translation. isDirectlyImported()520 bool isDirectlyImported() const { return DirectlyImported; } 521 522 /// Is this a module file for a module (rather than a PCH or similar). isModule()523 bool isModule() const { 524 return Kind == MK_ImplicitModule || Kind == MK_ExplicitModule || 525 Kind == MK_PrebuiltModule; 526 } 527 528 /// Dump debugging output for this module. 529 void dump(); 530 }; 531 532 } // namespace serialization 533 534 } // namespace clang 535 536 #endif // LLVM_CLANG_SERIALIZATION_MODULEFILE_H 537