1 //===- ModuleFile.h - Module file description -------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines the Module class, which describes a module that has
10 //  been loaded from an AST file.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_SERIALIZATION_MODULEFILE_H
15 #define LLVM_CLANG_SERIALIZATION_MODULEFILE_H
16 
17 #include "clang/Basic/FileManager.h"
18 #include "clang/Basic/Module.h"
19 #include "clang/Basic/SourceLocation.h"
20 #include "clang/Serialization/ASTBitCodes.h"
21 #include "clang/Serialization/ContinuousRangeMap.h"
22 #include "clang/Serialization/ModuleFileExtension.h"
23 #include "llvm/ADT/BitVector.h"
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/PointerIntPair.h"
26 #include "llvm/ADT/SetVector.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/StringRef.h"
29 #include "llvm/Bitstream/BitstreamReader.h"
30 #include "llvm/Support/Endian.h"
31 #include <cassert>
32 #include <cstdint>
33 #include <memory>
34 #include <string>
35 #include <vector>
36 
37 namespace clang {
38 
39 namespace serialization {
40 
41 /// Specifies the kind of module that has been loaded.
42 enum ModuleKind {
43   /// File is an implicitly-loaded module.
44   MK_ImplicitModule,
45 
46   /// File is an explicitly-loaded module.
47   MK_ExplicitModule,
48 
49   /// File is a PCH file treated as such.
50   MK_PCH,
51 
52   /// File is a PCH file treated as the preamble.
53   MK_Preamble,
54 
55   /// File is a PCH file treated as the actual main file.
56   MK_MainFile,
57 
58   /// File is from a prebuilt module path.
59   MK_PrebuiltModule
60 };
61 
62 /// The input file info that has been loaded from an AST file.
63 struct InputFileInfo {
64   std::string FilenameAsRequested;
65   std::string Filename;
66   uint64_t ContentHash;
67   off_t StoredSize;
68   time_t StoredTime;
69   bool Overridden;
70   bool Transient;
71   bool TopLevel;
72   bool ModuleMap;
73 };
74 
75 /// The input file that has been loaded from this AST file, along with
76 /// bools indicating whether this was an overridden buffer or if it was
77 /// out-of-date or not-found.
78 class InputFile {
79   enum {
80     Overridden = 1,
81     OutOfDate = 2,
82     NotFound = 3
83   };
84   llvm::PointerIntPair<const FileEntryRef::MapEntry *, 2, unsigned> Val;
85 
86 public:
87   InputFile() = default;
88 
89   InputFile(FileEntryRef File, bool isOverridden = false,
90             bool isOutOfDate = false) {
91     assert(!(isOverridden && isOutOfDate) &&
92            "an overridden cannot be out-of-date");
93     unsigned intVal = 0;
94     if (isOverridden)
95       intVal = Overridden;
96     else if (isOutOfDate)
97       intVal = OutOfDate;
98     Val.setPointerAndInt(&File.getMapEntry(), intVal);
99   }
100 
getNotFound()101   static InputFile getNotFound() {
102     InputFile File;
103     File.Val.setInt(NotFound);
104     return File;
105   }
106 
getFile()107   OptionalFileEntryRef getFile() const {
108     if (auto *P = Val.getPointer())
109       return FileEntryRef(*P);
110     return std::nullopt;
111   }
isOverridden()112   bool isOverridden() const { return Val.getInt() == Overridden; }
isOutOfDate()113   bool isOutOfDate() const { return Val.getInt() == OutOfDate; }
isNotFound()114   bool isNotFound() const { return Val.getInt() == NotFound; }
115 };
116 
117 /// Information about a module that has been loaded by the ASTReader.
118 ///
119 /// Each instance of the Module class corresponds to a single AST file, which
120 /// may be a precompiled header, precompiled preamble, a module, or an AST file
121 /// of some sort loaded as the main file, all of which are specific formulations
122 /// of the general notion of a "module". A module may depend on any number of
123 /// other modules.
124 class ModuleFile {
125 public:
ModuleFile(ModuleKind Kind,FileEntryRef File,unsigned Generation)126   ModuleFile(ModuleKind Kind, FileEntryRef File, unsigned Generation)
127       : Kind(Kind), File(File), Generation(Generation) {}
128   ~ModuleFile();
129 
130   // === General information ===
131 
132   /// The index of this module in the list of modules.
133   unsigned Index = 0;
134 
135   /// The type of this module.
136   ModuleKind Kind;
137 
138   /// The file name of the module file.
139   std::string FileName;
140 
141   /// The name of the module.
142   std::string ModuleName;
143 
144   /// The base directory of the module.
145   std::string BaseDirectory;
146 
getTimestampFilename()147   std::string getTimestampFilename() const {
148     return FileName + ".timestamp";
149   }
150 
151   /// The original source file name that was used to build the
152   /// primary AST file, which may have been modified for
153   /// relocatable-pch support.
154   std::string OriginalSourceFileName;
155 
156   /// The actual original source file name that was used to
157   /// build this AST file.
158   std::string ActualOriginalSourceFileName;
159 
160   /// The file ID for the original source file that was used to
161   /// build this AST file.
162   FileID OriginalSourceFileID;
163 
164   std::string ModuleMapPath;
165 
166   /// Whether this precompiled header is a relocatable PCH file.
167   bool RelocatablePCH = false;
168 
169   /// Whether this module file is a standard C++ module.
170   bool StandardCXXModule = false;
171 
172   /// Whether timestamps are included in this module file.
173   bool HasTimestamps = false;
174 
175   /// Whether the top-level module has been read from the AST file.
176   bool DidReadTopLevelSubmodule = false;
177 
178   /// The file entry for the module file.
179   FileEntryRef File;
180 
181   /// The signature of the module file, which may be used instead of the size
182   /// and modification time to identify this particular file.
183   ASTFileSignature Signature;
184 
185   /// The signature of the AST block of the module file, this can be used to
186   /// unique module files based on AST contents.
187   ASTFileSignature ASTBlockHash;
188 
189   /// The bit vector denoting usage of each header search entry (true = used).
190   llvm::BitVector SearchPathUsage;
191 
192   /// The bit vector denoting usage of each VFS entry (true = used).
193   llvm::BitVector VFSUsage;
194 
195   /// Whether this module has been directly imported by the
196   /// user.
197   bool DirectlyImported = false;
198 
199   /// The generation of which this module file is a part.
200   unsigned Generation;
201 
202   /// The memory buffer that stores the data associated with
203   /// this AST file, owned by the InMemoryModuleCache.
204   llvm::MemoryBuffer *Buffer = nullptr;
205 
206   /// The size of this file, in bits.
207   uint64_t SizeInBits = 0;
208 
209   /// The global bit offset (or base) of this module
210   uint64_t GlobalBitOffset = 0;
211 
212   /// The bit offset of the AST block of this module.
213   uint64_t ASTBlockStartOffset = 0;
214 
215   /// The serialized bitstream data for this file.
216   StringRef Data;
217 
218   /// The main bitstream cursor for the main block.
219   llvm::BitstreamCursor Stream;
220 
221   /// The source location where the module was explicitly or implicitly
222   /// imported in the local translation unit.
223   ///
224   /// If module A depends on and imports module B, both modules will have the
225   /// same DirectImportLoc, but different ImportLoc (B's ImportLoc will be a
226   /// source location inside module A).
227   ///
228   /// WARNING: This is largely useless. It doesn't tell you when a module was
229   /// made visible, just when the first submodule of that module was imported.
230   SourceLocation DirectImportLoc;
231 
232   /// The source location where this module was first imported.
233   SourceLocation ImportLoc;
234 
235   /// The first source location in this module.
236   SourceLocation FirstLoc;
237 
238   /// The list of extension readers that are attached to this module
239   /// file.
240   std::vector<std::unique_ptr<ModuleFileExtensionReader>> ExtensionReaders;
241 
242   /// The module offset map data for this file. If non-empty, the various
243   /// ContinuousRangeMaps described below have not yet been populated.
244   StringRef ModuleOffsetMap;
245 
246   // === Input Files ===
247 
248   /// The cursor to the start of the input-files block.
249   llvm::BitstreamCursor InputFilesCursor;
250 
251   /// Absolute offset of the start of the input-files block.
252   uint64_t InputFilesOffsetBase = 0;
253 
254   /// Relative offsets for all of the input file entries in the AST file.
255   const llvm::support::unaligned_uint64_t *InputFileOffsets = nullptr;
256 
257   /// The input files that have been loaded from this AST file.
258   std::vector<InputFile> InputFilesLoaded;
259 
260   /// The input file infos that have been loaded from this AST file.
261   std::vector<InputFileInfo> InputFileInfosLoaded;
262 
263   // All user input files reside at the index range [0, NumUserInputFiles), and
264   // system input files reside at [NumUserInputFiles, InputFilesLoaded.size()).
265   unsigned NumUserInputFiles = 0;
266 
267   /// If non-zero, specifies the time when we last validated input
268   /// files.  Zero means we never validated them.
269   ///
270   /// The time is specified in seconds since the start of the Epoch.
271   uint64_t InputFilesValidationTimestamp = 0;
272 
273   // === Source Locations ===
274 
275   /// Cursor used to read source location entries.
276   llvm::BitstreamCursor SLocEntryCursor;
277 
278   /// The bit offset to the start of the SOURCE_MANAGER_BLOCK.
279   uint64_t SourceManagerBlockStartOffset = 0;
280 
281   /// The number of source location entries in this AST file.
282   unsigned LocalNumSLocEntries = 0;
283 
284   /// The base ID in the source manager's view of this module.
285   int SLocEntryBaseID = 0;
286 
287   /// The base offset in the source manager's view of this module.
288   SourceLocation::UIntTy SLocEntryBaseOffset = 0;
289 
290   /// Base file offset for the offsets in SLocEntryOffsets. Real file offset
291   /// for the entry is SLocEntryOffsetsBase + SLocEntryOffsets[i].
292   uint64_t SLocEntryOffsetsBase = 0;
293 
294   /// Offsets for all of the source location entries in the
295   /// AST file.
296   const uint32_t *SLocEntryOffsets = nullptr;
297 
298   /// Remapping table for source locations in this module.
299   ContinuousRangeMap<SourceLocation::UIntTy, SourceLocation::IntTy, 2>
300       SLocRemap;
301 
302   // === Identifiers ===
303 
304   /// The number of identifiers in this AST file.
305   unsigned LocalNumIdentifiers = 0;
306 
307   /// Offsets into the identifier table data.
308   ///
309   /// This array is indexed by the identifier ID (-1), and provides
310   /// the offset into IdentifierTableData where the string data is
311   /// stored.
312   const uint32_t *IdentifierOffsets = nullptr;
313 
314   /// Base identifier ID for identifiers local to this module.
315   serialization::IdentID BaseIdentifierID = 0;
316 
317   /// Remapping table for identifier IDs in this module.
318   ContinuousRangeMap<uint32_t, int, 2> IdentifierRemap;
319 
320   /// Actual data for the on-disk hash table of identifiers.
321   ///
322   /// This pointer points into a memory buffer, where the on-disk hash
323   /// table for identifiers actually lives.
324   const unsigned char *IdentifierTableData = nullptr;
325 
326   /// A pointer to an on-disk hash table of opaque type
327   /// IdentifierHashTable.
328   void *IdentifierLookupTable = nullptr;
329 
330   /// Offsets of identifiers that we're going to preload within
331   /// IdentifierTableData.
332   std::vector<unsigned> PreloadIdentifierOffsets;
333 
334   // === Macros ===
335 
336   /// The cursor to the start of the preprocessor block, which stores
337   /// all of the macro definitions.
338   llvm::BitstreamCursor MacroCursor;
339 
340   /// The number of macros in this AST file.
341   unsigned LocalNumMacros = 0;
342 
343   /// Base file offset for the offsets in MacroOffsets. Real file offset for
344   /// the entry is MacroOffsetsBase + MacroOffsets[i].
345   uint64_t MacroOffsetsBase = 0;
346 
347   /// Offsets of macros in the preprocessor block.
348   ///
349   /// This array is indexed by the macro ID (-1), and provides
350   /// the offset into the preprocessor block where macro definitions are
351   /// stored.
352   const uint32_t *MacroOffsets = nullptr;
353 
354   /// Base macro ID for macros local to this module.
355   serialization::MacroID BaseMacroID = 0;
356 
357   /// Remapping table for macro IDs in this module.
358   ContinuousRangeMap<uint32_t, int, 2> MacroRemap;
359 
360   /// The offset of the start of the set of defined macros.
361   uint64_t MacroStartOffset = 0;
362 
363   // === Detailed PreprocessingRecord ===
364 
365   /// The cursor to the start of the (optional) detailed preprocessing
366   /// record block.
367   llvm::BitstreamCursor PreprocessorDetailCursor;
368 
369   /// The offset of the start of the preprocessor detail cursor.
370   uint64_t PreprocessorDetailStartOffset = 0;
371 
372   /// Base preprocessed entity ID for preprocessed entities local to
373   /// this module.
374   serialization::PreprocessedEntityID BasePreprocessedEntityID = 0;
375 
376   /// Remapping table for preprocessed entity IDs in this module.
377   ContinuousRangeMap<uint32_t, int, 2> PreprocessedEntityRemap;
378 
379   const PPEntityOffset *PreprocessedEntityOffsets = nullptr;
380   unsigned NumPreprocessedEntities = 0;
381 
382   /// Base ID for preprocessed skipped ranges local to this module.
383   unsigned BasePreprocessedSkippedRangeID = 0;
384 
385   const PPSkippedRange *PreprocessedSkippedRangeOffsets = nullptr;
386   unsigned NumPreprocessedSkippedRanges = 0;
387 
388   // === Header search information ===
389 
390   /// The number of local HeaderFileInfo structures.
391   unsigned LocalNumHeaderFileInfos = 0;
392 
393   /// Actual data for the on-disk hash table of header file
394   /// information.
395   ///
396   /// This pointer points into a memory buffer, where the on-disk hash
397   /// table for header file information actually lives.
398   const char *HeaderFileInfoTableData = nullptr;
399 
400   /// The on-disk hash table that contains information about each of
401   /// the header files.
402   void *HeaderFileInfoTable = nullptr;
403 
404   // === Submodule information ===
405 
406   /// The number of submodules in this module.
407   unsigned LocalNumSubmodules = 0;
408 
409   /// Base submodule ID for submodules local to this module.
410   serialization::SubmoduleID BaseSubmoduleID = 0;
411 
412   /// Remapping table for submodule IDs in this module.
413   ContinuousRangeMap<uint32_t, int, 2> SubmoduleRemap;
414 
415   // === Selectors ===
416 
417   /// The number of selectors new to this file.
418   ///
419   /// This is the number of entries in SelectorOffsets.
420   unsigned LocalNumSelectors = 0;
421 
422   /// Offsets into the selector lookup table's data array
423   /// where each selector resides.
424   const uint32_t *SelectorOffsets = nullptr;
425 
426   /// Base selector ID for selectors local to this module.
427   serialization::SelectorID BaseSelectorID = 0;
428 
429   /// Remapping table for selector IDs in this module.
430   ContinuousRangeMap<uint32_t, int, 2> SelectorRemap;
431 
432   /// A pointer to the character data that comprises the selector table
433   ///
434   /// The SelectorOffsets table refers into this memory.
435   const unsigned char *SelectorLookupTableData = nullptr;
436 
437   /// A pointer to an on-disk hash table of opaque type
438   /// ASTSelectorLookupTable.
439   ///
440   /// This hash table provides the IDs of all selectors, and the associated
441   /// instance and factory methods.
442   void *SelectorLookupTable = nullptr;
443 
444   // === Declarations ===
445 
446   /// DeclsCursor - This is a cursor to the start of the DECLTYPES_BLOCK block.
447   /// It has read all the abbreviations at the start of the block and is ready
448   /// to jump around with these in context.
449   llvm::BitstreamCursor DeclsCursor;
450 
451   /// The offset to the start of the DECLTYPES_BLOCK block.
452   uint64_t DeclsBlockStartOffset = 0;
453 
454   /// The number of declarations in this AST file.
455   unsigned LocalNumDecls = 0;
456 
457   /// Offset of each declaration within the bitstream, indexed
458   /// by the declaration ID (-1).
459   const DeclOffset *DeclOffsets = nullptr;
460 
461   /// Base declaration ID for declarations local to this module.
462   serialization::DeclID BaseDeclID = 0;
463 
464   /// Remapping table for declaration IDs in this module.
465   ContinuousRangeMap<uint32_t, int, 2> DeclRemap;
466 
467   /// Mapping from the module files that this module file depends on
468   /// to the base declaration ID for that module as it is understood within this
469   /// module.
470   ///
471   /// This is effectively a reverse global-to-local mapping for declaration
472   /// IDs, so that we can interpret a true global ID (for this translation unit)
473   /// as a local ID (for this module file).
474   llvm::DenseMap<ModuleFile *, serialization::DeclID> GlobalToLocalDeclIDs;
475 
476   /// Array of file-level DeclIDs sorted by file.
477   const serialization::DeclID *FileSortedDecls = nullptr;
478   unsigned NumFileSortedDecls = 0;
479 
480   /// Array of category list location information within this
481   /// module file, sorted by the definition ID.
482   const serialization::ObjCCategoriesInfo *ObjCCategoriesMap = nullptr;
483 
484   /// The number of redeclaration info entries in ObjCCategoriesMap.
485   unsigned LocalNumObjCCategoriesInMap = 0;
486 
487   /// The Objective-C category lists for categories known to this
488   /// module.
489   SmallVector<uint64_t, 1> ObjCCategories;
490 
491   // === Types ===
492 
493   /// The number of types in this AST file.
494   unsigned LocalNumTypes = 0;
495 
496   /// Offset of each type within the bitstream, indexed by the
497   /// type ID, or the representation of a Type*.
498   const UnderalignedInt64 *TypeOffsets = nullptr;
499 
500   /// Base type ID for types local to this module as represented in
501   /// the global type ID space.
502   serialization::TypeID BaseTypeIndex = 0;
503 
504   /// Remapping table for type IDs in this module.
505   ContinuousRangeMap<uint32_t, int, 2> TypeRemap;
506 
507   // === Miscellaneous ===
508 
509   /// Diagnostic IDs and their mappings that the user changed.
510   SmallVector<uint64_t, 8> PragmaDiagMappings;
511 
512   /// List of modules which depend on this module
513   llvm::SetVector<ModuleFile *> ImportedBy;
514 
515   /// List of modules which this module depends on
516   llvm::SetVector<ModuleFile *> Imports;
517 
518   /// Determine whether this module was directly imported at
519   /// any point during translation.
isDirectlyImported()520   bool isDirectlyImported() const { return DirectlyImported; }
521 
522   /// Is this a module file for a module (rather than a PCH or similar).
isModule()523   bool isModule() const {
524     return Kind == MK_ImplicitModule || Kind == MK_ExplicitModule ||
525            Kind == MK_PrebuiltModule;
526   }
527 
528   /// Dump debugging output for this module.
529   void dump();
530 };
531 
532 } // namespace serialization
533 
534 } // namespace clang
535 
536 #endif // LLVM_CLANG_SERIALIZATION_MODULEFILE_H
537