xref: /aosp_15_r20/external/clang/lib/Format/FormatToken.h (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li //===--- FormatToken.h - Format C++ code ------------------------*- C++ -*-===//
2*67e74705SXin Li //
3*67e74705SXin Li //                     The LLVM Compiler Infrastructure
4*67e74705SXin Li //
5*67e74705SXin Li // This file is distributed under the University of Illinois Open Source
6*67e74705SXin Li // License. See LICENSE.TXT for details.
7*67e74705SXin Li //
8*67e74705SXin Li //===----------------------------------------------------------------------===//
9*67e74705SXin Li ///
10*67e74705SXin Li /// \file
11*67e74705SXin Li /// \brief This file contains the declaration of the FormatToken, a wrapper
12*67e74705SXin Li /// around Token with additional information related to formatting.
13*67e74705SXin Li ///
14*67e74705SXin Li //===----------------------------------------------------------------------===//
15*67e74705SXin Li 
16*67e74705SXin Li #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H
17*67e74705SXin Li #define LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H
18*67e74705SXin Li 
19*67e74705SXin Li #include "clang/Basic/IdentifierTable.h"
20*67e74705SXin Li #include "clang/Basic/OperatorPrecedence.h"
21*67e74705SXin Li #include "clang/Format/Format.h"
22*67e74705SXin Li #include "clang/Lex/Lexer.h"
23*67e74705SXin Li #include <memory>
24*67e74705SXin Li 
25*67e74705SXin Li namespace clang {
26*67e74705SXin Li namespace format {
27*67e74705SXin Li 
28*67e74705SXin Li #define LIST_TOKEN_TYPES \
29*67e74705SXin Li   TYPE(ArrayInitializerLSquare) \
30*67e74705SXin Li   TYPE(ArraySubscriptLSquare) \
31*67e74705SXin Li   TYPE(AttributeParen) \
32*67e74705SXin Li   TYPE(BinaryOperator) \
33*67e74705SXin Li   TYPE(BitFieldColon) \
34*67e74705SXin Li   TYPE(BlockComment) \
35*67e74705SXin Li   TYPE(CastRParen) \
36*67e74705SXin Li   TYPE(ConditionalExpr) \
37*67e74705SXin Li   TYPE(ConflictAlternative) \
38*67e74705SXin Li   TYPE(ConflictEnd) \
39*67e74705SXin Li   TYPE(ConflictStart) \
40*67e74705SXin Li   TYPE(CtorInitializerColon) \
41*67e74705SXin Li   TYPE(CtorInitializerComma) \
42*67e74705SXin Li   TYPE(DesignatedInitializerPeriod) \
43*67e74705SXin Li   TYPE(DictLiteral) \
44*67e74705SXin Li   TYPE(ForEachMacro) \
45*67e74705SXin Li   TYPE(FunctionAnnotationRParen) \
46*67e74705SXin Li   TYPE(FunctionDeclarationName) \
47*67e74705SXin Li   TYPE(FunctionLBrace) \
48*67e74705SXin Li   TYPE(FunctionTypeLParen) \
49*67e74705SXin Li   TYPE(ImplicitStringLiteral) \
50*67e74705SXin Li   TYPE(InheritanceColon) \
51*67e74705SXin Li   TYPE(InlineASMBrace) \
52*67e74705SXin Li   TYPE(InlineASMColon) \
53*67e74705SXin Li   TYPE(JavaAnnotation) \
54*67e74705SXin Li   TYPE(JsComputedPropertyName) \
55*67e74705SXin Li   TYPE(JsFatArrow) \
56*67e74705SXin Li   TYPE(JsTypeColon) \
57*67e74705SXin Li   TYPE(JsTypeOperator) \
58*67e74705SXin Li   TYPE(JsTypeOptionalQuestion) \
59*67e74705SXin Li   TYPE(LambdaArrow) \
60*67e74705SXin Li   TYPE(LambdaLSquare) \
61*67e74705SXin Li   TYPE(LeadingJavaAnnotation) \
62*67e74705SXin Li   TYPE(LineComment) \
63*67e74705SXin Li   TYPE(MacroBlockBegin) \
64*67e74705SXin Li   TYPE(MacroBlockEnd) \
65*67e74705SXin Li   TYPE(ObjCBlockLBrace) \
66*67e74705SXin Li   TYPE(ObjCBlockLParen) \
67*67e74705SXin Li   TYPE(ObjCDecl) \
68*67e74705SXin Li   TYPE(ObjCForIn) \
69*67e74705SXin Li   TYPE(ObjCMethodExpr) \
70*67e74705SXin Li   TYPE(ObjCMethodSpecifier) \
71*67e74705SXin Li   TYPE(ObjCProperty) \
72*67e74705SXin Li   TYPE(ObjCStringLiteral) \
73*67e74705SXin Li   TYPE(OverloadedOperator) \
74*67e74705SXin Li   TYPE(OverloadedOperatorLParen) \
75*67e74705SXin Li   TYPE(PointerOrReference) \
76*67e74705SXin Li   TYPE(PureVirtualSpecifier) \
77*67e74705SXin Li   TYPE(RangeBasedForLoopColon) \
78*67e74705SXin Li   TYPE(RegexLiteral) \
79*67e74705SXin Li   TYPE(SelectorName) \
80*67e74705SXin Li   TYPE(StartOfName) \
81*67e74705SXin Li   TYPE(TemplateCloser) \
82*67e74705SXin Li   TYPE(TemplateOpener) \
83*67e74705SXin Li   TYPE(TemplateString) \
84*67e74705SXin Li   TYPE(TrailingAnnotation) \
85*67e74705SXin Li   TYPE(TrailingReturnArrow) \
86*67e74705SXin Li   TYPE(TrailingUnaryOperator) \
87*67e74705SXin Li   TYPE(UnaryOperator) \
88*67e74705SXin Li   TYPE(Unknown)
89*67e74705SXin Li 
90*67e74705SXin Li enum TokenType {
91*67e74705SXin Li #define TYPE(X) TT_##X,
92*67e74705SXin Li LIST_TOKEN_TYPES
93*67e74705SXin Li #undef TYPE
94*67e74705SXin Li   NUM_TOKEN_TYPES
95*67e74705SXin Li };
96*67e74705SXin Li 
97*67e74705SXin Li /// \brief Determines the name of a token type.
98*67e74705SXin Li const char *getTokenTypeName(TokenType Type);
99*67e74705SXin Li 
100*67e74705SXin Li // Represents what type of block a set of braces open.
101*67e74705SXin Li enum BraceBlockKind { BK_Unknown, BK_Block, BK_BracedInit };
102*67e74705SXin Li 
103*67e74705SXin Li // The packing kind of a function's parameters.
104*67e74705SXin Li enum ParameterPackingKind { PPK_BinPacked, PPK_OnePerLine, PPK_Inconclusive };
105*67e74705SXin Li 
106*67e74705SXin Li enum FormatDecision { FD_Unformatted, FD_Continue, FD_Break };
107*67e74705SXin Li 
108*67e74705SXin Li class TokenRole;
109*67e74705SXin Li class AnnotatedLine;
110*67e74705SXin Li 
111*67e74705SXin Li /// \brief A wrapper around a \c Token storing information about the
112*67e74705SXin Li /// whitespace characters preceding it.
113*67e74705SXin Li struct FormatToken {
FormatTokenFormatToken114*67e74705SXin Li   FormatToken() {}
115*67e74705SXin Li 
116*67e74705SXin Li   /// \brief The \c Token.
117*67e74705SXin Li   Token Tok;
118*67e74705SXin Li 
119*67e74705SXin Li   /// \brief The number of newlines immediately before the \c Token.
120*67e74705SXin Li   ///
121*67e74705SXin Li   /// This can be used to determine what the user wrote in the original code
122*67e74705SXin Li   /// and thereby e.g. leave an empty line between two function definitions.
123*67e74705SXin Li   unsigned NewlinesBefore = 0;
124*67e74705SXin Li 
125*67e74705SXin Li   /// \brief Whether there is at least one unescaped newline before the \c
126*67e74705SXin Li   /// Token.
127*67e74705SXin Li   bool HasUnescapedNewline = false;
128*67e74705SXin Li 
129*67e74705SXin Li   /// \brief The range of the whitespace immediately preceding the \c Token.
130*67e74705SXin Li   SourceRange WhitespaceRange;
131*67e74705SXin Li 
132*67e74705SXin Li   /// \brief The offset just past the last '\n' in this token's leading
133*67e74705SXin Li   /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'.
134*67e74705SXin Li   unsigned LastNewlineOffset = 0;
135*67e74705SXin Li 
136*67e74705SXin Li   /// \brief The width of the non-whitespace parts of the token (or its first
137*67e74705SXin Li   /// line for multi-line tokens) in columns.
138*67e74705SXin Li   /// We need this to correctly measure number of columns a token spans.
139*67e74705SXin Li   unsigned ColumnWidth = 0;
140*67e74705SXin Li 
141*67e74705SXin Li   /// \brief Contains the width in columns of the last line of a multi-line
142*67e74705SXin Li   /// token.
143*67e74705SXin Li   unsigned LastLineColumnWidth = 0;
144*67e74705SXin Li 
145*67e74705SXin Li   /// \brief Whether the token text contains newlines (escaped or not).
146*67e74705SXin Li   bool IsMultiline = false;
147*67e74705SXin Li 
148*67e74705SXin Li   /// \brief Indicates that this is the first token of the file.
149*67e74705SXin Li   bool IsFirst = false;
150*67e74705SXin Li 
151*67e74705SXin Li   /// \brief Whether there must be a line break before this token.
152*67e74705SXin Li   ///
153*67e74705SXin Li   /// This happens for example when a preprocessor directive ended directly
154*67e74705SXin Li   /// before the token.
155*67e74705SXin Li   bool MustBreakBefore = false;
156*67e74705SXin Li 
157*67e74705SXin Li   /// \brief The raw text of the token.
158*67e74705SXin Li   ///
159*67e74705SXin Li   /// Contains the raw token text without leading whitespace and without leading
160*67e74705SXin Li   /// escaped newlines.
161*67e74705SXin Li   StringRef TokenText;
162*67e74705SXin Li 
163*67e74705SXin Li   /// \brief Set to \c true if this token is an unterminated literal.
164*67e74705SXin Li   bool IsUnterminatedLiteral = 0;
165*67e74705SXin Li 
166*67e74705SXin Li   /// \brief Contains the kind of block if this token is a brace.
167*67e74705SXin Li   BraceBlockKind BlockKind = BK_Unknown;
168*67e74705SXin Li 
169*67e74705SXin Li   TokenType Type = TT_Unknown;
170*67e74705SXin Li 
171*67e74705SXin Li   /// \brief The number of spaces that should be inserted before this token.
172*67e74705SXin Li   unsigned SpacesRequiredBefore = 0;
173*67e74705SXin Li 
174*67e74705SXin Li   /// \brief \c true if it is allowed to break before this token.
175*67e74705SXin Li   bool CanBreakBefore = false;
176*67e74705SXin Li 
177*67e74705SXin Li   /// \brief \c true if this is the ">" of "template<..>".
178*67e74705SXin Li   bool ClosesTemplateDeclaration = false;
179*67e74705SXin Li 
180*67e74705SXin Li   /// \brief Number of parameters, if this is "(", "[" or "<".
181*67e74705SXin Li   ///
182*67e74705SXin Li   /// This is initialized to 1 as we don't need to distinguish functions with
183*67e74705SXin Li   /// 0 parameters from functions with 1 parameter. Thus, we can simply count
184*67e74705SXin Li   /// the number of commas.
185*67e74705SXin Li   unsigned ParameterCount = 0;
186*67e74705SXin Li 
187*67e74705SXin Li   /// \brief Number of parameters that are nested blocks,
188*67e74705SXin Li   /// if this is "(", "[" or "<".
189*67e74705SXin Li   unsigned BlockParameterCount = 0;
190*67e74705SXin Li 
191*67e74705SXin Li   /// \brief If this is a bracket ("<", "(", "[" or "{"), contains the kind of
192*67e74705SXin Li   /// the surrounding bracket.
193*67e74705SXin Li   tok::TokenKind ParentBracket = tok::unknown;
194*67e74705SXin Li 
195*67e74705SXin Li   /// \brief A token can have a special role that can carry extra information
196*67e74705SXin Li   /// about the token's formatting.
197*67e74705SXin Li   std::unique_ptr<TokenRole> Role;
198*67e74705SXin Li 
199*67e74705SXin Li   /// \brief If this is an opening parenthesis, how are the parameters packed?
200*67e74705SXin Li   ParameterPackingKind PackingKind = PPK_Inconclusive;
201*67e74705SXin Li 
202*67e74705SXin Li   /// \brief The total length of the unwrapped line up to and including this
203*67e74705SXin Li   /// token.
204*67e74705SXin Li   unsigned TotalLength = 0;
205*67e74705SXin Li 
206*67e74705SXin Li   /// \brief The original 0-based column of this token, including expanded tabs.
207*67e74705SXin Li   /// The configured TabWidth is used as tab width.
208*67e74705SXin Li   unsigned OriginalColumn = 0;
209*67e74705SXin Li 
210*67e74705SXin Li   /// \brief The length of following tokens until the next natural split point,
211*67e74705SXin Li   /// or the next token that can be broken.
212*67e74705SXin Li   unsigned UnbreakableTailLength = 0;
213*67e74705SXin Li 
214*67e74705SXin Li   // FIXME: Come up with a 'cleaner' concept.
215*67e74705SXin Li   /// \brief The binding strength of a token. This is a combined value of
216*67e74705SXin Li   /// operator precedence, parenthesis nesting, etc.
217*67e74705SXin Li   unsigned BindingStrength = 0;
218*67e74705SXin Li 
219*67e74705SXin Li   /// \brief The nesting level of this token, i.e. the number of surrounding (),
220*67e74705SXin Li   /// [], {} or <>.
221*67e74705SXin Li   unsigned NestingLevel = 0;
222*67e74705SXin Li 
223*67e74705SXin Li   /// \brief Penalty for inserting a line break before this token.
224*67e74705SXin Li   unsigned SplitPenalty = 0;
225*67e74705SXin Li 
226*67e74705SXin Li   /// \brief If this is the first ObjC selector name in an ObjC method
227*67e74705SXin Li   /// definition or call, this contains the length of the longest name.
228*67e74705SXin Li   ///
229*67e74705SXin Li   /// This being set to 0 means that the selectors should not be colon-aligned,
230*67e74705SXin Li   /// e.g. because several of them are block-type.
231*67e74705SXin Li   unsigned LongestObjCSelectorName = 0;
232*67e74705SXin Li 
233*67e74705SXin Li   /// \brief Stores the number of required fake parentheses and the
234*67e74705SXin Li   /// corresponding operator precedence.
235*67e74705SXin Li   ///
236*67e74705SXin Li   /// If multiple fake parentheses start at a token, this vector stores them in
237*67e74705SXin Li   /// reverse order, i.e. inner fake parenthesis first.
238*67e74705SXin Li   SmallVector<prec::Level, 4> FakeLParens;
239*67e74705SXin Li   /// \brief Insert this many fake ) after this token for correct indentation.
240*67e74705SXin Li   unsigned FakeRParens = 0;
241*67e74705SXin Li 
242*67e74705SXin Li   /// \brief \c true if this token starts a binary expression, i.e. has at least
243*67e74705SXin Li   /// one fake l_paren with a precedence greater than prec::Unknown.
244*67e74705SXin Li   bool StartsBinaryExpression = false;
245*67e74705SXin Li   /// \brief \c true if this token ends a binary expression.
246*67e74705SXin Li   bool EndsBinaryExpression = false;
247*67e74705SXin Li 
248*67e74705SXin Li   /// \brief Is this is an operator (or "."/"->") in a sequence of operators
249*67e74705SXin Li   /// with the same precedence, contains the 0-based operator index.
250*67e74705SXin Li   unsigned OperatorIndex = 0;
251*67e74705SXin Li 
252*67e74705SXin Li   /// \brief If this is an operator (or "."/"->") in a sequence of operators
253*67e74705SXin Li   /// with the same precedence, points to the next operator.
254*67e74705SXin Li   FormatToken *NextOperator = nullptr;
255*67e74705SXin Li 
256*67e74705SXin Li   /// \brief Is this token part of a \c DeclStmt defining multiple variables?
257*67e74705SXin Li   ///
258*67e74705SXin Li   /// Only set if \c Type == \c TT_StartOfName.
259*67e74705SXin Li   bool PartOfMultiVariableDeclStmt = false;
260*67e74705SXin Li 
261*67e74705SXin Li   /// \brief If this is a bracket, this points to the matching one.
262*67e74705SXin Li   FormatToken *MatchingParen = nullptr;
263*67e74705SXin Li 
264*67e74705SXin Li   /// \brief The previous token in the unwrapped line.
265*67e74705SXin Li   FormatToken *Previous = nullptr;
266*67e74705SXin Li 
267*67e74705SXin Li   /// \brief The next token in the unwrapped line.
268*67e74705SXin Li   FormatToken *Next = nullptr;
269*67e74705SXin Li 
270*67e74705SXin Li   /// \brief If this token starts a block, this contains all the unwrapped lines
271*67e74705SXin Li   /// in it.
272*67e74705SXin Li   SmallVector<AnnotatedLine *, 1> Children;
273*67e74705SXin Li 
274*67e74705SXin Li   /// \brief Stores the formatting decision for the token once it was made.
275*67e74705SXin Li   FormatDecision Decision = FD_Unformatted;
276*67e74705SXin Li 
277*67e74705SXin Li   /// \brief If \c true, this token has been fully formatted (indented and
278*67e74705SXin Li   /// potentially re-formatted inside), and we do not allow further formatting
279*67e74705SXin Li   /// changes.
280*67e74705SXin Li   bool Finalized = false;
281*67e74705SXin Li 
isFormatToken282*67e74705SXin Li   bool is(tok::TokenKind Kind) const { return Tok.is(Kind); }
isFormatToken283*67e74705SXin Li   bool is(TokenType TT) const { return Type == TT; }
isFormatToken284*67e74705SXin Li   bool is(const IdentifierInfo *II) const {
285*67e74705SXin Li     return II && II == Tok.getIdentifierInfo();
286*67e74705SXin Li   }
isFormatToken287*67e74705SXin Li   bool is(tok::PPKeywordKind Kind) const {
288*67e74705SXin Li     return Tok.getIdentifierInfo() &&
289*67e74705SXin Li            Tok.getIdentifierInfo()->getPPKeywordID() == Kind;
290*67e74705SXin Li   }
isOneOfFormatToken291*67e74705SXin Li   template <typename A, typename B> bool isOneOf(A K1, B K2) const {
292*67e74705SXin Li     return is(K1) || is(K2);
293*67e74705SXin Li   }
294*67e74705SXin Li   template <typename A, typename B, typename... Ts>
isOneOfFormatToken295*67e74705SXin Li   bool isOneOf(A K1, B K2, Ts... Ks) const {
296*67e74705SXin Li     return is(K1) || isOneOf(K2, Ks...);
297*67e74705SXin Li   }
isNotFormatToken298*67e74705SXin Li   template <typename T> bool isNot(T Kind) const { return !is(Kind); }
299*67e74705SXin Li 
300*67e74705SXin Li   /// \c true if this token starts a sequence with the given tokens in order,
301*67e74705SXin Li   /// following the ``Next`` pointers, ignoring comments.
302*67e74705SXin Li   template <typename A, typename... Ts>
startsSequenceFormatToken303*67e74705SXin Li   bool startsSequence(A K1, Ts... Tokens) const {
304*67e74705SXin Li     return startsSequenceInternal(K1, Tokens...);
305*67e74705SXin Li   }
306*67e74705SXin Li 
307*67e74705SXin Li   /// \c true if this token ends a sequence with the given tokens in order,
308*67e74705SXin Li   /// following the ``Previous`` pointers, ignoring comments.
309*67e74705SXin Li   template <typename A, typename... Ts>
endsSequenceFormatToken310*67e74705SXin Li   bool endsSequence(A K1, Ts... Tokens) const {
311*67e74705SXin Li     return endsSequenceInternal(K1, Tokens...);
312*67e74705SXin Li   }
313*67e74705SXin Li 
isStringLiteralFormatToken314*67e74705SXin Li   bool isStringLiteral() const { return tok::isStringLiteral(Tok.getKind()); }
315*67e74705SXin Li 
isObjCAtKeywordFormatToken316*67e74705SXin Li   bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const {
317*67e74705SXin Li     return Tok.isObjCAtKeyword(Kind);
318*67e74705SXin Li   }
319*67e74705SXin Li 
320*67e74705SXin Li   bool isAccessSpecifier(bool ColonRequired = true) const {
321*67e74705SXin Li     return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) &&
322*67e74705SXin Li            (!ColonRequired || (Next && Next->is(tok::colon)));
323*67e74705SXin Li   }
324*67e74705SXin Li 
325*67e74705SXin Li   /// \brief Determine whether the token is a simple-type-specifier.
326*67e74705SXin Li   bool isSimpleTypeSpecifier() const;
327*67e74705SXin Li 
isObjCAccessSpecifierFormatToken328*67e74705SXin Li   bool isObjCAccessSpecifier() const {
329*67e74705SXin Li     return is(tok::at) && Next && (Next->isObjCAtKeyword(tok::objc_public) ||
330*67e74705SXin Li                                    Next->isObjCAtKeyword(tok::objc_protected) ||
331*67e74705SXin Li                                    Next->isObjCAtKeyword(tok::objc_package) ||
332*67e74705SXin Li                                    Next->isObjCAtKeyword(tok::objc_private));
333*67e74705SXin Li   }
334*67e74705SXin Li 
335*67e74705SXin Li   /// \brief Returns whether \p Tok is ([{ or a template opening <.
opensScopeFormatToken336*67e74705SXin Li   bool opensScope() const {
337*67e74705SXin Li     return isOneOf(tok::l_paren, tok::l_brace, tok::l_square,
338*67e74705SXin Li                    TT_TemplateOpener);
339*67e74705SXin Li   }
340*67e74705SXin Li   /// \brief Returns whether \p Tok is )]} or a template closing >.
closesScopeFormatToken341*67e74705SXin Li   bool closesScope() const {
342*67e74705SXin Li     return isOneOf(tok::r_paren, tok::r_brace, tok::r_square,
343*67e74705SXin Li                    TT_TemplateCloser);
344*67e74705SXin Li   }
345*67e74705SXin Li 
346*67e74705SXin Li   /// \brief Returns \c true if this is a "." or "->" accessing a member.
isMemberAccessFormatToken347*67e74705SXin Li   bool isMemberAccess() const {
348*67e74705SXin Li     return isOneOf(tok::arrow, tok::period, tok::arrowstar) &&
349*67e74705SXin Li            !isOneOf(TT_DesignatedInitializerPeriod, TT_TrailingReturnArrow,
350*67e74705SXin Li                     TT_LambdaArrow);
351*67e74705SXin Li   }
352*67e74705SXin Li 
isUnaryOperatorFormatToken353*67e74705SXin Li   bool isUnaryOperator() const {
354*67e74705SXin Li     switch (Tok.getKind()) {
355*67e74705SXin Li     case tok::plus:
356*67e74705SXin Li     case tok::plusplus:
357*67e74705SXin Li     case tok::minus:
358*67e74705SXin Li     case tok::minusminus:
359*67e74705SXin Li     case tok::exclaim:
360*67e74705SXin Li     case tok::tilde:
361*67e74705SXin Li     case tok::kw_sizeof:
362*67e74705SXin Li     case tok::kw_alignof:
363*67e74705SXin Li       return true;
364*67e74705SXin Li     default:
365*67e74705SXin Li       return false;
366*67e74705SXin Li     }
367*67e74705SXin Li   }
368*67e74705SXin Li 
isBinaryOperatorFormatToken369*67e74705SXin Li   bool isBinaryOperator() const {
370*67e74705SXin Li     // Comma is a binary operator, but does not behave as such wrt. formatting.
371*67e74705SXin Li     return getPrecedence() > prec::Comma;
372*67e74705SXin Li   }
373*67e74705SXin Li 
isTrailingCommentFormatToken374*67e74705SXin Li   bool isTrailingComment() const {
375*67e74705SXin Li     return is(tok::comment) &&
376*67e74705SXin Li            (is(TT_LineComment) || !Next || Next->NewlinesBefore > 0);
377*67e74705SXin Li   }
378*67e74705SXin Li 
379*67e74705SXin Li   /// \brief Returns \c true if this is a keyword that can be used
380*67e74705SXin Li   /// like a function call (e.g. sizeof, typeid, ...).
isFunctionLikeKeywordFormatToken381*67e74705SXin Li   bool isFunctionLikeKeyword() const {
382*67e74705SXin Li     switch (Tok.getKind()) {
383*67e74705SXin Li     case tok::kw_throw:
384*67e74705SXin Li     case tok::kw_typeid:
385*67e74705SXin Li     case tok::kw_return:
386*67e74705SXin Li     case tok::kw_sizeof:
387*67e74705SXin Li     case tok::kw_alignof:
388*67e74705SXin Li     case tok::kw_alignas:
389*67e74705SXin Li     case tok::kw_decltype:
390*67e74705SXin Li     case tok::kw_noexcept:
391*67e74705SXin Li     case tok::kw_static_assert:
392*67e74705SXin Li     case tok::kw___attribute:
393*67e74705SXin Li       return true;
394*67e74705SXin Li     default:
395*67e74705SXin Li       return false;
396*67e74705SXin Li     }
397*67e74705SXin Li   }
398*67e74705SXin Li 
399*67e74705SXin Li   /// \brief Returns actual token start location without leading escaped
400*67e74705SXin Li   /// newlines and whitespace.
401*67e74705SXin Li   ///
402*67e74705SXin Li   /// This can be different to Tok.getLocation(), which includes leading escaped
403*67e74705SXin Li   /// newlines.
getStartOfNonWhitespaceFormatToken404*67e74705SXin Li   SourceLocation getStartOfNonWhitespace() const {
405*67e74705SXin Li     return WhitespaceRange.getEnd();
406*67e74705SXin Li   }
407*67e74705SXin Li 
getPrecedenceFormatToken408*67e74705SXin Li   prec::Level getPrecedence() const {
409*67e74705SXin Li     return getBinOpPrecedence(Tok.getKind(), true, true);
410*67e74705SXin Li   }
411*67e74705SXin Li 
412*67e74705SXin Li   /// \brief Returns the previous token ignoring comments.
getPreviousNonCommentFormatToken413*67e74705SXin Li   FormatToken *getPreviousNonComment() const {
414*67e74705SXin Li     FormatToken *Tok = Previous;
415*67e74705SXin Li     while (Tok && Tok->is(tok::comment))
416*67e74705SXin Li       Tok = Tok->Previous;
417*67e74705SXin Li     return Tok;
418*67e74705SXin Li   }
419*67e74705SXin Li 
420*67e74705SXin Li   /// \brief Returns the next token ignoring comments.
getNextNonCommentFormatToken421*67e74705SXin Li   const FormatToken *getNextNonComment() const {
422*67e74705SXin Li     const FormatToken *Tok = Next;
423*67e74705SXin Li     while (Tok && Tok->is(tok::comment))
424*67e74705SXin Li       Tok = Tok->Next;
425*67e74705SXin Li     return Tok;
426*67e74705SXin Li   }
427*67e74705SXin Li 
428*67e74705SXin Li   /// \brief Returns \c true if this tokens starts a block-type list, i.e. a
429*67e74705SXin Li   /// list that should be indented with a block indent.
opensBlockOrBlockTypeListFormatToken430*67e74705SXin Li   bool opensBlockOrBlockTypeList(const FormatStyle &Style) const {
431*67e74705SXin Li     return is(TT_ArrayInitializerLSquare) ||
432*67e74705SXin Li            (is(tok::l_brace) &&
433*67e74705SXin Li             (BlockKind == BK_Block || is(TT_DictLiteral) ||
434*67e74705SXin Li              (!Style.Cpp11BracedListStyle && NestingLevel == 0)));
435*67e74705SXin Li   }
436*67e74705SXin Li 
437*67e74705SXin Li   /// \brief Same as opensBlockOrBlockTypeList, but for the closing token.
closesBlockOrBlockTypeListFormatToken438*67e74705SXin Li   bool closesBlockOrBlockTypeList(const FormatStyle &Style) const {
439*67e74705SXin Li     return MatchingParen && MatchingParen->opensBlockOrBlockTypeList(Style);
440*67e74705SXin Li   }
441*67e74705SXin Li 
442*67e74705SXin Li private:
443*67e74705SXin Li   // Disallow copying.
444*67e74705SXin Li   FormatToken(const FormatToken &) = delete;
445*67e74705SXin Li   void operator=(const FormatToken &) = delete;
446*67e74705SXin Li 
447*67e74705SXin Li   template <typename A, typename... Ts>
startsSequenceInternalFormatToken448*67e74705SXin Li   bool startsSequenceInternal(A K1, Ts... Tokens) const {
449*67e74705SXin Li     if (is(tok::comment) && Next)
450*67e74705SXin Li       return Next->startsSequenceInternal(K1, Tokens...);
451*67e74705SXin Li     return is(K1) && Next && Next->startsSequenceInternal(Tokens...);
452*67e74705SXin Li   }
453*67e74705SXin Li 
454*67e74705SXin Li   template <typename A>
startsSequenceInternalFormatToken455*67e74705SXin Li   bool startsSequenceInternal(A K1) const {
456*67e74705SXin Li     if (is(tok::comment) && Next)
457*67e74705SXin Li       return Next->startsSequenceInternal(K1);
458*67e74705SXin Li     return is(K1);
459*67e74705SXin Li   }
460*67e74705SXin Li 
461*67e74705SXin Li   template <typename A, typename... Ts>
endsSequenceInternalFormatToken462*67e74705SXin Li   bool endsSequenceInternal(A K1) const {
463*67e74705SXin Li     if (is(tok::comment) && Previous)
464*67e74705SXin Li       return Previous->endsSequenceInternal(K1);
465*67e74705SXin Li     return is(K1);
466*67e74705SXin Li   }
467*67e74705SXin Li 
468*67e74705SXin Li   template <typename A, typename... Ts>
endsSequenceInternalFormatToken469*67e74705SXin Li   bool endsSequenceInternal(A K1, Ts... Tokens) const {
470*67e74705SXin Li     if (is(tok::comment) && Previous)
471*67e74705SXin Li       return Previous->endsSequenceInternal(K1, Tokens...);
472*67e74705SXin Li     return is(K1) && Previous && Previous->endsSequenceInternal(Tokens...);
473*67e74705SXin Li   }
474*67e74705SXin Li };
475*67e74705SXin Li 
476*67e74705SXin Li class ContinuationIndenter;
477*67e74705SXin Li struct LineState;
478*67e74705SXin Li 
479*67e74705SXin Li class TokenRole {
480*67e74705SXin Li public:
TokenRole(const FormatStyle & Style)481*67e74705SXin Li   TokenRole(const FormatStyle &Style) : Style(Style) {}
482*67e74705SXin Li   virtual ~TokenRole();
483*67e74705SXin Li 
484*67e74705SXin Li   /// \brief After the \c TokenAnnotator has finished annotating all the tokens,
485*67e74705SXin Li   /// this function precomputes required information for formatting.
486*67e74705SXin Li   virtual void precomputeFormattingInfos(const FormatToken *Token);
487*67e74705SXin Li 
488*67e74705SXin Li   /// \brief Apply the special formatting that the given role demands.
489*67e74705SXin Li   ///
490*67e74705SXin Li   /// Assumes that the token having this role is already formatted.
491*67e74705SXin Li   ///
492*67e74705SXin Li   /// Continues formatting from \p State leaving indentation to \p Indenter and
493*67e74705SXin Li   /// returns the total penalty that this formatting incurs.
formatFromToken(LineState & State,ContinuationIndenter * Indenter,bool DryRun)494*67e74705SXin Li   virtual unsigned formatFromToken(LineState &State,
495*67e74705SXin Li                                    ContinuationIndenter *Indenter,
496*67e74705SXin Li                                    bool DryRun) {
497*67e74705SXin Li     return 0;
498*67e74705SXin Li   }
499*67e74705SXin Li 
500*67e74705SXin Li   /// \brief Same as \c formatFromToken, but assumes that the first token has
501*67e74705SXin Li   /// already been set thereby deciding on the first line break.
formatAfterToken(LineState & State,ContinuationIndenter * Indenter,bool DryRun)502*67e74705SXin Li   virtual unsigned formatAfterToken(LineState &State,
503*67e74705SXin Li                                     ContinuationIndenter *Indenter,
504*67e74705SXin Li                                     bool DryRun) {
505*67e74705SXin Li     return 0;
506*67e74705SXin Li   }
507*67e74705SXin Li 
508*67e74705SXin Li   /// \brief Notifies the \c Role that a comma was found.
CommaFound(const FormatToken * Token)509*67e74705SXin Li   virtual void CommaFound(const FormatToken *Token) {}
510*67e74705SXin Li 
511*67e74705SXin Li protected:
512*67e74705SXin Li   const FormatStyle &Style;
513*67e74705SXin Li };
514*67e74705SXin Li 
515*67e74705SXin Li class CommaSeparatedList : public TokenRole {
516*67e74705SXin Li public:
CommaSeparatedList(const FormatStyle & Style)517*67e74705SXin Li   CommaSeparatedList(const FormatStyle &Style)
518*67e74705SXin Li       : TokenRole(Style), HasNestedBracedList(false) {}
519*67e74705SXin Li 
520*67e74705SXin Li   void precomputeFormattingInfos(const FormatToken *Token) override;
521*67e74705SXin Li 
522*67e74705SXin Li   unsigned formatAfterToken(LineState &State, ContinuationIndenter *Indenter,
523*67e74705SXin Li                             bool DryRun) override;
524*67e74705SXin Li 
525*67e74705SXin Li   unsigned formatFromToken(LineState &State, ContinuationIndenter *Indenter,
526*67e74705SXin Li                            bool DryRun) override;
527*67e74705SXin Li 
528*67e74705SXin Li   /// \brief Adds \p Token as the next comma to the \c CommaSeparated list.
CommaFound(const FormatToken * Token)529*67e74705SXin Li   void CommaFound(const FormatToken *Token) override {
530*67e74705SXin Li     Commas.push_back(Token);
531*67e74705SXin Li   }
532*67e74705SXin Li 
533*67e74705SXin Li private:
534*67e74705SXin Li   /// \brief A struct that holds information on how to format a given list with
535*67e74705SXin Li   /// a specific number of columns.
536*67e74705SXin Li   struct ColumnFormat {
537*67e74705SXin Li     /// \brief The number of columns to use.
538*67e74705SXin Li     unsigned Columns;
539*67e74705SXin Li 
540*67e74705SXin Li     /// \brief The total width in characters.
541*67e74705SXin Li     unsigned TotalWidth;
542*67e74705SXin Li 
543*67e74705SXin Li     /// \brief The number of lines required for this format.
544*67e74705SXin Li     unsigned LineCount;
545*67e74705SXin Li 
546*67e74705SXin Li     /// \brief The size of each column in characters.
547*67e74705SXin Li     SmallVector<unsigned, 8> ColumnSizes;
548*67e74705SXin Li   };
549*67e74705SXin Li 
550*67e74705SXin Li   /// \brief Calculate which \c ColumnFormat fits best into
551*67e74705SXin Li   /// \p RemainingCharacters.
552*67e74705SXin Li   const ColumnFormat *getColumnFormat(unsigned RemainingCharacters) const;
553*67e74705SXin Li 
554*67e74705SXin Li   /// \brief The ordered \c FormatTokens making up the commas of this list.
555*67e74705SXin Li   SmallVector<const FormatToken *, 8> Commas;
556*67e74705SXin Li 
557*67e74705SXin Li   /// \brief The length of each of the list's items in characters including the
558*67e74705SXin Li   /// trailing comma.
559*67e74705SXin Li   SmallVector<unsigned, 8> ItemLengths;
560*67e74705SXin Li 
561*67e74705SXin Li   /// \brief Precomputed formats that can be used for this list.
562*67e74705SXin Li   SmallVector<ColumnFormat, 4> Formats;
563*67e74705SXin Li 
564*67e74705SXin Li   bool HasNestedBracedList;
565*67e74705SXin Li };
566*67e74705SXin Li 
567*67e74705SXin Li /// \brief Encapsulates keywords that are context sensitive or for languages not
568*67e74705SXin Li /// properly supported by Clang's lexer.
569*67e74705SXin Li struct AdditionalKeywords {
AdditionalKeywordsAdditionalKeywords570*67e74705SXin Li   AdditionalKeywords(IdentifierTable &IdentTable) {
571*67e74705SXin Li     kw_final = &IdentTable.get("final");
572*67e74705SXin Li     kw_override = &IdentTable.get("override");
573*67e74705SXin Li     kw_in = &IdentTable.get("in");
574*67e74705SXin Li     kw_of = &IdentTable.get("of");
575*67e74705SXin Li     kw_CF_ENUM = &IdentTable.get("CF_ENUM");
576*67e74705SXin Li     kw_CF_OPTIONS = &IdentTable.get("CF_OPTIONS");
577*67e74705SXin Li     kw_NS_ENUM = &IdentTable.get("NS_ENUM");
578*67e74705SXin Li     kw_NS_OPTIONS = &IdentTable.get("NS_OPTIONS");
579*67e74705SXin Li 
580*67e74705SXin Li     kw_as = &IdentTable.get("as");
581*67e74705SXin Li     kw_async = &IdentTable.get("async");
582*67e74705SXin Li     kw_await = &IdentTable.get("await");
583*67e74705SXin Li     kw_finally = &IdentTable.get("finally");
584*67e74705SXin Li     kw_from = &IdentTable.get("from");
585*67e74705SXin Li     kw_function = &IdentTable.get("function");
586*67e74705SXin Li     kw_import = &IdentTable.get("import");
587*67e74705SXin Li     kw_is = &IdentTable.get("is");
588*67e74705SXin Li     kw_let = &IdentTable.get("let");
589*67e74705SXin Li     kw_type = &IdentTable.get("type");
590*67e74705SXin Li     kw_var = &IdentTable.get("var");
591*67e74705SXin Li     kw_yield = &IdentTable.get("yield");
592*67e74705SXin Li 
593*67e74705SXin Li     kw_abstract = &IdentTable.get("abstract");
594*67e74705SXin Li     kw_assert = &IdentTable.get("assert");
595*67e74705SXin Li     kw_extends = &IdentTable.get("extends");
596*67e74705SXin Li     kw_implements = &IdentTable.get("implements");
597*67e74705SXin Li     kw_instanceof = &IdentTable.get("instanceof");
598*67e74705SXin Li     kw_interface = &IdentTable.get("interface");
599*67e74705SXin Li     kw_native = &IdentTable.get("native");
600*67e74705SXin Li     kw_package = &IdentTable.get("package");
601*67e74705SXin Li     kw_synchronized = &IdentTable.get("synchronized");
602*67e74705SXin Li     kw_throws = &IdentTable.get("throws");
603*67e74705SXin Li     kw___except = &IdentTable.get("__except");
604*67e74705SXin Li 
605*67e74705SXin Li     kw_mark = &IdentTable.get("mark");
606*67e74705SXin Li 
607*67e74705SXin Li     kw_extend = &IdentTable.get("extend");
608*67e74705SXin Li     kw_option = &IdentTable.get("option");
609*67e74705SXin Li     kw_optional = &IdentTable.get("optional");
610*67e74705SXin Li     kw_repeated = &IdentTable.get("repeated");
611*67e74705SXin Li     kw_required = &IdentTable.get("required");
612*67e74705SXin Li     kw_returns = &IdentTable.get("returns");
613*67e74705SXin Li 
614*67e74705SXin Li     kw_signals = &IdentTable.get("signals");
615*67e74705SXin Li     kw_qsignals = &IdentTable.get("Q_SIGNALS");
616*67e74705SXin Li     kw_slots = &IdentTable.get("slots");
617*67e74705SXin Li     kw_qslots = &IdentTable.get("Q_SLOTS");
618*67e74705SXin Li   }
619*67e74705SXin Li 
620*67e74705SXin Li   // Context sensitive keywords.
621*67e74705SXin Li   IdentifierInfo *kw_final;
622*67e74705SXin Li   IdentifierInfo *kw_override;
623*67e74705SXin Li   IdentifierInfo *kw_in;
624*67e74705SXin Li   IdentifierInfo *kw_of;
625*67e74705SXin Li   IdentifierInfo *kw_CF_ENUM;
626*67e74705SXin Li   IdentifierInfo *kw_CF_OPTIONS;
627*67e74705SXin Li   IdentifierInfo *kw_NS_ENUM;
628*67e74705SXin Li   IdentifierInfo *kw_NS_OPTIONS;
629*67e74705SXin Li   IdentifierInfo *kw___except;
630*67e74705SXin Li 
631*67e74705SXin Li   // JavaScript keywords.
632*67e74705SXin Li   IdentifierInfo *kw_as;
633*67e74705SXin Li   IdentifierInfo *kw_async;
634*67e74705SXin Li   IdentifierInfo *kw_await;
635*67e74705SXin Li   IdentifierInfo *kw_finally;
636*67e74705SXin Li   IdentifierInfo *kw_from;
637*67e74705SXin Li   IdentifierInfo *kw_function;
638*67e74705SXin Li   IdentifierInfo *kw_import;
639*67e74705SXin Li   IdentifierInfo *kw_is;
640*67e74705SXin Li   IdentifierInfo *kw_let;
641*67e74705SXin Li   IdentifierInfo *kw_type;
642*67e74705SXin Li   IdentifierInfo *kw_var;
643*67e74705SXin Li   IdentifierInfo *kw_yield;
644*67e74705SXin Li 
645*67e74705SXin Li   // Java keywords.
646*67e74705SXin Li   IdentifierInfo *kw_abstract;
647*67e74705SXin Li   IdentifierInfo *kw_assert;
648*67e74705SXin Li   IdentifierInfo *kw_extends;
649*67e74705SXin Li   IdentifierInfo *kw_implements;
650*67e74705SXin Li   IdentifierInfo *kw_instanceof;
651*67e74705SXin Li   IdentifierInfo *kw_interface;
652*67e74705SXin Li   IdentifierInfo *kw_native;
653*67e74705SXin Li   IdentifierInfo *kw_package;
654*67e74705SXin Li   IdentifierInfo *kw_synchronized;
655*67e74705SXin Li   IdentifierInfo *kw_throws;
656*67e74705SXin Li 
657*67e74705SXin Li   // Pragma keywords.
658*67e74705SXin Li   IdentifierInfo *kw_mark;
659*67e74705SXin Li 
660*67e74705SXin Li   // Proto keywords.
661*67e74705SXin Li   IdentifierInfo *kw_extend;
662*67e74705SXin Li   IdentifierInfo *kw_option;
663*67e74705SXin Li   IdentifierInfo *kw_optional;
664*67e74705SXin Li   IdentifierInfo *kw_repeated;
665*67e74705SXin Li   IdentifierInfo *kw_required;
666*67e74705SXin Li   IdentifierInfo *kw_returns;
667*67e74705SXin Li 
668*67e74705SXin Li   // QT keywords.
669*67e74705SXin Li   IdentifierInfo *kw_signals;
670*67e74705SXin Li   IdentifierInfo *kw_qsignals;
671*67e74705SXin Li   IdentifierInfo *kw_slots;
672*67e74705SXin Li   IdentifierInfo *kw_qslots;
673*67e74705SXin Li };
674*67e74705SXin Li 
675*67e74705SXin Li } // namespace format
676*67e74705SXin Li } // namespace clang
677*67e74705SXin Li 
678*67e74705SXin Li #endif
679