1*67e74705SXin Li //===--- FormatToken.h - Format C++ code ------------------------*- C++ -*-===// 2*67e74705SXin Li // 3*67e74705SXin Li // The LLVM Compiler Infrastructure 4*67e74705SXin Li // 5*67e74705SXin Li // This file is distributed under the University of Illinois Open Source 6*67e74705SXin Li // License. See LICENSE.TXT for details. 7*67e74705SXin Li // 8*67e74705SXin Li //===----------------------------------------------------------------------===// 9*67e74705SXin Li /// 10*67e74705SXin Li /// \file 11*67e74705SXin Li /// \brief This file contains the declaration of the FormatToken, a wrapper 12*67e74705SXin Li /// around Token with additional information related to formatting. 13*67e74705SXin Li /// 14*67e74705SXin Li //===----------------------------------------------------------------------===// 15*67e74705SXin Li 16*67e74705SXin Li #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H 17*67e74705SXin Li #define LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H 18*67e74705SXin Li 19*67e74705SXin Li #include "clang/Basic/IdentifierTable.h" 20*67e74705SXin Li #include "clang/Basic/OperatorPrecedence.h" 21*67e74705SXin Li #include "clang/Format/Format.h" 22*67e74705SXin Li #include "clang/Lex/Lexer.h" 23*67e74705SXin Li #include <memory> 24*67e74705SXin Li 25*67e74705SXin Li namespace clang { 26*67e74705SXin Li namespace format { 27*67e74705SXin Li 28*67e74705SXin Li #define LIST_TOKEN_TYPES \ 29*67e74705SXin Li TYPE(ArrayInitializerLSquare) \ 30*67e74705SXin Li TYPE(ArraySubscriptLSquare) \ 31*67e74705SXin Li TYPE(AttributeParen) \ 32*67e74705SXin Li TYPE(BinaryOperator) \ 33*67e74705SXin Li TYPE(BitFieldColon) \ 34*67e74705SXin Li TYPE(BlockComment) \ 35*67e74705SXin Li TYPE(CastRParen) \ 36*67e74705SXin Li TYPE(ConditionalExpr) \ 37*67e74705SXin Li TYPE(ConflictAlternative) \ 38*67e74705SXin Li TYPE(ConflictEnd) \ 39*67e74705SXin Li TYPE(ConflictStart) \ 40*67e74705SXin Li TYPE(CtorInitializerColon) \ 41*67e74705SXin Li TYPE(CtorInitializerComma) \ 42*67e74705SXin Li TYPE(DesignatedInitializerPeriod) \ 43*67e74705SXin Li TYPE(DictLiteral) \ 44*67e74705SXin Li TYPE(ForEachMacro) \ 45*67e74705SXin Li TYPE(FunctionAnnotationRParen) \ 46*67e74705SXin Li TYPE(FunctionDeclarationName) \ 47*67e74705SXin Li TYPE(FunctionLBrace) \ 48*67e74705SXin Li TYPE(FunctionTypeLParen) \ 49*67e74705SXin Li TYPE(ImplicitStringLiteral) \ 50*67e74705SXin Li TYPE(InheritanceColon) \ 51*67e74705SXin Li TYPE(InlineASMBrace) \ 52*67e74705SXin Li TYPE(InlineASMColon) \ 53*67e74705SXin Li TYPE(JavaAnnotation) \ 54*67e74705SXin Li TYPE(JsComputedPropertyName) \ 55*67e74705SXin Li TYPE(JsFatArrow) \ 56*67e74705SXin Li TYPE(JsTypeColon) \ 57*67e74705SXin Li TYPE(JsTypeOperator) \ 58*67e74705SXin Li TYPE(JsTypeOptionalQuestion) \ 59*67e74705SXin Li TYPE(LambdaArrow) \ 60*67e74705SXin Li TYPE(LambdaLSquare) \ 61*67e74705SXin Li TYPE(LeadingJavaAnnotation) \ 62*67e74705SXin Li TYPE(LineComment) \ 63*67e74705SXin Li TYPE(MacroBlockBegin) \ 64*67e74705SXin Li TYPE(MacroBlockEnd) \ 65*67e74705SXin Li TYPE(ObjCBlockLBrace) \ 66*67e74705SXin Li TYPE(ObjCBlockLParen) \ 67*67e74705SXin Li TYPE(ObjCDecl) \ 68*67e74705SXin Li TYPE(ObjCForIn) \ 69*67e74705SXin Li TYPE(ObjCMethodExpr) \ 70*67e74705SXin Li TYPE(ObjCMethodSpecifier) \ 71*67e74705SXin Li TYPE(ObjCProperty) \ 72*67e74705SXin Li TYPE(ObjCStringLiteral) \ 73*67e74705SXin Li TYPE(OverloadedOperator) \ 74*67e74705SXin Li TYPE(OverloadedOperatorLParen) \ 75*67e74705SXin Li TYPE(PointerOrReference) \ 76*67e74705SXin Li TYPE(PureVirtualSpecifier) \ 77*67e74705SXin Li TYPE(RangeBasedForLoopColon) \ 78*67e74705SXin Li TYPE(RegexLiteral) \ 79*67e74705SXin Li TYPE(SelectorName) \ 80*67e74705SXin Li TYPE(StartOfName) \ 81*67e74705SXin Li TYPE(TemplateCloser) \ 82*67e74705SXin Li TYPE(TemplateOpener) \ 83*67e74705SXin Li TYPE(TemplateString) \ 84*67e74705SXin Li TYPE(TrailingAnnotation) \ 85*67e74705SXin Li TYPE(TrailingReturnArrow) \ 86*67e74705SXin Li TYPE(TrailingUnaryOperator) \ 87*67e74705SXin Li TYPE(UnaryOperator) \ 88*67e74705SXin Li TYPE(Unknown) 89*67e74705SXin Li 90*67e74705SXin Li enum TokenType { 91*67e74705SXin Li #define TYPE(X) TT_##X, 92*67e74705SXin Li LIST_TOKEN_TYPES 93*67e74705SXin Li #undef TYPE 94*67e74705SXin Li NUM_TOKEN_TYPES 95*67e74705SXin Li }; 96*67e74705SXin Li 97*67e74705SXin Li /// \brief Determines the name of a token type. 98*67e74705SXin Li const char *getTokenTypeName(TokenType Type); 99*67e74705SXin Li 100*67e74705SXin Li // Represents what type of block a set of braces open. 101*67e74705SXin Li enum BraceBlockKind { BK_Unknown, BK_Block, BK_BracedInit }; 102*67e74705SXin Li 103*67e74705SXin Li // The packing kind of a function's parameters. 104*67e74705SXin Li enum ParameterPackingKind { PPK_BinPacked, PPK_OnePerLine, PPK_Inconclusive }; 105*67e74705SXin Li 106*67e74705SXin Li enum FormatDecision { FD_Unformatted, FD_Continue, FD_Break }; 107*67e74705SXin Li 108*67e74705SXin Li class TokenRole; 109*67e74705SXin Li class AnnotatedLine; 110*67e74705SXin Li 111*67e74705SXin Li /// \brief A wrapper around a \c Token storing information about the 112*67e74705SXin Li /// whitespace characters preceding it. 113*67e74705SXin Li struct FormatToken { FormatTokenFormatToken114*67e74705SXin Li FormatToken() {} 115*67e74705SXin Li 116*67e74705SXin Li /// \brief The \c Token. 117*67e74705SXin Li Token Tok; 118*67e74705SXin Li 119*67e74705SXin Li /// \brief The number of newlines immediately before the \c Token. 120*67e74705SXin Li /// 121*67e74705SXin Li /// This can be used to determine what the user wrote in the original code 122*67e74705SXin Li /// and thereby e.g. leave an empty line between two function definitions. 123*67e74705SXin Li unsigned NewlinesBefore = 0; 124*67e74705SXin Li 125*67e74705SXin Li /// \brief Whether there is at least one unescaped newline before the \c 126*67e74705SXin Li /// Token. 127*67e74705SXin Li bool HasUnescapedNewline = false; 128*67e74705SXin Li 129*67e74705SXin Li /// \brief The range of the whitespace immediately preceding the \c Token. 130*67e74705SXin Li SourceRange WhitespaceRange; 131*67e74705SXin Li 132*67e74705SXin Li /// \brief The offset just past the last '\n' in this token's leading 133*67e74705SXin Li /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'. 134*67e74705SXin Li unsigned LastNewlineOffset = 0; 135*67e74705SXin Li 136*67e74705SXin Li /// \brief The width of the non-whitespace parts of the token (or its first 137*67e74705SXin Li /// line for multi-line tokens) in columns. 138*67e74705SXin Li /// We need this to correctly measure number of columns a token spans. 139*67e74705SXin Li unsigned ColumnWidth = 0; 140*67e74705SXin Li 141*67e74705SXin Li /// \brief Contains the width in columns of the last line of a multi-line 142*67e74705SXin Li /// token. 143*67e74705SXin Li unsigned LastLineColumnWidth = 0; 144*67e74705SXin Li 145*67e74705SXin Li /// \brief Whether the token text contains newlines (escaped or not). 146*67e74705SXin Li bool IsMultiline = false; 147*67e74705SXin Li 148*67e74705SXin Li /// \brief Indicates that this is the first token of the file. 149*67e74705SXin Li bool IsFirst = false; 150*67e74705SXin Li 151*67e74705SXin Li /// \brief Whether there must be a line break before this token. 152*67e74705SXin Li /// 153*67e74705SXin Li /// This happens for example when a preprocessor directive ended directly 154*67e74705SXin Li /// before the token. 155*67e74705SXin Li bool MustBreakBefore = false; 156*67e74705SXin Li 157*67e74705SXin Li /// \brief The raw text of the token. 158*67e74705SXin Li /// 159*67e74705SXin Li /// Contains the raw token text without leading whitespace and without leading 160*67e74705SXin Li /// escaped newlines. 161*67e74705SXin Li StringRef TokenText; 162*67e74705SXin Li 163*67e74705SXin Li /// \brief Set to \c true if this token is an unterminated literal. 164*67e74705SXin Li bool IsUnterminatedLiteral = 0; 165*67e74705SXin Li 166*67e74705SXin Li /// \brief Contains the kind of block if this token is a brace. 167*67e74705SXin Li BraceBlockKind BlockKind = BK_Unknown; 168*67e74705SXin Li 169*67e74705SXin Li TokenType Type = TT_Unknown; 170*67e74705SXin Li 171*67e74705SXin Li /// \brief The number of spaces that should be inserted before this token. 172*67e74705SXin Li unsigned SpacesRequiredBefore = 0; 173*67e74705SXin Li 174*67e74705SXin Li /// \brief \c true if it is allowed to break before this token. 175*67e74705SXin Li bool CanBreakBefore = false; 176*67e74705SXin Li 177*67e74705SXin Li /// \brief \c true if this is the ">" of "template<..>". 178*67e74705SXin Li bool ClosesTemplateDeclaration = false; 179*67e74705SXin Li 180*67e74705SXin Li /// \brief Number of parameters, if this is "(", "[" or "<". 181*67e74705SXin Li /// 182*67e74705SXin Li /// This is initialized to 1 as we don't need to distinguish functions with 183*67e74705SXin Li /// 0 parameters from functions with 1 parameter. Thus, we can simply count 184*67e74705SXin Li /// the number of commas. 185*67e74705SXin Li unsigned ParameterCount = 0; 186*67e74705SXin Li 187*67e74705SXin Li /// \brief Number of parameters that are nested blocks, 188*67e74705SXin Li /// if this is "(", "[" or "<". 189*67e74705SXin Li unsigned BlockParameterCount = 0; 190*67e74705SXin Li 191*67e74705SXin Li /// \brief If this is a bracket ("<", "(", "[" or "{"), contains the kind of 192*67e74705SXin Li /// the surrounding bracket. 193*67e74705SXin Li tok::TokenKind ParentBracket = tok::unknown; 194*67e74705SXin Li 195*67e74705SXin Li /// \brief A token can have a special role that can carry extra information 196*67e74705SXin Li /// about the token's formatting. 197*67e74705SXin Li std::unique_ptr<TokenRole> Role; 198*67e74705SXin Li 199*67e74705SXin Li /// \brief If this is an opening parenthesis, how are the parameters packed? 200*67e74705SXin Li ParameterPackingKind PackingKind = PPK_Inconclusive; 201*67e74705SXin Li 202*67e74705SXin Li /// \brief The total length of the unwrapped line up to and including this 203*67e74705SXin Li /// token. 204*67e74705SXin Li unsigned TotalLength = 0; 205*67e74705SXin Li 206*67e74705SXin Li /// \brief The original 0-based column of this token, including expanded tabs. 207*67e74705SXin Li /// The configured TabWidth is used as tab width. 208*67e74705SXin Li unsigned OriginalColumn = 0; 209*67e74705SXin Li 210*67e74705SXin Li /// \brief The length of following tokens until the next natural split point, 211*67e74705SXin Li /// or the next token that can be broken. 212*67e74705SXin Li unsigned UnbreakableTailLength = 0; 213*67e74705SXin Li 214*67e74705SXin Li // FIXME: Come up with a 'cleaner' concept. 215*67e74705SXin Li /// \brief The binding strength of a token. This is a combined value of 216*67e74705SXin Li /// operator precedence, parenthesis nesting, etc. 217*67e74705SXin Li unsigned BindingStrength = 0; 218*67e74705SXin Li 219*67e74705SXin Li /// \brief The nesting level of this token, i.e. the number of surrounding (), 220*67e74705SXin Li /// [], {} or <>. 221*67e74705SXin Li unsigned NestingLevel = 0; 222*67e74705SXin Li 223*67e74705SXin Li /// \brief Penalty for inserting a line break before this token. 224*67e74705SXin Li unsigned SplitPenalty = 0; 225*67e74705SXin Li 226*67e74705SXin Li /// \brief If this is the first ObjC selector name in an ObjC method 227*67e74705SXin Li /// definition or call, this contains the length of the longest name. 228*67e74705SXin Li /// 229*67e74705SXin Li /// This being set to 0 means that the selectors should not be colon-aligned, 230*67e74705SXin Li /// e.g. because several of them are block-type. 231*67e74705SXin Li unsigned LongestObjCSelectorName = 0; 232*67e74705SXin Li 233*67e74705SXin Li /// \brief Stores the number of required fake parentheses and the 234*67e74705SXin Li /// corresponding operator precedence. 235*67e74705SXin Li /// 236*67e74705SXin Li /// If multiple fake parentheses start at a token, this vector stores them in 237*67e74705SXin Li /// reverse order, i.e. inner fake parenthesis first. 238*67e74705SXin Li SmallVector<prec::Level, 4> FakeLParens; 239*67e74705SXin Li /// \brief Insert this many fake ) after this token for correct indentation. 240*67e74705SXin Li unsigned FakeRParens = 0; 241*67e74705SXin Li 242*67e74705SXin Li /// \brief \c true if this token starts a binary expression, i.e. has at least 243*67e74705SXin Li /// one fake l_paren with a precedence greater than prec::Unknown. 244*67e74705SXin Li bool StartsBinaryExpression = false; 245*67e74705SXin Li /// \brief \c true if this token ends a binary expression. 246*67e74705SXin Li bool EndsBinaryExpression = false; 247*67e74705SXin Li 248*67e74705SXin Li /// \brief Is this is an operator (or "."/"->") in a sequence of operators 249*67e74705SXin Li /// with the same precedence, contains the 0-based operator index. 250*67e74705SXin Li unsigned OperatorIndex = 0; 251*67e74705SXin Li 252*67e74705SXin Li /// \brief If this is an operator (or "."/"->") in a sequence of operators 253*67e74705SXin Li /// with the same precedence, points to the next operator. 254*67e74705SXin Li FormatToken *NextOperator = nullptr; 255*67e74705SXin Li 256*67e74705SXin Li /// \brief Is this token part of a \c DeclStmt defining multiple variables? 257*67e74705SXin Li /// 258*67e74705SXin Li /// Only set if \c Type == \c TT_StartOfName. 259*67e74705SXin Li bool PartOfMultiVariableDeclStmt = false; 260*67e74705SXin Li 261*67e74705SXin Li /// \brief If this is a bracket, this points to the matching one. 262*67e74705SXin Li FormatToken *MatchingParen = nullptr; 263*67e74705SXin Li 264*67e74705SXin Li /// \brief The previous token in the unwrapped line. 265*67e74705SXin Li FormatToken *Previous = nullptr; 266*67e74705SXin Li 267*67e74705SXin Li /// \brief The next token in the unwrapped line. 268*67e74705SXin Li FormatToken *Next = nullptr; 269*67e74705SXin Li 270*67e74705SXin Li /// \brief If this token starts a block, this contains all the unwrapped lines 271*67e74705SXin Li /// in it. 272*67e74705SXin Li SmallVector<AnnotatedLine *, 1> Children; 273*67e74705SXin Li 274*67e74705SXin Li /// \brief Stores the formatting decision for the token once it was made. 275*67e74705SXin Li FormatDecision Decision = FD_Unformatted; 276*67e74705SXin Li 277*67e74705SXin Li /// \brief If \c true, this token has been fully formatted (indented and 278*67e74705SXin Li /// potentially re-formatted inside), and we do not allow further formatting 279*67e74705SXin Li /// changes. 280*67e74705SXin Li bool Finalized = false; 281*67e74705SXin Li isFormatToken282*67e74705SXin Li bool is(tok::TokenKind Kind) const { return Tok.is(Kind); } isFormatToken283*67e74705SXin Li bool is(TokenType TT) const { return Type == TT; } isFormatToken284*67e74705SXin Li bool is(const IdentifierInfo *II) const { 285*67e74705SXin Li return II && II == Tok.getIdentifierInfo(); 286*67e74705SXin Li } isFormatToken287*67e74705SXin Li bool is(tok::PPKeywordKind Kind) const { 288*67e74705SXin Li return Tok.getIdentifierInfo() && 289*67e74705SXin Li Tok.getIdentifierInfo()->getPPKeywordID() == Kind; 290*67e74705SXin Li } isOneOfFormatToken291*67e74705SXin Li template <typename A, typename B> bool isOneOf(A K1, B K2) const { 292*67e74705SXin Li return is(K1) || is(K2); 293*67e74705SXin Li } 294*67e74705SXin Li template <typename A, typename B, typename... Ts> isOneOfFormatToken295*67e74705SXin Li bool isOneOf(A K1, B K2, Ts... Ks) const { 296*67e74705SXin Li return is(K1) || isOneOf(K2, Ks...); 297*67e74705SXin Li } isNotFormatToken298*67e74705SXin Li template <typename T> bool isNot(T Kind) const { return !is(Kind); } 299*67e74705SXin Li 300*67e74705SXin Li /// \c true if this token starts a sequence with the given tokens in order, 301*67e74705SXin Li /// following the ``Next`` pointers, ignoring comments. 302*67e74705SXin Li template <typename A, typename... Ts> startsSequenceFormatToken303*67e74705SXin Li bool startsSequence(A K1, Ts... Tokens) const { 304*67e74705SXin Li return startsSequenceInternal(K1, Tokens...); 305*67e74705SXin Li } 306*67e74705SXin Li 307*67e74705SXin Li /// \c true if this token ends a sequence with the given tokens in order, 308*67e74705SXin Li /// following the ``Previous`` pointers, ignoring comments. 309*67e74705SXin Li template <typename A, typename... Ts> endsSequenceFormatToken310*67e74705SXin Li bool endsSequence(A K1, Ts... Tokens) const { 311*67e74705SXin Li return endsSequenceInternal(K1, Tokens...); 312*67e74705SXin Li } 313*67e74705SXin Li isStringLiteralFormatToken314*67e74705SXin Li bool isStringLiteral() const { return tok::isStringLiteral(Tok.getKind()); } 315*67e74705SXin Li isObjCAtKeywordFormatToken316*67e74705SXin Li bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const { 317*67e74705SXin Li return Tok.isObjCAtKeyword(Kind); 318*67e74705SXin Li } 319*67e74705SXin Li 320*67e74705SXin Li bool isAccessSpecifier(bool ColonRequired = true) const { 321*67e74705SXin Li return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) && 322*67e74705SXin Li (!ColonRequired || (Next && Next->is(tok::colon))); 323*67e74705SXin Li } 324*67e74705SXin Li 325*67e74705SXin Li /// \brief Determine whether the token is a simple-type-specifier. 326*67e74705SXin Li bool isSimpleTypeSpecifier() const; 327*67e74705SXin Li isObjCAccessSpecifierFormatToken328*67e74705SXin Li bool isObjCAccessSpecifier() const { 329*67e74705SXin Li return is(tok::at) && Next && (Next->isObjCAtKeyword(tok::objc_public) || 330*67e74705SXin Li Next->isObjCAtKeyword(tok::objc_protected) || 331*67e74705SXin Li Next->isObjCAtKeyword(tok::objc_package) || 332*67e74705SXin Li Next->isObjCAtKeyword(tok::objc_private)); 333*67e74705SXin Li } 334*67e74705SXin Li 335*67e74705SXin Li /// \brief Returns whether \p Tok is ([{ or a template opening <. opensScopeFormatToken336*67e74705SXin Li bool opensScope() const { 337*67e74705SXin Li return isOneOf(tok::l_paren, tok::l_brace, tok::l_square, 338*67e74705SXin Li TT_TemplateOpener); 339*67e74705SXin Li } 340*67e74705SXin Li /// \brief Returns whether \p Tok is )]} or a template closing >. closesScopeFormatToken341*67e74705SXin Li bool closesScope() const { 342*67e74705SXin Li return isOneOf(tok::r_paren, tok::r_brace, tok::r_square, 343*67e74705SXin Li TT_TemplateCloser); 344*67e74705SXin Li } 345*67e74705SXin Li 346*67e74705SXin Li /// \brief Returns \c true if this is a "." or "->" accessing a member. isMemberAccessFormatToken347*67e74705SXin Li bool isMemberAccess() const { 348*67e74705SXin Li return isOneOf(tok::arrow, tok::period, tok::arrowstar) && 349*67e74705SXin Li !isOneOf(TT_DesignatedInitializerPeriod, TT_TrailingReturnArrow, 350*67e74705SXin Li TT_LambdaArrow); 351*67e74705SXin Li } 352*67e74705SXin Li isUnaryOperatorFormatToken353*67e74705SXin Li bool isUnaryOperator() const { 354*67e74705SXin Li switch (Tok.getKind()) { 355*67e74705SXin Li case tok::plus: 356*67e74705SXin Li case tok::plusplus: 357*67e74705SXin Li case tok::minus: 358*67e74705SXin Li case tok::minusminus: 359*67e74705SXin Li case tok::exclaim: 360*67e74705SXin Li case tok::tilde: 361*67e74705SXin Li case tok::kw_sizeof: 362*67e74705SXin Li case tok::kw_alignof: 363*67e74705SXin Li return true; 364*67e74705SXin Li default: 365*67e74705SXin Li return false; 366*67e74705SXin Li } 367*67e74705SXin Li } 368*67e74705SXin Li isBinaryOperatorFormatToken369*67e74705SXin Li bool isBinaryOperator() const { 370*67e74705SXin Li // Comma is a binary operator, but does not behave as such wrt. formatting. 371*67e74705SXin Li return getPrecedence() > prec::Comma; 372*67e74705SXin Li } 373*67e74705SXin Li isTrailingCommentFormatToken374*67e74705SXin Li bool isTrailingComment() const { 375*67e74705SXin Li return is(tok::comment) && 376*67e74705SXin Li (is(TT_LineComment) || !Next || Next->NewlinesBefore > 0); 377*67e74705SXin Li } 378*67e74705SXin Li 379*67e74705SXin Li /// \brief Returns \c true if this is a keyword that can be used 380*67e74705SXin Li /// like a function call (e.g. sizeof, typeid, ...). isFunctionLikeKeywordFormatToken381*67e74705SXin Li bool isFunctionLikeKeyword() const { 382*67e74705SXin Li switch (Tok.getKind()) { 383*67e74705SXin Li case tok::kw_throw: 384*67e74705SXin Li case tok::kw_typeid: 385*67e74705SXin Li case tok::kw_return: 386*67e74705SXin Li case tok::kw_sizeof: 387*67e74705SXin Li case tok::kw_alignof: 388*67e74705SXin Li case tok::kw_alignas: 389*67e74705SXin Li case tok::kw_decltype: 390*67e74705SXin Li case tok::kw_noexcept: 391*67e74705SXin Li case tok::kw_static_assert: 392*67e74705SXin Li case tok::kw___attribute: 393*67e74705SXin Li return true; 394*67e74705SXin Li default: 395*67e74705SXin Li return false; 396*67e74705SXin Li } 397*67e74705SXin Li } 398*67e74705SXin Li 399*67e74705SXin Li /// \brief Returns actual token start location without leading escaped 400*67e74705SXin Li /// newlines and whitespace. 401*67e74705SXin Li /// 402*67e74705SXin Li /// This can be different to Tok.getLocation(), which includes leading escaped 403*67e74705SXin Li /// newlines. getStartOfNonWhitespaceFormatToken404*67e74705SXin Li SourceLocation getStartOfNonWhitespace() const { 405*67e74705SXin Li return WhitespaceRange.getEnd(); 406*67e74705SXin Li } 407*67e74705SXin Li getPrecedenceFormatToken408*67e74705SXin Li prec::Level getPrecedence() const { 409*67e74705SXin Li return getBinOpPrecedence(Tok.getKind(), true, true); 410*67e74705SXin Li } 411*67e74705SXin Li 412*67e74705SXin Li /// \brief Returns the previous token ignoring comments. getPreviousNonCommentFormatToken413*67e74705SXin Li FormatToken *getPreviousNonComment() const { 414*67e74705SXin Li FormatToken *Tok = Previous; 415*67e74705SXin Li while (Tok && Tok->is(tok::comment)) 416*67e74705SXin Li Tok = Tok->Previous; 417*67e74705SXin Li return Tok; 418*67e74705SXin Li } 419*67e74705SXin Li 420*67e74705SXin Li /// \brief Returns the next token ignoring comments. getNextNonCommentFormatToken421*67e74705SXin Li const FormatToken *getNextNonComment() const { 422*67e74705SXin Li const FormatToken *Tok = Next; 423*67e74705SXin Li while (Tok && Tok->is(tok::comment)) 424*67e74705SXin Li Tok = Tok->Next; 425*67e74705SXin Li return Tok; 426*67e74705SXin Li } 427*67e74705SXin Li 428*67e74705SXin Li /// \brief Returns \c true if this tokens starts a block-type list, i.e. a 429*67e74705SXin Li /// list that should be indented with a block indent. opensBlockOrBlockTypeListFormatToken430*67e74705SXin Li bool opensBlockOrBlockTypeList(const FormatStyle &Style) const { 431*67e74705SXin Li return is(TT_ArrayInitializerLSquare) || 432*67e74705SXin Li (is(tok::l_brace) && 433*67e74705SXin Li (BlockKind == BK_Block || is(TT_DictLiteral) || 434*67e74705SXin Li (!Style.Cpp11BracedListStyle && NestingLevel == 0))); 435*67e74705SXin Li } 436*67e74705SXin Li 437*67e74705SXin Li /// \brief Same as opensBlockOrBlockTypeList, but for the closing token. closesBlockOrBlockTypeListFormatToken438*67e74705SXin Li bool closesBlockOrBlockTypeList(const FormatStyle &Style) const { 439*67e74705SXin Li return MatchingParen && MatchingParen->opensBlockOrBlockTypeList(Style); 440*67e74705SXin Li } 441*67e74705SXin Li 442*67e74705SXin Li private: 443*67e74705SXin Li // Disallow copying. 444*67e74705SXin Li FormatToken(const FormatToken &) = delete; 445*67e74705SXin Li void operator=(const FormatToken &) = delete; 446*67e74705SXin Li 447*67e74705SXin Li template <typename A, typename... Ts> startsSequenceInternalFormatToken448*67e74705SXin Li bool startsSequenceInternal(A K1, Ts... Tokens) const { 449*67e74705SXin Li if (is(tok::comment) && Next) 450*67e74705SXin Li return Next->startsSequenceInternal(K1, Tokens...); 451*67e74705SXin Li return is(K1) && Next && Next->startsSequenceInternal(Tokens...); 452*67e74705SXin Li } 453*67e74705SXin Li 454*67e74705SXin Li template <typename A> startsSequenceInternalFormatToken455*67e74705SXin Li bool startsSequenceInternal(A K1) const { 456*67e74705SXin Li if (is(tok::comment) && Next) 457*67e74705SXin Li return Next->startsSequenceInternal(K1); 458*67e74705SXin Li return is(K1); 459*67e74705SXin Li } 460*67e74705SXin Li 461*67e74705SXin Li template <typename A, typename... Ts> endsSequenceInternalFormatToken462*67e74705SXin Li bool endsSequenceInternal(A K1) const { 463*67e74705SXin Li if (is(tok::comment) && Previous) 464*67e74705SXin Li return Previous->endsSequenceInternal(K1); 465*67e74705SXin Li return is(K1); 466*67e74705SXin Li } 467*67e74705SXin Li 468*67e74705SXin Li template <typename A, typename... Ts> endsSequenceInternalFormatToken469*67e74705SXin Li bool endsSequenceInternal(A K1, Ts... Tokens) const { 470*67e74705SXin Li if (is(tok::comment) && Previous) 471*67e74705SXin Li return Previous->endsSequenceInternal(K1, Tokens...); 472*67e74705SXin Li return is(K1) && Previous && Previous->endsSequenceInternal(Tokens...); 473*67e74705SXin Li } 474*67e74705SXin Li }; 475*67e74705SXin Li 476*67e74705SXin Li class ContinuationIndenter; 477*67e74705SXin Li struct LineState; 478*67e74705SXin Li 479*67e74705SXin Li class TokenRole { 480*67e74705SXin Li public: TokenRole(const FormatStyle & Style)481*67e74705SXin Li TokenRole(const FormatStyle &Style) : Style(Style) {} 482*67e74705SXin Li virtual ~TokenRole(); 483*67e74705SXin Li 484*67e74705SXin Li /// \brief After the \c TokenAnnotator has finished annotating all the tokens, 485*67e74705SXin Li /// this function precomputes required information for formatting. 486*67e74705SXin Li virtual void precomputeFormattingInfos(const FormatToken *Token); 487*67e74705SXin Li 488*67e74705SXin Li /// \brief Apply the special formatting that the given role demands. 489*67e74705SXin Li /// 490*67e74705SXin Li /// Assumes that the token having this role is already formatted. 491*67e74705SXin Li /// 492*67e74705SXin Li /// Continues formatting from \p State leaving indentation to \p Indenter and 493*67e74705SXin Li /// returns the total penalty that this formatting incurs. formatFromToken(LineState & State,ContinuationIndenter * Indenter,bool DryRun)494*67e74705SXin Li virtual unsigned formatFromToken(LineState &State, 495*67e74705SXin Li ContinuationIndenter *Indenter, 496*67e74705SXin Li bool DryRun) { 497*67e74705SXin Li return 0; 498*67e74705SXin Li } 499*67e74705SXin Li 500*67e74705SXin Li /// \brief Same as \c formatFromToken, but assumes that the first token has 501*67e74705SXin Li /// already been set thereby deciding on the first line break. formatAfterToken(LineState & State,ContinuationIndenter * Indenter,bool DryRun)502*67e74705SXin Li virtual unsigned formatAfterToken(LineState &State, 503*67e74705SXin Li ContinuationIndenter *Indenter, 504*67e74705SXin Li bool DryRun) { 505*67e74705SXin Li return 0; 506*67e74705SXin Li } 507*67e74705SXin Li 508*67e74705SXin Li /// \brief Notifies the \c Role that a comma was found. CommaFound(const FormatToken * Token)509*67e74705SXin Li virtual void CommaFound(const FormatToken *Token) {} 510*67e74705SXin Li 511*67e74705SXin Li protected: 512*67e74705SXin Li const FormatStyle &Style; 513*67e74705SXin Li }; 514*67e74705SXin Li 515*67e74705SXin Li class CommaSeparatedList : public TokenRole { 516*67e74705SXin Li public: CommaSeparatedList(const FormatStyle & Style)517*67e74705SXin Li CommaSeparatedList(const FormatStyle &Style) 518*67e74705SXin Li : TokenRole(Style), HasNestedBracedList(false) {} 519*67e74705SXin Li 520*67e74705SXin Li void precomputeFormattingInfos(const FormatToken *Token) override; 521*67e74705SXin Li 522*67e74705SXin Li unsigned formatAfterToken(LineState &State, ContinuationIndenter *Indenter, 523*67e74705SXin Li bool DryRun) override; 524*67e74705SXin Li 525*67e74705SXin Li unsigned formatFromToken(LineState &State, ContinuationIndenter *Indenter, 526*67e74705SXin Li bool DryRun) override; 527*67e74705SXin Li 528*67e74705SXin Li /// \brief Adds \p Token as the next comma to the \c CommaSeparated list. CommaFound(const FormatToken * Token)529*67e74705SXin Li void CommaFound(const FormatToken *Token) override { 530*67e74705SXin Li Commas.push_back(Token); 531*67e74705SXin Li } 532*67e74705SXin Li 533*67e74705SXin Li private: 534*67e74705SXin Li /// \brief A struct that holds information on how to format a given list with 535*67e74705SXin Li /// a specific number of columns. 536*67e74705SXin Li struct ColumnFormat { 537*67e74705SXin Li /// \brief The number of columns to use. 538*67e74705SXin Li unsigned Columns; 539*67e74705SXin Li 540*67e74705SXin Li /// \brief The total width in characters. 541*67e74705SXin Li unsigned TotalWidth; 542*67e74705SXin Li 543*67e74705SXin Li /// \brief The number of lines required for this format. 544*67e74705SXin Li unsigned LineCount; 545*67e74705SXin Li 546*67e74705SXin Li /// \brief The size of each column in characters. 547*67e74705SXin Li SmallVector<unsigned, 8> ColumnSizes; 548*67e74705SXin Li }; 549*67e74705SXin Li 550*67e74705SXin Li /// \brief Calculate which \c ColumnFormat fits best into 551*67e74705SXin Li /// \p RemainingCharacters. 552*67e74705SXin Li const ColumnFormat *getColumnFormat(unsigned RemainingCharacters) const; 553*67e74705SXin Li 554*67e74705SXin Li /// \brief The ordered \c FormatTokens making up the commas of this list. 555*67e74705SXin Li SmallVector<const FormatToken *, 8> Commas; 556*67e74705SXin Li 557*67e74705SXin Li /// \brief The length of each of the list's items in characters including the 558*67e74705SXin Li /// trailing comma. 559*67e74705SXin Li SmallVector<unsigned, 8> ItemLengths; 560*67e74705SXin Li 561*67e74705SXin Li /// \brief Precomputed formats that can be used for this list. 562*67e74705SXin Li SmallVector<ColumnFormat, 4> Formats; 563*67e74705SXin Li 564*67e74705SXin Li bool HasNestedBracedList; 565*67e74705SXin Li }; 566*67e74705SXin Li 567*67e74705SXin Li /// \brief Encapsulates keywords that are context sensitive or for languages not 568*67e74705SXin Li /// properly supported by Clang's lexer. 569*67e74705SXin Li struct AdditionalKeywords { AdditionalKeywordsAdditionalKeywords570*67e74705SXin Li AdditionalKeywords(IdentifierTable &IdentTable) { 571*67e74705SXin Li kw_final = &IdentTable.get("final"); 572*67e74705SXin Li kw_override = &IdentTable.get("override"); 573*67e74705SXin Li kw_in = &IdentTable.get("in"); 574*67e74705SXin Li kw_of = &IdentTable.get("of"); 575*67e74705SXin Li kw_CF_ENUM = &IdentTable.get("CF_ENUM"); 576*67e74705SXin Li kw_CF_OPTIONS = &IdentTable.get("CF_OPTIONS"); 577*67e74705SXin Li kw_NS_ENUM = &IdentTable.get("NS_ENUM"); 578*67e74705SXin Li kw_NS_OPTIONS = &IdentTable.get("NS_OPTIONS"); 579*67e74705SXin Li 580*67e74705SXin Li kw_as = &IdentTable.get("as"); 581*67e74705SXin Li kw_async = &IdentTable.get("async"); 582*67e74705SXin Li kw_await = &IdentTable.get("await"); 583*67e74705SXin Li kw_finally = &IdentTable.get("finally"); 584*67e74705SXin Li kw_from = &IdentTable.get("from"); 585*67e74705SXin Li kw_function = &IdentTable.get("function"); 586*67e74705SXin Li kw_import = &IdentTable.get("import"); 587*67e74705SXin Li kw_is = &IdentTable.get("is"); 588*67e74705SXin Li kw_let = &IdentTable.get("let"); 589*67e74705SXin Li kw_type = &IdentTable.get("type"); 590*67e74705SXin Li kw_var = &IdentTable.get("var"); 591*67e74705SXin Li kw_yield = &IdentTable.get("yield"); 592*67e74705SXin Li 593*67e74705SXin Li kw_abstract = &IdentTable.get("abstract"); 594*67e74705SXin Li kw_assert = &IdentTable.get("assert"); 595*67e74705SXin Li kw_extends = &IdentTable.get("extends"); 596*67e74705SXin Li kw_implements = &IdentTable.get("implements"); 597*67e74705SXin Li kw_instanceof = &IdentTable.get("instanceof"); 598*67e74705SXin Li kw_interface = &IdentTable.get("interface"); 599*67e74705SXin Li kw_native = &IdentTable.get("native"); 600*67e74705SXin Li kw_package = &IdentTable.get("package"); 601*67e74705SXin Li kw_synchronized = &IdentTable.get("synchronized"); 602*67e74705SXin Li kw_throws = &IdentTable.get("throws"); 603*67e74705SXin Li kw___except = &IdentTable.get("__except"); 604*67e74705SXin Li 605*67e74705SXin Li kw_mark = &IdentTable.get("mark"); 606*67e74705SXin Li 607*67e74705SXin Li kw_extend = &IdentTable.get("extend"); 608*67e74705SXin Li kw_option = &IdentTable.get("option"); 609*67e74705SXin Li kw_optional = &IdentTable.get("optional"); 610*67e74705SXin Li kw_repeated = &IdentTable.get("repeated"); 611*67e74705SXin Li kw_required = &IdentTable.get("required"); 612*67e74705SXin Li kw_returns = &IdentTable.get("returns"); 613*67e74705SXin Li 614*67e74705SXin Li kw_signals = &IdentTable.get("signals"); 615*67e74705SXin Li kw_qsignals = &IdentTable.get("Q_SIGNALS"); 616*67e74705SXin Li kw_slots = &IdentTable.get("slots"); 617*67e74705SXin Li kw_qslots = &IdentTable.get("Q_SLOTS"); 618*67e74705SXin Li } 619*67e74705SXin Li 620*67e74705SXin Li // Context sensitive keywords. 621*67e74705SXin Li IdentifierInfo *kw_final; 622*67e74705SXin Li IdentifierInfo *kw_override; 623*67e74705SXin Li IdentifierInfo *kw_in; 624*67e74705SXin Li IdentifierInfo *kw_of; 625*67e74705SXin Li IdentifierInfo *kw_CF_ENUM; 626*67e74705SXin Li IdentifierInfo *kw_CF_OPTIONS; 627*67e74705SXin Li IdentifierInfo *kw_NS_ENUM; 628*67e74705SXin Li IdentifierInfo *kw_NS_OPTIONS; 629*67e74705SXin Li IdentifierInfo *kw___except; 630*67e74705SXin Li 631*67e74705SXin Li // JavaScript keywords. 632*67e74705SXin Li IdentifierInfo *kw_as; 633*67e74705SXin Li IdentifierInfo *kw_async; 634*67e74705SXin Li IdentifierInfo *kw_await; 635*67e74705SXin Li IdentifierInfo *kw_finally; 636*67e74705SXin Li IdentifierInfo *kw_from; 637*67e74705SXin Li IdentifierInfo *kw_function; 638*67e74705SXin Li IdentifierInfo *kw_import; 639*67e74705SXin Li IdentifierInfo *kw_is; 640*67e74705SXin Li IdentifierInfo *kw_let; 641*67e74705SXin Li IdentifierInfo *kw_type; 642*67e74705SXin Li IdentifierInfo *kw_var; 643*67e74705SXin Li IdentifierInfo *kw_yield; 644*67e74705SXin Li 645*67e74705SXin Li // Java keywords. 646*67e74705SXin Li IdentifierInfo *kw_abstract; 647*67e74705SXin Li IdentifierInfo *kw_assert; 648*67e74705SXin Li IdentifierInfo *kw_extends; 649*67e74705SXin Li IdentifierInfo *kw_implements; 650*67e74705SXin Li IdentifierInfo *kw_instanceof; 651*67e74705SXin Li IdentifierInfo *kw_interface; 652*67e74705SXin Li IdentifierInfo *kw_native; 653*67e74705SXin Li IdentifierInfo *kw_package; 654*67e74705SXin Li IdentifierInfo *kw_synchronized; 655*67e74705SXin Li IdentifierInfo *kw_throws; 656*67e74705SXin Li 657*67e74705SXin Li // Pragma keywords. 658*67e74705SXin Li IdentifierInfo *kw_mark; 659*67e74705SXin Li 660*67e74705SXin Li // Proto keywords. 661*67e74705SXin Li IdentifierInfo *kw_extend; 662*67e74705SXin Li IdentifierInfo *kw_option; 663*67e74705SXin Li IdentifierInfo *kw_optional; 664*67e74705SXin Li IdentifierInfo *kw_repeated; 665*67e74705SXin Li IdentifierInfo *kw_required; 666*67e74705SXin Li IdentifierInfo *kw_returns; 667*67e74705SXin Li 668*67e74705SXin Li // QT keywords. 669*67e74705SXin Li IdentifierInfo *kw_signals; 670*67e74705SXin Li IdentifierInfo *kw_qsignals; 671*67e74705SXin Li IdentifierInfo *kw_slots; 672*67e74705SXin Li IdentifierInfo *kw_qslots; 673*67e74705SXin Li }; 674*67e74705SXin Li 675*67e74705SXin Li } // namespace format 676*67e74705SXin Li } // namespace clang 677*67e74705SXin Li 678*67e74705SXin Li #endif 679