xref: /aosp_15_r20/external/clang/lib/Format/FormatToken.cpp (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li //===--- FormatToken.cpp - Format C++ code --------------------------------===//
2*67e74705SXin Li //
3*67e74705SXin Li //                     The LLVM Compiler Infrastructure
4*67e74705SXin Li //
5*67e74705SXin Li // This file is distributed under the University of Illinois Open Source
6*67e74705SXin Li // License. See LICENSE.TXT for details.
7*67e74705SXin Li //
8*67e74705SXin Li //===----------------------------------------------------------------------===//
9*67e74705SXin Li ///
10*67e74705SXin Li /// \file
11*67e74705SXin Li /// \brief This file implements specific functions of \c FormatTokens and their
12*67e74705SXin Li /// roles.
13*67e74705SXin Li ///
14*67e74705SXin Li //===----------------------------------------------------------------------===//
15*67e74705SXin Li 
16*67e74705SXin Li #include "ContinuationIndenter.h"
17*67e74705SXin Li #include "FormatToken.h"
18*67e74705SXin Li #include "clang/Format/Format.h"
19*67e74705SXin Li #include "llvm/ADT/SmallVector.h"
20*67e74705SXin Li #include "llvm/Support/Debug.h"
21*67e74705SXin Li #include <climits>
22*67e74705SXin Li 
23*67e74705SXin Li namespace clang {
24*67e74705SXin Li namespace format {
25*67e74705SXin Li 
getTokenTypeName(TokenType Type)26*67e74705SXin Li const char *getTokenTypeName(TokenType Type) {
27*67e74705SXin Li   static const char *const TokNames[] = {
28*67e74705SXin Li #define TYPE(X) #X,
29*67e74705SXin Li LIST_TOKEN_TYPES
30*67e74705SXin Li #undef TYPE
31*67e74705SXin Li     nullptr
32*67e74705SXin Li   };
33*67e74705SXin Li 
34*67e74705SXin Li   if (Type < NUM_TOKEN_TYPES)
35*67e74705SXin Li     return TokNames[Type];
36*67e74705SXin Li   llvm_unreachable("unknown TokenType");
37*67e74705SXin Li   return nullptr;
38*67e74705SXin Li }
39*67e74705SXin Li 
40*67e74705SXin Li // FIXME: This is copy&pasted from Sema. Put it in a common place and remove
41*67e74705SXin Li // duplication.
isSimpleTypeSpecifier() const42*67e74705SXin Li bool FormatToken::isSimpleTypeSpecifier() const {
43*67e74705SXin Li   switch (Tok.getKind()) {
44*67e74705SXin Li   case tok::kw_short:
45*67e74705SXin Li   case tok::kw_long:
46*67e74705SXin Li   case tok::kw___int64:
47*67e74705SXin Li   case tok::kw___int128:
48*67e74705SXin Li   case tok::kw_signed:
49*67e74705SXin Li   case tok::kw_unsigned:
50*67e74705SXin Li   case tok::kw_void:
51*67e74705SXin Li   case tok::kw_char:
52*67e74705SXin Li   case tok::kw_int:
53*67e74705SXin Li   case tok::kw_half:
54*67e74705SXin Li   case tok::kw_float:
55*67e74705SXin Li   case tok::kw_double:
56*67e74705SXin Li   case tok::kw___float128:
57*67e74705SXin Li   case tok::kw_wchar_t:
58*67e74705SXin Li   case tok::kw_bool:
59*67e74705SXin Li   case tok::kw___underlying_type:
60*67e74705SXin Li   case tok::annot_typename:
61*67e74705SXin Li   case tok::kw_char16_t:
62*67e74705SXin Li   case tok::kw_char32_t:
63*67e74705SXin Li   case tok::kw_typeof:
64*67e74705SXin Li   case tok::kw_decltype:
65*67e74705SXin Li     return true;
66*67e74705SXin Li   default:
67*67e74705SXin Li     return false;
68*67e74705SXin Li   }
69*67e74705SXin Li }
70*67e74705SXin Li 
~TokenRole()71*67e74705SXin Li TokenRole::~TokenRole() {}
72*67e74705SXin Li 
precomputeFormattingInfos(const FormatToken * Token)73*67e74705SXin Li void TokenRole::precomputeFormattingInfos(const FormatToken *Token) {}
74*67e74705SXin Li 
formatAfterToken(LineState & State,ContinuationIndenter * Indenter,bool DryRun)75*67e74705SXin Li unsigned CommaSeparatedList::formatAfterToken(LineState &State,
76*67e74705SXin Li                                               ContinuationIndenter *Indenter,
77*67e74705SXin Li                                               bool DryRun) {
78*67e74705SXin Li   if (State.NextToken == nullptr || !State.NextToken->Previous)
79*67e74705SXin Li     return 0;
80*67e74705SXin Li 
81*67e74705SXin Li   // Ensure that we start on the opening brace.
82*67e74705SXin Li   const FormatToken *LBrace =
83*67e74705SXin Li       State.NextToken->Previous->getPreviousNonComment();
84*67e74705SXin Li   if (!LBrace || !LBrace->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
85*67e74705SXin Li       LBrace->BlockKind == BK_Block || LBrace->Type == TT_DictLiteral ||
86*67e74705SXin Li       LBrace->Next->Type == TT_DesignatedInitializerPeriod)
87*67e74705SXin Li     return 0;
88*67e74705SXin Li 
89*67e74705SXin Li   // Calculate the number of code points we have to format this list. As the
90*67e74705SXin Li   // first token is already placed, we have to subtract it.
91*67e74705SXin Li   unsigned RemainingCodePoints =
92*67e74705SXin Li       Style.ColumnLimit - State.Column + State.NextToken->Previous->ColumnWidth;
93*67e74705SXin Li 
94*67e74705SXin Li   // Find the best ColumnFormat, i.e. the best number of columns to use.
95*67e74705SXin Li   const ColumnFormat *Format = getColumnFormat(RemainingCodePoints);
96*67e74705SXin Li   // If no ColumnFormat can be used, the braced list would generally be
97*67e74705SXin Li   // bin-packed. Add a severe penalty to this so that column layouts are
98*67e74705SXin Li   // preferred if possible.
99*67e74705SXin Li   if (!Format)
100*67e74705SXin Li     return 10000;
101*67e74705SXin Li 
102*67e74705SXin Li   // Format the entire list.
103*67e74705SXin Li   unsigned Penalty = 0;
104*67e74705SXin Li   unsigned Column = 0;
105*67e74705SXin Li   unsigned Item = 0;
106*67e74705SXin Li   while (State.NextToken != LBrace->MatchingParen) {
107*67e74705SXin Li     bool NewLine = false;
108*67e74705SXin Li     unsigned ExtraSpaces = 0;
109*67e74705SXin Li 
110*67e74705SXin Li     // If the previous token was one of our commas, we are now on the next item.
111*67e74705SXin Li     if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) {
112*67e74705SXin Li       if (!State.NextToken->isTrailingComment()) {
113*67e74705SXin Li         ExtraSpaces += Format->ColumnSizes[Column] - ItemLengths[Item];
114*67e74705SXin Li         ++Column;
115*67e74705SXin Li       }
116*67e74705SXin Li       ++Item;
117*67e74705SXin Li     }
118*67e74705SXin Li 
119*67e74705SXin Li     if (Column == Format->Columns || State.NextToken->MustBreakBefore) {
120*67e74705SXin Li       Column = 0;
121*67e74705SXin Li       NewLine = true;
122*67e74705SXin Li     }
123*67e74705SXin Li 
124*67e74705SXin Li     // Place token using the continuation indenter and store the penalty.
125*67e74705SXin Li     Penalty += Indenter->addTokenToState(State, NewLine, DryRun, ExtraSpaces);
126*67e74705SXin Li   }
127*67e74705SXin Li   return Penalty;
128*67e74705SXin Li }
129*67e74705SXin Li 
formatFromToken(LineState & State,ContinuationIndenter * Indenter,bool DryRun)130*67e74705SXin Li unsigned CommaSeparatedList::formatFromToken(LineState &State,
131*67e74705SXin Li                                              ContinuationIndenter *Indenter,
132*67e74705SXin Li                                              bool DryRun) {
133*67e74705SXin Li   if (HasNestedBracedList)
134*67e74705SXin Li     State.Stack.back().AvoidBinPacking = true;
135*67e74705SXin Li   return 0;
136*67e74705SXin Li }
137*67e74705SXin Li 
138*67e74705SXin Li // Returns the lengths in code points between Begin and End (both included),
139*67e74705SXin Li // assuming that the entire sequence is put on a single line.
CodePointsBetween(const FormatToken * Begin,const FormatToken * End)140*67e74705SXin Li static unsigned CodePointsBetween(const FormatToken *Begin,
141*67e74705SXin Li                                   const FormatToken *End) {
142*67e74705SXin Li   assert(End->TotalLength >= Begin->TotalLength);
143*67e74705SXin Li   return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth;
144*67e74705SXin Li }
145*67e74705SXin Li 
precomputeFormattingInfos(const FormatToken * Token)146*67e74705SXin Li void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
147*67e74705SXin Li   // FIXME: At some point we might want to do this for other lists, too.
148*67e74705SXin Li   if (!Token->MatchingParen ||
149*67e74705SXin Li       !Token->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare))
150*67e74705SXin Li     return;
151*67e74705SXin Li 
152*67e74705SXin Li   // In C++11 braced list style, we should not format in columns unless they
153*67e74705SXin Li   // have many items (20 or more) or we allow bin-packing of function call
154*67e74705SXin Li   // arguments.
155*67e74705SXin Li   if (Style.Cpp11BracedListStyle && !Style.BinPackArguments &&
156*67e74705SXin Li       Commas.size() < 19)
157*67e74705SXin Li     return;
158*67e74705SXin Li 
159*67e74705SXin Li   // Limit column layout for JavaScript array initializers to 20 or more items
160*67e74705SXin Li   // for now to introduce it carefully. We can become more aggressive if this
161*67e74705SXin Li   // necessary.
162*67e74705SXin Li   if (Token->is(TT_ArrayInitializerLSquare) && Commas.size() < 19)
163*67e74705SXin Li     return;
164*67e74705SXin Li 
165*67e74705SXin Li   // Column format doesn't really make sense if we don't align after brackets.
166*67e74705SXin Li   if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign)
167*67e74705SXin Li     return;
168*67e74705SXin Li 
169*67e74705SXin Li   FormatToken *ItemBegin = Token->Next;
170*67e74705SXin Li   while (ItemBegin->isTrailingComment())
171*67e74705SXin Li     ItemBegin = ItemBegin->Next;
172*67e74705SXin Li   SmallVector<bool, 8> MustBreakBeforeItem;
173*67e74705SXin Li 
174*67e74705SXin Li   // The lengths of an item if it is put at the end of the line. This includes
175*67e74705SXin Li   // trailing comments which are otherwise ignored for column alignment.
176*67e74705SXin Li   SmallVector<unsigned, 8> EndOfLineItemLength;
177*67e74705SXin Li 
178*67e74705SXin Li   bool HasSeparatingComment = false;
179*67e74705SXin Li   for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) {
180*67e74705SXin Li     // Skip comments on their own line.
181*67e74705SXin Li     while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) {
182*67e74705SXin Li       ItemBegin = ItemBegin->Next;
183*67e74705SXin Li       HasSeparatingComment = i > 0;
184*67e74705SXin Li     }
185*67e74705SXin Li 
186*67e74705SXin Li     MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore);
187*67e74705SXin Li     if (ItemBegin->is(tok::l_brace))
188*67e74705SXin Li       HasNestedBracedList = true;
189*67e74705SXin Li     const FormatToken *ItemEnd = nullptr;
190*67e74705SXin Li     if (i == Commas.size()) {
191*67e74705SXin Li       ItemEnd = Token->MatchingParen;
192*67e74705SXin Li       const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment();
193*67e74705SXin Li       ItemLengths.push_back(CodePointsBetween(ItemBegin, NonCommentEnd));
194*67e74705SXin Li       if (Style.Cpp11BracedListStyle &&
195*67e74705SXin Li           !ItemEnd->Previous->isTrailingComment()) {
196*67e74705SXin Li         // In Cpp11 braced list style, the } and possibly other subsequent
197*67e74705SXin Li         // tokens will need to stay on a line with the last element.
198*67e74705SXin Li         while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore)
199*67e74705SXin Li           ItemEnd = ItemEnd->Next;
200*67e74705SXin Li       } else {
201*67e74705SXin Li         // In other braced lists styles, the "}" can be wrapped to the new line.
202*67e74705SXin Li         ItemEnd = Token->MatchingParen->Previous;
203*67e74705SXin Li       }
204*67e74705SXin Li     } else {
205*67e74705SXin Li       ItemEnd = Commas[i];
206*67e74705SXin Li       // The comma is counted as part of the item when calculating the length.
207*67e74705SXin Li       ItemLengths.push_back(CodePointsBetween(ItemBegin, ItemEnd));
208*67e74705SXin Li 
209*67e74705SXin Li       // Consume trailing comments so the are included in EndOfLineItemLength.
210*67e74705SXin Li       if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline &&
211*67e74705SXin Li           ItemEnd->Next->isTrailingComment())
212*67e74705SXin Li         ItemEnd = ItemEnd->Next;
213*67e74705SXin Li     }
214*67e74705SXin Li     EndOfLineItemLength.push_back(CodePointsBetween(ItemBegin, ItemEnd));
215*67e74705SXin Li     // If there is a trailing comma in the list, the next item will start at the
216*67e74705SXin Li     // closing brace. Don't create an extra item for this.
217*67e74705SXin Li     if (ItemEnd->getNextNonComment() == Token->MatchingParen)
218*67e74705SXin Li       break;
219*67e74705SXin Li     ItemBegin = ItemEnd->Next;
220*67e74705SXin Li   }
221*67e74705SXin Li 
222*67e74705SXin Li   // Don't use column layout for lists with few elements and in presence of
223*67e74705SXin Li   // separating comments.
224*67e74705SXin Li   if (Commas.size() < 5 || HasSeparatingComment)
225*67e74705SXin Li     return;
226*67e74705SXin Li 
227*67e74705SXin Li   if (Token->NestingLevel != 0 && Token->is(tok::l_brace) && Commas.size() < 19)
228*67e74705SXin Li     return;
229*67e74705SXin Li 
230*67e74705SXin Li   // We can never place more than ColumnLimit / 3 items in a row (because of the
231*67e74705SXin Li   // spaces and the comma).
232*67e74705SXin Li   unsigned MaxItems = Style.ColumnLimit / 3;
233*67e74705SXin Li   std::vector<unsigned> MinSizeInColumn;
234*67e74705SXin Li   MinSizeInColumn.reserve(MaxItems);
235*67e74705SXin Li   for (unsigned Columns = 1; Columns <= MaxItems; ++Columns) {
236*67e74705SXin Li     ColumnFormat Format;
237*67e74705SXin Li     Format.Columns = Columns;
238*67e74705SXin Li     Format.ColumnSizes.resize(Columns);
239*67e74705SXin Li     MinSizeInColumn.assign(Columns, UINT_MAX);
240*67e74705SXin Li     Format.LineCount = 1;
241*67e74705SXin Li     bool HasRowWithSufficientColumns = false;
242*67e74705SXin Li     unsigned Column = 0;
243*67e74705SXin Li     for (unsigned i = 0, e = ItemLengths.size(); i != e; ++i) {
244*67e74705SXin Li       assert(i < MustBreakBeforeItem.size());
245*67e74705SXin Li       if (MustBreakBeforeItem[i] || Column == Columns) {
246*67e74705SXin Li         ++Format.LineCount;
247*67e74705SXin Li         Column = 0;
248*67e74705SXin Li       }
249*67e74705SXin Li       if (Column == Columns - 1)
250*67e74705SXin Li         HasRowWithSufficientColumns = true;
251*67e74705SXin Li       unsigned Length =
252*67e74705SXin Li           (Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i];
253*67e74705SXin Li       Format.ColumnSizes[Column] = std::max(Format.ColumnSizes[Column], Length);
254*67e74705SXin Li       MinSizeInColumn[Column] = std::min(MinSizeInColumn[Column], Length);
255*67e74705SXin Li       ++Column;
256*67e74705SXin Li     }
257*67e74705SXin Li     // If all rows are terminated early (e.g. by trailing comments), we don't
258*67e74705SXin Li     // need to look further.
259*67e74705SXin Li     if (!HasRowWithSufficientColumns)
260*67e74705SXin Li       break;
261*67e74705SXin Li     Format.TotalWidth = Columns - 1; // Width of the N-1 spaces.
262*67e74705SXin Li 
263*67e74705SXin Li     for (unsigned i = 0; i < Columns; ++i)
264*67e74705SXin Li       Format.TotalWidth += Format.ColumnSizes[i];
265*67e74705SXin Li 
266*67e74705SXin Li     // Don't use this Format, if the difference between the longest and shortest
267*67e74705SXin Li     // element in a column exceeds a threshold to avoid excessive spaces.
268*67e74705SXin Li     if ([&] {
269*67e74705SXin Li           for (unsigned i = 0; i < Columns - 1; ++i)
270*67e74705SXin Li             if (Format.ColumnSizes[i] - MinSizeInColumn[i] > 10)
271*67e74705SXin Li               return true;
272*67e74705SXin Li           return false;
273*67e74705SXin Li         }())
274*67e74705SXin Li       continue;
275*67e74705SXin Li 
276*67e74705SXin Li     // Ignore layouts that are bound to violate the column limit.
277*67e74705SXin Li     if (Format.TotalWidth > Style.ColumnLimit)
278*67e74705SXin Li       continue;
279*67e74705SXin Li 
280*67e74705SXin Li     Formats.push_back(Format);
281*67e74705SXin Li   }
282*67e74705SXin Li }
283*67e74705SXin Li 
284*67e74705SXin Li const CommaSeparatedList::ColumnFormat *
getColumnFormat(unsigned RemainingCharacters) const285*67e74705SXin Li CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const {
286*67e74705SXin Li   const ColumnFormat *BestFormat = nullptr;
287*67e74705SXin Li   for (SmallVector<ColumnFormat, 4>::const_reverse_iterator
288*67e74705SXin Li            I = Formats.rbegin(),
289*67e74705SXin Li            E = Formats.rend();
290*67e74705SXin Li        I != E; ++I) {
291*67e74705SXin Li     if (I->TotalWidth <= RemainingCharacters) {
292*67e74705SXin Li       if (BestFormat && I->LineCount > BestFormat->LineCount)
293*67e74705SXin Li         break;
294*67e74705SXin Li       BestFormat = &*I;
295*67e74705SXin Li     }
296*67e74705SXin Li   }
297*67e74705SXin Li   return BestFormat;
298*67e74705SXin Li }
299*67e74705SXin Li 
300*67e74705SXin Li } // namespace format
301*67e74705SXin Li } // namespace clang
302