1*67e74705SXin Li //===--- FormatToken.cpp - Format C++ code --------------------------------===//
2*67e74705SXin Li //
3*67e74705SXin Li // The LLVM Compiler Infrastructure
4*67e74705SXin Li //
5*67e74705SXin Li // This file is distributed under the University of Illinois Open Source
6*67e74705SXin Li // License. See LICENSE.TXT for details.
7*67e74705SXin Li //
8*67e74705SXin Li //===----------------------------------------------------------------------===//
9*67e74705SXin Li ///
10*67e74705SXin Li /// \file
11*67e74705SXin Li /// \brief This file implements specific functions of \c FormatTokens and their
12*67e74705SXin Li /// roles.
13*67e74705SXin Li ///
14*67e74705SXin Li //===----------------------------------------------------------------------===//
15*67e74705SXin Li
16*67e74705SXin Li #include "ContinuationIndenter.h"
17*67e74705SXin Li #include "FormatToken.h"
18*67e74705SXin Li #include "clang/Format/Format.h"
19*67e74705SXin Li #include "llvm/ADT/SmallVector.h"
20*67e74705SXin Li #include "llvm/Support/Debug.h"
21*67e74705SXin Li #include <climits>
22*67e74705SXin Li
23*67e74705SXin Li namespace clang {
24*67e74705SXin Li namespace format {
25*67e74705SXin Li
getTokenTypeName(TokenType Type)26*67e74705SXin Li const char *getTokenTypeName(TokenType Type) {
27*67e74705SXin Li static const char *const TokNames[] = {
28*67e74705SXin Li #define TYPE(X) #X,
29*67e74705SXin Li LIST_TOKEN_TYPES
30*67e74705SXin Li #undef TYPE
31*67e74705SXin Li nullptr
32*67e74705SXin Li };
33*67e74705SXin Li
34*67e74705SXin Li if (Type < NUM_TOKEN_TYPES)
35*67e74705SXin Li return TokNames[Type];
36*67e74705SXin Li llvm_unreachable("unknown TokenType");
37*67e74705SXin Li return nullptr;
38*67e74705SXin Li }
39*67e74705SXin Li
40*67e74705SXin Li // FIXME: This is copy&pasted from Sema. Put it in a common place and remove
41*67e74705SXin Li // duplication.
isSimpleTypeSpecifier() const42*67e74705SXin Li bool FormatToken::isSimpleTypeSpecifier() const {
43*67e74705SXin Li switch (Tok.getKind()) {
44*67e74705SXin Li case tok::kw_short:
45*67e74705SXin Li case tok::kw_long:
46*67e74705SXin Li case tok::kw___int64:
47*67e74705SXin Li case tok::kw___int128:
48*67e74705SXin Li case tok::kw_signed:
49*67e74705SXin Li case tok::kw_unsigned:
50*67e74705SXin Li case tok::kw_void:
51*67e74705SXin Li case tok::kw_char:
52*67e74705SXin Li case tok::kw_int:
53*67e74705SXin Li case tok::kw_half:
54*67e74705SXin Li case tok::kw_float:
55*67e74705SXin Li case tok::kw_double:
56*67e74705SXin Li case tok::kw___float128:
57*67e74705SXin Li case tok::kw_wchar_t:
58*67e74705SXin Li case tok::kw_bool:
59*67e74705SXin Li case tok::kw___underlying_type:
60*67e74705SXin Li case tok::annot_typename:
61*67e74705SXin Li case tok::kw_char16_t:
62*67e74705SXin Li case tok::kw_char32_t:
63*67e74705SXin Li case tok::kw_typeof:
64*67e74705SXin Li case tok::kw_decltype:
65*67e74705SXin Li return true;
66*67e74705SXin Li default:
67*67e74705SXin Li return false;
68*67e74705SXin Li }
69*67e74705SXin Li }
70*67e74705SXin Li
~TokenRole()71*67e74705SXin Li TokenRole::~TokenRole() {}
72*67e74705SXin Li
precomputeFormattingInfos(const FormatToken * Token)73*67e74705SXin Li void TokenRole::precomputeFormattingInfos(const FormatToken *Token) {}
74*67e74705SXin Li
formatAfterToken(LineState & State,ContinuationIndenter * Indenter,bool DryRun)75*67e74705SXin Li unsigned CommaSeparatedList::formatAfterToken(LineState &State,
76*67e74705SXin Li ContinuationIndenter *Indenter,
77*67e74705SXin Li bool DryRun) {
78*67e74705SXin Li if (State.NextToken == nullptr || !State.NextToken->Previous)
79*67e74705SXin Li return 0;
80*67e74705SXin Li
81*67e74705SXin Li // Ensure that we start on the opening brace.
82*67e74705SXin Li const FormatToken *LBrace =
83*67e74705SXin Li State.NextToken->Previous->getPreviousNonComment();
84*67e74705SXin Li if (!LBrace || !LBrace->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
85*67e74705SXin Li LBrace->BlockKind == BK_Block || LBrace->Type == TT_DictLiteral ||
86*67e74705SXin Li LBrace->Next->Type == TT_DesignatedInitializerPeriod)
87*67e74705SXin Li return 0;
88*67e74705SXin Li
89*67e74705SXin Li // Calculate the number of code points we have to format this list. As the
90*67e74705SXin Li // first token is already placed, we have to subtract it.
91*67e74705SXin Li unsigned RemainingCodePoints =
92*67e74705SXin Li Style.ColumnLimit - State.Column + State.NextToken->Previous->ColumnWidth;
93*67e74705SXin Li
94*67e74705SXin Li // Find the best ColumnFormat, i.e. the best number of columns to use.
95*67e74705SXin Li const ColumnFormat *Format = getColumnFormat(RemainingCodePoints);
96*67e74705SXin Li // If no ColumnFormat can be used, the braced list would generally be
97*67e74705SXin Li // bin-packed. Add a severe penalty to this so that column layouts are
98*67e74705SXin Li // preferred if possible.
99*67e74705SXin Li if (!Format)
100*67e74705SXin Li return 10000;
101*67e74705SXin Li
102*67e74705SXin Li // Format the entire list.
103*67e74705SXin Li unsigned Penalty = 0;
104*67e74705SXin Li unsigned Column = 0;
105*67e74705SXin Li unsigned Item = 0;
106*67e74705SXin Li while (State.NextToken != LBrace->MatchingParen) {
107*67e74705SXin Li bool NewLine = false;
108*67e74705SXin Li unsigned ExtraSpaces = 0;
109*67e74705SXin Li
110*67e74705SXin Li // If the previous token was one of our commas, we are now on the next item.
111*67e74705SXin Li if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) {
112*67e74705SXin Li if (!State.NextToken->isTrailingComment()) {
113*67e74705SXin Li ExtraSpaces += Format->ColumnSizes[Column] - ItemLengths[Item];
114*67e74705SXin Li ++Column;
115*67e74705SXin Li }
116*67e74705SXin Li ++Item;
117*67e74705SXin Li }
118*67e74705SXin Li
119*67e74705SXin Li if (Column == Format->Columns || State.NextToken->MustBreakBefore) {
120*67e74705SXin Li Column = 0;
121*67e74705SXin Li NewLine = true;
122*67e74705SXin Li }
123*67e74705SXin Li
124*67e74705SXin Li // Place token using the continuation indenter and store the penalty.
125*67e74705SXin Li Penalty += Indenter->addTokenToState(State, NewLine, DryRun, ExtraSpaces);
126*67e74705SXin Li }
127*67e74705SXin Li return Penalty;
128*67e74705SXin Li }
129*67e74705SXin Li
formatFromToken(LineState & State,ContinuationIndenter * Indenter,bool DryRun)130*67e74705SXin Li unsigned CommaSeparatedList::formatFromToken(LineState &State,
131*67e74705SXin Li ContinuationIndenter *Indenter,
132*67e74705SXin Li bool DryRun) {
133*67e74705SXin Li if (HasNestedBracedList)
134*67e74705SXin Li State.Stack.back().AvoidBinPacking = true;
135*67e74705SXin Li return 0;
136*67e74705SXin Li }
137*67e74705SXin Li
138*67e74705SXin Li // Returns the lengths in code points between Begin and End (both included),
139*67e74705SXin Li // assuming that the entire sequence is put on a single line.
CodePointsBetween(const FormatToken * Begin,const FormatToken * End)140*67e74705SXin Li static unsigned CodePointsBetween(const FormatToken *Begin,
141*67e74705SXin Li const FormatToken *End) {
142*67e74705SXin Li assert(End->TotalLength >= Begin->TotalLength);
143*67e74705SXin Li return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth;
144*67e74705SXin Li }
145*67e74705SXin Li
precomputeFormattingInfos(const FormatToken * Token)146*67e74705SXin Li void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
147*67e74705SXin Li // FIXME: At some point we might want to do this for other lists, too.
148*67e74705SXin Li if (!Token->MatchingParen ||
149*67e74705SXin Li !Token->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare))
150*67e74705SXin Li return;
151*67e74705SXin Li
152*67e74705SXin Li // In C++11 braced list style, we should not format in columns unless they
153*67e74705SXin Li // have many items (20 or more) or we allow bin-packing of function call
154*67e74705SXin Li // arguments.
155*67e74705SXin Li if (Style.Cpp11BracedListStyle && !Style.BinPackArguments &&
156*67e74705SXin Li Commas.size() < 19)
157*67e74705SXin Li return;
158*67e74705SXin Li
159*67e74705SXin Li // Limit column layout for JavaScript array initializers to 20 or more items
160*67e74705SXin Li // for now to introduce it carefully. We can become more aggressive if this
161*67e74705SXin Li // necessary.
162*67e74705SXin Li if (Token->is(TT_ArrayInitializerLSquare) && Commas.size() < 19)
163*67e74705SXin Li return;
164*67e74705SXin Li
165*67e74705SXin Li // Column format doesn't really make sense if we don't align after brackets.
166*67e74705SXin Li if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign)
167*67e74705SXin Li return;
168*67e74705SXin Li
169*67e74705SXin Li FormatToken *ItemBegin = Token->Next;
170*67e74705SXin Li while (ItemBegin->isTrailingComment())
171*67e74705SXin Li ItemBegin = ItemBegin->Next;
172*67e74705SXin Li SmallVector<bool, 8> MustBreakBeforeItem;
173*67e74705SXin Li
174*67e74705SXin Li // The lengths of an item if it is put at the end of the line. This includes
175*67e74705SXin Li // trailing comments which are otherwise ignored for column alignment.
176*67e74705SXin Li SmallVector<unsigned, 8> EndOfLineItemLength;
177*67e74705SXin Li
178*67e74705SXin Li bool HasSeparatingComment = false;
179*67e74705SXin Li for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) {
180*67e74705SXin Li // Skip comments on their own line.
181*67e74705SXin Li while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) {
182*67e74705SXin Li ItemBegin = ItemBegin->Next;
183*67e74705SXin Li HasSeparatingComment = i > 0;
184*67e74705SXin Li }
185*67e74705SXin Li
186*67e74705SXin Li MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore);
187*67e74705SXin Li if (ItemBegin->is(tok::l_brace))
188*67e74705SXin Li HasNestedBracedList = true;
189*67e74705SXin Li const FormatToken *ItemEnd = nullptr;
190*67e74705SXin Li if (i == Commas.size()) {
191*67e74705SXin Li ItemEnd = Token->MatchingParen;
192*67e74705SXin Li const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment();
193*67e74705SXin Li ItemLengths.push_back(CodePointsBetween(ItemBegin, NonCommentEnd));
194*67e74705SXin Li if (Style.Cpp11BracedListStyle &&
195*67e74705SXin Li !ItemEnd->Previous->isTrailingComment()) {
196*67e74705SXin Li // In Cpp11 braced list style, the } and possibly other subsequent
197*67e74705SXin Li // tokens will need to stay on a line with the last element.
198*67e74705SXin Li while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore)
199*67e74705SXin Li ItemEnd = ItemEnd->Next;
200*67e74705SXin Li } else {
201*67e74705SXin Li // In other braced lists styles, the "}" can be wrapped to the new line.
202*67e74705SXin Li ItemEnd = Token->MatchingParen->Previous;
203*67e74705SXin Li }
204*67e74705SXin Li } else {
205*67e74705SXin Li ItemEnd = Commas[i];
206*67e74705SXin Li // The comma is counted as part of the item when calculating the length.
207*67e74705SXin Li ItemLengths.push_back(CodePointsBetween(ItemBegin, ItemEnd));
208*67e74705SXin Li
209*67e74705SXin Li // Consume trailing comments so the are included in EndOfLineItemLength.
210*67e74705SXin Li if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline &&
211*67e74705SXin Li ItemEnd->Next->isTrailingComment())
212*67e74705SXin Li ItemEnd = ItemEnd->Next;
213*67e74705SXin Li }
214*67e74705SXin Li EndOfLineItemLength.push_back(CodePointsBetween(ItemBegin, ItemEnd));
215*67e74705SXin Li // If there is a trailing comma in the list, the next item will start at the
216*67e74705SXin Li // closing brace. Don't create an extra item for this.
217*67e74705SXin Li if (ItemEnd->getNextNonComment() == Token->MatchingParen)
218*67e74705SXin Li break;
219*67e74705SXin Li ItemBegin = ItemEnd->Next;
220*67e74705SXin Li }
221*67e74705SXin Li
222*67e74705SXin Li // Don't use column layout for lists with few elements and in presence of
223*67e74705SXin Li // separating comments.
224*67e74705SXin Li if (Commas.size() < 5 || HasSeparatingComment)
225*67e74705SXin Li return;
226*67e74705SXin Li
227*67e74705SXin Li if (Token->NestingLevel != 0 && Token->is(tok::l_brace) && Commas.size() < 19)
228*67e74705SXin Li return;
229*67e74705SXin Li
230*67e74705SXin Li // We can never place more than ColumnLimit / 3 items in a row (because of the
231*67e74705SXin Li // spaces and the comma).
232*67e74705SXin Li unsigned MaxItems = Style.ColumnLimit / 3;
233*67e74705SXin Li std::vector<unsigned> MinSizeInColumn;
234*67e74705SXin Li MinSizeInColumn.reserve(MaxItems);
235*67e74705SXin Li for (unsigned Columns = 1; Columns <= MaxItems; ++Columns) {
236*67e74705SXin Li ColumnFormat Format;
237*67e74705SXin Li Format.Columns = Columns;
238*67e74705SXin Li Format.ColumnSizes.resize(Columns);
239*67e74705SXin Li MinSizeInColumn.assign(Columns, UINT_MAX);
240*67e74705SXin Li Format.LineCount = 1;
241*67e74705SXin Li bool HasRowWithSufficientColumns = false;
242*67e74705SXin Li unsigned Column = 0;
243*67e74705SXin Li for (unsigned i = 0, e = ItemLengths.size(); i != e; ++i) {
244*67e74705SXin Li assert(i < MustBreakBeforeItem.size());
245*67e74705SXin Li if (MustBreakBeforeItem[i] || Column == Columns) {
246*67e74705SXin Li ++Format.LineCount;
247*67e74705SXin Li Column = 0;
248*67e74705SXin Li }
249*67e74705SXin Li if (Column == Columns - 1)
250*67e74705SXin Li HasRowWithSufficientColumns = true;
251*67e74705SXin Li unsigned Length =
252*67e74705SXin Li (Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i];
253*67e74705SXin Li Format.ColumnSizes[Column] = std::max(Format.ColumnSizes[Column], Length);
254*67e74705SXin Li MinSizeInColumn[Column] = std::min(MinSizeInColumn[Column], Length);
255*67e74705SXin Li ++Column;
256*67e74705SXin Li }
257*67e74705SXin Li // If all rows are terminated early (e.g. by trailing comments), we don't
258*67e74705SXin Li // need to look further.
259*67e74705SXin Li if (!HasRowWithSufficientColumns)
260*67e74705SXin Li break;
261*67e74705SXin Li Format.TotalWidth = Columns - 1; // Width of the N-1 spaces.
262*67e74705SXin Li
263*67e74705SXin Li for (unsigned i = 0; i < Columns; ++i)
264*67e74705SXin Li Format.TotalWidth += Format.ColumnSizes[i];
265*67e74705SXin Li
266*67e74705SXin Li // Don't use this Format, if the difference between the longest and shortest
267*67e74705SXin Li // element in a column exceeds a threshold to avoid excessive spaces.
268*67e74705SXin Li if ([&] {
269*67e74705SXin Li for (unsigned i = 0; i < Columns - 1; ++i)
270*67e74705SXin Li if (Format.ColumnSizes[i] - MinSizeInColumn[i] > 10)
271*67e74705SXin Li return true;
272*67e74705SXin Li return false;
273*67e74705SXin Li }())
274*67e74705SXin Li continue;
275*67e74705SXin Li
276*67e74705SXin Li // Ignore layouts that are bound to violate the column limit.
277*67e74705SXin Li if (Format.TotalWidth > Style.ColumnLimit)
278*67e74705SXin Li continue;
279*67e74705SXin Li
280*67e74705SXin Li Formats.push_back(Format);
281*67e74705SXin Li }
282*67e74705SXin Li }
283*67e74705SXin Li
284*67e74705SXin Li const CommaSeparatedList::ColumnFormat *
getColumnFormat(unsigned RemainingCharacters) const285*67e74705SXin Li CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const {
286*67e74705SXin Li const ColumnFormat *BestFormat = nullptr;
287*67e74705SXin Li for (SmallVector<ColumnFormat, 4>::const_reverse_iterator
288*67e74705SXin Li I = Formats.rbegin(),
289*67e74705SXin Li E = Formats.rend();
290*67e74705SXin Li I != E; ++I) {
291*67e74705SXin Li if (I->TotalWidth <= RemainingCharacters) {
292*67e74705SXin Li if (BestFormat && I->LineCount > BestFormat->LineCount)
293*67e74705SXin Li break;
294*67e74705SXin Li BestFormat = &*I;
295*67e74705SXin Li }
296*67e74705SXin Li }
297*67e74705SXin Li return BestFormat;
298*67e74705SXin Li }
299*67e74705SXin Li
300*67e74705SXin Li } // namespace format
301*67e74705SXin Li } // namespace clang
302