1*67e74705SXin Li //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2*67e74705SXin Li //
3*67e74705SXin Li // The LLVM Compiler Infrastructure
4*67e74705SXin Li //
5*67e74705SXin Li // This file is distributed under the University of Illinois Open Source
6*67e74705SXin Li // License. See LICENSE.TXT for details.
7*67e74705SXin Li //
8*67e74705SXin Li //===----------------------------------------------------------------------===//
9*67e74705SXin Li //
10*67e74705SXin Li // Handling of format string in scanf and friends. The structure of format
11*67e74705SXin Li // strings for fscanf() are described in C99 7.19.6.2.
12*67e74705SXin Li //
13*67e74705SXin Li //===----------------------------------------------------------------------===//
14*67e74705SXin Li
15*67e74705SXin Li #include "clang/Analysis/Analyses/FormatString.h"
16*67e74705SXin Li #include "FormatStringParsing.h"
17*67e74705SXin Li #include "clang/Basic/TargetInfo.h"
18*67e74705SXin Li
19*67e74705SXin Li using clang::analyze_format_string::ArgType;
20*67e74705SXin Li using clang::analyze_format_string::FormatStringHandler;
21*67e74705SXin Li using clang::analyze_format_string::LengthModifier;
22*67e74705SXin Li using clang::analyze_format_string::OptionalAmount;
23*67e74705SXin Li using clang::analyze_format_string::ConversionSpecifier;
24*67e74705SXin Li using clang::analyze_scanf::ScanfConversionSpecifier;
25*67e74705SXin Li using clang::analyze_scanf::ScanfSpecifier;
26*67e74705SXin Li using clang::UpdateOnReturn;
27*67e74705SXin Li using namespace clang;
28*67e74705SXin Li
29*67e74705SXin Li typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
30*67e74705SXin Li ScanfSpecifierResult;
31*67e74705SXin Li
ParseScanList(FormatStringHandler & H,ScanfConversionSpecifier & CS,const char * & Beg,const char * E)32*67e74705SXin Li static bool ParseScanList(FormatStringHandler &H,
33*67e74705SXin Li ScanfConversionSpecifier &CS,
34*67e74705SXin Li const char *&Beg, const char *E) {
35*67e74705SXin Li const char *I = Beg;
36*67e74705SXin Li const char *start = I - 1;
37*67e74705SXin Li UpdateOnReturn <const char*> UpdateBeg(Beg, I);
38*67e74705SXin Li
39*67e74705SXin Li // No more characters?
40*67e74705SXin Li if (I == E) {
41*67e74705SXin Li H.HandleIncompleteScanList(start, I);
42*67e74705SXin Li return true;
43*67e74705SXin Li }
44*67e74705SXin Li
45*67e74705SXin Li // Special case: ']' is the first character.
46*67e74705SXin Li if (*I == ']') {
47*67e74705SXin Li if (++I == E) {
48*67e74705SXin Li H.HandleIncompleteScanList(start, I - 1);
49*67e74705SXin Li return true;
50*67e74705SXin Li }
51*67e74705SXin Li }
52*67e74705SXin Li
53*67e74705SXin Li // Special case: "^]" are the first characters.
54*67e74705SXin Li if (I + 1 != E && I[0] == '^' && I[1] == ']') {
55*67e74705SXin Li I += 2;
56*67e74705SXin Li if (I == E) {
57*67e74705SXin Li H.HandleIncompleteScanList(start, I - 1);
58*67e74705SXin Li return true;
59*67e74705SXin Li }
60*67e74705SXin Li }
61*67e74705SXin Li
62*67e74705SXin Li // Look for a ']' character which denotes the end of the scan list.
63*67e74705SXin Li while (*I != ']') {
64*67e74705SXin Li if (++I == E) {
65*67e74705SXin Li H.HandleIncompleteScanList(start, I - 1);
66*67e74705SXin Li return true;
67*67e74705SXin Li }
68*67e74705SXin Li }
69*67e74705SXin Li
70*67e74705SXin Li CS.setEndScanList(I);
71*67e74705SXin Li return false;
72*67e74705SXin Li }
73*67e74705SXin Li
74*67e74705SXin Li // FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
75*67e74705SXin Li // We can possibly refactor.
ParseScanfSpecifier(FormatStringHandler & H,const char * & Beg,const char * E,unsigned & argIndex,const LangOptions & LO,const TargetInfo & Target)76*67e74705SXin Li static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
77*67e74705SXin Li const char *&Beg,
78*67e74705SXin Li const char *E,
79*67e74705SXin Li unsigned &argIndex,
80*67e74705SXin Li const LangOptions &LO,
81*67e74705SXin Li const TargetInfo &Target) {
82*67e74705SXin Li using namespace clang::analyze_format_string;
83*67e74705SXin Li using namespace clang::analyze_scanf;
84*67e74705SXin Li const char *I = Beg;
85*67e74705SXin Li const char *Start = nullptr;
86*67e74705SXin Li UpdateOnReturn <const char*> UpdateBeg(Beg, I);
87*67e74705SXin Li
88*67e74705SXin Li // Look for a '%' character that indicates the start of a format specifier.
89*67e74705SXin Li for ( ; I != E ; ++I) {
90*67e74705SXin Li char c = *I;
91*67e74705SXin Li if (c == '\0') {
92*67e74705SXin Li // Detect spurious null characters, which are likely errors.
93*67e74705SXin Li H.HandleNullChar(I);
94*67e74705SXin Li return true;
95*67e74705SXin Li }
96*67e74705SXin Li if (c == '%') {
97*67e74705SXin Li Start = I++; // Record the start of the format specifier.
98*67e74705SXin Li break;
99*67e74705SXin Li }
100*67e74705SXin Li }
101*67e74705SXin Li
102*67e74705SXin Li // No format specifier found?
103*67e74705SXin Li if (!Start)
104*67e74705SXin Li return false;
105*67e74705SXin Li
106*67e74705SXin Li if (I == E) {
107*67e74705SXin Li // No more characters left?
108*67e74705SXin Li H.HandleIncompleteSpecifier(Start, E - Start);
109*67e74705SXin Li return true;
110*67e74705SXin Li }
111*67e74705SXin Li
112*67e74705SXin Li ScanfSpecifier FS;
113*67e74705SXin Li if (ParseArgPosition(H, FS, Start, I, E))
114*67e74705SXin Li return true;
115*67e74705SXin Li
116*67e74705SXin Li if (I == E) {
117*67e74705SXin Li // No more characters left?
118*67e74705SXin Li H.HandleIncompleteSpecifier(Start, E - Start);
119*67e74705SXin Li return true;
120*67e74705SXin Li }
121*67e74705SXin Li
122*67e74705SXin Li // Look for '*' flag if it is present.
123*67e74705SXin Li if (*I == '*') {
124*67e74705SXin Li FS.setSuppressAssignment(I);
125*67e74705SXin Li if (++I == E) {
126*67e74705SXin Li H.HandleIncompleteSpecifier(Start, E - Start);
127*67e74705SXin Li return true;
128*67e74705SXin Li }
129*67e74705SXin Li }
130*67e74705SXin Li
131*67e74705SXin Li // Look for the field width (if any). Unlike printf, this is either
132*67e74705SXin Li // a fixed integer or isn't present.
133*67e74705SXin Li const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
134*67e74705SXin Li if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
135*67e74705SXin Li assert(Amt.getHowSpecified() == OptionalAmount::Constant);
136*67e74705SXin Li FS.setFieldWidth(Amt);
137*67e74705SXin Li
138*67e74705SXin Li if (I == E) {
139*67e74705SXin Li // No more characters left?
140*67e74705SXin Li H.HandleIncompleteSpecifier(Start, E - Start);
141*67e74705SXin Li return true;
142*67e74705SXin Li }
143*67e74705SXin Li }
144*67e74705SXin Li
145*67e74705SXin Li // Look for the length modifier.
146*67e74705SXin Li if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
147*67e74705SXin Li // No more characters left?
148*67e74705SXin Li H.HandleIncompleteSpecifier(Start, E - Start);
149*67e74705SXin Li return true;
150*67e74705SXin Li }
151*67e74705SXin Li
152*67e74705SXin Li // Detect spurious null characters, which are likely errors.
153*67e74705SXin Li if (*I == '\0') {
154*67e74705SXin Li H.HandleNullChar(I);
155*67e74705SXin Li return true;
156*67e74705SXin Li }
157*67e74705SXin Li
158*67e74705SXin Li // Finally, look for the conversion specifier.
159*67e74705SXin Li const char *conversionPosition = I++;
160*67e74705SXin Li ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
161*67e74705SXin Li switch (*conversionPosition) {
162*67e74705SXin Li default:
163*67e74705SXin Li break;
164*67e74705SXin Li case '%': k = ConversionSpecifier::PercentArg; break;
165*67e74705SXin Li case 'A': k = ConversionSpecifier::AArg; break;
166*67e74705SXin Li case 'E': k = ConversionSpecifier::EArg; break;
167*67e74705SXin Li case 'F': k = ConversionSpecifier::FArg; break;
168*67e74705SXin Li case 'G': k = ConversionSpecifier::GArg; break;
169*67e74705SXin Li case 'X': k = ConversionSpecifier::XArg; break;
170*67e74705SXin Li case 'a': k = ConversionSpecifier::aArg; break;
171*67e74705SXin Li case 'd': k = ConversionSpecifier::dArg; break;
172*67e74705SXin Li case 'e': k = ConversionSpecifier::eArg; break;
173*67e74705SXin Li case 'f': k = ConversionSpecifier::fArg; break;
174*67e74705SXin Li case 'g': k = ConversionSpecifier::gArg; break;
175*67e74705SXin Li case 'i': k = ConversionSpecifier::iArg; break;
176*67e74705SXin Li case 'n': k = ConversionSpecifier::nArg; break;
177*67e74705SXin Li case 'c': k = ConversionSpecifier::cArg; break;
178*67e74705SXin Li case 'C': k = ConversionSpecifier::CArg; break;
179*67e74705SXin Li case 'S': k = ConversionSpecifier::SArg; break;
180*67e74705SXin Li case '[': k = ConversionSpecifier::ScanListArg; break;
181*67e74705SXin Li case 'u': k = ConversionSpecifier::uArg; break;
182*67e74705SXin Li case 'x': k = ConversionSpecifier::xArg; break;
183*67e74705SXin Li case 'o': k = ConversionSpecifier::oArg; break;
184*67e74705SXin Li case 's': k = ConversionSpecifier::sArg; break;
185*67e74705SXin Li case 'p': k = ConversionSpecifier::pArg; break;
186*67e74705SXin Li // Apple extensions
187*67e74705SXin Li // Apple-specific
188*67e74705SXin Li case 'D':
189*67e74705SXin Li if (Target.getTriple().isOSDarwin())
190*67e74705SXin Li k = ConversionSpecifier::DArg;
191*67e74705SXin Li break;
192*67e74705SXin Li case 'O':
193*67e74705SXin Li if (Target.getTriple().isOSDarwin())
194*67e74705SXin Li k = ConversionSpecifier::OArg;
195*67e74705SXin Li break;
196*67e74705SXin Li case 'U':
197*67e74705SXin Li if (Target.getTriple().isOSDarwin())
198*67e74705SXin Li k = ConversionSpecifier::UArg;
199*67e74705SXin Li break;
200*67e74705SXin Li }
201*67e74705SXin Li ScanfConversionSpecifier CS(conversionPosition, k);
202*67e74705SXin Li if (k == ScanfConversionSpecifier::ScanListArg) {
203*67e74705SXin Li if (ParseScanList(H, CS, I, E))
204*67e74705SXin Li return true;
205*67e74705SXin Li }
206*67e74705SXin Li FS.setConversionSpecifier(CS);
207*67e74705SXin Li if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
208*67e74705SXin Li && !FS.usesPositionalArg())
209*67e74705SXin Li FS.setArgIndex(argIndex++);
210*67e74705SXin Li
211*67e74705SXin Li // FIXME: '%' and '*' doesn't make sense. Issue a warning.
212*67e74705SXin Li // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
213*67e74705SXin Li
214*67e74705SXin Li if (k == ScanfConversionSpecifier::InvalidSpecifier) {
215*67e74705SXin Li unsigned Len = I - Beg;
216*67e74705SXin Li if (ParseUTF8InvalidSpecifier(Beg, E, Len)) {
217*67e74705SXin Li CS.setEndScanList(Beg + Len);
218*67e74705SXin Li FS.setConversionSpecifier(CS);
219*67e74705SXin Li }
220*67e74705SXin Li // Assume the conversion takes one argument.
221*67e74705SXin Li return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len);
222*67e74705SXin Li }
223*67e74705SXin Li return ScanfSpecifierResult(Start, FS);
224*67e74705SXin Li }
225*67e74705SXin Li
getArgType(ASTContext & Ctx) const226*67e74705SXin Li ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
227*67e74705SXin Li const ScanfConversionSpecifier &CS = getConversionSpecifier();
228*67e74705SXin Li
229*67e74705SXin Li if (!CS.consumesDataArgument())
230*67e74705SXin Li return ArgType::Invalid();
231*67e74705SXin Li
232*67e74705SXin Li switch(CS.getKind()) {
233*67e74705SXin Li // Signed int.
234*67e74705SXin Li case ConversionSpecifier::dArg:
235*67e74705SXin Li case ConversionSpecifier::DArg:
236*67e74705SXin Li case ConversionSpecifier::iArg:
237*67e74705SXin Li switch (LM.getKind()) {
238*67e74705SXin Li case LengthModifier::None:
239*67e74705SXin Li return ArgType::PtrTo(Ctx.IntTy);
240*67e74705SXin Li case LengthModifier::AsChar:
241*67e74705SXin Li return ArgType::PtrTo(ArgType::AnyCharTy);
242*67e74705SXin Li case LengthModifier::AsShort:
243*67e74705SXin Li return ArgType::PtrTo(Ctx.ShortTy);
244*67e74705SXin Li case LengthModifier::AsLong:
245*67e74705SXin Li return ArgType::PtrTo(Ctx.LongTy);
246*67e74705SXin Li case LengthModifier::AsLongLong:
247*67e74705SXin Li case LengthModifier::AsQuad:
248*67e74705SXin Li return ArgType::PtrTo(Ctx.LongLongTy);
249*67e74705SXin Li case LengthModifier::AsInt64:
250*67e74705SXin Li return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
251*67e74705SXin Li case LengthModifier::AsIntMax:
252*67e74705SXin Li return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
253*67e74705SXin Li case LengthModifier::AsSizeT:
254*67e74705SXin Li // FIXME: ssize_t.
255*67e74705SXin Li return ArgType();
256*67e74705SXin Li case LengthModifier::AsPtrDiff:
257*67e74705SXin Li return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
258*67e74705SXin Li case LengthModifier::AsLongDouble:
259*67e74705SXin Li // GNU extension.
260*67e74705SXin Li return ArgType::PtrTo(Ctx.LongLongTy);
261*67e74705SXin Li case LengthModifier::AsAllocate:
262*67e74705SXin Li case LengthModifier::AsMAllocate:
263*67e74705SXin Li case LengthModifier::AsInt32:
264*67e74705SXin Li case LengthModifier::AsInt3264:
265*67e74705SXin Li case LengthModifier::AsWide:
266*67e74705SXin Li return ArgType::Invalid();
267*67e74705SXin Li }
268*67e74705SXin Li
269*67e74705SXin Li // Unsigned int.
270*67e74705SXin Li case ConversionSpecifier::oArg:
271*67e74705SXin Li case ConversionSpecifier::OArg:
272*67e74705SXin Li case ConversionSpecifier::uArg:
273*67e74705SXin Li case ConversionSpecifier::UArg:
274*67e74705SXin Li case ConversionSpecifier::xArg:
275*67e74705SXin Li case ConversionSpecifier::XArg:
276*67e74705SXin Li switch (LM.getKind()) {
277*67e74705SXin Li case LengthModifier::None:
278*67e74705SXin Li return ArgType::PtrTo(Ctx.UnsignedIntTy);
279*67e74705SXin Li case LengthModifier::AsChar:
280*67e74705SXin Li return ArgType::PtrTo(Ctx.UnsignedCharTy);
281*67e74705SXin Li case LengthModifier::AsShort:
282*67e74705SXin Li return ArgType::PtrTo(Ctx.UnsignedShortTy);
283*67e74705SXin Li case LengthModifier::AsLong:
284*67e74705SXin Li return ArgType::PtrTo(Ctx.UnsignedLongTy);
285*67e74705SXin Li case LengthModifier::AsLongLong:
286*67e74705SXin Li case LengthModifier::AsQuad:
287*67e74705SXin Li return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
288*67e74705SXin Li case LengthModifier::AsInt64:
289*67e74705SXin Li return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64"));
290*67e74705SXin Li case LengthModifier::AsIntMax:
291*67e74705SXin Li return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
292*67e74705SXin Li case LengthModifier::AsSizeT:
293*67e74705SXin Li return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t"));
294*67e74705SXin Li case LengthModifier::AsPtrDiff:
295*67e74705SXin Li // FIXME: Unsigned version of ptrdiff_t?
296*67e74705SXin Li return ArgType();
297*67e74705SXin Li case LengthModifier::AsLongDouble:
298*67e74705SXin Li // GNU extension.
299*67e74705SXin Li return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
300*67e74705SXin Li case LengthModifier::AsAllocate:
301*67e74705SXin Li case LengthModifier::AsMAllocate:
302*67e74705SXin Li case LengthModifier::AsInt32:
303*67e74705SXin Li case LengthModifier::AsInt3264:
304*67e74705SXin Li case LengthModifier::AsWide:
305*67e74705SXin Li return ArgType::Invalid();
306*67e74705SXin Li }
307*67e74705SXin Li
308*67e74705SXin Li // Float.
309*67e74705SXin Li case ConversionSpecifier::aArg:
310*67e74705SXin Li case ConversionSpecifier::AArg:
311*67e74705SXin Li case ConversionSpecifier::eArg:
312*67e74705SXin Li case ConversionSpecifier::EArg:
313*67e74705SXin Li case ConversionSpecifier::fArg:
314*67e74705SXin Li case ConversionSpecifier::FArg:
315*67e74705SXin Li case ConversionSpecifier::gArg:
316*67e74705SXin Li case ConversionSpecifier::GArg:
317*67e74705SXin Li switch (LM.getKind()) {
318*67e74705SXin Li case LengthModifier::None:
319*67e74705SXin Li return ArgType::PtrTo(Ctx.FloatTy);
320*67e74705SXin Li case LengthModifier::AsLong:
321*67e74705SXin Li return ArgType::PtrTo(Ctx.DoubleTy);
322*67e74705SXin Li case LengthModifier::AsLongDouble:
323*67e74705SXin Li return ArgType::PtrTo(Ctx.LongDoubleTy);
324*67e74705SXin Li default:
325*67e74705SXin Li return ArgType::Invalid();
326*67e74705SXin Li }
327*67e74705SXin Li
328*67e74705SXin Li // Char, string and scanlist.
329*67e74705SXin Li case ConversionSpecifier::cArg:
330*67e74705SXin Li case ConversionSpecifier::sArg:
331*67e74705SXin Li case ConversionSpecifier::ScanListArg:
332*67e74705SXin Li switch (LM.getKind()) {
333*67e74705SXin Li case LengthModifier::None:
334*67e74705SXin Li return ArgType::PtrTo(ArgType::AnyCharTy);
335*67e74705SXin Li case LengthModifier::AsLong:
336*67e74705SXin Li case LengthModifier::AsWide:
337*67e74705SXin Li return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
338*67e74705SXin Li case LengthModifier::AsAllocate:
339*67e74705SXin Li case LengthModifier::AsMAllocate:
340*67e74705SXin Li return ArgType::PtrTo(ArgType::CStrTy);
341*67e74705SXin Li case LengthModifier::AsShort:
342*67e74705SXin Li if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
343*67e74705SXin Li return ArgType::PtrTo(ArgType::AnyCharTy);
344*67e74705SXin Li default:
345*67e74705SXin Li return ArgType::Invalid();
346*67e74705SXin Li }
347*67e74705SXin Li case ConversionSpecifier::CArg:
348*67e74705SXin Li case ConversionSpecifier::SArg:
349*67e74705SXin Li // FIXME: Mac OS X specific?
350*67e74705SXin Li switch (LM.getKind()) {
351*67e74705SXin Li case LengthModifier::None:
352*67e74705SXin Li case LengthModifier::AsWide:
353*67e74705SXin Li return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
354*67e74705SXin Li case LengthModifier::AsAllocate:
355*67e74705SXin Li case LengthModifier::AsMAllocate:
356*67e74705SXin Li return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
357*67e74705SXin Li case LengthModifier::AsShort:
358*67e74705SXin Li if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
359*67e74705SXin Li return ArgType::PtrTo(ArgType::AnyCharTy);
360*67e74705SXin Li default:
361*67e74705SXin Li return ArgType::Invalid();
362*67e74705SXin Li }
363*67e74705SXin Li
364*67e74705SXin Li // Pointer.
365*67e74705SXin Li case ConversionSpecifier::pArg:
366*67e74705SXin Li return ArgType::PtrTo(ArgType::CPointerTy);
367*67e74705SXin Li
368*67e74705SXin Li // Write-back.
369*67e74705SXin Li case ConversionSpecifier::nArg:
370*67e74705SXin Li switch (LM.getKind()) {
371*67e74705SXin Li case LengthModifier::None:
372*67e74705SXin Li return ArgType::PtrTo(Ctx.IntTy);
373*67e74705SXin Li case LengthModifier::AsChar:
374*67e74705SXin Li return ArgType::PtrTo(Ctx.SignedCharTy);
375*67e74705SXin Li case LengthModifier::AsShort:
376*67e74705SXin Li return ArgType::PtrTo(Ctx.ShortTy);
377*67e74705SXin Li case LengthModifier::AsLong:
378*67e74705SXin Li return ArgType::PtrTo(Ctx.LongTy);
379*67e74705SXin Li case LengthModifier::AsLongLong:
380*67e74705SXin Li case LengthModifier::AsQuad:
381*67e74705SXin Li return ArgType::PtrTo(Ctx.LongLongTy);
382*67e74705SXin Li case LengthModifier::AsInt64:
383*67e74705SXin Li return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
384*67e74705SXin Li case LengthModifier::AsIntMax:
385*67e74705SXin Li return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
386*67e74705SXin Li case LengthModifier::AsSizeT:
387*67e74705SXin Li return ArgType(); // FIXME: ssize_t
388*67e74705SXin Li case LengthModifier::AsPtrDiff:
389*67e74705SXin Li return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
390*67e74705SXin Li case LengthModifier::AsLongDouble:
391*67e74705SXin Li return ArgType(); // FIXME: Is this a known extension?
392*67e74705SXin Li case LengthModifier::AsAllocate:
393*67e74705SXin Li case LengthModifier::AsMAllocate:
394*67e74705SXin Li case LengthModifier::AsInt32:
395*67e74705SXin Li case LengthModifier::AsInt3264:
396*67e74705SXin Li case LengthModifier::AsWide:
397*67e74705SXin Li return ArgType::Invalid();
398*67e74705SXin Li }
399*67e74705SXin Li
400*67e74705SXin Li default:
401*67e74705SXin Li break;
402*67e74705SXin Li }
403*67e74705SXin Li
404*67e74705SXin Li return ArgType();
405*67e74705SXin Li }
406*67e74705SXin Li
fixType(QualType QT,QualType RawQT,const LangOptions & LangOpt,ASTContext & Ctx)407*67e74705SXin Li bool ScanfSpecifier::fixType(QualType QT, QualType RawQT,
408*67e74705SXin Li const LangOptions &LangOpt,
409*67e74705SXin Li ASTContext &Ctx) {
410*67e74705SXin Li
411*67e74705SXin Li // %n is different from other conversion specifiers; don't try to fix it.
412*67e74705SXin Li if (CS.getKind() == ConversionSpecifier::nArg)
413*67e74705SXin Li return false;
414*67e74705SXin Li
415*67e74705SXin Li if (!QT->isPointerType())
416*67e74705SXin Li return false;
417*67e74705SXin Li
418*67e74705SXin Li QualType PT = QT->getPointeeType();
419*67e74705SXin Li
420*67e74705SXin Li // If it's an enum, get its underlying type.
421*67e74705SXin Li if (const EnumType *ETy = PT->getAs<EnumType>())
422*67e74705SXin Li PT = ETy->getDecl()->getIntegerType();
423*67e74705SXin Li
424*67e74705SXin Li const BuiltinType *BT = PT->getAs<BuiltinType>();
425*67e74705SXin Li if (!BT)
426*67e74705SXin Li return false;
427*67e74705SXin Li
428*67e74705SXin Li // Pointer to a character.
429*67e74705SXin Li if (PT->isAnyCharacterType()) {
430*67e74705SXin Li CS.setKind(ConversionSpecifier::sArg);
431*67e74705SXin Li if (PT->isWideCharType())
432*67e74705SXin Li LM.setKind(LengthModifier::AsWideChar);
433*67e74705SXin Li else
434*67e74705SXin Li LM.setKind(LengthModifier::None);
435*67e74705SXin Li
436*67e74705SXin Li // If we know the target array length, we can use it as a field width.
437*67e74705SXin Li if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(RawQT)) {
438*67e74705SXin Li if (CAT->getSizeModifier() == ArrayType::Normal)
439*67e74705SXin Li FieldWidth = OptionalAmount(OptionalAmount::Constant,
440*67e74705SXin Li CAT->getSize().getZExtValue() - 1,
441*67e74705SXin Li "", 0, false);
442*67e74705SXin Li
443*67e74705SXin Li }
444*67e74705SXin Li return true;
445*67e74705SXin Li }
446*67e74705SXin Li
447*67e74705SXin Li // Figure out the length modifier.
448*67e74705SXin Li switch (BT->getKind()) {
449*67e74705SXin Li // no modifier
450*67e74705SXin Li case BuiltinType::UInt:
451*67e74705SXin Li case BuiltinType::Int:
452*67e74705SXin Li case BuiltinType::Float:
453*67e74705SXin Li LM.setKind(LengthModifier::None);
454*67e74705SXin Li break;
455*67e74705SXin Li
456*67e74705SXin Li // hh
457*67e74705SXin Li case BuiltinType::Char_U:
458*67e74705SXin Li case BuiltinType::UChar:
459*67e74705SXin Li case BuiltinType::Char_S:
460*67e74705SXin Li case BuiltinType::SChar:
461*67e74705SXin Li LM.setKind(LengthModifier::AsChar);
462*67e74705SXin Li break;
463*67e74705SXin Li
464*67e74705SXin Li // h
465*67e74705SXin Li case BuiltinType::Short:
466*67e74705SXin Li case BuiltinType::UShort:
467*67e74705SXin Li LM.setKind(LengthModifier::AsShort);
468*67e74705SXin Li break;
469*67e74705SXin Li
470*67e74705SXin Li // l
471*67e74705SXin Li case BuiltinType::Long:
472*67e74705SXin Li case BuiltinType::ULong:
473*67e74705SXin Li case BuiltinType::Double:
474*67e74705SXin Li LM.setKind(LengthModifier::AsLong);
475*67e74705SXin Li break;
476*67e74705SXin Li
477*67e74705SXin Li // ll
478*67e74705SXin Li case BuiltinType::LongLong:
479*67e74705SXin Li case BuiltinType::ULongLong:
480*67e74705SXin Li LM.setKind(LengthModifier::AsLongLong);
481*67e74705SXin Li break;
482*67e74705SXin Li
483*67e74705SXin Li // L
484*67e74705SXin Li case BuiltinType::LongDouble:
485*67e74705SXin Li LM.setKind(LengthModifier::AsLongDouble);
486*67e74705SXin Li break;
487*67e74705SXin Li
488*67e74705SXin Li // Don't know.
489*67e74705SXin Li default:
490*67e74705SXin Li return false;
491*67e74705SXin Li }
492*67e74705SXin Li
493*67e74705SXin Li // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
494*67e74705SXin Li if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus11))
495*67e74705SXin Li namedTypeToLengthModifier(PT, LM);
496*67e74705SXin Li
497*67e74705SXin Li // If fixing the length modifier was enough, we are done.
498*67e74705SXin Li if (hasValidLengthModifier(Ctx.getTargetInfo())) {
499*67e74705SXin Li const analyze_scanf::ArgType &AT = getArgType(Ctx);
500*67e74705SXin Li if (AT.isValid() && AT.matchesType(Ctx, QT))
501*67e74705SXin Li return true;
502*67e74705SXin Li }
503*67e74705SXin Li
504*67e74705SXin Li // Figure out the conversion specifier.
505*67e74705SXin Li if (PT->isRealFloatingType())
506*67e74705SXin Li CS.setKind(ConversionSpecifier::fArg);
507*67e74705SXin Li else if (PT->isSignedIntegerType())
508*67e74705SXin Li CS.setKind(ConversionSpecifier::dArg);
509*67e74705SXin Li else if (PT->isUnsignedIntegerType())
510*67e74705SXin Li CS.setKind(ConversionSpecifier::uArg);
511*67e74705SXin Li else
512*67e74705SXin Li llvm_unreachable("Unexpected type");
513*67e74705SXin Li
514*67e74705SXin Li return true;
515*67e74705SXin Li }
516*67e74705SXin Li
toString(raw_ostream & os) const517*67e74705SXin Li void ScanfSpecifier::toString(raw_ostream &os) const {
518*67e74705SXin Li os << "%";
519*67e74705SXin Li
520*67e74705SXin Li if (usesPositionalArg())
521*67e74705SXin Li os << getPositionalArgIndex() << "$";
522*67e74705SXin Li if (SuppressAssignment)
523*67e74705SXin Li os << "*";
524*67e74705SXin Li
525*67e74705SXin Li FieldWidth.toString(os);
526*67e74705SXin Li os << LM.toString();
527*67e74705SXin Li os << CS.toString();
528*67e74705SXin Li }
529*67e74705SXin Li
ParseScanfString(FormatStringHandler & H,const char * I,const char * E,const LangOptions & LO,const TargetInfo & Target)530*67e74705SXin Li bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
531*67e74705SXin Li const char *I,
532*67e74705SXin Li const char *E,
533*67e74705SXin Li const LangOptions &LO,
534*67e74705SXin Li const TargetInfo &Target) {
535*67e74705SXin Li
536*67e74705SXin Li unsigned argIndex = 0;
537*67e74705SXin Li
538*67e74705SXin Li // Keep looking for a format specifier until we have exhausted the string.
539*67e74705SXin Li while (I != E) {
540*67e74705SXin Li const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
541*67e74705SXin Li LO, Target);
542*67e74705SXin Li // Did a fail-stop error of any kind occur when parsing the specifier?
543*67e74705SXin Li // If so, don't do any more processing.
544*67e74705SXin Li if (FSR.shouldStop())
545*67e74705SXin Li return true;
546*67e74705SXin Li // Did we exhaust the string or encounter an error that
547*67e74705SXin Li // we can recover from?
548*67e74705SXin Li if (!FSR.hasValue())
549*67e74705SXin Li continue;
550*67e74705SXin Li // We have a format specifier. Pass it to the callback.
551*67e74705SXin Li if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
552*67e74705SXin Li I - FSR.getStart())) {
553*67e74705SXin Li return true;
554*67e74705SXin Li }
555*67e74705SXin Li }
556*67e74705SXin Li assert(I == E && "Format string not exhausted");
557*67e74705SXin Li return false;
558*67e74705SXin Li }
559