xref: /aosp_15_r20/external/clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp (revision 67e74705e28f6214e480b399dd47ea732279e315)
1  //===--- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp -----------------=//
2  //
3  //                     The LLVM Compiler Infrastructure
4  //
5  // This file is distributed under the University of Illinois Open Source
6  // License. See LICENSE.TXT for details.
7  //
8  //===----------------------------------------------------------------------===//
9  //
10  // This tablegen backend emits an fficient function to translate HTML named
11  // character references to UTF-8 sequences.
12  //
13  //===----------------------------------------------------------------------===//
14  
15  #include "llvm/ADT/SmallString.h"
16  #include "llvm/Support/ConvertUTF.h"
17  #include "llvm/TableGen/Error.h"
18  #include "llvm/TableGen/Record.h"
19  #include "llvm/TableGen/StringMatcher.h"
20  #include "llvm/TableGen/TableGenBackend.h"
21  #include <vector>
22  
23  using namespace llvm;
24  
25  /// \brief Convert a code point to the corresponding UTF-8 sequence represented
26  /// as a C string literal.
27  ///
28  /// \returns true on success.
translateCodePointToUTF8(unsigned CodePoint,SmallVectorImpl<char> & CLiteral)29  static bool translateCodePointToUTF8(unsigned CodePoint,
30                                       SmallVectorImpl<char> &CLiteral) {
31    char Translated[UNI_MAX_UTF8_BYTES_PER_CODE_POINT];
32    char *TranslatedPtr = Translated;
33    if (!ConvertCodePointToUTF8(CodePoint, TranslatedPtr))
34      return false;
35  
36    StringRef UTF8(Translated, TranslatedPtr - Translated);
37  
38    raw_svector_ostream OS(CLiteral);
39    OS << "\"";
40    for (size_t i = 0, e = UTF8.size(); i != e; ++i) {
41      OS << "\\x";
42      OS.write_hex(static_cast<unsigned char>(UTF8[i]));
43    }
44    OS << "\"";
45  
46    return true;
47  }
48  
49  namespace clang {
EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper & Records,raw_ostream & OS)50  void EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records,
51                                                    raw_ostream &OS) {
52    std::vector<Record *> Tags = Records.getAllDerivedDefinitions("NCR");
53    std::vector<StringMatcher::StringPair> NameToUTF8;
54    SmallString<32> CLiteral;
55    for (std::vector<Record *>::iterator I = Tags.begin(), E = Tags.end();
56         I != E; ++I) {
57      Record &Tag = **I;
58      std::string Spelling = Tag.getValueAsString("Spelling");
59      uint64_t CodePoint = Tag.getValueAsInt("CodePoint");
60      CLiteral.clear();
61      CLiteral.append("return ");
62      if (!translateCodePointToUTF8(CodePoint, CLiteral)) {
63        SrcMgr.PrintMessage(Tag.getLoc().front(),
64                            SourceMgr::DK_Error,
65                            Twine("invalid code point"));
66        continue;
67      }
68      CLiteral.append(";");
69  
70      StringMatcher::StringPair Match(Spelling, CLiteral.str());
71      NameToUTF8.push_back(Match);
72    }
73  
74    emitSourceFileHeader("HTML named character reference to UTF-8 "
75                         "translation", OS);
76  
77    OS << "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n"
78          "                                             StringRef Name) {\n";
79    StringMatcher("Name", NameToUTF8, OS).Emit();
80    OS << "  return StringRef();\n"
81       << "}\n\n";
82  }
83  
84  } // end namespace clang
85  
86