xref: /aosp_15_r20/external/libwebm/webm_parser/src/master_parser.h (revision 103e46e4cd4b6efcf6001f23fa8665fb110abf8d)
1*103e46e4SHarish Mahendrakar // Copyright (c) 2016 The WebM project authors. All Rights Reserved.
2*103e46e4SHarish Mahendrakar //
3*103e46e4SHarish Mahendrakar // Use of this source code is governed by a BSD-style license
4*103e46e4SHarish Mahendrakar // that can be found in the LICENSE file in the root of the source
5*103e46e4SHarish Mahendrakar // tree. An additional intellectual property rights grant can be found
6*103e46e4SHarish Mahendrakar // in the file PATENTS.  All contributing project authors may
7*103e46e4SHarish Mahendrakar // be found in the AUTHORS file in the root of the source tree.
8*103e46e4SHarish Mahendrakar #ifndef SRC_MASTER_PARSER_H_
9*103e46e4SHarish Mahendrakar #define SRC_MASTER_PARSER_H_
10*103e46e4SHarish Mahendrakar 
11*103e46e4SHarish Mahendrakar #include <cassert>
12*103e46e4SHarish Mahendrakar #include <cstdint>
13*103e46e4SHarish Mahendrakar #include <functional>
14*103e46e4SHarish Mahendrakar #include <memory>
15*103e46e4SHarish Mahendrakar #include <type_traits>
16*103e46e4SHarish Mahendrakar #include <unordered_map>
17*103e46e4SHarish Mahendrakar #include <utility>
18*103e46e4SHarish Mahendrakar 
19*103e46e4SHarish Mahendrakar #include "src/element_parser.h"
20*103e46e4SHarish Mahendrakar #include "src/id_parser.h"
21*103e46e4SHarish Mahendrakar #include "src/size_parser.h"
22*103e46e4SHarish Mahendrakar #include "src/skip_parser.h"
23*103e46e4SHarish Mahendrakar #include "src/unknown_parser.h"
24*103e46e4SHarish Mahendrakar #include "src/void_parser.h"
25*103e46e4SHarish Mahendrakar #include "webm/callback.h"
26*103e46e4SHarish Mahendrakar #include "webm/element.h"
27*103e46e4SHarish Mahendrakar #include "webm/id.h"
28*103e46e4SHarish Mahendrakar #include "webm/reader.h"
29*103e46e4SHarish Mahendrakar #include "webm/status.h"
30*103e46e4SHarish Mahendrakar 
31*103e46e4SHarish Mahendrakar namespace webm {
32*103e46e4SHarish Mahendrakar 
33*103e46e4SHarish Mahendrakar // A general purpose parser for EBML master elements.
34*103e46e4SHarish Mahendrakar //
35*103e46e4SHarish Mahendrakar // For example, if a document specification defines a Foo master element that
36*103e46e4SHarish Mahendrakar // has two boolean children (Bar and Baz), then a FooParser capable of parsing
37*103e46e4SHarish Mahendrakar // the Foo master element could be defined as follows:
38*103e46e4SHarish Mahendrakar //
39*103e46e4SHarish Mahendrakar // struct FooParser : public MasterParser {
40*103e46e4SHarish Mahendrakar //   FooParser()
41*103e46e4SHarish Mahendrakar //       : MasterParser(MakeChild<BoolParser>(Id::kBar),
42*103e46e4SHarish Mahendrakar //                      MakeChild<BoolParser>(Id::kBaz)) {}
43*103e46e4SHarish Mahendrakar // };
44*103e46e4SHarish Mahendrakar //
45*103e46e4SHarish Mahendrakar // See the MasterValueParser for an alternative class for parsing master
46*103e46e4SHarish Mahendrakar // elements into a data structure.
47*103e46e4SHarish Mahendrakar class MasterParser : public ElementParser {
48*103e46e4SHarish Mahendrakar  public:
49*103e46e4SHarish Mahendrakar   // Constructs a new MasterParser that uses the given
50*103e46e4SHarish Mahendrakar   // {Id, std::unique_ptr<ElementParser>} pairs to map child IDs to the
51*103e46e4SHarish Mahendrakar   // appropriate parser/handler. Each argument must be of type
52*103e46e4SHarish Mahendrakar   // std::pair<Id, std::unique_ptr<ElementParser>>. If a parser is not
53*103e46e4SHarish Mahendrakar   // explicitly provided for Id::kVoid, a VoidParser will automatically be used
54*103e46e4SHarish Mahendrakar   // for it.
55*103e46e4SHarish Mahendrakar   //
56*103e46e4SHarish Mahendrakar   // Initializer lists don't support move-only types (i.e. std::unique_ptr), so
57*103e46e4SHarish Mahendrakar   // instead a variadic template is used.
58*103e46e4SHarish Mahendrakar   template <typename... T>
MasterParser(T &&...parser_pairs)59*103e46e4SHarish Mahendrakar   explicit MasterParser(T&&... parser_pairs) {
60*103e46e4SHarish Mahendrakar     // Prefer an odd reserve size. This makes libc++ use a prime number for the
61*103e46e4SHarish Mahendrakar     // bucket count. Otherwise, if it happens to be a power of 2, then libc++
62*103e46e4SHarish Mahendrakar     // will use a power-of-2 bucket count (and since Matroska EBML IDs have low
63*103e46e4SHarish Mahendrakar     // entropy in the low bits, there will be a lot of collisions). libstdc++
64*103e46e4SHarish Mahendrakar     // always prefers a prime bucket count. I'm not sure how MSVC or others are
65*103e46e4SHarish Mahendrakar     // implemented, but this shouldn't adversely affect them even if they are
66*103e46e4SHarish Mahendrakar     // implemented differently. Add one to the count because we'll likely need
67*103e46e4SHarish Mahendrakar     // to insert a parser for Id::kVoid.
68*103e46e4SHarish Mahendrakar     parsers_.reserve((sizeof...(T) + 1) | 1);
69*103e46e4SHarish Mahendrakar 
70*103e46e4SHarish Mahendrakar     // This dummy initializer list is just used to force the parameter pack to
71*103e46e4SHarish Mahendrakar     // be expanded, which turns the expression into a for-each "loop" that
72*103e46e4SHarish Mahendrakar     // inserts each argument into the map.
73*103e46e4SHarish Mahendrakar     auto dummy = {0, (InsertParser(std::forward<T>(parser_pairs)), 0)...};
74*103e46e4SHarish Mahendrakar     (void)dummy;  // Silence unused variable warning.
75*103e46e4SHarish Mahendrakar 
76*103e46e4SHarish Mahendrakar     if (parsers_.find(Id::kVoid) == parsers_.end()) {
77*103e46e4SHarish Mahendrakar       InsertParser(MakeChild<VoidParser>(Id::kVoid));
78*103e46e4SHarish Mahendrakar     }
79*103e46e4SHarish Mahendrakar   }
80*103e46e4SHarish Mahendrakar 
81*103e46e4SHarish Mahendrakar   MasterParser(const MasterParser&) = delete;
82*103e46e4SHarish Mahendrakar   MasterParser& operator=(const MasterParser&) = delete;
83*103e46e4SHarish Mahendrakar 
84*103e46e4SHarish Mahendrakar   Status Init(const ElementMetadata& metadata, std::uint64_t max_size) override;
85*103e46e4SHarish Mahendrakar 
86*103e46e4SHarish Mahendrakar   void InitAfterSeek(const Ancestory& child_ancestory,
87*103e46e4SHarish Mahendrakar                      const ElementMetadata& child_metadata) override;
88*103e46e4SHarish Mahendrakar 
89*103e46e4SHarish Mahendrakar   Status Feed(Callback* callback, Reader* reader,
90*103e46e4SHarish Mahendrakar               std::uint64_t* num_bytes_read) override;
91*103e46e4SHarish Mahendrakar 
92*103e46e4SHarish Mahendrakar   bool GetCachedMetadata(ElementMetadata* metadata) override;
93*103e46e4SHarish Mahendrakar 
header_size()94*103e46e4SHarish Mahendrakar   std::uint32_t header_size() const { return header_size_; }
95*103e46e4SHarish Mahendrakar 
96*103e46e4SHarish Mahendrakar   // Gets the size of this element. May be called before the parse is fully
97*103e46e4SHarish Mahendrakar   // complete (but only after Init() has already been called and successfully
98*103e46e4SHarish Mahendrakar   // returned).
size()99*103e46e4SHarish Mahendrakar   std::uint64_t size() const { return my_size_; }
100*103e46e4SHarish Mahendrakar 
101*103e46e4SHarish Mahendrakar   // Gets absolute byte position of the start of the element in the byte stream.
102*103e46e4SHarish Mahendrakar   // May be called before the parse is fully complete (but only after Init() has
103*103e46e4SHarish Mahendrakar   // already been called and successfully returned).
position()104*103e46e4SHarish Mahendrakar   std::uint64_t position() const { return my_position_; }
105*103e46e4SHarish Mahendrakar 
106*103e46e4SHarish Mahendrakar   // Gets the metadata for the child that is currently being parsed. This may
107*103e46e4SHarish Mahendrakar   // only be called while the child's body (not its header information like ID
108*103e46e4SHarish Mahendrakar   // and size) is being parsed.
child_metadata()109*103e46e4SHarish Mahendrakar   const ElementMetadata& child_metadata() const {
110*103e46e4SHarish Mahendrakar     assert(state_ == State::kValidatingChildSize ||
111*103e46e4SHarish Mahendrakar            state_ == State::kGettingAction ||
112*103e46e4SHarish Mahendrakar            state_ == State::kInitializingChildParser ||
113*103e46e4SHarish Mahendrakar            state_ == State::kReadingChildBody);
114*103e46e4SHarish Mahendrakar     return child_metadata_;
115*103e46e4SHarish Mahendrakar   }
116*103e46e4SHarish Mahendrakar 
117*103e46e4SHarish Mahendrakar  protected:
118*103e46e4SHarish Mahendrakar   // Allocates a new parser of type T, forwarding args to the constructor, and
119*103e46e4SHarish Mahendrakar   // creates a std::pair<Id, std::unique_ptr<ElementParser>> using the given id
120*103e46e4SHarish Mahendrakar   // and the allocated parser.
121*103e46e4SHarish Mahendrakar   template <typename T, typename... Args>
MakeChild(Id id,Args &&...args)122*103e46e4SHarish Mahendrakar   static std::pair<Id, std::unique_ptr<ElementParser>> MakeChild(
123*103e46e4SHarish Mahendrakar       Id id, Args&&... args) {
124*103e46e4SHarish Mahendrakar     std::unique_ptr<ElementParser> ptr(new T(std::forward<Args>(args)...));
125*103e46e4SHarish Mahendrakar     return std::pair<Id, std::unique_ptr<ElementParser>>(id, std::move(ptr));
126*103e46e4SHarish Mahendrakar   }
127*103e46e4SHarish Mahendrakar 
128*103e46e4SHarish Mahendrakar  private:
129*103e46e4SHarish Mahendrakar   // Parsing states for the finite-state machine.
130*103e46e4SHarish Mahendrakar   enum class State {
131*103e46e4SHarish Mahendrakar     /* clang-format off */
132*103e46e4SHarish Mahendrakar     // State                      Transitions to state      When
133*103e46e4SHarish Mahendrakar     kFirstReadOfChildId,       // kFinishingReadingChildId  size(id)  > 1
134*103e46e4SHarish Mahendrakar                                // kReadingChildSize         size(id) == 1
135*103e46e4SHarish Mahendrakar                                // kEndReached               EOF
136*103e46e4SHarish Mahendrakar     kFinishingReadingChildId,  // kReadingChildSize         done
137*103e46e4SHarish Mahendrakar     kReadingChildSize,         // kValidatingChildSize      done
138*103e46e4SHarish Mahendrakar     kValidatingChildSize,      // kGettingAction            done
139*103e46e4SHarish Mahendrakar                                // kEndReached               unknown id & unsized
140*103e46e4SHarish Mahendrakar     kGettingAction,            // kInitializingChildParser  done
141*103e46e4SHarish Mahendrakar     kInitializingChildParser,  // kReadingChildBody         done
142*103e46e4SHarish Mahendrakar     kReadingChildBody,         // kChildFullyParsed         child parse done
143*103e46e4SHarish Mahendrakar     kChildFullyParsed,         // kValidatingChildSize      cached metadata
144*103e46e4SHarish Mahendrakar                                // kFirstReadOfChildId       read  < my_size_
145*103e46e4SHarish Mahendrakar                                // kEndReached               read == my_size_
146*103e46e4SHarish Mahendrakar     kEndReached,               // No transitions from here (must call Init)
147*103e46e4SHarish Mahendrakar     /* clang-format on */
148*103e46e4SHarish Mahendrakar   };
149*103e46e4SHarish Mahendrakar 
150*103e46e4SHarish Mahendrakar   using StdHashIdKeyType = std::underlying_type<Id>::type;
151*103e46e4SHarish Mahendrakar   using StdHashId = std::hash<StdHashIdKeyType>;
152*103e46e4SHarish Mahendrakar 
153*103e46e4SHarish Mahendrakar   // Hash functor for hashing Id enums for storage in std::unordered_map.
154*103e46e4SHarish Mahendrakar   struct IdHash : StdHashId {
155*103e46e4SHarish Mahendrakar #if __cpp_lib_is_invocable
156*103e46e4SHarish Mahendrakar     using result_type = std::invoke_result<StdHashId, StdHashIdKeyType>::type;
157*103e46e4SHarish Mahendrakar #else
158*103e46e4SHarish Mahendrakar     using result_type = StdHashId::result_type;
159*103e46e4SHarish Mahendrakar #endif  // __cpp_lib_is_invocable
160*103e46e4SHarish Mahendrakar 
161*103e46e4SHarish Mahendrakar     // Returns the hash of the given id.
operatorIdHash162*103e46e4SHarish Mahendrakar     result_type operator()(Id id) const {
163*103e46e4SHarish Mahendrakar       return StdHashId::operator()(static_cast<StdHashIdKeyType>(id));
164*103e46e4SHarish Mahendrakar     }
165*103e46e4SHarish Mahendrakar   };
166*103e46e4SHarish Mahendrakar 
167*103e46e4SHarish Mahendrakar   // The parser for parsing element Ids.
168*103e46e4SHarish Mahendrakar   IdParser id_parser_;
169*103e46e4SHarish Mahendrakar 
170*103e46e4SHarish Mahendrakar   // The parser for parsing element sizes.
171*103e46e4SHarish Mahendrakar   SizeParser size_parser_;
172*103e46e4SHarish Mahendrakar 
173*103e46e4SHarish Mahendrakar   // Metadata for the child element that is currently being parsed.
174*103e46e4SHarish Mahendrakar   ElementMetadata child_metadata_;
175*103e46e4SHarish Mahendrakar 
176*103e46e4SHarish Mahendrakar   // Maps child IDs to the appropriate parser that can handle that child.
177*103e46e4SHarish Mahendrakar   std::unordered_map<Id, std::unique_ptr<ElementParser>, IdHash> parsers_;
178*103e46e4SHarish Mahendrakar 
179*103e46e4SHarish Mahendrakar   // The parser that is used to parse unknown children.
180*103e46e4SHarish Mahendrakar   UnknownParser unknown_parser_;
181*103e46e4SHarish Mahendrakar 
182*103e46e4SHarish Mahendrakar   // The parser that is used to skip over children.
183*103e46e4SHarish Mahendrakar   SkipParser skip_parser_;
184*103e46e4SHarish Mahendrakar 
185*103e46e4SHarish Mahendrakar   // The parser that is being used to parse the current child. This must be null
186*103e46e4SHarish Mahendrakar   // or a pointer in parsers_.
187*103e46e4SHarish Mahendrakar   ElementParser* child_parser_;
188*103e46e4SHarish Mahendrakar 
189*103e46e4SHarish Mahendrakar   // The current parsing action for the child that is currently being parsed.
190*103e46e4SHarish Mahendrakar   Action action_ = Action::kRead;
191*103e46e4SHarish Mahendrakar 
192*103e46e4SHarish Mahendrakar   // The current state of the parser.
193*103e46e4SHarish Mahendrakar   State state_;
194*103e46e4SHarish Mahendrakar 
195*103e46e4SHarish Mahendrakar   std::uint32_t header_size_;
196*103e46e4SHarish Mahendrakar 
197*103e46e4SHarish Mahendrakar   // The size of this element.
198*103e46e4SHarish Mahendrakar   std::uint64_t my_size_;
199*103e46e4SHarish Mahendrakar 
200*103e46e4SHarish Mahendrakar   std::uint64_t my_position_;
201*103e46e4SHarish Mahendrakar 
202*103e46e4SHarish Mahendrakar   std::uint64_t max_size_;
203*103e46e4SHarish Mahendrakar 
204*103e46e4SHarish Mahendrakar   // The total number of bytes read by this parser.
205*103e46e4SHarish Mahendrakar   std::uint64_t total_bytes_read_;
206*103e46e4SHarish Mahendrakar 
207*103e46e4SHarish Mahendrakar   // Set to true if parsing has completed and this parser consumed an extra
208*103e46e4SHarish Mahendrakar   // element header (ID and size) that wasn't from a child.
209*103e46e4SHarish Mahendrakar   bool has_cached_metadata_ = false;
210*103e46e4SHarish Mahendrakar 
211*103e46e4SHarish Mahendrakar   // Inserts the parser into the parsers_ map and asserts it is the only parser
212*103e46e4SHarish Mahendrakar   // registers to parse the corresponding Id.
213*103e46e4SHarish Mahendrakar   template <typename T>
InsertParser(T && parser)214*103e46e4SHarish Mahendrakar   void InsertParser(T&& parser) {
215*103e46e4SHarish Mahendrakar     bool inserted = parsers_.insert(std::forward<T>(parser)).second;
216*103e46e4SHarish Mahendrakar     (void)inserted;  // Silence unused variable warning.
217*103e46e4SHarish Mahendrakar     assert(inserted);  // Make sure there aren't duplicates.
218*103e46e4SHarish Mahendrakar   }
219*103e46e4SHarish Mahendrakar 
220*103e46e4SHarish Mahendrakar   // Common initialization logic for Init/InitAfterseek.
221*103e46e4SHarish Mahendrakar   void InitSetup(std::uint32_t header_size, std::uint64_t size_in_bytes,
222*103e46e4SHarish Mahendrakar                  std::uint64_t position);
223*103e46e4SHarish Mahendrakar 
224*103e46e4SHarish Mahendrakar   // Resets the internal parsers in preparation for parsing the next child.
225*103e46e4SHarish Mahendrakar   void PrepareForNextChild();
226*103e46e4SHarish Mahendrakar };
227*103e46e4SHarish Mahendrakar 
228*103e46e4SHarish Mahendrakar }  // namespace webm
229*103e46e4SHarish Mahendrakar 
230*103e46e4SHarish Mahendrakar #endif  // SRC_MASTER_PARSER_H_
231