1*103e46e4SHarish Mahendrakar // Copyright (c) 2016 The WebM project authors. All Rights Reserved. 2*103e46e4SHarish Mahendrakar // 3*103e46e4SHarish Mahendrakar // Use of this source code is governed by a BSD-style license 4*103e46e4SHarish Mahendrakar // that can be found in the LICENSE file in the root of the source 5*103e46e4SHarish Mahendrakar // tree. An additional intellectual property rights grant can be found 6*103e46e4SHarish Mahendrakar // in the file PATENTS. All contributing project authors may 7*103e46e4SHarish Mahendrakar // be found in the AUTHORS file in the root of the source tree. 8*103e46e4SHarish Mahendrakar #ifndef SRC_MASTER_PARSER_H_ 9*103e46e4SHarish Mahendrakar #define SRC_MASTER_PARSER_H_ 10*103e46e4SHarish Mahendrakar 11*103e46e4SHarish Mahendrakar #include <cassert> 12*103e46e4SHarish Mahendrakar #include <cstdint> 13*103e46e4SHarish Mahendrakar #include <functional> 14*103e46e4SHarish Mahendrakar #include <memory> 15*103e46e4SHarish Mahendrakar #include <type_traits> 16*103e46e4SHarish Mahendrakar #include <unordered_map> 17*103e46e4SHarish Mahendrakar #include <utility> 18*103e46e4SHarish Mahendrakar 19*103e46e4SHarish Mahendrakar #include "src/element_parser.h" 20*103e46e4SHarish Mahendrakar #include "src/id_parser.h" 21*103e46e4SHarish Mahendrakar #include "src/size_parser.h" 22*103e46e4SHarish Mahendrakar #include "src/skip_parser.h" 23*103e46e4SHarish Mahendrakar #include "src/unknown_parser.h" 24*103e46e4SHarish Mahendrakar #include "src/void_parser.h" 25*103e46e4SHarish Mahendrakar #include "webm/callback.h" 26*103e46e4SHarish Mahendrakar #include "webm/element.h" 27*103e46e4SHarish Mahendrakar #include "webm/id.h" 28*103e46e4SHarish Mahendrakar #include "webm/reader.h" 29*103e46e4SHarish Mahendrakar #include "webm/status.h" 30*103e46e4SHarish Mahendrakar 31*103e46e4SHarish Mahendrakar namespace webm { 32*103e46e4SHarish Mahendrakar 33*103e46e4SHarish Mahendrakar // A general purpose parser for EBML master elements. 34*103e46e4SHarish Mahendrakar // 35*103e46e4SHarish Mahendrakar // For example, if a document specification defines a Foo master element that 36*103e46e4SHarish Mahendrakar // has two boolean children (Bar and Baz), then a FooParser capable of parsing 37*103e46e4SHarish Mahendrakar // the Foo master element could be defined as follows: 38*103e46e4SHarish Mahendrakar // 39*103e46e4SHarish Mahendrakar // struct FooParser : public MasterParser { 40*103e46e4SHarish Mahendrakar // FooParser() 41*103e46e4SHarish Mahendrakar // : MasterParser(MakeChild<BoolParser>(Id::kBar), 42*103e46e4SHarish Mahendrakar // MakeChild<BoolParser>(Id::kBaz)) {} 43*103e46e4SHarish Mahendrakar // }; 44*103e46e4SHarish Mahendrakar // 45*103e46e4SHarish Mahendrakar // See the MasterValueParser for an alternative class for parsing master 46*103e46e4SHarish Mahendrakar // elements into a data structure. 47*103e46e4SHarish Mahendrakar class MasterParser : public ElementParser { 48*103e46e4SHarish Mahendrakar public: 49*103e46e4SHarish Mahendrakar // Constructs a new MasterParser that uses the given 50*103e46e4SHarish Mahendrakar // {Id, std::unique_ptr<ElementParser>} pairs to map child IDs to the 51*103e46e4SHarish Mahendrakar // appropriate parser/handler. Each argument must be of type 52*103e46e4SHarish Mahendrakar // std::pair<Id, std::unique_ptr<ElementParser>>. If a parser is not 53*103e46e4SHarish Mahendrakar // explicitly provided for Id::kVoid, a VoidParser will automatically be used 54*103e46e4SHarish Mahendrakar // for it. 55*103e46e4SHarish Mahendrakar // 56*103e46e4SHarish Mahendrakar // Initializer lists don't support move-only types (i.e. std::unique_ptr), so 57*103e46e4SHarish Mahendrakar // instead a variadic template is used. 58*103e46e4SHarish Mahendrakar template <typename... T> MasterParser(T &&...parser_pairs)59*103e46e4SHarish Mahendrakar explicit MasterParser(T&&... parser_pairs) { 60*103e46e4SHarish Mahendrakar // Prefer an odd reserve size. This makes libc++ use a prime number for the 61*103e46e4SHarish Mahendrakar // bucket count. Otherwise, if it happens to be a power of 2, then libc++ 62*103e46e4SHarish Mahendrakar // will use a power-of-2 bucket count (and since Matroska EBML IDs have low 63*103e46e4SHarish Mahendrakar // entropy in the low bits, there will be a lot of collisions). libstdc++ 64*103e46e4SHarish Mahendrakar // always prefers a prime bucket count. I'm not sure how MSVC or others are 65*103e46e4SHarish Mahendrakar // implemented, but this shouldn't adversely affect them even if they are 66*103e46e4SHarish Mahendrakar // implemented differently. Add one to the count because we'll likely need 67*103e46e4SHarish Mahendrakar // to insert a parser for Id::kVoid. 68*103e46e4SHarish Mahendrakar parsers_.reserve((sizeof...(T) + 1) | 1); 69*103e46e4SHarish Mahendrakar 70*103e46e4SHarish Mahendrakar // This dummy initializer list is just used to force the parameter pack to 71*103e46e4SHarish Mahendrakar // be expanded, which turns the expression into a for-each "loop" that 72*103e46e4SHarish Mahendrakar // inserts each argument into the map. 73*103e46e4SHarish Mahendrakar auto dummy = {0, (InsertParser(std::forward<T>(parser_pairs)), 0)...}; 74*103e46e4SHarish Mahendrakar (void)dummy; // Silence unused variable warning. 75*103e46e4SHarish Mahendrakar 76*103e46e4SHarish Mahendrakar if (parsers_.find(Id::kVoid) == parsers_.end()) { 77*103e46e4SHarish Mahendrakar InsertParser(MakeChild<VoidParser>(Id::kVoid)); 78*103e46e4SHarish Mahendrakar } 79*103e46e4SHarish Mahendrakar } 80*103e46e4SHarish Mahendrakar 81*103e46e4SHarish Mahendrakar MasterParser(const MasterParser&) = delete; 82*103e46e4SHarish Mahendrakar MasterParser& operator=(const MasterParser&) = delete; 83*103e46e4SHarish Mahendrakar 84*103e46e4SHarish Mahendrakar Status Init(const ElementMetadata& metadata, std::uint64_t max_size) override; 85*103e46e4SHarish Mahendrakar 86*103e46e4SHarish Mahendrakar void InitAfterSeek(const Ancestory& child_ancestory, 87*103e46e4SHarish Mahendrakar const ElementMetadata& child_metadata) override; 88*103e46e4SHarish Mahendrakar 89*103e46e4SHarish Mahendrakar Status Feed(Callback* callback, Reader* reader, 90*103e46e4SHarish Mahendrakar std::uint64_t* num_bytes_read) override; 91*103e46e4SHarish Mahendrakar 92*103e46e4SHarish Mahendrakar bool GetCachedMetadata(ElementMetadata* metadata) override; 93*103e46e4SHarish Mahendrakar header_size()94*103e46e4SHarish Mahendrakar std::uint32_t header_size() const { return header_size_; } 95*103e46e4SHarish Mahendrakar 96*103e46e4SHarish Mahendrakar // Gets the size of this element. May be called before the parse is fully 97*103e46e4SHarish Mahendrakar // complete (but only after Init() has already been called and successfully 98*103e46e4SHarish Mahendrakar // returned). size()99*103e46e4SHarish Mahendrakar std::uint64_t size() const { return my_size_; } 100*103e46e4SHarish Mahendrakar 101*103e46e4SHarish Mahendrakar // Gets absolute byte position of the start of the element in the byte stream. 102*103e46e4SHarish Mahendrakar // May be called before the parse is fully complete (but only after Init() has 103*103e46e4SHarish Mahendrakar // already been called and successfully returned). position()104*103e46e4SHarish Mahendrakar std::uint64_t position() const { return my_position_; } 105*103e46e4SHarish Mahendrakar 106*103e46e4SHarish Mahendrakar // Gets the metadata for the child that is currently being parsed. This may 107*103e46e4SHarish Mahendrakar // only be called while the child's body (not its header information like ID 108*103e46e4SHarish Mahendrakar // and size) is being parsed. child_metadata()109*103e46e4SHarish Mahendrakar const ElementMetadata& child_metadata() const { 110*103e46e4SHarish Mahendrakar assert(state_ == State::kValidatingChildSize || 111*103e46e4SHarish Mahendrakar state_ == State::kGettingAction || 112*103e46e4SHarish Mahendrakar state_ == State::kInitializingChildParser || 113*103e46e4SHarish Mahendrakar state_ == State::kReadingChildBody); 114*103e46e4SHarish Mahendrakar return child_metadata_; 115*103e46e4SHarish Mahendrakar } 116*103e46e4SHarish Mahendrakar 117*103e46e4SHarish Mahendrakar protected: 118*103e46e4SHarish Mahendrakar // Allocates a new parser of type T, forwarding args to the constructor, and 119*103e46e4SHarish Mahendrakar // creates a std::pair<Id, std::unique_ptr<ElementParser>> using the given id 120*103e46e4SHarish Mahendrakar // and the allocated parser. 121*103e46e4SHarish Mahendrakar template <typename T, typename... Args> MakeChild(Id id,Args &&...args)122*103e46e4SHarish Mahendrakar static std::pair<Id, std::unique_ptr<ElementParser>> MakeChild( 123*103e46e4SHarish Mahendrakar Id id, Args&&... args) { 124*103e46e4SHarish Mahendrakar std::unique_ptr<ElementParser> ptr(new T(std::forward<Args>(args)...)); 125*103e46e4SHarish Mahendrakar return std::pair<Id, std::unique_ptr<ElementParser>>(id, std::move(ptr)); 126*103e46e4SHarish Mahendrakar } 127*103e46e4SHarish Mahendrakar 128*103e46e4SHarish Mahendrakar private: 129*103e46e4SHarish Mahendrakar // Parsing states for the finite-state machine. 130*103e46e4SHarish Mahendrakar enum class State { 131*103e46e4SHarish Mahendrakar /* clang-format off */ 132*103e46e4SHarish Mahendrakar // State Transitions to state When 133*103e46e4SHarish Mahendrakar kFirstReadOfChildId, // kFinishingReadingChildId size(id) > 1 134*103e46e4SHarish Mahendrakar // kReadingChildSize size(id) == 1 135*103e46e4SHarish Mahendrakar // kEndReached EOF 136*103e46e4SHarish Mahendrakar kFinishingReadingChildId, // kReadingChildSize done 137*103e46e4SHarish Mahendrakar kReadingChildSize, // kValidatingChildSize done 138*103e46e4SHarish Mahendrakar kValidatingChildSize, // kGettingAction done 139*103e46e4SHarish Mahendrakar // kEndReached unknown id & unsized 140*103e46e4SHarish Mahendrakar kGettingAction, // kInitializingChildParser done 141*103e46e4SHarish Mahendrakar kInitializingChildParser, // kReadingChildBody done 142*103e46e4SHarish Mahendrakar kReadingChildBody, // kChildFullyParsed child parse done 143*103e46e4SHarish Mahendrakar kChildFullyParsed, // kValidatingChildSize cached metadata 144*103e46e4SHarish Mahendrakar // kFirstReadOfChildId read < my_size_ 145*103e46e4SHarish Mahendrakar // kEndReached read == my_size_ 146*103e46e4SHarish Mahendrakar kEndReached, // No transitions from here (must call Init) 147*103e46e4SHarish Mahendrakar /* clang-format on */ 148*103e46e4SHarish Mahendrakar }; 149*103e46e4SHarish Mahendrakar 150*103e46e4SHarish Mahendrakar using StdHashIdKeyType = std::underlying_type<Id>::type; 151*103e46e4SHarish Mahendrakar using StdHashId = std::hash<StdHashIdKeyType>; 152*103e46e4SHarish Mahendrakar 153*103e46e4SHarish Mahendrakar // Hash functor for hashing Id enums for storage in std::unordered_map. 154*103e46e4SHarish Mahendrakar struct IdHash : StdHashId { 155*103e46e4SHarish Mahendrakar #if __cpp_lib_is_invocable 156*103e46e4SHarish Mahendrakar using result_type = std::invoke_result<StdHashId, StdHashIdKeyType>::type; 157*103e46e4SHarish Mahendrakar #else 158*103e46e4SHarish Mahendrakar using result_type = StdHashId::result_type; 159*103e46e4SHarish Mahendrakar #endif // __cpp_lib_is_invocable 160*103e46e4SHarish Mahendrakar 161*103e46e4SHarish Mahendrakar // Returns the hash of the given id. operatorIdHash162*103e46e4SHarish Mahendrakar result_type operator()(Id id) const { 163*103e46e4SHarish Mahendrakar return StdHashId::operator()(static_cast<StdHashIdKeyType>(id)); 164*103e46e4SHarish Mahendrakar } 165*103e46e4SHarish Mahendrakar }; 166*103e46e4SHarish Mahendrakar 167*103e46e4SHarish Mahendrakar // The parser for parsing element Ids. 168*103e46e4SHarish Mahendrakar IdParser id_parser_; 169*103e46e4SHarish Mahendrakar 170*103e46e4SHarish Mahendrakar // The parser for parsing element sizes. 171*103e46e4SHarish Mahendrakar SizeParser size_parser_; 172*103e46e4SHarish Mahendrakar 173*103e46e4SHarish Mahendrakar // Metadata for the child element that is currently being parsed. 174*103e46e4SHarish Mahendrakar ElementMetadata child_metadata_; 175*103e46e4SHarish Mahendrakar 176*103e46e4SHarish Mahendrakar // Maps child IDs to the appropriate parser that can handle that child. 177*103e46e4SHarish Mahendrakar std::unordered_map<Id, std::unique_ptr<ElementParser>, IdHash> parsers_; 178*103e46e4SHarish Mahendrakar 179*103e46e4SHarish Mahendrakar // The parser that is used to parse unknown children. 180*103e46e4SHarish Mahendrakar UnknownParser unknown_parser_; 181*103e46e4SHarish Mahendrakar 182*103e46e4SHarish Mahendrakar // The parser that is used to skip over children. 183*103e46e4SHarish Mahendrakar SkipParser skip_parser_; 184*103e46e4SHarish Mahendrakar 185*103e46e4SHarish Mahendrakar // The parser that is being used to parse the current child. This must be null 186*103e46e4SHarish Mahendrakar // or a pointer in parsers_. 187*103e46e4SHarish Mahendrakar ElementParser* child_parser_; 188*103e46e4SHarish Mahendrakar 189*103e46e4SHarish Mahendrakar // The current parsing action for the child that is currently being parsed. 190*103e46e4SHarish Mahendrakar Action action_ = Action::kRead; 191*103e46e4SHarish Mahendrakar 192*103e46e4SHarish Mahendrakar // The current state of the parser. 193*103e46e4SHarish Mahendrakar State state_; 194*103e46e4SHarish Mahendrakar 195*103e46e4SHarish Mahendrakar std::uint32_t header_size_; 196*103e46e4SHarish Mahendrakar 197*103e46e4SHarish Mahendrakar // The size of this element. 198*103e46e4SHarish Mahendrakar std::uint64_t my_size_; 199*103e46e4SHarish Mahendrakar 200*103e46e4SHarish Mahendrakar std::uint64_t my_position_; 201*103e46e4SHarish Mahendrakar 202*103e46e4SHarish Mahendrakar std::uint64_t max_size_; 203*103e46e4SHarish Mahendrakar 204*103e46e4SHarish Mahendrakar // The total number of bytes read by this parser. 205*103e46e4SHarish Mahendrakar std::uint64_t total_bytes_read_; 206*103e46e4SHarish Mahendrakar 207*103e46e4SHarish Mahendrakar // Set to true if parsing has completed and this parser consumed an extra 208*103e46e4SHarish Mahendrakar // element header (ID and size) that wasn't from a child. 209*103e46e4SHarish Mahendrakar bool has_cached_metadata_ = false; 210*103e46e4SHarish Mahendrakar 211*103e46e4SHarish Mahendrakar // Inserts the parser into the parsers_ map and asserts it is the only parser 212*103e46e4SHarish Mahendrakar // registers to parse the corresponding Id. 213*103e46e4SHarish Mahendrakar template <typename T> InsertParser(T && parser)214*103e46e4SHarish Mahendrakar void InsertParser(T&& parser) { 215*103e46e4SHarish Mahendrakar bool inserted = parsers_.insert(std::forward<T>(parser)).second; 216*103e46e4SHarish Mahendrakar (void)inserted; // Silence unused variable warning. 217*103e46e4SHarish Mahendrakar assert(inserted); // Make sure there aren't duplicates. 218*103e46e4SHarish Mahendrakar } 219*103e46e4SHarish Mahendrakar 220*103e46e4SHarish Mahendrakar // Common initialization logic for Init/InitAfterseek. 221*103e46e4SHarish Mahendrakar void InitSetup(std::uint32_t header_size, std::uint64_t size_in_bytes, 222*103e46e4SHarish Mahendrakar std::uint64_t position); 223*103e46e4SHarish Mahendrakar 224*103e46e4SHarish Mahendrakar // Resets the internal parsers in preparation for parsing the next child. 225*103e46e4SHarish Mahendrakar void PrepareForNextChild(); 226*103e46e4SHarish Mahendrakar }; 227*103e46e4SHarish Mahendrakar 228*103e46e4SHarish Mahendrakar } // namespace webm 229*103e46e4SHarish Mahendrakar 230*103e46e4SHarish Mahendrakar #endif // SRC_MASTER_PARSER_H_ 231