xref: /aosp_15_r20/external/libwebm/webm_parser/src/webm_parser.cc (revision 103e46e4cd4b6efcf6001f23fa8665fb110abf8d)
1*103e46e4SHarish Mahendrakar // Copyright (c) 2016 The WebM project authors. All Rights Reserved.
2*103e46e4SHarish Mahendrakar //
3*103e46e4SHarish Mahendrakar // Use of this source code is governed by a BSD-style license
4*103e46e4SHarish Mahendrakar // that can be found in the LICENSE file in the root of the source
5*103e46e4SHarish Mahendrakar // tree. An additional intellectual property rights grant can be found
6*103e46e4SHarish Mahendrakar // in the file PATENTS.  All contributing project authors may
7*103e46e4SHarish Mahendrakar // be found in the AUTHORS file in the root of the source tree.
8*103e46e4SHarish Mahendrakar #include "webm/webm_parser.h"
9*103e46e4SHarish Mahendrakar 
10*103e46e4SHarish Mahendrakar #include <cassert>
11*103e46e4SHarish Mahendrakar #include <cstdint>
12*103e46e4SHarish Mahendrakar 
13*103e46e4SHarish Mahendrakar #include "src/ebml_parser.h"
14*103e46e4SHarish Mahendrakar #include "src/master_parser.h"
15*103e46e4SHarish Mahendrakar #include "src/segment_parser.h"
16*103e46e4SHarish Mahendrakar #include "src/unknown_parser.h"
17*103e46e4SHarish Mahendrakar #include "webm/element.h"
18*103e46e4SHarish Mahendrakar 
19*103e46e4SHarish Mahendrakar namespace webm {
20*103e46e4SHarish Mahendrakar 
21*103e46e4SHarish Mahendrakar // Parses WebM EBML documents (i.e. level-0 WebM elements).
22*103e46e4SHarish Mahendrakar class WebmParser::DocumentParser {
23*103e46e4SHarish Mahendrakar  public:
24*103e46e4SHarish Mahendrakar   // Resets the parser after a seek to a new position in the reader.
DidSeek()25*103e46e4SHarish Mahendrakar   void DidSeek() {
26*103e46e4SHarish Mahendrakar     PrepareForNextChild();
27*103e46e4SHarish Mahendrakar     did_seek_ = true;
28*103e46e4SHarish Mahendrakar     state_ = State::kBegin;
29*103e46e4SHarish Mahendrakar   }
30*103e46e4SHarish Mahendrakar 
31*103e46e4SHarish Mahendrakar   // Feeds the parser; will return Status::kOkCompleted when the reader returns
32*103e46e4SHarish Mahendrakar   // Status::kEndOfFile, but only if the parser has already completed parsing
33*103e46e4SHarish Mahendrakar   // its child elements.
Feed(Callback * callback,Reader * reader)34*103e46e4SHarish Mahendrakar   Status Feed(Callback* callback, Reader* reader) {
35*103e46e4SHarish Mahendrakar     assert(callback != nullptr);
36*103e46e4SHarish Mahendrakar     assert(reader != nullptr);
37*103e46e4SHarish Mahendrakar 
38*103e46e4SHarish Mahendrakar     Callback* const original_callback = callback;
39*103e46e4SHarish Mahendrakar     if (action_ == Action::kSkip) {
40*103e46e4SHarish Mahendrakar       callback = &skip_callback_;
41*103e46e4SHarish Mahendrakar     }
42*103e46e4SHarish Mahendrakar 
43*103e46e4SHarish Mahendrakar     Status status;
44*103e46e4SHarish Mahendrakar     std::uint64_t num_bytes_read;
45*103e46e4SHarish Mahendrakar     while (true) {
46*103e46e4SHarish Mahendrakar       switch (state_) {
47*103e46e4SHarish Mahendrakar         case State::kBegin: {
48*103e46e4SHarish Mahendrakar           child_metadata_.header_size = 0;
49*103e46e4SHarish Mahendrakar           child_metadata_.position = reader->Position();
50*103e46e4SHarish Mahendrakar           state_ = State::kReadingChildId;
51*103e46e4SHarish Mahendrakar           continue;
52*103e46e4SHarish Mahendrakar         }
53*103e46e4SHarish Mahendrakar 
54*103e46e4SHarish Mahendrakar         case State::kReadingChildId: {
55*103e46e4SHarish Mahendrakar           assert(child_parser_ == nullptr);
56*103e46e4SHarish Mahendrakar           status = id_parser_.Feed(callback, reader, &num_bytes_read);
57*103e46e4SHarish Mahendrakar           child_metadata_.header_size += num_bytes_read;
58*103e46e4SHarish Mahendrakar           if (!status.completed_ok()) {
59*103e46e4SHarish Mahendrakar             if (status.code == Status::kEndOfFile &&
60*103e46e4SHarish Mahendrakar                 reader->Position() == child_metadata_.position) {
61*103e46e4SHarish Mahendrakar               state_ = State::kEndReached;
62*103e46e4SHarish Mahendrakar               continue;
63*103e46e4SHarish Mahendrakar             }
64*103e46e4SHarish Mahendrakar             return status;
65*103e46e4SHarish Mahendrakar           }
66*103e46e4SHarish Mahendrakar           state_ = State::kReadingChildSize;
67*103e46e4SHarish Mahendrakar           continue;
68*103e46e4SHarish Mahendrakar         }
69*103e46e4SHarish Mahendrakar 
70*103e46e4SHarish Mahendrakar         case State::kReadingChildSize: {
71*103e46e4SHarish Mahendrakar           assert(child_parser_ == nullptr);
72*103e46e4SHarish Mahendrakar           status = size_parser_.Feed(callback, reader, &num_bytes_read);
73*103e46e4SHarish Mahendrakar           child_metadata_.header_size += num_bytes_read;
74*103e46e4SHarish Mahendrakar           if (!status.completed_ok()) {
75*103e46e4SHarish Mahendrakar             return status;
76*103e46e4SHarish Mahendrakar           }
77*103e46e4SHarish Mahendrakar           child_metadata_.id = id_parser_.id();
78*103e46e4SHarish Mahendrakar           child_metadata_.size = size_parser_.size();
79*103e46e4SHarish Mahendrakar           state_ = State::kValidatingChildSize;
80*103e46e4SHarish Mahendrakar           continue;
81*103e46e4SHarish Mahendrakar         }
82*103e46e4SHarish Mahendrakar 
83*103e46e4SHarish Mahendrakar         case State::kValidatingChildSize: {
84*103e46e4SHarish Mahendrakar           assert(child_parser_ == nullptr);
85*103e46e4SHarish Mahendrakar 
86*103e46e4SHarish Mahendrakar           if (child_metadata_.id == Id::kSegment) {
87*103e46e4SHarish Mahendrakar             child_parser_ = &segment_parser_;
88*103e46e4SHarish Mahendrakar             did_seek_ = false;
89*103e46e4SHarish Mahendrakar             state_ = State::kGettingAction;
90*103e46e4SHarish Mahendrakar             continue;
91*103e46e4SHarish Mahendrakar           } else if (child_metadata_.id == Id::kEbml) {
92*103e46e4SHarish Mahendrakar             child_parser_ = &ebml_parser_;
93*103e46e4SHarish Mahendrakar             did_seek_ = false;
94*103e46e4SHarish Mahendrakar             state_ = State::kGettingAction;
95*103e46e4SHarish Mahendrakar             continue;
96*103e46e4SHarish Mahendrakar           }
97*103e46e4SHarish Mahendrakar 
98*103e46e4SHarish Mahendrakar           Ancestory ancestory;
99*103e46e4SHarish Mahendrakar           if (did_seek_ && Ancestory::ById(child_metadata_.id, &ancestory)) {
100*103e46e4SHarish Mahendrakar             assert(!ancestory.empty());
101*103e46e4SHarish Mahendrakar             assert(ancestory.id() == Id::kSegment ||
102*103e46e4SHarish Mahendrakar                    ancestory.id() == Id::kEbml);
103*103e46e4SHarish Mahendrakar 
104*103e46e4SHarish Mahendrakar             if (ancestory.id() == Id::kSegment) {
105*103e46e4SHarish Mahendrakar               child_parser_ = &segment_parser_;
106*103e46e4SHarish Mahendrakar             } else {
107*103e46e4SHarish Mahendrakar               child_parser_ = &ebml_parser_;
108*103e46e4SHarish Mahendrakar             }
109*103e46e4SHarish Mahendrakar 
110*103e46e4SHarish Mahendrakar             child_parser_->InitAfterSeek(ancestory.next(), child_metadata_);
111*103e46e4SHarish Mahendrakar             child_metadata_.id = ancestory.id();
112*103e46e4SHarish Mahendrakar             child_metadata_.header_size = kUnknownHeaderSize;
113*103e46e4SHarish Mahendrakar             child_metadata_.size = kUnknownElementSize;
114*103e46e4SHarish Mahendrakar             child_metadata_.position = kUnknownElementPosition;
115*103e46e4SHarish Mahendrakar             did_seek_ = false;
116*103e46e4SHarish Mahendrakar             action_ = Action::kRead;
117*103e46e4SHarish Mahendrakar             state_ = State::kReadingChildBody;
118*103e46e4SHarish Mahendrakar             continue;
119*103e46e4SHarish Mahendrakar           }
120*103e46e4SHarish Mahendrakar 
121*103e46e4SHarish Mahendrakar           if (child_metadata_.id == Id::kVoid) {
122*103e46e4SHarish Mahendrakar             child_parser_ = &void_parser_;
123*103e46e4SHarish Mahendrakar           } else {
124*103e46e4SHarish Mahendrakar             if (child_metadata_.size == kUnknownElementSize) {
125*103e46e4SHarish Mahendrakar               return Status(Status::kIndefiniteUnknownElement);
126*103e46e4SHarish Mahendrakar             }
127*103e46e4SHarish Mahendrakar             child_parser_ = &unknown_parser_;
128*103e46e4SHarish Mahendrakar           }
129*103e46e4SHarish Mahendrakar           state_ = State::kGettingAction;
130*103e46e4SHarish Mahendrakar           continue;
131*103e46e4SHarish Mahendrakar         }
132*103e46e4SHarish Mahendrakar 
133*103e46e4SHarish Mahendrakar         case State::kGettingAction: {
134*103e46e4SHarish Mahendrakar           assert(child_parser_ != nullptr);
135*103e46e4SHarish Mahendrakar           status = callback->OnElementBegin(child_metadata_, &action_);
136*103e46e4SHarish Mahendrakar           if (!status.completed_ok()) {
137*103e46e4SHarish Mahendrakar             return status;
138*103e46e4SHarish Mahendrakar           }
139*103e46e4SHarish Mahendrakar 
140*103e46e4SHarish Mahendrakar           if (action_ == Action::kSkip) {
141*103e46e4SHarish Mahendrakar             callback = &skip_callback_;
142*103e46e4SHarish Mahendrakar             if (child_metadata_.size != kUnknownElementSize) {
143*103e46e4SHarish Mahendrakar               child_parser_ = &skip_parser_;
144*103e46e4SHarish Mahendrakar             }
145*103e46e4SHarish Mahendrakar           }
146*103e46e4SHarish Mahendrakar           state_ = State::kInitializingChildParser;
147*103e46e4SHarish Mahendrakar           continue;
148*103e46e4SHarish Mahendrakar         }
149*103e46e4SHarish Mahendrakar 
150*103e46e4SHarish Mahendrakar         case State::kInitializingChildParser: {
151*103e46e4SHarish Mahendrakar           assert(child_parser_ != nullptr);
152*103e46e4SHarish Mahendrakar           status = child_parser_->Init(child_metadata_, child_metadata_.size);
153*103e46e4SHarish Mahendrakar           if (!status.completed_ok()) {
154*103e46e4SHarish Mahendrakar             return status;
155*103e46e4SHarish Mahendrakar           }
156*103e46e4SHarish Mahendrakar           state_ = State::kReadingChildBody;
157*103e46e4SHarish Mahendrakar           continue;
158*103e46e4SHarish Mahendrakar         }
159*103e46e4SHarish Mahendrakar 
160*103e46e4SHarish Mahendrakar         case State::kReadingChildBody: {
161*103e46e4SHarish Mahendrakar           assert(child_parser_ != nullptr);
162*103e46e4SHarish Mahendrakar           status = child_parser_->Feed(callback, reader, &num_bytes_read);
163*103e46e4SHarish Mahendrakar           if (!status.completed_ok()) {
164*103e46e4SHarish Mahendrakar             return status;
165*103e46e4SHarish Mahendrakar           }
166*103e46e4SHarish Mahendrakar           if (child_parser_->GetCachedMetadata(&child_metadata_)) {
167*103e46e4SHarish Mahendrakar             state_ = State::kValidatingChildSize;
168*103e46e4SHarish Mahendrakar           } else {
169*103e46e4SHarish Mahendrakar             child_metadata_.header_size = 0;
170*103e46e4SHarish Mahendrakar             state_ = State::kReadingChildId;
171*103e46e4SHarish Mahendrakar           }
172*103e46e4SHarish Mahendrakar           PrepareForNextChild();
173*103e46e4SHarish Mahendrakar           callback = original_callback;
174*103e46e4SHarish Mahendrakar           child_metadata_.position = reader->Position();
175*103e46e4SHarish Mahendrakar           continue;
176*103e46e4SHarish Mahendrakar         }
177*103e46e4SHarish Mahendrakar 
178*103e46e4SHarish Mahendrakar         case State::kEndReached: {
179*103e46e4SHarish Mahendrakar           return Status(Status::kOkCompleted);
180*103e46e4SHarish Mahendrakar         }
181*103e46e4SHarish Mahendrakar       }
182*103e46e4SHarish Mahendrakar     }
183*103e46e4SHarish Mahendrakar   }
184*103e46e4SHarish Mahendrakar 
185*103e46e4SHarish Mahendrakar  private:
186*103e46e4SHarish Mahendrakar   // Parsing states for the finite-state machine.
187*103e46e4SHarish Mahendrakar   enum class State {
188*103e46e4SHarish Mahendrakar     /* clang-format off */
189*103e46e4SHarish Mahendrakar     // State                      Transitions to state      When
190*103e46e4SHarish Mahendrakar     kBegin,                    // kReadingChildId           done
191*103e46e4SHarish Mahendrakar     kReadingChildId,           // kReadingChildSize         done
192*103e46e4SHarish Mahendrakar                                // kEndReached               EOF
193*103e46e4SHarish Mahendrakar     kReadingChildSize,         // kValidatingChildSize      done
194*103e46e4SHarish Mahendrakar     kValidatingChildSize,      // kGettingAction            done
195*103e46e4SHarish Mahendrakar     kGettingAction,            // kInitializingChildParser  done
196*103e46e4SHarish Mahendrakar     kInitializingChildParser,  // kReadingChildBody         done
197*103e46e4SHarish Mahendrakar     kReadingChildBody,         // kValidatingChildSize      cached metadata
198*103e46e4SHarish Mahendrakar                                // kReadingChildId           otherwise
199*103e46e4SHarish Mahendrakar     kEndReached,               // No transitions from here
200*103e46e4SHarish Mahendrakar     /* clang-format on */
201*103e46e4SHarish Mahendrakar   };
202*103e46e4SHarish Mahendrakar 
203*103e46e4SHarish Mahendrakar   // The parser for parsing child element Ids.
204*103e46e4SHarish Mahendrakar   IdParser id_parser_;
205*103e46e4SHarish Mahendrakar 
206*103e46e4SHarish Mahendrakar   // The parser for parsing child element sizes.
207*103e46e4SHarish Mahendrakar   SizeParser size_parser_;
208*103e46e4SHarish Mahendrakar 
209*103e46e4SHarish Mahendrakar   // The parser for Id::kEbml elements.
210*103e46e4SHarish Mahendrakar   EbmlParser ebml_parser_;
211*103e46e4SHarish Mahendrakar 
212*103e46e4SHarish Mahendrakar   // The parser for Id::kSegment child elements.
213*103e46e4SHarish Mahendrakar   SegmentParser segment_parser_;
214*103e46e4SHarish Mahendrakar 
215*103e46e4SHarish Mahendrakar   // The parser for Id::kVoid child elements.
216*103e46e4SHarish Mahendrakar   VoidParser void_parser_;
217*103e46e4SHarish Mahendrakar 
218*103e46e4SHarish Mahendrakar   // The parser used when skipping elements (if the element's size is known).
219*103e46e4SHarish Mahendrakar   SkipParser skip_parser_;
220*103e46e4SHarish Mahendrakar 
221*103e46e4SHarish Mahendrakar   // The parser used for unknown children.
222*103e46e4SHarish Mahendrakar   UnknownParser unknown_parser_;
223*103e46e4SHarish Mahendrakar 
224*103e46e4SHarish Mahendrakar   // The callback used when skipping elements.
225*103e46e4SHarish Mahendrakar   SkipCallback skip_callback_;
226*103e46e4SHarish Mahendrakar 
227*103e46e4SHarish Mahendrakar   // The parser that is parsing the current child element.
228*103e46e4SHarish Mahendrakar   ElementParser* child_parser_ = nullptr;
229*103e46e4SHarish Mahendrakar 
230*103e46e4SHarish Mahendrakar   // Metadata for the current child being parsed.
231*103e46e4SHarish Mahendrakar   ElementMetadata child_metadata_ = {};
232*103e46e4SHarish Mahendrakar 
233*103e46e4SHarish Mahendrakar   // Action for the current child being parsed.
234*103e46e4SHarish Mahendrakar   Action action_ = Action::kRead;
235*103e46e4SHarish Mahendrakar 
236*103e46e4SHarish Mahendrakar   // True if a seek was performed and the parser needs to handle it.
237*103e46e4SHarish Mahendrakar   bool did_seek_ = false;
238*103e46e4SHarish Mahendrakar 
239*103e46e4SHarish Mahendrakar   // The current state of the finite state machine.
240*103e46e4SHarish Mahendrakar   State state_ = State::kBegin;
241*103e46e4SHarish Mahendrakar 
242*103e46e4SHarish Mahendrakar   // Resets state in preparation for parsing a child element.
PrepareForNextChild()243*103e46e4SHarish Mahendrakar   void PrepareForNextChild() {
244*103e46e4SHarish Mahendrakar     id_parser_ = {};
245*103e46e4SHarish Mahendrakar     size_parser_ = {};
246*103e46e4SHarish Mahendrakar     child_parser_ = nullptr;
247*103e46e4SHarish Mahendrakar     action_ = Action::kRead;
248*103e46e4SHarish Mahendrakar   }
249*103e46e4SHarish Mahendrakar };
250*103e46e4SHarish Mahendrakar 
251*103e46e4SHarish Mahendrakar // We have to explicitly declare a destructor (even if it's just defaulted)
252*103e46e4SHarish Mahendrakar // because using the pimpl idiom with std::unique_ptr requires it. See Herb
253*103e46e4SHarish Mahendrakar // Sutter's GotW #100 for further explanation.
254*103e46e4SHarish Mahendrakar WebmParser::~WebmParser() = default;
255*103e46e4SHarish Mahendrakar 
WebmParser()256*103e46e4SHarish Mahendrakar WebmParser::WebmParser() : parser_(new DocumentParser) {}
257*103e46e4SHarish Mahendrakar 
DidSeek()258*103e46e4SHarish Mahendrakar void WebmParser::DidSeek() {
259*103e46e4SHarish Mahendrakar   parser_->DidSeek();
260*103e46e4SHarish Mahendrakar   parsing_status_ = Status(Status::kOkPartial);
261*103e46e4SHarish Mahendrakar }
262*103e46e4SHarish Mahendrakar 
Feed(Callback * callback,Reader * reader)263*103e46e4SHarish Mahendrakar Status WebmParser::Feed(Callback* callback, Reader* reader) {
264*103e46e4SHarish Mahendrakar   assert(callback != nullptr);
265*103e46e4SHarish Mahendrakar   assert(reader != nullptr);
266*103e46e4SHarish Mahendrakar 
267*103e46e4SHarish Mahendrakar   if (parsing_status_.is_parsing_error()) {
268*103e46e4SHarish Mahendrakar     return parsing_status_;
269*103e46e4SHarish Mahendrakar   }
270*103e46e4SHarish Mahendrakar   parsing_status_ = parser_->Feed(callback, reader);
271*103e46e4SHarish Mahendrakar   return parsing_status_;
272*103e46e4SHarish Mahendrakar }
273*103e46e4SHarish Mahendrakar 
Swap(WebmParser * other)274*103e46e4SHarish Mahendrakar void WebmParser::Swap(WebmParser* other) {
275*103e46e4SHarish Mahendrakar   assert(other != nullptr);
276*103e46e4SHarish Mahendrakar   parser_.swap(other->parser_);
277*103e46e4SHarish Mahendrakar   std::swap(parsing_status_, other->parsing_status_);
278*103e46e4SHarish Mahendrakar }
279*103e46e4SHarish Mahendrakar 
swap(WebmParser & left,WebmParser & right)280*103e46e4SHarish Mahendrakar void swap(WebmParser& left, WebmParser& right) { left.Swap(&right); }
281*103e46e4SHarish Mahendrakar 
282*103e46e4SHarish Mahendrakar }  // namespace webm
283