1*103e46e4SHarish Mahendrakar // Copyright (c) 2016 The WebM project authors. All Rights Reserved.
2*103e46e4SHarish Mahendrakar //
3*103e46e4SHarish Mahendrakar // Use of this source code is governed by a BSD-style license
4*103e46e4SHarish Mahendrakar // that can be found in the LICENSE file in the root of the source
5*103e46e4SHarish Mahendrakar // tree. An additional intellectual property rights grant can be found
6*103e46e4SHarish Mahendrakar // in the file PATENTS. All contributing project authors may
7*103e46e4SHarish Mahendrakar // be found in the AUTHORS file in the root of the source tree.
8*103e46e4SHarish Mahendrakar #include "webm/webm_parser.h"
9*103e46e4SHarish Mahendrakar
10*103e46e4SHarish Mahendrakar #include <cassert>
11*103e46e4SHarish Mahendrakar #include <cstdint>
12*103e46e4SHarish Mahendrakar
13*103e46e4SHarish Mahendrakar #include "src/ebml_parser.h"
14*103e46e4SHarish Mahendrakar #include "src/master_parser.h"
15*103e46e4SHarish Mahendrakar #include "src/segment_parser.h"
16*103e46e4SHarish Mahendrakar #include "src/unknown_parser.h"
17*103e46e4SHarish Mahendrakar #include "webm/element.h"
18*103e46e4SHarish Mahendrakar
19*103e46e4SHarish Mahendrakar namespace webm {
20*103e46e4SHarish Mahendrakar
21*103e46e4SHarish Mahendrakar // Parses WebM EBML documents (i.e. level-0 WebM elements).
22*103e46e4SHarish Mahendrakar class WebmParser::DocumentParser {
23*103e46e4SHarish Mahendrakar public:
24*103e46e4SHarish Mahendrakar // Resets the parser after a seek to a new position in the reader.
DidSeek()25*103e46e4SHarish Mahendrakar void DidSeek() {
26*103e46e4SHarish Mahendrakar PrepareForNextChild();
27*103e46e4SHarish Mahendrakar did_seek_ = true;
28*103e46e4SHarish Mahendrakar state_ = State::kBegin;
29*103e46e4SHarish Mahendrakar }
30*103e46e4SHarish Mahendrakar
31*103e46e4SHarish Mahendrakar // Feeds the parser; will return Status::kOkCompleted when the reader returns
32*103e46e4SHarish Mahendrakar // Status::kEndOfFile, but only if the parser has already completed parsing
33*103e46e4SHarish Mahendrakar // its child elements.
Feed(Callback * callback,Reader * reader)34*103e46e4SHarish Mahendrakar Status Feed(Callback* callback, Reader* reader) {
35*103e46e4SHarish Mahendrakar assert(callback != nullptr);
36*103e46e4SHarish Mahendrakar assert(reader != nullptr);
37*103e46e4SHarish Mahendrakar
38*103e46e4SHarish Mahendrakar Callback* const original_callback = callback;
39*103e46e4SHarish Mahendrakar if (action_ == Action::kSkip) {
40*103e46e4SHarish Mahendrakar callback = &skip_callback_;
41*103e46e4SHarish Mahendrakar }
42*103e46e4SHarish Mahendrakar
43*103e46e4SHarish Mahendrakar Status status;
44*103e46e4SHarish Mahendrakar std::uint64_t num_bytes_read;
45*103e46e4SHarish Mahendrakar while (true) {
46*103e46e4SHarish Mahendrakar switch (state_) {
47*103e46e4SHarish Mahendrakar case State::kBegin: {
48*103e46e4SHarish Mahendrakar child_metadata_.header_size = 0;
49*103e46e4SHarish Mahendrakar child_metadata_.position = reader->Position();
50*103e46e4SHarish Mahendrakar state_ = State::kReadingChildId;
51*103e46e4SHarish Mahendrakar continue;
52*103e46e4SHarish Mahendrakar }
53*103e46e4SHarish Mahendrakar
54*103e46e4SHarish Mahendrakar case State::kReadingChildId: {
55*103e46e4SHarish Mahendrakar assert(child_parser_ == nullptr);
56*103e46e4SHarish Mahendrakar status = id_parser_.Feed(callback, reader, &num_bytes_read);
57*103e46e4SHarish Mahendrakar child_metadata_.header_size += num_bytes_read;
58*103e46e4SHarish Mahendrakar if (!status.completed_ok()) {
59*103e46e4SHarish Mahendrakar if (status.code == Status::kEndOfFile &&
60*103e46e4SHarish Mahendrakar reader->Position() == child_metadata_.position) {
61*103e46e4SHarish Mahendrakar state_ = State::kEndReached;
62*103e46e4SHarish Mahendrakar continue;
63*103e46e4SHarish Mahendrakar }
64*103e46e4SHarish Mahendrakar return status;
65*103e46e4SHarish Mahendrakar }
66*103e46e4SHarish Mahendrakar state_ = State::kReadingChildSize;
67*103e46e4SHarish Mahendrakar continue;
68*103e46e4SHarish Mahendrakar }
69*103e46e4SHarish Mahendrakar
70*103e46e4SHarish Mahendrakar case State::kReadingChildSize: {
71*103e46e4SHarish Mahendrakar assert(child_parser_ == nullptr);
72*103e46e4SHarish Mahendrakar status = size_parser_.Feed(callback, reader, &num_bytes_read);
73*103e46e4SHarish Mahendrakar child_metadata_.header_size += num_bytes_read;
74*103e46e4SHarish Mahendrakar if (!status.completed_ok()) {
75*103e46e4SHarish Mahendrakar return status;
76*103e46e4SHarish Mahendrakar }
77*103e46e4SHarish Mahendrakar child_metadata_.id = id_parser_.id();
78*103e46e4SHarish Mahendrakar child_metadata_.size = size_parser_.size();
79*103e46e4SHarish Mahendrakar state_ = State::kValidatingChildSize;
80*103e46e4SHarish Mahendrakar continue;
81*103e46e4SHarish Mahendrakar }
82*103e46e4SHarish Mahendrakar
83*103e46e4SHarish Mahendrakar case State::kValidatingChildSize: {
84*103e46e4SHarish Mahendrakar assert(child_parser_ == nullptr);
85*103e46e4SHarish Mahendrakar
86*103e46e4SHarish Mahendrakar if (child_metadata_.id == Id::kSegment) {
87*103e46e4SHarish Mahendrakar child_parser_ = &segment_parser_;
88*103e46e4SHarish Mahendrakar did_seek_ = false;
89*103e46e4SHarish Mahendrakar state_ = State::kGettingAction;
90*103e46e4SHarish Mahendrakar continue;
91*103e46e4SHarish Mahendrakar } else if (child_metadata_.id == Id::kEbml) {
92*103e46e4SHarish Mahendrakar child_parser_ = &ebml_parser_;
93*103e46e4SHarish Mahendrakar did_seek_ = false;
94*103e46e4SHarish Mahendrakar state_ = State::kGettingAction;
95*103e46e4SHarish Mahendrakar continue;
96*103e46e4SHarish Mahendrakar }
97*103e46e4SHarish Mahendrakar
98*103e46e4SHarish Mahendrakar Ancestory ancestory;
99*103e46e4SHarish Mahendrakar if (did_seek_ && Ancestory::ById(child_metadata_.id, &ancestory)) {
100*103e46e4SHarish Mahendrakar assert(!ancestory.empty());
101*103e46e4SHarish Mahendrakar assert(ancestory.id() == Id::kSegment ||
102*103e46e4SHarish Mahendrakar ancestory.id() == Id::kEbml);
103*103e46e4SHarish Mahendrakar
104*103e46e4SHarish Mahendrakar if (ancestory.id() == Id::kSegment) {
105*103e46e4SHarish Mahendrakar child_parser_ = &segment_parser_;
106*103e46e4SHarish Mahendrakar } else {
107*103e46e4SHarish Mahendrakar child_parser_ = &ebml_parser_;
108*103e46e4SHarish Mahendrakar }
109*103e46e4SHarish Mahendrakar
110*103e46e4SHarish Mahendrakar child_parser_->InitAfterSeek(ancestory.next(), child_metadata_);
111*103e46e4SHarish Mahendrakar child_metadata_.id = ancestory.id();
112*103e46e4SHarish Mahendrakar child_metadata_.header_size = kUnknownHeaderSize;
113*103e46e4SHarish Mahendrakar child_metadata_.size = kUnknownElementSize;
114*103e46e4SHarish Mahendrakar child_metadata_.position = kUnknownElementPosition;
115*103e46e4SHarish Mahendrakar did_seek_ = false;
116*103e46e4SHarish Mahendrakar action_ = Action::kRead;
117*103e46e4SHarish Mahendrakar state_ = State::kReadingChildBody;
118*103e46e4SHarish Mahendrakar continue;
119*103e46e4SHarish Mahendrakar }
120*103e46e4SHarish Mahendrakar
121*103e46e4SHarish Mahendrakar if (child_metadata_.id == Id::kVoid) {
122*103e46e4SHarish Mahendrakar child_parser_ = &void_parser_;
123*103e46e4SHarish Mahendrakar } else {
124*103e46e4SHarish Mahendrakar if (child_metadata_.size == kUnknownElementSize) {
125*103e46e4SHarish Mahendrakar return Status(Status::kIndefiniteUnknownElement);
126*103e46e4SHarish Mahendrakar }
127*103e46e4SHarish Mahendrakar child_parser_ = &unknown_parser_;
128*103e46e4SHarish Mahendrakar }
129*103e46e4SHarish Mahendrakar state_ = State::kGettingAction;
130*103e46e4SHarish Mahendrakar continue;
131*103e46e4SHarish Mahendrakar }
132*103e46e4SHarish Mahendrakar
133*103e46e4SHarish Mahendrakar case State::kGettingAction: {
134*103e46e4SHarish Mahendrakar assert(child_parser_ != nullptr);
135*103e46e4SHarish Mahendrakar status = callback->OnElementBegin(child_metadata_, &action_);
136*103e46e4SHarish Mahendrakar if (!status.completed_ok()) {
137*103e46e4SHarish Mahendrakar return status;
138*103e46e4SHarish Mahendrakar }
139*103e46e4SHarish Mahendrakar
140*103e46e4SHarish Mahendrakar if (action_ == Action::kSkip) {
141*103e46e4SHarish Mahendrakar callback = &skip_callback_;
142*103e46e4SHarish Mahendrakar if (child_metadata_.size != kUnknownElementSize) {
143*103e46e4SHarish Mahendrakar child_parser_ = &skip_parser_;
144*103e46e4SHarish Mahendrakar }
145*103e46e4SHarish Mahendrakar }
146*103e46e4SHarish Mahendrakar state_ = State::kInitializingChildParser;
147*103e46e4SHarish Mahendrakar continue;
148*103e46e4SHarish Mahendrakar }
149*103e46e4SHarish Mahendrakar
150*103e46e4SHarish Mahendrakar case State::kInitializingChildParser: {
151*103e46e4SHarish Mahendrakar assert(child_parser_ != nullptr);
152*103e46e4SHarish Mahendrakar status = child_parser_->Init(child_metadata_, child_metadata_.size);
153*103e46e4SHarish Mahendrakar if (!status.completed_ok()) {
154*103e46e4SHarish Mahendrakar return status;
155*103e46e4SHarish Mahendrakar }
156*103e46e4SHarish Mahendrakar state_ = State::kReadingChildBody;
157*103e46e4SHarish Mahendrakar continue;
158*103e46e4SHarish Mahendrakar }
159*103e46e4SHarish Mahendrakar
160*103e46e4SHarish Mahendrakar case State::kReadingChildBody: {
161*103e46e4SHarish Mahendrakar assert(child_parser_ != nullptr);
162*103e46e4SHarish Mahendrakar status = child_parser_->Feed(callback, reader, &num_bytes_read);
163*103e46e4SHarish Mahendrakar if (!status.completed_ok()) {
164*103e46e4SHarish Mahendrakar return status;
165*103e46e4SHarish Mahendrakar }
166*103e46e4SHarish Mahendrakar if (child_parser_->GetCachedMetadata(&child_metadata_)) {
167*103e46e4SHarish Mahendrakar state_ = State::kValidatingChildSize;
168*103e46e4SHarish Mahendrakar } else {
169*103e46e4SHarish Mahendrakar child_metadata_.header_size = 0;
170*103e46e4SHarish Mahendrakar state_ = State::kReadingChildId;
171*103e46e4SHarish Mahendrakar }
172*103e46e4SHarish Mahendrakar PrepareForNextChild();
173*103e46e4SHarish Mahendrakar callback = original_callback;
174*103e46e4SHarish Mahendrakar child_metadata_.position = reader->Position();
175*103e46e4SHarish Mahendrakar continue;
176*103e46e4SHarish Mahendrakar }
177*103e46e4SHarish Mahendrakar
178*103e46e4SHarish Mahendrakar case State::kEndReached: {
179*103e46e4SHarish Mahendrakar return Status(Status::kOkCompleted);
180*103e46e4SHarish Mahendrakar }
181*103e46e4SHarish Mahendrakar }
182*103e46e4SHarish Mahendrakar }
183*103e46e4SHarish Mahendrakar }
184*103e46e4SHarish Mahendrakar
185*103e46e4SHarish Mahendrakar private:
186*103e46e4SHarish Mahendrakar // Parsing states for the finite-state machine.
187*103e46e4SHarish Mahendrakar enum class State {
188*103e46e4SHarish Mahendrakar /* clang-format off */
189*103e46e4SHarish Mahendrakar // State Transitions to state When
190*103e46e4SHarish Mahendrakar kBegin, // kReadingChildId done
191*103e46e4SHarish Mahendrakar kReadingChildId, // kReadingChildSize done
192*103e46e4SHarish Mahendrakar // kEndReached EOF
193*103e46e4SHarish Mahendrakar kReadingChildSize, // kValidatingChildSize done
194*103e46e4SHarish Mahendrakar kValidatingChildSize, // kGettingAction done
195*103e46e4SHarish Mahendrakar kGettingAction, // kInitializingChildParser done
196*103e46e4SHarish Mahendrakar kInitializingChildParser, // kReadingChildBody done
197*103e46e4SHarish Mahendrakar kReadingChildBody, // kValidatingChildSize cached metadata
198*103e46e4SHarish Mahendrakar // kReadingChildId otherwise
199*103e46e4SHarish Mahendrakar kEndReached, // No transitions from here
200*103e46e4SHarish Mahendrakar /* clang-format on */
201*103e46e4SHarish Mahendrakar };
202*103e46e4SHarish Mahendrakar
203*103e46e4SHarish Mahendrakar // The parser for parsing child element Ids.
204*103e46e4SHarish Mahendrakar IdParser id_parser_;
205*103e46e4SHarish Mahendrakar
206*103e46e4SHarish Mahendrakar // The parser for parsing child element sizes.
207*103e46e4SHarish Mahendrakar SizeParser size_parser_;
208*103e46e4SHarish Mahendrakar
209*103e46e4SHarish Mahendrakar // The parser for Id::kEbml elements.
210*103e46e4SHarish Mahendrakar EbmlParser ebml_parser_;
211*103e46e4SHarish Mahendrakar
212*103e46e4SHarish Mahendrakar // The parser for Id::kSegment child elements.
213*103e46e4SHarish Mahendrakar SegmentParser segment_parser_;
214*103e46e4SHarish Mahendrakar
215*103e46e4SHarish Mahendrakar // The parser for Id::kVoid child elements.
216*103e46e4SHarish Mahendrakar VoidParser void_parser_;
217*103e46e4SHarish Mahendrakar
218*103e46e4SHarish Mahendrakar // The parser used when skipping elements (if the element's size is known).
219*103e46e4SHarish Mahendrakar SkipParser skip_parser_;
220*103e46e4SHarish Mahendrakar
221*103e46e4SHarish Mahendrakar // The parser used for unknown children.
222*103e46e4SHarish Mahendrakar UnknownParser unknown_parser_;
223*103e46e4SHarish Mahendrakar
224*103e46e4SHarish Mahendrakar // The callback used when skipping elements.
225*103e46e4SHarish Mahendrakar SkipCallback skip_callback_;
226*103e46e4SHarish Mahendrakar
227*103e46e4SHarish Mahendrakar // The parser that is parsing the current child element.
228*103e46e4SHarish Mahendrakar ElementParser* child_parser_ = nullptr;
229*103e46e4SHarish Mahendrakar
230*103e46e4SHarish Mahendrakar // Metadata for the current child being parsed.
231*103e46e4SHarish Mahendrakar ElementMetadata child_metadata_ = {};
232*103e46e4SHarish Mahendrakar
233*103e46e4SHarish Mahendrakar // Action for the current child being parsed.
234*103e46e4SHarish Mahendrakar Action action_ = Action::kRead;
235*103e46e4SHarish Mahendrakar
236*103e46e4SHarish Mahendrakar // True if a seek was performed and the parser needs to handle it.
237*103e46e4SHarish Mahendrakar bool did_seek_ = false;
238*103e46e4SHarish Mahendrakar
239*103e46e4SHarish Mahendrakar // The current state of the finite state machine.
240*103e46e4SHarish Mahendrakar State state_ = State::kBegin;
241*103e46e4SHarish Mahendrakar
242*103e46e4SHarish Mahendrakar // Resets state in preparation for parsing a child element.
PrepareForNextChild()243*103e46e4SHarish Mahendrakar void PrepareForNextChild() {
244*103e46e4SHarish Mahendrakar id_parser_ = {};
245*103e46e4SHarish Mahendrakar size_parser_ = {};
246*103e46e4SHarish Mahendrakar child_parser_ = nullptr;
247*103e46e4SHarish Mahendrakar action_ = Action::kRead;
248*103e46e4SHarish Mahendrakar }
249*103e46e4SHarish Mahendrakar };
250*103e46e4SHarish Mahendrakar
251*103e46e4SHarish Mahendrakar // We have to explicitly declare a destructor (even if it's just defaulted)
252*103e46e4SHarish Mahendrakar // because using the pimpl idiom with std::unique_ptr requires it. See Herb
253*103e46e4SHarish Mahendrakar // Sutter's GotW #100 for further explanation.
254*103e46e4SHarish Mahendrakar WebmParser::~WebmParser() = default;
255*103e46e4SHarish Mahendrakar
WebmParser()256*103e46e4SHarish Mahendrakar WebmParser::WebmParser() : parser_(new DocumentParser) {}
257*103e46e4SHarish Mahendrakar
DidSeek()258*103e46e4SHarish Mahendrakar void WebmParser::DidSeek() {
259*103e46e4SHarish Mahendrakar parser_->DidSeek();
260*103e46e4SHarish Mahendrakar parsing_status_ = Status(Status::kOkPartial);
261*103e46e4SHarish Mahendrakar }
262*103e46e4SHarish Mahendrakar
Feed(Callback * callback,Reader * reader)263*103e46e4SHarish Mahendrakar Status WebmParser::Feed(Callback* callback, Reader* reader) {
264*103e46e4SHarish Mahendrakar assert(callback != nullptr);
265*103e46e4SHarish Mahendrakar assert(reader != nullptr);
266*103e46e4SHarish Mahendrakar
267*103e46e4SHarish Mahendrakar if (parsing_status_.is_parsing_error()) {
268*103e46e4SHarish Mahendrakar return parsing_status_;
269*103e46e4SHarish Mahendrakar }
270*103e46e4SHarish Mahendrakar parsing_status_ = parser_->Feed(callback, reader);
271*103e46e4SHarish Mahendrakar return parsing_status_;
272*103e46e4SHarish Mahendrakar }
273*103e46e4SHarish Mahendrakar
Swap(WebmParser * other)274*103e46e4SHarish Mahendrakar void WebmParser::Swap(WebmParser* other) {
275*103e46e4SHarish Mahendrakar assert(other != nullptr);
276*103e46e4SHarish Mahendrakar parser_.swap(other->parser_);
277*103e46e4SHarish Mahendrakar std::swap(parsing_status_, other->parsing_status_);
278*103e46e4SHarish Mahendrakar }
279*103e46e4SHarish Mahendrakar
swap(WebmParser & left,WebmParser & right)280*103e46e4SHarish Mahendrakar void swap(WebmParser& left, WebmParser& right) { left.Swap(&right); }
281*103e46e4SHarish Mahendrakar
282*103e46e4SHarish Mahendrakar } // namespace webm
283