xref: /aosp_15_r20/external/golang-protobuf/internal/encoding/messageset/messageset.go (revision 1c12ee1efe575feb122dbf939ff15148a3b3e8f2)
1*1c12ee1eSDan Willemsen// Copyright 2019 The Go Authors. All rights reserved.
2*1c12ee1eSDan Willemsen// Use of this source code is governed by a BSD-style
3*1c12ee1eSDan Willemsen// license that can be found in the LICENSE file.
4*1c12ee1eSDan Willemsen
5*1c12ee1eSDan Willemsen// Package messageset encodes and decodes the obsolete MessageSet wire format.
6*1c12ee1eSDan Willemsenpackage messageset
7*1c12ee1eSDan Willemsen
8*1c12ee1eSDan Willemsenimport (
9*1c12ee1eSDan Willemsen	"math"
10*1c12ee1eSDan Willemsen
11*1c12ee1eSDan Willemsen	"google.golang.org/protobuf/encoding/protowire"
12*1c12ee1eSDan Willemsen	"google.golang.org/protobuf/internal/errors"
13*1c12ee1eSDan Willemsen	"google.golang.org/protobuf/reflect/protoreflect"
14*1c12ee1eSDan Willemsen)
15*1c12ee1eSDan Willemsen
16*1c12ee1eSDan Willemsen// The MessageSet wire format is equivalent to a message defined as follows,
17*1c12ee1eSDan Willemsen// where each Item defines an extension field with a field number of 'type_id'
18*1c12ee1eSDan Willemsen// and content of 'message'. MessageSet extensions must be non-repeated message
19*1c12ee1eSDan Willemsen// fields.
20*1c12ee1eSDan Willemsen//
21*1c12ee1eSDan Willemsen//	message MessageSet {
22*1c12ee1eSDan Willemsen//		repeated group Item = 1 {
23*1c12ee1eSDan Willemsen//			required int32 type_id = 2;
24*1c12ee1eSDan Willemsen//			required string message = 3;
25*1c12ee1eSDan Willemsen//		}
26*1c12ee1eSDan Willemsen//	}
27*1c12ee1eSDan Willemsenconst (
28*1c12ee1eSDan Willemsen	FieldItem    = protowire.Number(1)
29*1c12ee1eSDan Willemsen	FieldTypeID  = protowire.Number(2)
30*1c12ee1eSDan Willemsen	FieldMessage = protowire.Number(3)
31*1c12ee1eSDan Willemsen)
32*1c12ee1eSDan Willemsen
33*1c12ee1eSDan Willemsen// ExtensionName is the field name for extensions of MessageSet.
34*1c12ee1eSDan Willemsen//
35*1c12ee1eSDan Willemsen// A valid MessageSet extension must be of the form:
36*1c12ee1eSDan Willemsen//
37*1c12ee1eSDan Willemsen//	message MyMessage {
38*1c12ee1eSDan Willemsen//		extend proto2.bridge.MessageSet {
39*1c12ee1eSDan Willemsen//			optional MyMessage message_set_extension = 1234;
40*1c12ee1eSDan Willemsen//		}
41*1c12ee1eSDan Willemsen//		...
42*1c12ee1eSDan Willemsen//	}
43*1c12ee1eSDan Willemsenconst ExtensionName = "message_set_extension"
44*1c12ee1eSDan Willemsen
45*1c12ee1eSDan Willemsen// IsMessageSet returns whether the message uses the MessageSet wire format.
46*1c12ee1eSDan Willemsenfunc IsMessageSet(md protoreflect.MessageDescriptor) bool {
47*1c12ee1eSDan Willemsen	xmd, ok := md.(interface{ IsMessageSet() bool })
48*1c12ee1eSDan Willemsen	return ok && xmd.IsMessageSet()
49*1c12ee1eSDan Willemsen}
50*1c12ee1eSDan Willemsen
51*1c12ee1eSDan Willemsen// IsMessageSetExtension reports this field properly extends a MessageSet.
52*1c12ee1eSDan Willemsenfunc IsMessageSetExtension(fd protoreflect.FieldDescriptor) bool {
53*1c12ee1eSDan Willemsen	switch {
54*1c12ee1eSDan Willemsen	case fd.Name() != ExtensionName:
55*1c12ee1eSDan Willemsen		return false
56*1c12ee1eSDan Willemsen	case !IsMessageSet(fd.ContainingMessage()):
57*1c12ee1eSDan Willemsen		return false
58*1c12ee1eSDan Willemsen	case fd.FullName().Parent() != fd.Message().FullName():
59*1c12ee1eSDan Willemsen		return false
60*1c12ee1eSDan Willemsen	}
61*1c12ee1eSDan Willemsen	return true
62*1c12ee1eSDan Willemsen}
63*1c12ee1eSDan Willemsen
64*1c12ee1eSDan Willemsen// SizeField returns the size of a MessageSet item field containing an extension
65*1c12ee1eSDan Willemsen// with the given field number, not counting the contents of the message subfield.
66*1c12ee1eSDan Willemsenfunc SizeField(num protowire.Number) int {
67*1c12ee1eSDan Willemsen	return 2*protowire.SizeTag(FieldItem) + protowire.SizeTag(FieldTypeID) + protowire.SizeVarint(uint64(num))
68*1c12ee1eSDan Willemsen}
69*1c12ee1eSDan Willemsen
70*1c12ee1eSDan Willemsen// Unmarshal parses a MessageSet.
71*1c12ee1eSDan Willemsen//
72*1c12ee1eSDan Willemsen// It calls fn with the type ID and value of each item in the MessageSet.
73*1c12ee1eSDan Willemsen// Unknown fields are discarded.
74*1c12ee1eSDan Willemsen//
75*1c12ee1eSDan Willemsen// If wantLen is true, the item values include the varint length prefix.
76*1c12ee1eSDan Willemsen// This is ugly, but simplifies the fast-path decoder in internal/impl.
77*1c12ee1eSDan Willemsenfunc Unmarshal(b []byte, wantLen bool, fn func(typeID protowire.Number, value []byte) error) error {
78*1c12ee1eSDan Willemsen	for len(b) > 0 {
79*1c12ee1eSDan Willemsen		num, wtyp, n := protowire.ConsumeTag(b)
80*1c12ee1eSDan Willemsen		if n < 0 {
81*1c12ee1eSDan Willemsen			return protowire.ParseError(n)
82*1c12ee1eSDan Willemsen		}
83*1c12ee1eSDan Willemsen		b = b[n:]
84*1c12ee1eSDan Willemsen		if num != FieldItem || wtyp != protowire.StartGroupType {
85*1c12ee1eSDan Willemsen			n := protowire.ConsumeFieldValue(num, wtyp, b)
86*1c12ee1eSDan Willemsen			if n < 0 {
87*1c12ee1eSDan Willemsen				return protowire.ParseError(n)
88*1c12ee1eSDan Willemsen			}
89*1c12ee1eSDan Willemsen			b = b[n:]
90*1c12ee1eSDan Willemsen			continue
91*1c12ee1eSDan Willemsen		}
92*1c12ee1eSDan Willemsen		typeID, value, n, err := ConsumeFieldValue(b, wantLen)
93*1c12ee1eSDan Willemsen		if err != nil {
94*1c12ee1eSDan Willemsen			return err
95*1c12ee1eSDan Willemsen		}
96*1c12ee1eSDan Willemsen		b = b[n:]
97*1c12ee1eSDan Willemsen		if typeID == 0 {
98*1c12ee1eSDan Willemsen			continue
99*1c12ee1eSDan Willemsen		}
100*1c12ee1eSDan Willemsen		if err := fn(typeID, value); err != nil {
101*1c12ee1eSDan Willemsen			return err
102*1c12ee1eSDan Willemsen		}
103*1c12ee1eSDan Willemsen	}
104*1c12ee1eSDan Willemsen	return nil
105*1c12ee1eSDan Willemsen}
106*1c12ee1eSDan Willemsen
107*1c12ee1eSDan Willemsen// ConsumeFieldValue parses b as a MessageSet item field value until and including
108*1c12ee1eSDan Willemsen// the trailing end group marker. It assumes the start group tag has already been parsed.
109*1c12ee1eSDan Willemsen// It returns the contents of the type_id and message subfields and the total
110*1c12ee1eSDan Willemsen// item length.
111*1c12ee1eSDan Willemsen//
112*1c12ee1eSDan Willemsen// If wantLen is true, the returned message value includes the length prefix.
113*1c12ee1eSDan Willemsenfunc ConsumeFieldValue(b []byte, wantLen bool) (typeid protowire.Number, message []byte, n int, err error) {
114*1c12ee1eSDan Willemsen	ilen := len(b)
115*1c12ee1eSDan Willemsen	for {
116*1c12ee1eSDan Willemsen		num, wtyp, n := protowire.ConsumeTag(b)
117*1c12ee1eSDan Willemsen		if n < 0 {
118*1c12ee1eSDan Willemsen			return 0, nil, 0, protowire.ParseError(n)
119*1c12ee1eSDan Willemsen		}
120*1c12ee1eSDan Willemsen		b = b[n:]
121*1c12ee1eSDan Willemsen		switch {
122*1c12ee1eSDan Willemsen		case num == FieldItem && wtyp == protowire.EndGroupType:
123*1c12ee1eSDan Willemsen			if wantLen && len(message) == 0 {
124*1c12ee1eSDan Willemsen				// The message field was missing, which should never happen.
125*1c12ee1eSDan Willemsen				// Be prepared for this case anyway.
126*1c12ee1eSDan Willemsen				message = protowire.AppendVarint(message, 0)
127*1c12ee1eSDan Willemsen			}
128*1c12ee1eSDan Willemsen			return typeid, message, ilen - len(b), nil
129*1c12ee1eSDan Willemsen		case num == FieldTypeID && wtyp == protowire.VarintType:
130*1c12ee1eSDan Willemsen			v, n := protowire.ConsumeVarint(b)
131*1c12ee1eSDan Willemsen			if n < 0 {
132*1c12ee1eSDan Willemsen				return 0, nil, 0, protowire.ParseError(n)
133*1c12ee1eSDan Willemsen			}
134*1c12ee1eSDan Willemsen			b = b[n:]
135*1c12ee1eSDan Willemsen			if v < 1 || v > math.MaxInt32 {
136*1c12ee1eSDan Willemsen				return 0, nil, 0, errors.New("invalid type_id in message set")
137*1c12ee1eSDan Willemsen			}
138*1c12ee1eSDan Willemsen			typeid = protowire.Number(v)
139*1c12ee1eSDan Willemsen		case num == FieldMessage && wtyp == protowire.BytesType:
140*1c12ee1eSDan Willemsen			m, n := protowire.ConsumeBytes(b)
141*1c12ee1eSDan Willemsen			if n < 0 {
142*1c12ee1eSDan Willemsen				return 0, nil, 0, protowire.ParseError(n)
143*1c12ee1eSDan Willemsen			}
144*1c12ee1eSDan Willemsen			if message == nil {
145*1c12ee1eSDan Willemsen				if wantLen {
146*1c12ee1eSDan Willemsen					message = b[:n:n]
147*1c12ee1eSDan Willemsen				} else {
148*1c12ee1eSDan Willemsen					message = m[:len(m):len(m)]
149*1c12ee1eSDan Willemsen				}
150*1c12ee1eSDan Willemsen			} else {
151*1c12ee1eSDan Willemsen				// This case should never happen in practice, but handle it for
152*1c12ee1eSDan Willemsen				// correctness: The MessageSet item contains multiple message
153*1c12ee1eSDan Willemsen				// fields, which need to be merged.
154*1c12ee1eSDan Willemsen				//
155*1c12ee1eSDan Willemsen				// In the case where we're returning the length, this becomes
156*1c12ee1eSDan Willemsen				// quite inefficient since we need to strip the length off
157*1c12ee1eSDan Willemsen				// the existing data and reconstruct it with the combined length.
158*1c12ee1eSDan Willemsen				if wantLen {
159*1c12ee1eSDan Willemsen					_, nn := protowire.ConsumeVarint(message)
160*1c12ee1eSDan Willemsen					m0 := message[nn:]
161*1c12ee1eSDan Willemsen					message = nil
162*1c12ee1eSDan Willemsen					message = protowire.AppendVarint(message, uint64(len(m0)+len(m)))
163*1c12ee1eSDan Willemsen					message = append(message, m0...)
164*1c12ee1eSDan Willemsen					message = append(message, m...)
165*1c12ee1eSDan Willemsen				} else {
166*1c12ee1eSDan Willemsen					message = append(message, m...)
167*1c12ee1eSDan Willemsen				}
168*1c12ee1eSDan Willemsen			}
169*1c12ee1eSDan Willemsen			b = b[n:]
170*1c12ee1eSDan Willemsen		default:
171*1c12ee1eSDan Willemsen			// We have no place to put it, so we just ignore unknown fields.
172*1c12ee1eSDan Willemsen			n := protowire.ConsumeFieldValue(num, wtyp, b)
173*1c12ee1eSDan Willemsen			if n < 0 {
174*1c12ee1eSDan Willemsen				return 0, nil, 0, protowire.ParseError(n)
175*1c12ee1eSDan Willemsen			}
176*1c12ee1eSDan Willemsen			b = b[n:]
177*1c12ee1eSDan Willemsen		}
178*1c12ee1eSDan Willemsen	}
179*1c12ee1eSDan Willemsen}
180*1c12ee1eSDan Willemsen
181*1c12ee1eSDan Willemsen// AppendFieldStart appends the start of a MessageSet item field containing
182*1c12ee1eSDan Willemsen// an extension with the given number. The caller must add the message
183*1c12ee1eSDan Willemsen// subfield (including the tag).
184*1c12ee1eSDan Willemsenfunc AppendFieldStart(b []byte, num protowire.Number) []byte {
185*1c12ee1eSDan Willemsen	b = protowire.AppendTag(b, FieldItem, protowire.StartGroupType)
186*1c12ee1eSDan Willemsen	b = protowire.AppendTag(b, FieldTypeID, protowire.VarintType)
187*1c12ee1eSDan Willemsen	b = protowire.AppendVarint(b, uint64(num))
188*1c12ee1eSDan Willemsen	return b
189*1c12ee1eSDan Willemsen}
190*1c12ee1eSDan Willemsen
191*1c12ee1eSDan Willemsen// AppendFieldEnd appends the trailing end group marker for a MessageSet item field.
192*1c12ee1eSDan Willemsenfunc AppendFieldEnd(b []byte) []byte {
193*1c12ee1eSDan Willemsen	return protowire.AppendTag(b, FieldItem, protowire.EndGroupType)
194*1c12ee1eSDan Willemsen}
195*1c12ee1eSDan Willemsen
196*1c12ee1eSDan Willemsen// SizeUnknown returns the size of an unknown fields section in MessageSet format.
197*1c12ee1eSDan Willemsen//
198*1c12ee1eSDan Willemsen// See AppendUnknown.
199*1c12ee1eSDan Willemsenfunc SizeUnknown(unknown []byte) (size int) {
200*1c12ee1eSDan Willemsen	for len(unknown) > 0 {
201*1c12ee1eSDan Willemsen		num, typ, n := protowire.ConsumeTag(unknown)
202*1c12ee1eSDan Willemsen		if n < 0 || typ != protowire.BytesType {
203*1c12ee1eSDan Willemsen			return 0
204*1c12ee1eSDan Willemsen		}
205*1c12ee1eSDan Willemsen		unknown = unknown[n:]
206*1c12ee1eSDan Willemsen		_, n = protowire.ConsumeBytes(unknown)
207*1c12ee1eSDan Willemsen		if n < 0 {
208*1c12ee1eSDan Willemsen			return 0
209*1c12ee1eSDan Willemsen		}
210*1c12ee1eSDan Willemsen		unknown = unknown[n:]
211*1c12ee1eSDan Willemsen		size += SizeField(num) + protowire.SizeTag(FieldMessage) + n
212*1c12ee1eSDan Willemsen	}
213*1c12ee1eSDan Willemsen	return size
214*1c12ee1eSDan Willemsen}
215*1c12ee1eSDan Willemsen
216*1c12ee1eSDan Willemsen// AppendUnknown appends unknown fields to b in MessageSet format.
217*1c12ee1eSDan Willemsen//
218*1c12ee1eSDan Willemsen// For historic reasons, unresolved items in a MessageSet are stored in a
219*1c12ee1eSDan Willemsen// message's unknown fields section in non-MessageSet format. That is, an
220*1c12ee1eSDan Willemsen// unknown item with typeID T and value V appears in the unknown fields as
221*1c12ee1eSDan Willemsen// a field with number T and value V.
222*1c12ee1eSDan Willemsen//
223*1c12ee1eSDan Willemsen// This function converts the unknown fields back into MessageSet form.
224*1c12ee1eSDan Willemsenfunc AppendUnknown(b, unknown []byte) ([]byte, error) {
225*1c12ee1eSDan Willemsen	for len(unknown) > 0 {
226*1c12ee1eSDan Willemsen		num, typ, n := protowire.ConsumeTag(unknown)
227*1c12ee1eSDan Willemsen		if n < 0 || typ != protowire.BytesType {
228*1c12ee1eSDan Willemsen			return nil, errors.New("invalid data in message set unknown fields")
229*1c12ee1eSDan Willemsen		}
230*1c12ee1eSDan Willemsen		unknown = unknown[n:]
231*1c12ee1eSDan Willemsen		_, n = protowire.ConsumeBytes(unknown)
232*1c12ee1eSDan Willemsen		if n < 0 {
233*1c12ee1eSDan Willemsen			return nil, errors.New("invalid data in message set unknown fields")
234*1c12ee1eSDan Willemsen		}
235*1c12ee1eSDan Willemsen		b = AppendFieldStart(b, num)
236*1c12ee1eSDan Willemsen		b = protowire.AppendTag(b, FieldMessage, protowire.BytesType)
237*1c12ee1eSDan Willemsen		b = append(b, unknown[:n]...)
238*1c12ee1eSDan Willemsen		b = AppendFieldEnd(b)
239*1c12ee1eSDan Willemsen		unknown = unknown[n:]
240*1c12ee1eSDan Willemsen	}
241*1c12ee1eSDan Willemsen	return b, nil
242*1c12ee1eSDan Willemsen}
243