blob: cc5a54ab84d32d33a22d57d6bc7899b75ae0f816 [file] [log] [blame]
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001/* Amalgamated source file */
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07002#include "upb.h"
Paul Yang9bda1f12018-09-22 18:57:43 -07003
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08004#ifndef UINTPTR_MAX
5#error must include stdint.h first
6#endif
7
Paul Yang9bda1f12018-09-22 18:57:43 -07008#if UINTPTR_MAX == 0xffffffff
9#define UPB_SIZE(size32, size64) size32
10#else
11#define UPB_SIZE(size32, size64) size64
12#endif
13
14#define UPB_FIELD_AT(msg, fieldtype, offset) \
15 *(fieldtype*)((const char*)(msg) + offset)
16
17#define UPB_READ_ONEOF(msg, fieldtype, offset, case_offset, case_val, default) \
18 UPB_FIELD_AT(msg, int, case_offset) == case_val \
19 ? UPB_FIELD_AT(msg, fieldtype, offset) \
20 : default
21
22#define UPB_WRITE_ONEOF(msg, fieldtype, offset, value, case_offset, case_val) \
23 UPB_FIELD_AT(msg, int, case_offset) = case_val; \
24 UPB_FIELD_AT(msg, fieldtype, offset) = value;
Paul Yang6dd563a2018-03-08 17:35:22 -080025/* This file was generated by upbc (the upb compiler) from the input
26 * file:
27 *
28 * google/protobuf/descriptor.proto
29 *
30 * Do not edit -- your changes will be discarded when the file is
31 * regenerated. */
32
33#include <stddef.h>
34
35
Paul Yang9bda1f12018-09-22 18:57:43 -070036static const upb_msglayout *const google_protobuf_FileDescriptorSet_submsgs[1] = {
Paul Yang6dd563a2018-03-08 17:35:22 -080037 &google_protobuf_FileDescriptorProto_msginit,
38};
39
Paul Yang9bda1f12018-09-22 18:57:43 -070040static const upb_msglayout_field google_protobuf_FileDescriptorSet__fields[1] = {
41 {1, UPB_SIZE(0, 0), 0, 0, 11, 3},
Paul Yang6dd563a2018-03-08 17:35:22 -080042};
43
Paul Yang9bda1f12018-09-22 18:57:43 -070044const upb_msglayout google_protobuf_FileDescriptorSet_msginit = {
Paul Yang6dd563a2018-03-08 17:35:22 -080045 &google_protobuf_FileDescriptorSet_submsgs[0],
46 &google_protobuf_FileDescriptorSet__fields[0],
Paul Yang9bda1f12018-09-22 18:57:43 -070047 UPB_SIZE(4, 8), 1, false,
Paul Yang6dd563a2018-03-08 17:35:22 -080048};
49
Paul Yang9bda1f12018-09-22 18:57:43 -070050static const upb_msglayout *const google_protobuf_FileDescriptorProto_submsgs[6] = {
Paul Yang6dd563a2018-03-08 17:35:22 -080051 &google_protobuf_DescriptorProto_msginit,
52 &google_protobuf_EnumDescriptorProto_msginit,
53 &google_protobuf_FieldDescriptorProto_msginit,
54 &google_protobuf_FileOptions_msginit,
55 &google_protobuf_ServiceDescriptorProto_msginit,
56 &google_protobuf_SourceCodeInfo_msginit,
57};
58
Paul Yang9bda1f12018-09-22 18:57:43 -070059static const upb_msglayout_field google_protobuf_FileDescriptorProto__fields[12] = {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080060 {1, UPB_SIZE(4, 8), 1, 0, 9, 1},
61 {2, UPB_SIZE(12, 24), 2, 0, 9, 1},
62 {3, UPB_SIZE(36, 72), 0, 0, 9, 3},
63 {4, UPB_SIZE(40, 80), 0, 0, 11, 3},
64 {5, UPB_SIZE(44, 88), 0, 1, 11, 3},
65 {6, UPB_SIZE(48, 96), 0, 4, 11, 3},
66 {7, UPB_SIZE(52, 104), 0, 2, 11, 3},
67 {8, UPB_SIZE(28, 56), 4, 3, 11, 1},
68 {9, UPB_SIZE(32, 64), 5, 5, 11, 1},
69 {10, UPB_SIZE(56, 112), 0, 0, 5, 3},
70 {11, UPB_SIZE(60, 120), 0, 0, 5, 3},
71 {12, UPB_SIZE(20, 40), 3, 0, 9, 1},
Paul Yang6dd563a2018-03-08 17:35:22 -080072};
73
Paul Yang9bda1f12018-09-22 18:57:43 -070074const upb_msglayout google_protobuf_FileDescriptorProto_msginit = {
Paul Yang6dd563a2018-03-08 17:35:22 -080075 &google_protobuf_FileDescriptorProto_submsgs[0],
76 &google_protobuf_FileDescriptorProto__fields[0],
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080077 UPB_SIZE(64, 128), 12, false,
Paul Yang6dd563a2018-03-08 17:35:22 -080078};
79
Paul Yang9bda1f12018-09-22 18:57:43 -070080static const upb_msglayout *const google_protobuf_DescriptorProto_submsgs[8] = {
Paul Yang6dd563a2018-03-08 17:35:22 -080081 &google_protobuf_DescriptorProto_msginit,
82 &google_protobuf_DescriptorProto_ExtensionRange_msginit,
83 &google_protobuf_DescriptorProto_ReservedRange_msginit,
84 &google_protobuf_EnumDescriptorProto_msginit,
85 &google_protobuf_FieldDescriptorProto_msginit,
86 &google_protobuf_MessageOptions_msginit,
87 &google_protobuf_OneofDescriptorProto_msginit,
88};
89
Paul Yang9bda1f12018-09-22 18:57:43 -070090static const upb_msglayout_field google_protobuf_DescriptorProto__fields[10] = {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080091 {1, UPB_SIZE(4, 8), 1, 0, 9, 1},
92 {2, UPB_SIZE(16, 32), 0, 4, 11, 3},
93 {3, UPB_SIZE(20, 40), 0, 0, 11, 3},
94 {4, UPB_SIZE(24, 48), 0, 3, 11, 3},
95 {5, UPB_SIZE(28, 56), 0, 1, 11, 3},
96 {6, UPB_SIZE(32, 64), 0, 4, 11, 3},
97 {7, UPB_SIZE(12, 24), 2, 5, 11, 1},
98 {8, UPB_SIZE(36, 72), 0, 6, 11, 3},
99 {9, UPB_SIZE(40, 80), 0, 2, 11, 3},
100 {10, UPB_SIZE(44, 88), 0, 0, 9, 3},
Paul Yang6dd563a2018-03-08 17:35:22 -0800101};
102
Paul Yang9bda1f12018-09-22 18:57:43 -0700103const upb_msglayout google_protobuf_DescriptorProto_msginit = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800104 &google_protobuf_DescriptorProto_submsgs[0],
105 &google_protobuf_DescriptorProto__fields[0],
Joshua Habermanf5e8ee42019-03-06 12:04:17 -0800106 UPB_SIZE(48, 96), 10, false,
Paul Yang6dd563a2018-03-08 17:35:22 -0800107};
108
Paul Yang9bda1f12018-09-22 18:57:43 -0700109static const upb_msglayout *const google_protobuf_DescriptorProto_ExtensionRange_submsgs[1] = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800110 &google_protobuf_ExtensionRangeOptions_msginit,
111};
112
Paul Yang9bda1f12018-09-22 18:57:43 -0700113static const upb_msglayout_field google_protobuf_DescriptorProto_ExtensionRange__fields[3] = {
114 {1, UPB_SIZE(4, 4), 1, 0, 5, 1},
115 {2, UPB_SIZE(8, 8), 2, 0, 5, 1},
116 {3, UPB_SIZE(12, 16), 3, 0, 11, 1},
Paul Yang6dd563a2018-03-08 17:35:22 -0800117};
118
Paul Yang9bda1f12018-09-22 18:57:43 -0700119const upb_msglayout google_protobuf_DescriptorProto_ExtensionRange_msginit = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800120 &google_protobuf_DescriptorProto_ExtensionRange_submsgs[0],
121 &google_protobuf_DescriptorProto_ExtensionRange__fields[0],
Paul Yang9bda1f12018-09-22 18:57:43 -0700122 UPB_SIZE(16, 24), 3, false,
Paul Yang6dd563a2018-03-08 17:35:22 -0800123};
124
Paul Yang9bda1f12018-09-22 18:57:43 -0700125static const upb_msglayout_field google_protobuf_DescriptorProto_ReservedRange__fields[2] = {
126 {1, UPB_SIZE(4, 4), 1, 0, 5, 1},
127 {2, UPB_SIZE(8, 8), 2, 0, 5, 1},
Paul Yang6dd563a2018-03-08 17:35:22 -0800128};
129
Paul Yang9bda1f12018-09-22 18:57:43 -0700130const upb_msglayout google_protobuf_DescriptorProto_ReservedRange_msginit = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800131 NULL,
132 &google_protobuf_DescriptorProto_ReservedRange__fields[0],
Paul Yang9bda1f12018-09-22 18:57:43 -0700133 UPB_SIZE(12, 12), 2, false,
Paul Yang6dd563a2018-03-08 17:35:22 -0800134};
135
Paul Yang9bda1f12018-09-22 18:57:43 -0700136static const upb_msglayout *const google_protobuf_ExtensionRangeOptions_submsgs[1] = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800137 &google_protobuf_UninterpretedOption_msginit,
138};
139
Paul Yang9bda1f12018-09-22 18:57:43 -0700140static const upb_msglayout_field google_protobuf_ExtensionRangeOptions__fields[1] = {
141 {999, UPB_SIZE(0, 0), 0, 0, 11, 3},
Paul Yang6dd563a2018-03-08 17:35:22 -0800142};
143
Paul Yang9bda1f12018-09-22 18:57:43 -0700144const upb_msglayout google_protobuf_ExtensionRangeOptions_msginit = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800145 &google_protobuf_ExtensionRangeOptions_submsgs[0],
146 &google_protobuf_ExtensionRangeOptions__fields[0],
Paul Yang9bda1f12018-09-22 18:57:43 -0700147 UPB_SIZE(4, 8), 1, false,
Paul Yang6dd563a2018-03-08 17:35:22 -0800148};
149
Paul Yang9bda1f12018-09-22 18:57:43 -0700150static const upb_msglayout *const google_protobuf_FieldDescriptorProto_submsgs[1] = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800151 &google_protobuf_FieldOptions_msginit,
152};
153
Paul Yang9bda1f12018-09-22 18:57:43 -0700154static const upb_msglayout_field google_protobuf_FieldDescriptorProto__fields[10] = {
155 {1, UPB_SIZE(32, 32), 5, 0, 9, 1},
156 {2, UPB_SIZE(40, 48), 6, 0, 9, 1},
157 {3, UPB_SIZE(24, 24), 3, 0, 5, 1},
158 {4, UPB_SIZE(8, 8), 1, 0, 14, 1},
159 {5, UPB_SIZE(16, 16), 2, 0, 14, 1},
160 {6, UPB_SIZE(48, 64), 7, 0, 9, 1},
161 {7, UPB_SIZE(56, 80), 8, 0, 9, 1},
162 {8, UPB_SIZE(72, 112), 10, 0, 11, 1},
163 {9, UPB_SIZE(28, 28), 4, 0, 5, 1},
164 {10, UPB_SIZE(64, 96), 9, 0, 9, 1},
Paul Yang6dd563a2018-03-08 17:35:22 -0800165};
166
Paul Yang9bda1f12018-09-22 18:57:43 -0700167const upb_msglayout google_protobuf_FieldDescriptorProto_msginit = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800168 &google_protobuf_FieldDescriptorProto_submsgs[0],
169 &google_protobuf_FieldDescriptorProto__fields[0],
Paul Yang9bda1f12018-09-22 18:57:43 -0700170 UPB_SIZE(80, 128), 10, false,
Paul Yang6dd563a2018-03-08 17:35:22 -0800171};
172
Paul Yang9bda1f12018-09-22 18:57:43 -0700173static const upb_msglayout *const google_protobuf_OneofDescriptorProto_submsgs[1] = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800174 &google_protobuf_OneofOptions_msginit,
175};
176
Paul Yang9bda1f12018-09-22 18:57:43 -0700177static const upb_msglayout_field google_protobuf_OneofDescriptorProto__fields[2] = {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -0800178 {1, UPB_SIZE(4, 8), 1, 0, 9, 1},
179 {2, UPB_SIZE(12, 24), 2, 0, 11, 1},
Paul Yang6dd563a2018-03-08 17:35:22 -0800180};
181
Paul Yang9bda1f12018-09-22 18:57:43 -0700182const upb_msglayout google_protobuf_OneofDescriptorProto_msginit = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800183 &google_protobuf_OneofDescriptorProto_submsgs[0],
184 &google_protobuf_OneofDescriptorProto__fields[0],
Joshua Habermanf5e8ee42019-03-06 12:04:17 -0800185 UPB_SIZE(16, 32), 2, false,
Paul Yang6dd563a2018-03-08 17:35:22 -0800186};
187
Paul Yang9bda1f12018-09-22 18:57:43 -0700188static const upb_msglayout *const google_protobuf_EnumDescriptorProto_submsgs[3] = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800189 &google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit,
190 &google_protobuf_EnumOptions_msginit,
191 &google_protobuf_EnumValueDescriptorProto_msginit,
192};
193
Paul Yang9bda1f12018-09-22 18:57:43 -0700194static const upb_msglayout_field google_protobuf_EnumDescriptorProto__fields[5] = {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -0800195 {1, UPB_SIZE(4, 8), 1, 0, 9, 1},
196 {2, UPB_SIZE(16, 32), 0, 2, 11, 3},
197 {3, UPB_SIZE(12, 24), 2, 1, 11, 1},
198 {4, UPB_SIZE(20, 40), 0, 0, 11, 3},
199 {5, UPB_SIZE(24, 48), 0, 0, 9, 3},
Paul Yang6dd563a2018-03-08 17:35:22 -0800200};
201
Paul Yang9bda1f12018-09-22 18:57:43 -0700202const upb_msglayout google_protobuf_EnumDescriptorProto_msginit = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800203 &google_protobuf_EnumDescriptorProto_submsgs[0],
204 &google_protobuf_EnumDescriptorProto__fields[0],
Paul Yang9bda1f12018-09-22 18:57:43 -0700205 UPB_SIZE(32, 64), 5, false,
Paul Yang6dd563a2018-03-08 17:35:22 -0800206};
207
Paul Yang9bda1f12018-09-22 18:57:43 -0700208static const upb_msglayout_field google_protobuf_EnumDescriptorProto_EnumReservedRange__fields[2] = {
209 {1, UPB_SIZE(4, 4), 1, 0, 5, 1},
210 {2, UPB_SIZE(8, 8), 2, 0, 5, 1},
Paul Yang6dd563a2018-03-08 17:35:22 -0800211};
212
Paul Yang9bda1f12018-09-22 18:57:43 -0700213const upb_msglayout google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800214 NULL,
215 &google_protobuf_EnumDescriptorProto_EnumReservedRange__fields[0],
Paul Yang9bda1f12018-09-22 18:57:43 -0700216 UPB_SIZE(12, 12), 2, false,
Paul Yang6dd563a2018-03-08 17:35:22 -0800217};
218
Paul Yang9bda1f12018-09-22 18:57:43 -0700219static const upb_msglayout *const google_protobuf_EnumValueDescriptorProto_submsgs[1] = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800220 &google_protobuf_EnumValueOptions_msginit,
221};
222
Paul Yang9bda1f12018-09-22 18:57:43 -0700223static const upb_msglayout_field google_protobuf_EnumValueDescriptorProto__fields[3] = {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -0800224 {1, UPB_SIZE(8, 8), 2, 0, 9, 1},
Paul Yang9bda1f12018-09-22 18:57:43 -0700225 {2, UPB_SIZE(4, 4), 1, 0, 5, 1},
Joshua Habermanf5e8ee42019-03-06 12:04:17 -0800226 {3, UPB_SIZE(16, 24), 3, 0, 11, 1},
Paul Yang6dd563a2018-03-08 17:35:22 -0800227};
228
Paul Yang9bda1f12018-09-22 18:57:43 -0700229const upb_msglayout google_protobuf_EnumValueDescriptorProto_msginit = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800230 &google_protobuf_EnumValueDescriptorProto_submsgs[0],
231 &google_protobuf_EnumValueDescriptorProto__fields[0],
Joshua Habermanf5e8ee42019-03-06 12:04:17 -0800232 UPB_SIZE(24, 32), 3, false,
Paul Yang6dd563a2018-03-08 17:35:22 -0800233};
234
Paul Yang9bda1f12018-09-22 18:57:43 -0700235static const upb_msglayout *const google_protobuf_ServiceDescriptorProto_submsgs[2] = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800236 &google_protobuf_MethodDescriptorProto_msginit,
237 &google_protobuf_ServiceOptions_msginit,
238};
239
Paul Yang9bda1f12018-09-22 18:57:43 -0700240static const upb_msglayout_field google_protobuf_ServiceDescriptorProto__fields[3] = {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -0800241 {1, UPB_SIZE(4, 8), 1, 0, 9, 1},
242 {2, UPB_SIZE(16, 32), 0, 0, 11, 3},
243 {3, UPB_SIZE(12, 24), 2, 1, 11, 1},
Paul Yang6dd563a2018-03-08 17:35:22 -0800244};
245
Paul Yang9bda1f12018-09-22 18:57:43 -0700246const upb_msglayout google_protobuf_ServiceDescriptorProto_msginit = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800247 &google_protobuf_ServiceDescriptorProto_submsgs[0],
248 &google_protobuf_ServiceDescriptorProto__fields[0],
Paul Yang9bda1f12018-09-22 18:57:43 -0700249 UPB_SIZE(24, 48), 3, false,
Paul Yang6dd563a2018-03-08 17:35:22 -0800250};
251
Paul Yang9bda1f12018-09-22 18:57:43 -0700252static const upb_msglayout *const google_protobuf_MethodDescriptorProto_submsgs[1] = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800253 &google_protobuf_MethodOptions_msginit,
254};
255
Paul Yang9bda1f12018-09-22 18:57:43 -0700256static const upb_msglayout_field google_protobuf_MethodDescriptorProto__fields[6] = {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -0800257 {1, UPB_SIZE(4, 8), 3, 0, 9, 1},
258 {2, UPB_SIZE(12, 24), 4, 0, 9, 1},
259 {3, UPB_SIZE(20, 40), 5, 0, 9, 1},
260 {4, UPB_SIZE(28, 56), 6, 0, 11, 1},
Paul Yang9bda1f12018-09-22 18:57:43 -0700261 {5, UPB_SIZE(1, 1), 1, 0, 8, 1},
262 {6, UPB_SIZE(2, 2), 2, 0, 8, 1},
Paul Yang6dd563a2018-03-08 17:35:22 -0800263};
264
Paul Yang9bda1f12018-09-22 18:57:43 -0700265const upb_msglayout google_protobuf_MethodDescriptorProto_msginit = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800266 &google_protobuf_MethodDescriptorProto_submsgs[0],
267 &google_protobuf_MethodDescriptorProto__fields[0],
Joshua Habermanf5e8ee42019-03-06 12:04:17 -0800268 UPB_SIZE(32, 64), 6, false,
Paul Yang6dd563a2018-03-08 17:35:22 -0800269};
270
Paul Yang9bda1f12018-09-22 18:57:43 -0700271static const upb_msglayout *const google_protobuf_FileOptions_submsgs[1] = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800272 &google_protobuf_UninterpretedOption_msginit,
273};
274
Paul Yang9bda1f12018-09-22 18:57:43 -0700275static const upb_msglayout_field google_protobuf_FileOptions__fields[19] = {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -0800276 {1, UPB_SIZE(28, 32), 11, 0, 9, 1},
277 {8, UPB_SIZE(36, 48), 12, 0, 9, 1},
Paul Yang9bda1f12018-09-22 18:57:43 -0700278 {9, UPB_SIZE(8, 8), 1, 0, 14, 1},
279 {10, UPB_SIZE(16, 16), 2, 0, 8, 1},
Joshua Habermanf5e8ee42019-03-06 12:04:17 -0800280 {11, UPB_SIZE(44, 64), 13, 0, 9, 1},
Paul Yang9bda1f12018-09-22 18:57:43 -0700281 {16, UPB_SIZE(17, 17), 3, 0, 8, 1},
282 {17, UPB_SIZE(18, 18), 4, 0, 8, 1},
283 {18, UPB_SIZE(19, 19), 5, 0, 8, 1},
284 {20, UPB_SIZE(20, 20), 6, 0, 8, 1},
285 {23, UPB_SIZE(21, 21), 7, 0, 8, 1},
286 {27, UPB_SIZE(22, 22), 8, 0, 8, 1},
287 {31, UPB_SIZE(23, 23), 9, 0, 8, 1},
Joshua Habermanf5e8ee42019-03-06 12:04:17 -0800288 {36, UPB_SIZE(52, 80), 14, 0, 9, 1},
289 {37, UPB_SIZE(60, 96), 15, 0, 9, 1},
290 {39, UPB_SIZE(68, 112), 16, 0, 9, 1},
291 {40, UPB_SIZE(76, 128), 17, 0, 9, 1},
292 {41, UPB_SIZE(84, 144), 18, 0, 9, 1},
Paul Yang9bda1f12018-09-22 18:57:43 -0700293 {42, UPB_SIZE(24, 24), 10, 0, 8, 1},
Joshua Habermanf5e8ee42019-03-06 12:04:17 -0800294 {999, UPB_SIZE(92, 160), 0, 0, 11, 3},
Paul Yang6dd563a2018-03-08 17:35:22 -0800295};
296
Paul Yang9bda1f12018-09-22 18:57:43 -0700297const upb_msglayout google_protobuf_FileOptions_msginit = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800298 &google_protobuf_FileOptions_submsgs[0],
299 &google_protobuf_FileOptions__fields[0],
Joshua Habermanf5e8ee42019-03-06 12:04:17 -0800300 UPB_SIZE(96, 176), 19, false,
Paul Yang6dd563a2018-03-08 17:35:22 -0800301};
302
Paul Yang9bda1f12018-09-22 18:57:43 -0700303static const upb_msglayout *const google_protobuf_MessageOptions_submsgs[1] = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800304 &google_protobuf_UninterpretedOption_msginit,
305};
306
Paul Yang9bda1f12018-09-22 18:57:43 -0700307static const upb_msglayout_field google_protobuf_MessageOptions__fields[5] = {
308 {1, UPB_SIZE(1, 1), 1, 0, 8, 1},
309 {2, UPB_SIZE(2, 2), 2, 0, 8, 1},
310 {3, UPB_SIZE(3, 3), 3, 0, 8, 1},
311 {7, UPB_SIZE(4, 4), 4, 0, 8, 1},
312 {999, UPB_SIZE(8, 8), 0, 0, 11, 3},
Paul Yang6dd563a2018-03-08 17:35:22 -0800313};
314
Paul Yang9bda1f12018-09-22 18:57:43 -0700315const upb_msglayout google_protobuf_MessageOptions_msginit = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800316 &google_protobuf_MessageOptions_submsgs[0],
317 &google_protobuf_MessageOptions__fields[0],
Paul Yang9bda1f12018-09-22 18:57:43 -0700318 UPB_SIZE(12, 16), 5, false,
Paul Yang6dd563a2018-03-08 17:35:22 -0800319};
320
Paul Yang9bda1f12018-09-22 18:57:43 -0700321static const upb_msglayout *const google_protobuf_FieldOptions_submsgs[1] = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800322 &google_protobuf_UninterpretedOption_msginit,
323};
324
Paul Yang9bda1f12018-09-22 18:57:43 -0700325static const upb_msglayout_field google_protobuf_FieldOptions__fields[7] = {
326 {1, UPB_SIZE(8, 8), 1, 0, 14, 1},
327 {2, UPB_SIZE(24, 24), 3, 0, 8, 1},
328 {3, UPB_SIZE(25, 25), 4, 0, 8, 1},
329 {5, UPB_SIZE(26, 26), 5, 0, 8, 1},
330 {6, UPB_SIZE(16, 16), 2, 0, 14, 1},
331 {10, UPB_SIZE(27, 27), 6, 0, 8, 1},
332 {999, UPB_SIZE(28, 32), 0, 0, 11, 3},
Paul Yang6dd563a2018-03-08 17:35:22 -0800333};
334
Paul Yang9bda1f12018-09-22 18:57:43 -0700335const upb_msglayout google_protobuf_FieldOptions_msginit = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800336 &google_protobuf_FieldOptions_submsgs[0],
337 &google_protobuf_FieldOptions__fields[0],
Paul Yang9bda1f12018-09-22 18:57:43 -0700338 UPB_SIZE(32, 40), 7, false,
Paul Yang6dd563a2018-03-08 17:35:22 -0800339};
340
Paul Yang9bda1f12018-09-22 18:57:43 -0700341static const upb_msglayout *const google_protobuf_OneofOptions_submsgs[1] = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800342 &google_protobuf_UninterpretedOption_msginit,
343};
344
Paul Yang9bda1f12018-09-22 18:57:43 -0700345static const upb_msglayout_field google_protobuf_OneofOptions__fields[1] = {
346 {999, UPB_SIZE(0, 0), 0, 0, 11, 3},
Paul Yang6dd563a2018-03-08 17:35:22 -0800347};
348
Paul Yang9bda1f12018-09-22 18:57:43 -0700349const upb_msglayout google_protobuf_OneofOptions_msginit = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800350 &google_protobuf_OneofOptions_submsgs[0],
351 &google_protobuf_OneofOptions__fields[0],
Paul Yang9bda1f12018-09-22 18:57:43 -0700352 UPB_SIZE(4, 8), 1, false,
Paul Yang6dd563a2018-03-08 17:35:22 -0800353};
354
Paul Yang9bda1f12018-09-22 18:57:43 -0700355static const upb_msglayout *const google_protobuf_EnumOptions_submsgs[1] = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800356 &google_protobuf_UninterpretedOption_msginit,
357};
358
Paul Yang9bda1f12018-09-22 18:57:43 -0700359static const upb_msglayout_field google_protobuf_EnumOptions__fields[3] = {
360 {2, UPB_SIZE(1, 1), 1, 0, 8, 1},
361 {3, UPB_SIZE(2, 2), 2, 0, 8, 1},
362 {999, UPB_SIZE(4, 8), 0, 0, 11, 3},
Paul Yang6dd563a2018-03-08 17:35:22 -0800363};
364
Paul Yang9bda1f12018-09-22 18:57:43 -0700365const upb_msglayout google_protobuf_EnumOptions_msginit = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800366 &google_protobuf_EnumOptions_submsgs[0],
367 &google_protobuf_EnumOptions__fields[0],
Paul Yang9bda1f12018-09-22 18:57:43 -0700368 UPB_SIZE(8, 16), 3, false,
Paul Yang6dd563a2018-03-08 17:35:22 -0800369};
370
Paul Yang9bda1f12018-09-22 18:57:43 -0700371static const upb_msglayout *const google_protobuf_EnumValueOptions_submsgs[1] = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800372 &google_protobuf_UninterpretedOption_msginit,
373};
374
Paul Yang9bda1f12018-09-22 18:57:43 -0700375static const upb_msglayout_field google_protobuf_EnumValueOptions__fields[2] = {
376 {1, UPB_SIZE(1, 1), 1, 0, 8, 1},
377 {999, UPB_SIZE(4, 8), 0, 0, 11, 3},
Paul Yang6dd563a2018-03-08 17:35:22 -0800378};
379
Paul Yang9bda1f12018-09-22 18:57:43 -0700380const upb_msglayout google_protobuf_EnumValueOptions_msginit = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800381 &google_protobuf_EnumValueOptions_submsgs[0],
382 &google_protobuf_EnumValueOptions__fields[0],
Paul Yang9bda1f12018-09-22 18:57:43 -0700383 UPB_SIZE(8, 16), 2, false,
Paul Yang6dd563a2018-03-08 17:35:22 -0800384};
385
Paul Yang9bda1f12018-09-22 18:57:43 -0700386static const upb_msglayout *const google_protobuf_ServiceOptions_submsgs[1] = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800387 &google_protobuf_UninterpretedOption_msginit,
388};
389
Paul Yang9bda1f12018-09-22 18:57:43 -0700390static const upb_msglayout_field google_protobuf_ServiceOptions__fields[2] = {
391 {33, UPB_SIZE(1, 1), 1, 0, 8, 1},
392 {999, UPB_SIZE(4, 8), 0, 0, 11, 3},
Paul Yang6dd563a2018-03-08 17:35:22 -0800393};
394
Paul Yang9bda1f12018-09-22 18:57:43 -0700395const upb_msglayout google_protobuf_ServiceOptions_msginit = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800396 &google_protobuf_ServiceOptions_submsgs[0],
397 &google_protobuf_ServiceOptions__fields[0],
Paul Yang9bda1f12018-09-22 18:57:43 -0700398 UPB_SIZE(8, 16), 2, false,
Paul Yang6dd563a2018-03-08 17:35:22 -0800399};
400
Paul Yang9bda1f12018-09-22 18:57:43 -0700401static const upb_msglayout *const google_protobuf_MethodOptions_submsgs[1] = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800402 &google_protobuf_UninterpretedOption_msginit,
403};
404
Paul Yang9bda1f12018-09-22 18:57:43 -0700405static const upb_msglayout_field google_protobuf_MethodOptions__fields[3] = {
406 {33, UPB_SIZE(16, 16), 2, 0, 8, 1},
407 {34, UPB_SIZE(8, 8), 1, 0, 14, 1},
408 {999, UPB_SIZE(20, 24), 0, 0, 11, 3},
Paul Yang6dd563a2018-03-08 17:35:22 -0800409};
410
Paul Yang9bda1f12018-09-22 18:57:43 -0700411const upb_msglayout google_protobuf_MethodOptions_msginit = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800412 &google_protobuf_MethodOptions_submsgs[0],
413 &google_protobuf_MethodOptions__fields[0],
Paul Yang9bda1f12018-09-22 18:57:43 -0700414 UPB_SIZE(24, 32), 3, false,
Paul Yang6dd563a2018-03-08 17:35:22 -0800415};
416
Paul Yang9bda1f12018-09-22 18:57:43 -0700417static const upb_msglayout *const google_protobuf_UninterpretedOption_submsgs[1] = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800418 &google_protobuf_UninterpretedOption_NamePart_msginit,
419};
420
Paul Yang9bda1f12018-09-22 18:57:43 -0700421static const upb_msglayout_field google_protobuf_UninterpretedOption__fields[7] = {
422 {2, UPB_SIZE(56, 80), 0, 0, 11, 3},
423 {3, UPB_SIZE(32, 32), 4, 0, 9, 1},
424 {4, UPB_SIZE(8, 8), 1, 0, 4, 1},
425 {5, UPB_SIZE(16, 16), 2, 0, 3, 1},
426 {6, UPB_SIZE(24, 24), 3, 0, 1, 1},
427 {7, UPB_SIZE(40, 48), 5, 0, 12, 1},
428 {8, UPB_SIZE(48, 64), 6, 0, 9, 1},
Paul Yang6dd563a2018-03-08 17:35:22 -0800429};
430
Paul Yang9bda1f12018-09-22 18:57:43 -0700431const upb_msglayout google_protobuf_UninterpretedOption_msginit = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800432 &google_protobuf_UninterpretedOption_submsgs[0],
433 &google_protobuf_UninterpretedOption__fields[0],
Paul Yang9bda1f12018-09-22 18:57:43 -0700434 UPB_SIZE(64, 96), 7, false,
Paul Yang6dd563a2018-03-08 17:35:22 -0800435};
436
Paul Yang9bda1f12018-09-22 18:57:43 -0700437static const upb_msglayout_field google_protobuf_UninterpretedOption_NamePart__fields[2] = {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -0800438 {1, UPB_SIZE(4, 8), 2, 0, 9, 2},
Paul Yang9bda1f12018-09-22 18:57:43 -0700439 {2, UPB_SIZE(1, 1), 1, 0, 8, 2},
Paul Yang6dd563a2018-03-08 17:35:22 -0800440};
441
Paul Yang9bda1f12018-09-22 18:57:43 -0700442const upb_msglayout google_protobuf_UninterpretedOption_NamePart_msginit = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800443 NULL,
444 &google_protobuf_UninterpretedOption_NamePart__fields[0],
Paul Yang9bda1f12018-09-22 18:57:43 -0700445 UPB_SIZE(16, 32), 2, false,
Paul Yang6dd563a2018-03-08 17:35:22 -0800446};
447
Paul Yang9bda1f12018-09-22 18:57:43 -0700448static const upb_msglayout *const google_protobuf_SourceCodeInfo_submsgs[1] = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800449 &google_protobuf_SourceCodeInfo_Location_msginit,
450};
451
Paul Yang9bda1f12018-09-22 18:57:43 -0700452static const upb_msglayout_field google_protobuf_SourceCodeInfo__fields[1] = {
453 {1, UPB_SIZE(0, 0), 0, 0, 11, 3},
Paul Yang6dd563a2018-03-08 17:35:22 -0800454};
455
Paul Yang9bda1f12018-09-22 18:57:43 -0700456const upb_msglayout google_protobuf_SourceCodeInfo_msginit = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800457 &google_protobuf_SourceCodeInfo_submsgs[0],
458 &google_protobuf_SourceCodeInfo__fields[0],
Paul Yang9bda1f12018-09-22 18:57:43 -0700459 UPB_SIZE(4, 8), 1, false,
Paul Yang6dd563a2018-03-08 17:35:22 -0800460};
461
Paul Yang9bda1f12018-09-22 18:57:43 -0700462static const upb_msglayout_field google_protobuf_SourceCodeInfo_Location__fields[5] = {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -0800463 {1, UPB_SIZE(20, 40), 0, 0, 5, 3},
464 {2, UPB_SIZE(24, 48), 0, 0, 5, 3},
465 {3, UPB_SIZE(4, 8), 1, 0, 9, 1},
466 {4, UPB_SIZE(12, 24), 2, 0, 9, 1},
467 {6, UPB_SIZE(28, 56), 0, 0, 9, 3},
Paul Yang6dd563a2018-03-08 17:35:22 -0800468};
469
Paul Yang9bda1f12018-09-22 18:57:43 -0700470const upb_msglayout google_protobuf_SourceCodeInfo_Location_msginit = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800471 NULL,
472 &google_protobuf_SourceCodeInfo_Location__fields[0],
Joshua Habermanf5e8ee42019-03-06 12:04:17 -0800473 UPB_SIZE(32, 64), 5, false,
Paul Yang6dd563a2018-03-08 17:35:22 -0800474};
475
Paul Yang9bda1f12018-09-22 18:57:43 -0700476static const upb_msglayout *const google_protobuf_GeneratedCodeInfo_submsgs[1] = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800477 &google_protobuf_GeneratedCodeInfo_Annotation_msginit,
478};
479
Paul Yang9bda1f12018-09-22 18:57:43 -0700480static const upb_msglayout_field google_protobuf_GeneratedCodeInfo__fields[1] = {
481 {1, UPB_SIZE(0, 0), 0, 0, 11, 3},
Paul Yang6dd563a2018-03-08 17:35:22 -0800482};
483
Paul Yang9bda1f12018-09-22 18:57:43 -0700484const upb_msglayout google_protobuf_GeneratedCodeInfo_msginit = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800485 &google_protobuf_GeneratedCodeInfo_submsgs[0],
486 &google_protobuf_GeneratedCodeInfo__fields[0],
Paul Yang9bda1f12018-09-22 18:57:43 -0700487 UPB_SIZE(4, 8), 1, false,
Paul Yang6dd563a2018-03-08 17:35:22 -0800488};
489
Paul Yang9bda1f12018-09-22 18:57:43 -0700490static const upb_msglayout_field google_protobuf_GeneratedCodeInfo_Annotation__fields[4] = {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -0800491 {1, UPB_SIZE(20, 32), 0, 0, 5, 3},
492 {2, UPB_SIZE(12, 16), 3, 0, 9, 1},
Paul Yang9bda1f12018-09-22 18:57:43 -0700493 {3, UPB_SIZE(4, 4), 1, 0, 5, 1},
494 {4, UPB_SIZE(8, 8), 2, 0, 5, 1},
Paul Yang6dd563a2018-03-08 17:35:22 -0800495};
496
Paul Yang9bda1f12018-09-22 18:57:43 -0700497const upb_msglayout google_protobuf_GeneratedCodeInfo_Annotation_msginit = {
Paul Yang6dd563a2018-03-08 17:35:22 -0800498 NULL,
499 &google_protobuf_GeneratedCodeInfo_Annotation__fields[0],
Joshua Habermanf5e8ee42019-03-06 12:04:17 -0800500 UPB_SIZE(24, 48), 4, false,
Paul Yang6dd563a2018-03-08 17:35:22 -0800501};
502
Paul Yang9bda1f12018-09-22 18:57:43 -0700503
Jisi Liu3b3c8ab2016-03-30 11:39:59 -0700504
Joshua Habermanf5e8ee42019-03-06 12:04:17 -0800505#include <string.h>
Jisi Liu3b3c8ab2016-03-30 11:39:59 -0700506
Paul Yang60327462017-10-09 12:39:13 -0700507/* Maps descriptor type -> upb field type. */
Paul Yang6dd563a2018-03-08 17:35:22 -0800508const uint8_t upb_desctype_to_fieldtype[] = {
Paul Yang60327462017-10-09 12:39:13 -0700509 UPB_WIRE_TYPE_END_GROUP, /* ENDGROUP */
510 UPB_TYPE_DOUBLE, /* DOUBLE */
511 UPB_TYPE_FLOAT, /* FLOAT */
512 UPB_TYPE_INT64, /* INT64 */
513 UPB_TYPE_UINT64, /* UINT64 */
514 UPB_TYPE_INT32, /* INT32 */
515 UPB_TYPE_UINT64, /* FIXED64 */
516 UPB_TYPE_UINT32, /* FIXED32 */
517 UPB_TYPE_BOOL, /* BOOL */
518 UPB_TYPE_STRING, /* STRING */
519 UPB_TYPE_MESSAGE, /* GROUP */
520 UPB_TYPE_MESSAGE, /* MESSAGE */
521 UPB_TYPE_BYTES, /* BYTES */
522 UPB_TYPE_UINT32, /* UINT32 */
523 UPB_TYPE_ENUM, /* ENUM */
524 UPB_TYPE_INT32, /* SFIXED32 */
525 UPB_TYPE_INT64, /* SFIXED64 */
526 UPB_TYPE_INT32, /* SINT32 */
527 UPB_TYPE_INT64, /* SINT64 */
528};
529
530/* Data pertaining to the parse. */
531typedef struct {
Paul Yang60327462017-10-09 12:39:13 -0700532 /* Current decoding pointer. Points to the beginning of a field until we
533 * have finished decoding the whole field. */
534 const char *ptr;
535} upb_decstate;
536
537/* Data pertaining to a single message frame. */
538typedef struct {
539 const char *limit;
540 int32_t group_number; /* 0 if we are not parsing a group. */
541
542 /* These members are unset for an unknown group frame. */
543 char *msg;
Paul Yang9bda1f12018-09-22 18:57:43 -0700544 const upb_msglayout *m;
Paul Yang60327462017-10-09 12:39:13 -0700545} upb_decframe;
546
547#define CHK(x) if (!(x)) { return false; }
548
549static bool upb_skip_unknowngroup(upb_decstate *d, int field_number,
550 const char *limit);
551static bool upb_decode_message(upb_decstate *d, const char *limit,
552 int group_number, char *msg,
Paul Yang9bda1f12018-09-22 18:57:43 -0700553 const upb_msglayout *l);
Paul Yang60327462017-10-09 12:39:13 -0700554
555static bool upb_decode_varint(const char **ptr, const char *limit,
556 uint64_t *val) {
557 uint8_t byte;
558 int bitpos = 0;
559 const char *p = *ptr;
560 *val = 0;
561
562 do {
563 CHK(bitpos < 70 && p < limit);
564 byte = *p;
565 *val |= (uint64_t)(byte & 0x7F) << bitpos;
566 p++;
567 bitpos += 7;
568 } while (byte & 0x80);
569
570 *ptr = p;
571 return true;
572}
573
574static bool upb_decode_varint32(const char **ptr, const char *limit,
575 uint32_t *val) {
576 uint64_t u64;
577 CHK(upb_decode_varint(ptr, limit, &u64) && u64 <= UINT32_MAX);
578 *val = u64;
579 return true;
580}
581
582static bool upb_decode_64bit(const char **ptr, const char *limit,
583 uint64_t *val) {
584 CHK(limit - *ptr >= 8);
585 memcpy(val, *ptr, 8);
586 *ptr += 8;
587 return true;
588}
589
590static bool upb_decode_32bit(const char **ptr, const char *limit,
591 uint32_t *val) {
592 CHK(limit - *ptr >= 4);
593 memcpy(val, *ptr, 4);
594 *ptr += 4;
595 return true;
596}
597
598static bool upb_decode_tag(const char **ptr, const char *limit,
599 int *field_number, int *wire_type) {
600 uint32_t tag = 0;
601 CHK(upb_decode_varint32(ptr, limit, &tag));
602 *field_number = tag >> 3;
603 *wire_type = tag & 7;
604 return true;
605}
606
607static int32_t upb_zzdecode_32(uint32_t n) {
608 return (n >> 1) ^ -(int32_t)(n & 1);
609}
610
611static int64_t upb_zzdecode_64(uint64_t n) {
612 return (n >> 1) ^ -(int64_t)(n & 1);
613}
614
615static bool upb_decode_string(const char **ptr, const char *limit,
Joshua Habermanf5e8ee42019-03-06 12:04:17 -0800616 upb_strview *val) {
Paul Yang60327462017-10-09 12:39:13 -0700617 uint32_t len;
618
619 CHK(upb_decode_varint32(ptr, limit, &len) &&
620 len < INT32_MAX &&
621 limit - *ptr >= (int32_t)len);
622
Joshua Habermanf5e8ee42019-03-06 12:04:17 -0800623 *val = upb_strview_make(*ptr, len);
Paul Yang60327462017-10-09 12:39:13 -0700624 *ptr += len;
625 return true;
626}
627
628static void upb_set32(void *msg, size_t ofs, uint32_t val) {
629 memcpy((char*)msg + ofs, &val, sizeof(val));
630}
631
632static bool upb_append_unknown(upb_decstate *d, upb_decframe *frame,
633 const char *start) {
Paul Yang9bda1f12018-09-22 18:57:43 -0700634 upb_msg_addunknown(frame->msg, start, d->ptr - start);
Paul Yang60327462017-10-09 12:39:13 -0700635 return true;
636}
637
638static bool upb_skip_unknownfielddata(upb_decstate *d, upb_decframe *frame,
639 int field_number, int wire_type) {
640 switch (wire_type) {
641 case UPB_WIRE_TYPE_VARINT: {
642 uint64_t val;
643 return upb_decode_varint(&d->ptr, frame->limit, &val);
644 }
645 case UPB_WIRE_TYPE_32BIT: {
646 uint32_t val;
647 return upb_decode_32bit(&d->ptr, frame->limit, &val);
648 }
649 case UPB_WIRE_TYPE_64BIT: {
650 uint64_t val;
651 return upb_decode_64bit(&d->ptr, frame->limit, &val);
652 }
653 case UPB_WIRE_TYPE_DELIMITED: {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -0800654 upb_strview val;
Paul Yang60327462017-10-09 12:39:13 -0700655 return upb_decode_string(&d->ptr, frame->limit, &val);
656 }
657 case UPB_WIRE_TYPE_START_GROUP:
658 return upb_skip_unknowngroup(d, field_number, frame->limit);
659 case UPB_WIRE_TYPE_END_GROUP:
660 CHK(field_number == frame->group_number);
661 frame->limit = d->ptr;
662 return true;
663 }
664 return false;
665}
666
667static bool upb_array_grow(upb_array *arr, size_t elements) {
668 size_t needed = arr->len + elements;
669 size_t new_size = UPB_MAX(arr->size, 8);
670 size_t new_bytes;
671 size_t old_bytes;
672 void *new_data;
Paul Yang9bda1f12018-09-22 18:57:43 -0700673 upb_alloc *alloc = upb_arena_alloc(arr->arena);
Paul Yang60327462017-10-09 12:39:13 -0700674
675 while (new_size < needed) {
676 new_size *= 2;
677 }
678
679 old_bytes = arr->len * arr->element_size;
680 new_bytes = new_size * arr->element_size;
Paul Yang9bda1f12018-09-22 18:57:43 -0700681 new_data = upb_realloc(alloc, arr->data, old_bytes, new_bytes);
Paul Yang60327462017-10-09 12:39:13 -0700682 CHK(new_data);
683
684 arr->data = new_data;
685 arr->size = new_size;
686 return true;
687}
688
689static void *upb_array_reserve(upb_array *arr, size_t elements) {
690 if (arr->size - arr->len < elements) {
691 CHK(upb_array_grow(arr, elements));
692 }
693 return (char*)arr->data + (arr->len * arr->element_size);
694}
695
696static void *upb_array_add(upb_array *arr, size_t elements) {
697 void *ret = upb_array_reserve(arr, elements);
698 arr->len += elements;
699 return ret;
700}
701
702static upb_array *upb_getarr(upb_decframe *frame,
Paul Yang9bda1f12018-09-22 18:57:43 -0700703 const upb_msglayout_field *field) {
Paul Yang60327462017-10-09 12:39:13 -0700704 UPB_ASSERT(field->label == UPB_LABEL_REPEATED);
705 return *(upb_array**)&frame->msg[field->offset];
706}
707
Paul Yang9bda1f12018-09-22 18:57:43 -0700708static upb_array *upb_getorcreatearr(upb_decframe *frame,
709 const upb_msglayout_field *field) {
Paul Yang60327462017-10-09 12:39:13 -0700710 upb_array *arr = upb_getarr(frame, field);
711
712 if (!arr) {
Paul Yang9bda1f12018-09-22 18:57:43 -0700713 upb_fieldtype_t type = upb_desctype_to_fieldtype[field->descriptortype];
714 arr = upb_array_new(type, upb_msg_arena(frame->msg));
Paul Yang60327462017-10-09 12:39:13 -0700715 if (!arr) {
716 return NULL;
717 }
Paul Yang60327462017-10-09 12:39:13 -0700718 *(upb_array**)&frame->msg[field->offset] = arr;
719 }
720
721 return arr;
722}
723
724static void upb_sethasbit(upb_decframe *frame,
Paul Yang9bda1f12018-09-22 18:57:43 -0700725 const upb_msglayout_field *field) {
726 int32_t hasbit = field->presence;
727 UPB_ASSERT(field->presence > 0);
728 frame->msg[hasbit / 8] |= (1 << (hasbit % 8));
Paul Yang60327462017-10-09 12:39:13 -0700729}
730
731static void upb_setoneofcase(upb_decframe *frame,
Paul Yang9bda1f12018-09-22 18:57:43 -0700732 const upb_msglayout_field *field) {
733 UPB_ASSERT(field->presence < 0);
734 upb_set32(frame->msg, ~field->presence, field->number);
Paul Yang60327462017-10-09 12:39:13 -0700735}
736
Paul Yang9bda1f12018-09-22 18:57:43 -0700737static char *upb_decode_prepareslot(upb_decframe *frame,
738 const upb_msglayout_field *field) {
Paul Yang60327462017-10-09 12:39:13 -0700739 char *field_mem = frame->msg + field->offset;
740 upb_array *arr;
741
742 if (field->label == UPB_LABEL_REPEATED) {
Paul Yang9bda1f12018-09-22 18:57:43 -0700743 arr = upb_getorcreatearr(frame, field);
Paul Yang60327462017-10-09 12:39:13 -0700744 field_mem = upb_array_reserve(arr, 1);
745 }
746
747 return field_mem;
748}
749
750static void upb_decode_setpresent(upb_decframe *frame,
Paul Yang9bda1f12018-09-22 18:57:43 -0700751 const upb_msglayout_field *field) {
Paul Yang60327462017-10-09 12:39:13 -0700752 if (field->label == UPB_LABEL_REPEATED) {
753 upb_array *arr = upb_getarr(frame, field);
754 UPB_ASSERT(arr->len < arr->size);
755 arr->len++;
Paul Yang9bda1f12018-09-22 18:57:43 -0700756 } else if (field->presence < 0) {
Paul Yang60327462017-10-09 12:39:13 -0700757 upb_setoneofcase(frame, field);
Paul Yang9bda1f12018-09-22 18:57:43 -0700758 } else if (field->presence > 0) {
Paul Yang60327462017-10-09 12:39:13 -0700759 upb_sethasbit(frame, field);
760 }
761}
762
Paul Yang9bda1f12018-09-22 18:57:43 -0700763static bool upb_decode_submsg(upb_decstate *d, upb_decframe *frame,
Paul Yang60327462017-10-09 12:39:13 -0700764 const char *limit,
Paul Yang9bda1f12018-09-22 18:57:43 -0700765 const upb_msglayout_field *field,
Paul Yang60327462017-10-09 12:39:13 -0700766 int group_number) {
Paul Yang9bda1f12018-09-22 18:57:43 -0700767 char *submsg_slot = upb_decode_prepareslot(frame, field);
768 char *submsg = *(void **)submsg_slot;
769 const upb_msglayout *subm;
Paul Yang60327462017-10-09 12:39:13 -0700770
Paul Yang60327462017-10-09 12:39:13 -0700771 subm = frame->m->submsgs[field->submsg_index];
772 UPB_ASSERT(subm);
773
774 if (!submsg) {
Paul Yang9bda1f12018-09-22 18:57:43 -0700775 submsg = upb_msg_new(subm, upb_msg_arena(frame->msg));
Paul Yang60327462017-10-09 12:39:13 -0700776 CHK(submsg);
Paul Yang6dd563a2018-03-08 17:35:22 -0800777 *(void**)submsg_slot = submsg;
Paul Yang60327462017-10-09 12:39:13 -0700778 }
779
780 upb_decode_message(d, limit, group_number, submsg, subm);
781
782 return true;
783}
784
785static bool upb_decode_varintfield(upb_decstate *d, upb_decframe *frame,
786 const char *field_start,
Paul Yang9bda1f12018-09-22 18:57:43 -0700787 const upb_msglayout_field *field) {
Paul Yang60327462017-10-09 12:39:13 -0700788 uint64_t val;
789 void *field_mem;
790
Paul Yang9bda1f12018-09-22 18:57:43 -0700791 field_mem = upb_decode_prepareslot(frame, field);
Paul Yang60327462017-10-09 12:39:13 -0700792 CHK(field_mem);
793 CHK(upb_decode_varint(&d->ptr, frame->limit, &val));
794
Paul Yang0f4ad852018-03-06 13:30:03 -0800795 switch ((upb_descriptortype_t)field->descriptortype) {
Paul Yang60327462017-10-09 12:39:13 -0700796 case UPB_DESCRIPTOR_TYPE_INT64:
797 case UPB_DESCRIPTOR_TYPE_UINT64:
798 memcpy(field_mem, &val, sizeof(val));
799 break;
800 case UPB_DESCRIPTOR_TYPE_INT32:
801 case UPB_DESCRIPTOR_TYPE_UINT32:
802 case UPB_DESCRIPTOR_TYPE_ENUM: {
803 uint32_t val32 = val;
804 memcpy(field_mem, &val32, sizeof(val32));
805 break;
806 }
807 case UPB_DESCRIPTOR_TYPE_BOOL: {
808 bool valbool = val != 0;
809 memcpy(field_mem, &valbool, sizeof(valbool));
810 break;
811 }
812 case UPB_DESCRIPTOR_TYPE_SINT32: {
813 int32_t decoded = upb_zzdecode_32(val);
814 memcpy(field_mem, &decoded, sizeof(decoded));
815 break;
816 }
817 case UPB_DESCRIPTOR_TYPE_SINT64: {
818 int64_t decoded = upb_zzdecode_64(val);
819 memcpy(field_mem, &decoded, sizeof(decoded));
820 break;
821 }
822 default:
823 return upb_append_unknown(d, frame, field_start);
824 }
825
826 upb_decode_setpresent(frame, field);
827 return true;
828}
829
830static bool upb_decode_64bitfield(upb_decstate *d, upb_decframe *frame,
831 const char *field_start,
Paul Yang9bda1f12018-09-22 18:57:43 -0700832 const upb_msglayout_field *field) {
Paul Yang60327462017-10-09 12:39:13 -0700833 void *field_mem;
834 uint64_t val;
835
Paul Yang9bda1f12018-09-22 18:57:43 -0700836 field_mem = upb_decode_prepareslot(frame, field);
Paul Yang60327462017-10-09 12:39:13 -0700837 CHK(field_mem);
838 CHK(upb_decode_64bit(&d->ptr, frame->limit, &val));
839
Paul Yang0f4ad852018-03-06 13:30:03 -0800840 switch ((upb_descriptortype_t)field->descriptortype) {
Paul Yang60327462017-10-09 12:39:13 -0700841 case UPB_DESCRIPTOR_TYPE_DOUBLE:
842 case UPB_DESCRIPTOR_TYPE_FIXED64:
843 case UPB_DESCRIPTOR_TYPE_SFIXED64:
844 memcpy(field_mem, &val, sizeof(val));
845 break;
846 default:
847 return upb_append_unknown(d, frame, field_start);
848 }
849
850 upb_decode_setpresent(frame, field);
851 return true;
852}
853
854static bool upb_decode_32bitfield(upb_decstate *d, upb_decframe *frame,
855 const char *field_start,
Paul Yang9bda1f12018-09-22 18:57:43 -0700856 const upb_msglayout_field *field) {
Paul Yang60327462017-10-09 12:39:13 -0700857 void *field_mem;
858 uint32_t val;
859
Paul Yang9bda1f12018-09-22 18:57:43 -0700860 field_mem = upb_decode_prepareslot(frame, field);
Paul Yang60327462017-10-09 12:39:13 -0700861 CHK(field_mem);
862 CHK(upb_decode_32bit(&d->ptr, frame->limit, &val));
863
Paul Yang0f4ad852018-03-06 13:30:03 -0800864 switch ((upb_descriptortype_t)field->descriptortype) {
Paul Yang60327462017-10-09 12:39:13 -0700865 case UPB_DESCRIPTOR_TYPE_FLOAT:
866 case UPB_DESCRIPTOR_TYPE_FIXED32:
867 case UPB_DESCRIPTOR_TYPE_SFIXED32:
868 memcpy(field_mem, &val, sizeof(val));
869 break;
870 default:
871 return upb_append_unknown(d, frame, field_start);
872 }
873
874 upb_decode_setpresent(frame, field);
875 return true;
876}
877
Joshua Habermanf5e8ee42019-03-06 12:04:17 -0800878static bool upb_decode_fixedpacked(upb_array *arr, upb_strview data,
Paul Yang60327462017-10-09 12:39:13 -0700879 int elem_size) {
880 int elements = data.size / elem_size;
881 void *field_mem;
882
883 CHK((size_t)(elements * elem_size) == data.size);
884 field_mem = upb_array_add(arr, elements);
885 CHK(field_mem);
886 memcpy(field_mem, data.data, data.size);
887 return true;
888}
889
890static bool upb_decode_toarray(upb_decstate *d, upb_decframe *frame,
891 const char *field_start,
Paul Yang9bda1f12018-09-22 18:57:43 -0700892 const upb_msglayout_field *field,
Joshua Habermanf5e8ee42019-03-06 12:04:17 -0800893 upb_strview val) {
Paul Yang9bda1f12018-09-22 18:57:43 -0700894 upb_array *arr = upb_getorcreatearr(frame, field);
Paul Yang60327462017-10-09 12:39:13 -0700895
896#define VARINT_CASE(ctype, decode) { \
897 const char *ptr = val.data; \
898 const char *limit = ptr + val.size; \
899 while (ptr < limit) { \
900 uint64_t val; \
901 void *field_mem; \
902 ctype decoded; \
903 CHK(upb_decode_varint(&ptr, limit, &val)); \
904 decoded = (decode)(val); \
905 field_mem = upb_array_add(arr, 1); \
906 CHK(field_mem); \
907 memcpy(field_mem, &decoded, sizeof(ctype)); \
908 } \
909 return true; \
910}
911
Paul Yang0f4ad852018-03-06 13:30:03 -0800912 switch ((upb_descriptortype_t)field->descriptortype) {
Paul Yang60327462017-10-09 12:39:13 -0700913 case UPB_DESCRIPTOR_TYPE_STRING:
914 case UPB_DESCRIPTOR_TYPE_BYTES: {
915 void *field_mem = upb_array_add(arr, 1);
916 CHK(field_mem);
917 memcpy(field_mem, &val, sizeof(val));
918 return true;
919 }
920 case UPB_DESCRIPTOR_TYPE_FLOAT:
921 case UPB_DESCRIPTOR_TYPE_FIXED32:
922 case UPB_DESCRIPTOR_TYPE_SFIXED32:
923 return upb_decode_fixedpacked(arr, val, sizeof(int32_t));
924 case UPB_DESCRIPTOR_TYPE_DOUBLE:
925 case UPB_DESCRIPTOR_TYPE_FIXED64:
926 case UPB_DESCRIPTOR_TYPE_SFIXED64:
927 return upb_decode_fixedpacked(arr, val, sizeof(int64_t));
928 case UPB_DESCRIPTOR_TYPE_INT32:
929 case UPB_DESCRIPTOR_TYPE_UINT32:
930 case UPB_DESCRIPTOR_TYPE_ENUM:
931 /* TODO: proto2 enum field that isn't in the enum. */
932 VARINT_CASE(uint32_t, uint32_t);
933 case UPB_DESCRIPTOR_TYPE_INT64:
934 case UPB_DESCRIPTOR_TYPE_UINT64:
935 VARINT_CASE(uint64_t, uint64_t);
936 case UPB_DESCRIPTOR_TYPE_BOOL:
937 VARINT_CASE(bool, bool);
938 case UPB_DESCRIPTOR_TYPE_SINT32:
939 VARINT_CASE(int32_t, upb_zzdecode_32);
940 case UPB_DESCRIPTOR_TYPE_SINT64:
941 VARINT_CASE(int64_t, upb_zzdecode_64);
Paul Yang6dd563a2018-03-08 17:35:22 -0800942 case UPB_DESCRIPTOR_TYPE_MESSAGE: {
Paul Yang9bda1f12018-09-22 18:57:43 -0700943 const upb_msglayout *subm;
Paul Yang6dd563a2018-03-08 17:35:22 -0800944 char *submsg;
945 void *field_mem;
946
Paul Yang60327462017-10-09 12:39:13 -0700947 CHK(val.size <= (size_t)(frame->limit - val.data));
Paul Yang6dd563a2018-03-08 17:35:22 -0800948 d->ptr -= val.size;
949
950 /* Create elemente message. */
Paul Yang6dd563a2018-03-08 17:35:22 -0800951 subm = frame->m->submsgs[field->submsg_index];
952 UPB_ASSERT(subm);
953
Paul Yang9bda1f12018-09-22 18:57:43 -0700954 submsg = upb_msg_new(subm, upb_msg_arena(frame->msg));
Paul Yang6dd563a2018-03-08 17:35:22 -0800955 CHK(submsg);
Paul Yang6dd563a2018-03-08 17:35:22 -0800956
957 field_mem = upb_array_add(arr, 1);
958 CHK(field_mem);
959 *(void**)field_mem = submsg;
960
961 return upb_decode_message(
962 d, val.data + val.size, frame->group_number, submsg, subm);
963 }
Paul Yang60327462017-10-09 12:39:13 -0700964 case UPB_DESCRIPTOR_TYPE_GROUP:
965 return upb_append_unknown(d, frame, field_start);
966 }
967#undef VARINT_CASE
968 UPB_UNREACHABLE();
969}
970
971static bool upb_decode_delimitedfield(upb_decstate *d, upb_decframe *frame,
972 const char *field_start,
Paul Yang9bda1f12018-09-22 18:57:43 -0700973 const upb_msglayout_field *field) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -0800974 upb_strview val;
Paul Yang60327462017-10-09 12:39:13 -0700975
976 CHK(upb_decode_string(&d->ptr, frame->limit, &val));
977
978 if (field->label == UPB_LABEL_REPEATED) {
979 return upb_decode_toarray(d, frame, field_start, field, val);
980 } else {
Paul Yang0f4ad852018-03-06 13:30:03 -0800981 switch ((upb_descriptortype_t)field->descriptortype) {
Paul Yang60327462017-10-09 12:39:13 -0700982 case UPB_DESCRIPTOR_TYPE_STRING:
983 case UPB_DESCRIPTOR_TYPE_BYTES: {
Paul Yang9bda1f12018-09-22 18:57:43 -0700984 void *field_mem = upb_decode_prepareslot(frame, field);
Paul Yang60327462017-10-09 12:39:13 -0700985 CHK(field_mem);
986 memcpy(field_mem, &val, sizeof(val));
987 break;
988 }
989 case UPB_DESCRIPTOR_TYPE_MESSAGE:
990 CHK(val.size <= (size_t)(frame->limit - val.data));
Paul Yang6dd563a2018-03-08 17:35:22 -0800991 d->ptr -= val.size;
Paul Yang60327462017-10-09 12:39:13 -0700992 CHK(upb_decode_submsg(d, frame, val.data + val.size, field, 0));
993 break;
994 default:
995 /* TODO(haberman): should we accept the last element of a packed? */
996 return upb_append_unknown(d, frame, field_start);
997 }
998 upb_decode_setpresent(frame, field);
999 return true;
1000 }
1001}
1002
Paul Yang9bda1f12018-09-22 18:57:43 -07001003static const upb_msglayout_field *upb_find_field(const upb_msglayout *l,
1004 uint32_t field_number) {
Paul Yang60327462017-10-09 12:39:13 -07001005 /* Lots of optimization opportunities here. */
1006 int i;
1007 for (i = 0; i < l->field_count; i++) {
1008 if (l->fields[i].number == field_number) {
1009 return &l->fields[i];
1010 }
1011 }
1012
1013 return NULL; /* Unknown field. */
1014}
1015
1016static bool upb_decode_field(upb_decstate *d, upb_decframe *frame) {
1017 int field_number;
1018 int wire_type;
1019 const char *field_start = d->ptr;
Paul Yang9bda1f12018-09-22 18:57:43 -07001020 const upb_msglayout_field *field;
Paul Yang60327462017-10-09 12:39:13 -07001021
1022 CHK(upb_decode_tag(&d->ptr, frame->limit, &field_number, &wire_type));
1023 field = upb_find_field(frame->m, field_number);
1024
1025 if (field) {
1026 switch (wire_type) {
1027 case UPB_WIRE_TYPE_VARINT:
1028 return upb_decode_varintfield(d, frame, field_start, field);
1029 case UPB_WIRE_TYPE_32BIT:
1030 return upb_decode_32bitfield(d, frame, field_start, field);
1031 case UPB_WIRE_TYPE_64BIT:
1032 return upb_decode_64bitfield(d, frame, field_start, field);
1033 case UPB_WIRE_TYPE_DELIMITED:
1034 return upb_decode_delimitedfield(d, frame, field_start, field);
1035 case UPB_WIRE_TYPE_START_GROUP:
Paul Yang0f4ad852018-03-06 13:30:03 -08001036 CHK(field->descriptortype == UPB_DESCRIPTOR_TYPE_GROUP);
Paul Yang60327462017-10-09 12:39:13 -07001037 return upb_decode_submsg(d, frame, frame->limit, field, field_number);
1038 case UPB_WIRE_TYPE_END_GROUP:
1039 CHK(frame->group_number == field_number)
1040 frame->limit = d->ptr;
1041 return true;
1042 default:
1043 return false;
1044 }
1045 } else {
1046 CHK(field_number != 0);
Paul Yang9bda1f12018-09-22 18:57:43 -07001047 CHK(upb_skip_unknownfielddata(d, frame, field_number, wire_type));
1048 CHK(upb_append_unknown(d, frame, field_start));
1049 return true;
Paul Yang60327462017-10-09 12:39:13 -07001050 }
1051}
1052
1053static bool upb_skip_unknowngroup(upb_decstate *d, int field_number,
1054 const char *limit) {
1055 upb_decframe frame;
1056 frame.msg = NULL;
1057 frame.m = NULL;
1058 frame.group_number = field_number;
1059 frame.limit = limit;
1060
1061 while (d->ptr < frame.limit) {
1062 int wire_type;
1063 int field_number;
1064
1065 CHK(upb_decode_tag(&d->ptr, frame.limit, &field_number, &wire_type));
1066 CHK(upb_skip_unknownfielddata(d, &frame, field_number, wire_type));
1067 }
1068
1069 return true;
1070}
1071
1072static bool upb_decode_message(upb_decstate *d, const char *limit,
1073 int group_number, char *msg,
Paul Yang9bda1f12018-09-22 18:57:43 -07001074 const upb_msglayout *l) {
Paul Yang60327462017-10-09 12:39:13 -07001075 upb_decframe frame;
1076 frame.group_number = group_number;
1077 frame.limit = limit;
1078 frame.msg = msg;
1079 frame.m = l;
1080
1081 while (d->ptr < frame.limit) {
1082 CHK(upb_decode_field(d, &frame));
1083 }
1084
1085 return true;
1086}
1087
Adam Cozzette8645d892019-03-26 14:32:20 -07001088bool upb_decode(const char *buf, size_t size, void *msg,
1089 const upb_msglayout *l) {
Paul Yang60327462017-10-09 12:39:13 -07001090 upb_decstate state;
Adam Cozzette8645d892019-03-26 14:32:20 -07001091 state.ptr = buf;
Paul Yang60327462017-10-09 12:39:13 -07001092
Adam Cozzette8645d892019-03-26 14:32:20 -07001093 return upb_decode_message(&state, buf + size, 0, msg, l);
Paul Yang60327462017-10-09 12:39:13 -07001094}
1095
1096#undef CHK
1097
1098
Paul Yange0e54662016-09-15 11:09:01 -07001099#include <ctype.h>
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001100#include <errno.h>
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001101#include <stdlib.h>
1102#include <string.h>
1103
1104typedef struct {
1105 size_t len;
1106 char str[1]; /* Null-terminated string data follows. */
1107} str_t;
1108
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001109static str_t *newstr(upb_alloc *alloc, const char *data, size_t len) {
1110 str_t *ret = upb_malloc(alloc, sizeof(*ret) + len);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001111 if (!ret) return NULL;
1112 ret->len = len;
1113 memcpy(ret->str, data, len);
1114 ret->str[len] = '\0';
1115 return ret;
1116}
1117
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001118struct upb_fielddef {
1119 const upb_filedef *file;
1120 const upb_msgdef *msgdef;
1121 const char *full_name;
1122 union {
1123 int64_t sint;
1124 uint64_t uint;
1125 double dbl;
1126 float flt;
1127 bool boolean;
1128 str_t *str;
1129 } defaultval;
1130 const upb_oneofdef *oneof;
1131 union {
1132 const upb_msgdef *msgdef;
1133 const upb_enumdef *enumdef;
1134 const google_protobuf_FieldDescriptorProto *unresolved;
1135 } sub;
1136 uint32_t number_;
1137 uint32_t index_;
1138 uint32_t selector_base; /* Used to index into a upb::Handlers table. */
1139 bool is_extension_;
1140 bool lazy_;
1141 bool packed_;
1142 upb_descriptortype_t type_;
1143 upb_label_t label_;
1144};
1145
1146struct upb_msgdef {
1147 const upb_filedef *file;
1148 const char *full_name;
1149 uint32_t selector_count;
1150 uint32_t submsg_field_count;
1151
1152 /* Tables for looking up fields by number and name. */
1153 upb_inttable itof;
1154 upb_strtable ntof;
1155
1156 const upb_fielddef *fields;
1157 const upb_oneofdef *oneofs;
1158 int field_count;
1159 int oneof_count;
1160
1161 /* Is this a map-entry message? */
1162 bool map_entry;
1163 upb_wellknowntype_t well_known_type;
1164
1165 /* TODO(haberman): proper extension ranges (there can be multiple). */
1166};
1167
1168struct upb_enumdef {
1169 const upb_filedef *file;
1170 const char *full_name;
1171 upb_strtable ntoi;
1172 upb_inttable iton;
1173 int32_t defaultval;
1174};
1175
1176struct upb_oneofdef {
1177 const upb_msgdef *parent;
1178 const char *full_name;
1179 uint32_t index;
1180 upb_strtable ntof;
1181 upb_inttable itof;
1182};
1183
1184struct upb_filedef {
1185 const char *name;
1186 const char *package;
1187 const char *phpprefix;
1188 const char *phpnamespace;
1189 upb_syntax_t syntax;
1190
1191 const upb_filedef **deps;
1192 const upb_msgdef *msgs;
1193 const upb_enumdef *enums;
1194 const upb_fielddef *exts;
1195
1196 int dep_count;
1197 int msg_count;
1198 int enum_count;
1199 int ext_count;
1200};
1201
1202struct upb_symtab {
1203 upb_arena *arena;
1204 upb_strtable syms; /* full_name -> packed def ptr */
1205 upb_strtable files; /* file_name -> upb_filedef* */
1206};
1207
1208/* Inside a symtab we store tagged pointers to specific def types. */
1209typedef enum {
1210 UPB_DEFTYPE_MSG = 0,
1211 UPB_DEFTYPE_ENUM = 1,
1212 UPB_DEFTYPE_FIELD = 2,
1213 UPB_DEFTYPE_ONEOF = 3
1214} upb_deftype_t;
1215
1216static const void *unpack_def(upb_value v, upb_deftype_t type) {
1217 uintptr_t num = (uintptr_t)upb_value_getconstptr(v);
1218 return (num & 3) == type ? (const void*)(num & ~3) : NULL;
1219}
1220
1221static upb_value pack_def(const void *ptr, upb_deftype_t type) {
1222 uintptr_t num = (uintptr_t)ptr | type;
1223 return upb_value_constptr((const void*)num);
1224}
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001225
1226/* isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. */
1227static bool upb_isbetween(char c, char low, char high) {
1228 return c >= low && c <= high;
1229}
1230
1231static bool upb_isletter(char c) {
1232 return upb_isbetween(c, 'A', 'Z') || upb_isbetween(c, 'a', 'z') || c == '_';
1233}
1234
1235static bool upb_isalphanum(char c) {
1236 return upb_isletter(c) || upb_isbetween(c, '0', '9');
1237}
1238
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001239static bool upb_isident(upb_strview name, bool full, upb_status *s) {
1240 const char *str = name.data;
1241 size_t len = name.size;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001242 bool start = true;
1243 size_t i;
1244 for (i = 0; i < len; i++) {
1245 char c = str[i];
1246 if (c == '.') {
1247 if (start || !full) {
1248 upb_status_seterrf(s, "invalid name: unexpected '.' (%s)", str);
1249 return false;
1250 }
1251 start = true;
1252 } else if (start) {
1253 if (!upb_isletter(c)) {
1254 upb_status_seterrf(
1255 s, "invalid name: path components must start with a letter (%s)",
1256 str);
1257 return false;
1258 }
1259 start = false;
1260 } else {
1261 if (!upb_isalphanum(c)) {
1262 upb_status_seterrf(s, "invalid name: non-alphanumeric character (%s)",
1263 str);
1264 return false;
1265 }
1266 }
1267 }
1268 return !start;
1269}
1270
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001271static const char *shortdefname(const char *fullname) {
Paul Yange0e54662016-09-15 11:09:01 -07001272 const char *p;
1273
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001274 if (fullname == NULL) {
Paul Yange0e54662016-09-15 11:09:01 -07001275 return NULL;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001276 } else if ((p = strrchr(fullname, '.')) == NULL) {
Paul Yange0e54662016-09-15 11:09:01 -07001277 /* No '.' in the name, return the full string. */
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001278 return fullname;
Paul Yange0e54662016-09-15 11:09:01 -07001279 } else {
1280 /* Return one past the last '.'. */
1281 return p + 1;
1282 }
1283}
1284
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001285/* All submessage fields are lower than all other fields.
1286 * Secondly, fields are increasing in order. */
1287uint32_t field_rank(const upb_fielddef *f) {
1288 uint32_t ret = upb_fielddef_number(f);
1289 const uint32_t high_bit = 1 << 30;
Paul Yange0e54662016-09-15 11:09:01 -07001290 UPB_ASSERT(ret < high_bit);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001291 if (!upb_fielddef_issubmsg(f))
1292 ret |= high_bit;
1293 return ret;
1294}
1295
1296int cmp_fields(const void *p1, const void *p2) {
1297 const upb_fielddef *f1 = *(upb_fielddef*const*)p1;
1298 const upb_fielddef *f2 = *(upb_fielddef*const*)p2;
1299 return field_rank(f1) - field_rank(f2);
1300}
1301
1302static bool assign_msg_indices(upb_msgdef *m, upb_status *s) {
1303 /* Sort fields. upb internally relies on UPB_TYPE_MESSAGE fields having the
1304 * lowest indexes, but we do not publicly guarantee this. */
1305 upb_msg_field_iter j;
Paul Yang5a3405c2017-02-06 12:40:51 -08001306 upb_msg_oneof_iter k;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001307 int i;
1308 uint32_t selector;
1309 int n = upb_msgdef_numfields(m);
Paul Yange0e54662016-09-15 11:09:01 -07001310 upb_fielddef **fields;
1311
1312 if (n == 0) {
1313 m->selector_count = UPB_STATIC_SELECTOR_COUNT;
1314 m->submsg_field_count = 0;
1315 return true;
1316 }
1317
1318 fields = upb_gmalloc(n * sizeof(*fields));
1319 if (!fields) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001320 upb_status_setoom(s);
Paul Yange0e54662016-09-15 11:09:01 -07001321 return false;
1322 }
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001323
1324 m->submsg_field_count = 0;
1325 for(i = 0, upb_msg_field_begin(&j, m);
1326 !upb_msg_field_done(&j);
1327 upb_msg_field_next(&j), i++) {
1328 upb_fielddef *f = upb_msg_iter_field(&j);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001329 UPB_ASSERT(f->msgdef == m);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001330 if (upb_fielddef_issubmsg(f)) {
1331 m->submsg_field_count++;
1332 }
1333 fields[i] = f;
1334 }
1335
1336 qsort(fields, n, sizeof(*fields), cmp_fields);
1337
1338 selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count;
1339 for (i = 0; i < n; i++) {
1340 upb_fielddef *f = fields[i];
1341 f->index_ = i;
1342 f->selector_base = selector + upb_handlers_selectorbaseoffset(f);
1343 selector += upb_handlers_selectorcount(f);
1344 }
1345 m->selector_count = selector;
1346
1347#ifndef NDEBUG
1348 {
1349 /* Verify that all selectors for the message are distinct. */
1350#define TRY(type) \
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001351 if (upb_handlers_getselector(f, type, &sel)) { upb_inttable_insert(&t, sel, v); }
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001352
1353 upb_inttable t;
1354 upb_value v;
1355 upb_selector_t sel;
1356
1357 upb_inttable_init(&t, UPB_CTYPE_BOOL);
1358 v = upb_value_bool(true);
1359 upb_inttable_insert(&t, UPB_STARTMSG_SELECTOR, v);
1360 upb_inttable_insert(&t, UPB_ENDMSG_SELECTOR, v);
Paul Yang60327462017-10-09 12:39:13 -07001361 upb_inttable_insert(&t, UPB_UNKNOWN_SELECTOR, v);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001362 for(upb_msg_field_begin(&j, m);
1363 !upb_msg_field_done(&j);
1364 upb_msg_field_next(&j)) {
1365 upb_fielddef *f = upb_msg_iter_field(&j);
1366 /* These calls will assert-fail in upb_table if the value already
1367 * exists. */
1368 TRY(UPB_HANDLER_INT32);
1369 TRY(UPB_HANDLER_INT64)
1370 TRY(UPB_HANDLER_UINT32)
1371 TRY(UPB_HANDLER_UINT64)
1372 TRY(UPB_HANDLER_FLOAT)
1373 TRY(UPB_HANDLER_DOUBLE)
1374 TRY(UPB_HANDLER_BOOL)
1375 TRY(UPB_HANDLER_STARTSTR)
1376 TRY(UPB_HANDLER_STRING)
1377 TRY(UPB_HANDLER_ENDSTR)
1378 TRY(UPB_HANDLER_STARTSUBMSG)
1379 TRY(UPB_HANDLER_ENDSUBMSG)
1380 TRY(UPB_HANDLER_STARTSEQ)
1381 TRY(UPB_HANDLER_ENDSEQ)
1382 }
1383 upb_inttable_uninit(&t);
1384 }
1385#undef TRY
1386#endif
1387
Paul Yang5a3405c2017-02-06 12:40:51 -08001388 for(upb_msg_oneof_begin(&k, m), i = 0;
1389 !upb_msg_oneof_done(&k);
1390 upb_msg_oneof_next(&k), i++) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001391 upb_oneofdef *o = (upb_oneofdef*)upb_msg_iter_oneof(&k);
Paul Yang5a3405c2017-02-06 12:40:51 -08001392 o->index = i;
1393 }
1394
Paul Yange0e54662016-09-15 11:09:01 -07001395 upb_gfree(fields);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001396 return true;
1397}
1398
Paul Yang9bda1f12018-09-22 18:57:43 -07001399static void assign_msg_wellknowntype(upb_msgdef *m) {
1400 const char *name = upb_msgdef_fullname(m);
1401 if (name == NULL) {
1402 m->well_known_type = UPB_WELLKNOWN_UNSPECIFIED;
1403 return;
1404 }
Paul Yang8faa7782018-12-26 10:36:09 -08001405 if (!strcmp(name, "google.protobuf.Any")) {
1406 m->well_known_type = UPB_WELLKNOWN_ANY;
Paul Yangc4f2a922019-01-17 10:18:43 -08001407 } else if (!strcmp(name, "google.protobuf.FieldMask")) {
1408 m->well_known_type = UPB_WELLKNOWN_FIELDMASK;
Paul Yang8faa7782018-12-26 10:36:09 -08001409 } else if (!strcmp(name, "google.protobuf.Duration")) {
Paul Yang9bda1f12018-09-22 18:57:43 -07001410 m->well_known_type = UPB_WELLKNOWN_DURATION;
1411 } else if (!strcmp(name, "google.protobuf.Timestamp")) {
1412 m->well_known_type = UPB_WELLKNOWN_TIMESTAMP;
1413 } else if (!strcmp(name, "google.protobuf.DoubleValue")) {
1414 m->well_known_type = UPB_WELLKNOWN_DOUBLEVALUE;
1415 } else if (!strcmp(name, "google.protobuf.FloatValue")) {
1416 m->well_known_type = UPB_WELLKNOWN_FLOATVALUE;
1417 } else if (!strcmp(name, "google.protobuf.Int64Value")) {
1418 m->well_known_type = UPB_WELLKNOWN_INT64VALUE;
1419 } else if (!strcmp(name, "google.protobuf.UInt64Value")) {
1420 m->well_known_type = UPB_WELLKNOWN_UINT64VALUE;
1421 } else if (!strcmp(name, "google.protobuf.Int32Value")) {
1422 m->well_known_type = UPB_WELLKNOWN_INT32VALUE;
1423 } else if (!strcmp(name, "google.protobuf.UInt32Value")) {
1424 m->well_known_type = UPB_WELLKNOWN_UINT32VALUE;
1425 } else if (!strcmp(name, "google.protobuf.BoolValue")) {
1426 m->well_known_type = UPB_WELLKNOWN_BOOLVALUE;
1427 } else if (!strcmp(name, "google.protobuf.StringValue")) {
1428 m->well_known_type = UPB_WELLKNOWN_STRINGVALUE;
1429 } else if (!strcmp(name, "google.protobuf.BytesValue")) {
1430 m->well_known_type = UPB_WELLKNOWN_BYTESVALUE;
1431 } else if (!strcmp(name, "google.protobuf.Value")) {
1432 m->well_known_type = UPB_WELLKNOWN_VALUE;
1433 } else if (!strcmp(name, "google.protobuf.ListValue")) {
1434 m->well_known_type = UPB_WELLKNOWN_LISTVALUE;
1435 } else if (!strcmp(name, "google.protobuf.Struct")) {
1436 m->well_known_type = UPB_WELLKNOWN_STRUCT;
1437 } else {
1438 m->well_known_type = UPB_WELLKNOWN_UNSPECIFIED;
1439 }
1440}
1441
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001442
1443/* upb_enumdef ****************************************************************/
1444
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001445const char *upb_enumdef_fullname(const upb_enumdef *e) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001446 return e->full_name;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001447}
1448
Paul Yange0e54662016-09-15 11:09:01 -07001449const char *upb_enumdef_name(const upb_enumdef *e) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001450 return shortdefname(e->full_name);
Paul Yange0e54662016-09-15 11:09:01 -07001451}
1452
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001453const upb_filedef *upb_enumdef_file(const upb_enumdef *e) {
1454 return e->file;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001455}
1456
1457int32_t upb_enumdef_default(const upb_enumdef *e) {
Paul Yange0e54662016-09-15 11:09:01 -07001458 UPB_ASSERT(upb_enumdef_iton(e, e->defaultval));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001459 return e->defaultval;
1460}
1461
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001462int upb_enumdef_numvals(const upb_enumdef *e) {
1463 return upb_strtable_count(&e->ntoi);
1464}
1465
1466void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) {
1467 /* We iterate over the ntoi table, to account for duplicate numbers. */
1468 upb_strtable_begin(i, &e->ntoi);
1469}
1470
1471void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); }
1472bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); }
1473
1474bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name,
1475 size_t len, int32_t *num) {
1476 upb_value v;
1477 if (!upb_strtable_lookup2(&def->ntoi, name, len, &v)) {
1478 return false;
1479 }
1480 if (num) *num = upb_value_getint32(v);
1481 return true;
1482}
1483
1484const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) {
1485 upb_value v;
1486 return upb_inttable_lookup32(&def->iton, num, &v) ?
1487 upb_value_getcstr(v) : NULL;
1488}
1489
1490const char *upb_enum_iter_name(upb_enum_iter *iter) {
1491 return upb_strtable_iter_key(iter);
1492}
1493
1494int32_t upb_enum_iter_number(upb_enum_iter *iter) {
1495 return upb_value_getint32(upb_strtable_iter_value(iter));
1496}
1497
1498
1499/* upb_fielddef ***************************************************************/
1500
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001501const char *upb_fielddef_fullname(const upb_fielddef *f) {
1502 return f->full_name;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001503}
1504
1505upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001506 switch (f->type_) {
1507 case UPB_DESCRIPTOR_TYPE_DOUBLE:
1508 return UPB_TYPE_DOUBLE;
1509 case UPB_DESCRIPTOR_TYPE_FLOAT:
1510 return UPB_TYPE_FLOAT;
1511 case UPB_DESCRIPTOR_TYPE_INT64:
1512 case UPB_DESCRIPTOR_TYPE_SINT64:
1513 case UPB_DESCRIPTOR_TYPE_SFIXED64:
1514 return UPB_TYPE_INT64;
1515 case UPB_DESCRIPTOR_TYPE_INT32:
1516 case UPB_DESCRIPTOR_TYPE_SFIXED32:
1517 case UPB_DESCRIPTOR_TYPE_SINT32:
1518 return UPB_TYPE_INT32;
1519 case UPB_DESCRIPTOR_TYPE_UINT64:
1520 case UPB_DESCRIPTOR_TYPE_FIXED64:
1521 return UPB_TYPE_UINT64;
1522 case UPB_DESCRIPTOR_TYPE_UINT32:
1523 case UPB_DESCRIPTOR_TYPE_FIXED32:
1524 return UPB_TYPE_UINT32;
1525 case UPB_DESCRIPTOR_TYPE_ENUM:
1526 return UPB_TYPE_ENUM;
1527 case UPB_DESCRIPTOR_TYPE_BOOL:
1528 return UPB_TYPE_BOOL;
1529 case UPB_DESCRIPTOR_TYPE_STRING:
1530 return UPB_TYPE_STRING;
1531 case UPB_DESCRIPTOR_TYPE_BYTES:
1532 return UPB_TYPE_BYTES;
1533 case UPB_DESCRIPTOR_TYPE_GROUP:
1534 case UPB_DESCRIPTOR_TYPE_MESSAGE:
1535 return UPB_TYPE_MESSAGE;
1536 }
1537 UPB_UNREACHABLE();
1538}
1539
1540upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001541 return f->type_;
1542}
1543
1544uint32_t upb_fielddef_index(const upb_fielddef *f) {
1545 return f->index_;
1546}
1547
1548upb_label_t upb_fielddef_label(const upb_fielddef *f) {
1549 return f->label_;
1550}
1551
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001552uint32_t upb_fielddef_number(const upb_fielddef *f) {
1553 return f->number_;
1554}
1555
1556bool upb_fielddef_isextension(const upb_fielddef *f) {
1557 return f->is_extension_;
1558}
1559
1560bool upb_fielddef_lazy(const upb_fielddef *f) {
1561 return f->lazy_;
1562}
1563
1564bool upb_fielddef_packed(const upb_fielddef *f) {
1565 return f->packed_;
1566}
1567
1568const char *upb_fielddef_name(const upb_fielddef *f) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001569 return shortdefname(f->full_name);
1570}
1571
1572uint32_t upb_fielddef_selectorbase(const upb_fielddef *f) {
1573 return f->selector_base;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001574}
1575
Paul Yange0e54662016-09-15 11:09:01 -07001576size_t upb_fielddef_getjsonname(const upb_fielddef *f, char *buf, size_t len) {
1577 const char *name = upb_fielddef_name(f);
1578 size_t src, dst = 0;
1579 bool ucase_next = false;
1580
1581#define WRITE(byte) \
1582 ++dst; \
1583 if (dst < len) buf[dst - 1] = byte; \
1584 else if (dst == len) buf[dst - 1] = '\0'
1585
1586 if (!name) {
1587 WRITE('\0');
1588 return 0;
1589 }
1590
1591 /* Implement the transformation as described in the spec:
1592 * 1. upper case all letters after an underscore.
1593 * 2. remove all underscores.
1594 */
1595 for (src = 0; name[src]; src++) {
1596 if (name[src] == '_') {
1597 ucase_next = true;
1598 continue;
1599 }
1600
1601 if (ucase_next) {
1602 WRITE(toupper(name[src]));
1603 ucase_next = false;
1604 } else {
1605 WRITE(name[src]);
1606 }
1607 }
1608
1609 WRITE('\0');
1610 return dst;
1611
1612#undef WRITE
1613}
1614
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001615const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001616 return f->msgdef;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001617}
1618
1619const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f) {
1620 return f->oneof;
1621}
1622
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001623static void chkdefaulttype(const upb_fielddef *f, int ctype) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001624 UPB_UNUSED(f);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001625 UPB_UNUSED(ctype);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001626}
1627
1628int64_t upb_fielddef_defaultint64(const upb_fielddef *f) {
1629 chkdefaulttype(f, UPB_TYPE_INT64);
1630 return f->defaultval.sint;
1631}
1632
1633int32_t upb_fielddef_defaultint32(const upb_fielddef *f) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001634 chkdefaulttype(f, UPB_TYPE_INT32);
1635 return f->defaultval.sint;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001636}
1637
1638uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f) {
1639 chkdefaulttype(f, UPB_TYPE_UINT64);
1640 return f->defaultval.uint;
1641}
1642
1643uint32_t upb_fielddef_defaultuint32(const upb_fielddef *f) {
1644 chkdefaulttype(f, UPB_TYPE_UINT32);
1645 return f->defaultval.uint;
1646}
1647
1648bool upb_fielddef_defaultbool(const upb_fielddef *f) {
1649 chkdefaulttype(f, UPB_TYPE_BOOL);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001650 return f->defaultval.boolean;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001651}
1652
1653float upb_fielddef_defaultfloat(const upb_fielddef *f) {
1654 chkdefaulttype(f, UPB_TYPE_FLOAT);
1655 return f->defaultval.flt;
1656}
1657
1658double upb_fielddef_defaultdouble(const upb_fielddef *f) {
1659 chkdefaulttype(f, UPB_TYPE_DOUBLE);
1660 return f->defaultval.dbl;
1661}
1662
1663const char *upb_fielddef_defaultstr(const upb_fielddef *f, size_t *len) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001664 str_t *str = f->defaultval.str;
Paul Yange0e54662016-09-15 11:09:01 -07001665 UPB_ASSERT(upb_fielddef_type(f) == UPB_TYPE_STRING ||
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001666 upb_fielddef_type(f) == UPB_TYPE_BYTES ||
1667 upb_fielddef_type(f) == UPB_TYPE_ENUM);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001668 if (str) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001669 if (len) *len = str->len;
1670 return str->str;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001671 } else {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001672 if (len) *len = 0;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001673 return NULL;
1674 }
1675}
1676
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001677const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f) {
1678 UPB_ASSERT(upb_fielddef_type(f) == UPB_TYPE_MESSAGE);
1679 return f->sub.msgdef;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001680}
1681
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001682const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f) {
1683 UPB_ASSERT(upb_fielddef_type(f) == UPB_TYPE_ENUM);
1684 return f->sub.enumdef;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001685}
1686
1687bool upb_fielddef_issubmsg(const upb_fielddef *f) {
1688 return upb_fielddef_type(f) == UPB_TYPE_MESSAGE;
1689}
1690
1691bool upb_fielddef_isstring(const upb_fielddef *f) {
1692 return upb_fielddef_type(f) == UPB_TYPE_STRING ||
1693 upb_fielddef_type(f) == UPB_TYPE_BYTES;
1694}
1695
1696bool upb_fielddef_isseq(const upb_fielddef *f) {
1697 return upb_fielddef_label(f) == UPB_LABEL_REPEATED;
1698}
1699
1700bool upb_fielddef_isprimitive(const upb_fielddef *f) {
1701 return !upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f);
1702}
1703
1704bool upb_fielddef_ismap(const upb_fielddef *f) {
1705 return upb_fielddef_isseq(f) && upb_fielddef_issubmsg(f) &&
1706 upb_msgdef_mapentry(upb_fielddef_msgsubdef(f));
1707}
1708
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001709bool upb_fielddef_hassubdef(const upb_fielddef *f) {
1710 return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM;
1711}
1712
Paul Yange0e54662016-09-15 11:09:01 -07001713bool upb_fielddef_haspresence(const upb_fielddef *f) {
1714 if (upb_fielddef_isseq(f)) return false;
1715 if (upb_fielddef_issubmsg(f)) return true;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001716 return f->file->syntax == UPB_SYNTAX_PROTO2;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001717}
1718
1719static bool between(int32_t x, int32_t low, int32_t high) {
1720 return x >= low && x <= high;
1721}
1722
1723bool upb_fielddef_checklabel(int32_t label) { return between(label, 1, 3); }
1724bool upb_fielddef_checktype(int32_t type) { return between(type, 1, 11); }
1725bool upb_fielddef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); }
1726
1727bool upb_fielddef_checkdescriptortype(int32_t type) {
1728 return between(type, 1, 18);
1729}
1730
1731/* upb_msgdef *****************************************************************/
1732
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001733const char *upb_msgdef_fullname(const upb_msgdef *m) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001734 return m->full_name;
1735}
1736
1737const upb_filedef *upb_msgdef_file(const upb_msgdef *m) {
1738 return m->file;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001739}
1740
Paul Yange0e54662016-09-15 11:09:01 -07001741const char *upb_msgdef_name(const upb_msgdef *m) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001742 return shortdefname(m->full_name);
Paul Yange0e54662016-09-15 11:09:01 -07001743}
1744
1745upb_syntax_t upb_msgdef_syntax(const upb_msgdef *m) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001746 return m->file->syntax;
Paul Yange0e54662016-09-15 11:09:01 -07001747}
1748
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001749size_t upb_msgdef_selectorcount(const upb_msgdef *m) {
1750 return m->selector_count;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001751}
1752
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001753uint32_t upb_msgdef_submsgfieldcount(const upb_msgdef *m) {
1754 return m->submsg_field_count;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001755}
1756
1757const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) {
1758 upb_value val;
1759 return upb_inttable_lookup32(&m->itof, i, &val) ?
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001760 upb_value_getconstptr(val) : NULL;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001761}
1762
1763const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name,
1764 size_t len) {
1765 upb_value val;
Paul Yange0e54662016-09-15 11:09:01 -07001766
1767 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
1768 return NULL;
1769 }
1770
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001771 return unpack_def(val, UPB_DEFTYPE_FIELD);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001772}
1773
1774const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name,
1775 size_t len) {
1776 upb_value val;
Paul Yange0e54662016-09-15 11:09:01 -07001777
1778 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
1779 return NULL;
1780 }
1781
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001782 return unpack_def(val, UPB_DEFTYPE_ONEOF);
Paul Yange0e54662016-09-15 11:09:01 -07001783}
1784
1785bool upb_msgdef_lookupname(const upb_msgdef *m, const char *name, size_t len,
1786 const upb_fielddef **f, const upb_oneofdef **o) {
1787 upb_value val;
1788
1789 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
1790 return false;
1791 }
1792
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001793 *o = unpack_def(val, UPB_DEFTYPE_ONEOF);
1794 *f = unpack_def(val, UPB_DEFTYPE_FIELD);
Paul Yange0e54662016-09-15 11:09:01 -07001795 UPB_ASSERT((*o != NULL) ^ (*f != NULL)); /* Exactly one of the two should be set. */
1796 return true;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001797}
1798
1799int upb_msgdef_numfields(const upb_msgdef *m) {
Paul Yange0e54662016-09-15 11:09:01 -07001800 /* The number table contains only fields. */
1801 return upb_inttable_count(&m->itof);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001802}
1803
1804int upb_msgdef_numoneofs(const upb_msgdef *m) {
Paul Yange0e54662016-09-15 11:09:01 -07001805 /* The name table includes oneofs, and the number table does not. */
1806 return upb_strtable_count(&m->ntof) - upb_inttable_count(&m->itof);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001807}
1808
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001809bool upb_msgdef_mapentry(const upb_msgdef *m) {
1810 return m->map_entry;
1811}
1812
Paul Yang9bda1f12018-09-22 18:57:43 -07001813upb_wellknowntype_t upb_msgdef_wellknowntype(const upb_msgdef *m) {
1814 return m->well_known_type;
1815}
1816
1817bool upb_msgdef_isnumberwrapper(const upb_msgdef *m) {
1818 upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
1819 return type >= UPB_WELLKNOWN_DOUBLEVALUE &&
1820 type <= UPB_WELLKNOWN_UINT32VALUE;
1821}
1822
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001823void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m) {
1824 upb_inttable_begin(iter, &m->itof);
1825}
1826
1827void upb_msg_field_next(upb_msg_field_iter *iter) { upb_inttable_next(iter); }
1828
1829bool upb_msg_field_done(const upb_msg_field_iter *iter) {
1830 return upb_inttable_done(iter);
1831}
1832
1833upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001834 return (upb_fielddef *)upb_value_getconstptr(upb_inttable_iter_value(iter));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001835}
1836
1837void upb_msg_field_iter_setdone(upb_msg_field_iter *iter) {
1838 upb_inttable_iter_setdone(iter);
1839}
1840
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001841bool upb_msg_field_iter_isequal(const upb_msg_field_iter * iter1,
1842 const upb_msg_field_iter * iter2) {
1843 return upb_inttable_iter_isequal(iter1, iter2);
1844}
1845
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001846void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m) {
Paul Yange0e54662016-09-15 11:09:01 -07001847 upb_strtable_begin(iter, &m->ntof);
1848 /* We need to skip past any initial fields. */
1849 while (!upb_strtable_done(iter) &&
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001850 !unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF)) {
Paul Yange0e54662016-09-15 11:09:01 -07001851 upb_strtable_next(iter);
1852 }
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001853}
1854
Paul Yange0e54662016-09-15 11:09:01 -07001855void upb_msg_oneof_next(upb_msg_oneof_iter *iter) {
1856 /* We need to skip past fields to return only oneofs. */
1857 do {
1858 upb_strtable_next(iter);
1859 } while (!upb_strtable_done(iter) &&
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001860 !unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF));
Paul Yange0e54662016-09-15 11:09:01 -07001861}
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001862
1863bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter) {
1864 return upb_strtable_done(iter);
1865}
1866
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001867const upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter) {
1868 return unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001869}
1870
1871void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter) {
1872 upb_strtable_iter_setdone(iter);
1873}
1874
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001875bool upb_msg_oneof_iter_isequal(const upb_msg_oneof_iter *iter1,
1876 const upb_msg_oneof_iter *iter2) {
1877 return upb_strtable_iter_isequal(iter1, iter2);
1878}
1879
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001880/* upb_oneofdef ***************************************************************/
1881
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001882const char *upb_oneofdef_name(const upb_oneofdef *o) {
1883 return shortdefname(o->full_name);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001884}
1885
1886const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o) {
1887 return o->parent;
1888}
1889
1890int upb_oneofdef_numfields(const upb_oneofdef *o) {
1891 return upb_strtable_count(&o->ntof);
1892}
1893
Paul Yang5a3405c2017-02-06 12:40:51 -08001894uint32_t upb_oneofdef_index(const upb_oneofdef *o) {
1895 return o->index;
1896}
1897
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001898const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o,
1899 const char *name, size_t length) {
1900 upb_value val;
1901 return upb_strtable_lookup2(&o->ntof, name, length, &val) ?
1902 upb_value_getptr(val) : NULL;
1903}
1904
1905const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num) {
1906 upb_value val;
1907 return upb_inttable_lookup32(&o->itof, num, &val) ?
1908 upb_value_getptr(val) : NULL;
1909}
1910
1911void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o) {
1912 upb_inttable_begin(iter, &o->itof);
1913}
1914
1915void upb_oneof_next(upb_oneof_iter *iter) {
1916 upb_inttable_next(iter);
1917}
1918
1919bool upb_oneof_done(upb_oneof_iter *iter) {
1920 return upb_inttable_done(iter);
1921}
1922
1923upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001924 return (upb_fielddef *)upb_value_getconstptr(upb_inttable_iter_value(iter));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001925}
1926
1927void upb_oneof_iter_setdone(upb_oneof_iter *iter) {
1928 upb_inttable_iter_setdone(iter);
1929}
1930
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08001931/* Code to build defs from descriptor protos. *********************************/
1932
1933/* There is a question of how much validation to do here. It will be difficult
1934 * to perfectly match the amount of validation performed by proto2. But since
1935 * this code is used to directly build defs from Ruby (for example) we do need
1936 * to validate important constraints like uniqueness of names and numbers. */
1937
1938#define CHK(x) if (!(x)) { return false; }
1939#define CHK_OOM(x) if (!(x)) { upb_status_setoom(ctx->status); return false; }
1940
1941typedef struct {
1942 const upb_symtab *symtab;
1943 upb_filedef *file; /* File we are building. */
1944 upb_alloc *alloc; /* Allocate defs here. */
1945 upb_alloc *tmp; /* Alloc for addtab and any other tmp data. */
1946 upb_strtable *addtab; /* full_name -> packed def ptr for new defs. */
1947 upb_status *status; /* Record errors here. */
1948} symtab_addctx;
1949
1950static char* strviewdup(const symtab_addctx *ctx, upb_strview view) {
1951 return upb_strdup2(view.data, view.size, ctx->alloc);
1952}
1953
1954static bool streql2(const char *a, size_t n, const char *b) {
1955 return n == strlen(b) && memcmp(a, b, n) == 0;
1956}
1957
1958static bool streql_view(upb_strview view, const char *b) {
1959 return streql2(view.data, view.size, b);
1960}
1961
1962static const char *makefullname(const symtab_addctx *ctx, const char *prefix,
1963 upb_strview name) {
1964 if (prefix) {
1965 /* ret = prefix + '.' + name; */
1966 size_t n = strlen(prefix);
1967 char *ret = upb_malloc(ctx->alloc, n + name.size + 2);
1968 CHK_OOM(ret);
1969 strcpy(ret, prefix);
1970 ret[n] = '.';
1971 memcpy(&ret[n + 1], name.data, name.size);
1972 ret[n + 1 + name.size] = '\0';
1973 return ret;
1974 } else {
1975 return strviewdup(ctx, name);
1976 }
1977}
1978
1979static bool symtab_add(const symtab_addctx *ctx, const char *name,
1980 upb_value v) {
1981 upb_value tmp;
1982 if (upb_strtable_lookup(ctx->addtab, name, &tmp) ||
1983 upb_strtable_lookup(&ctx->symtab->syms, name, &tmp)) {
1984 upb_status_seterrf(ctx->status, "duplicate symbol '%s'", name);
1985 return false;
1986 }
1987
1988 CHK_OOM(upb_strtable_insert3(ctx->addtab, name, strlen(name), v, ctx->tmp));
1989 return true;
1990}
1991
1992/* Given a symbol and the base symbol inside which it is defined, find the
1993 * symbol's definition in t. */
1994static bool resolvename(const upb_strtable *t, const upb_fielddef *f,
1995 const char *base, upb_strview sym,
1996 upb_deftype_t type, upb_status *status,
1997 const void **def) {
1998 if(sym.size == 0) return NULL;
1999 if(sym.data[0] == '.') {
2000 /* Symbols starting with '.' are absolute, so we do a single lookup.
2001 * Slice to omit the leading '.' */
2002 upb_value v;
2003 if (!upb_strtable_lookup2(t, sym.data + 1, sym.size - 1, &v)) {
2004 return false;
2005 }
2006
2007 *def = unpack_def(v, type);
2008
2009 if (!*def) {
2010 upb_status_seterrf(status,
2011 "type mismatch when resolving field %s, name %s",
2012 f->full_name, sym.data);
2013 return false;
2014 }
2015
2016 return true;
2017 } else {
2018 /* Remove components from base until we find an entry or run out.
2019 * TODO: This branch is totally broken, but currently not used. */
2020 (void)base;
2021 UPB_ASSERT(false);
2022 return false;
2023 }
2024}
2025
2026const void *symtab_resolve(const symtab_addctx *ctx, const upb_fielddef *f,
2027 const char *base, upb_strview sym,
2028 upb_deftype_t type) {
2029 const void *ret;
2030 if (!resolvename(ctx->addtab, f, base, sym, type, ctx->status, &ret) &&
2031 !resolvename(&ctx->symtab->syms, f, base, sym, type, ctx->status, &ret)) {
2032 if (upb_ok(ctx->status)) {
2033 upb_status_seterrf(ctx->status, "couldn't resolve name '%s'", sym.data);
2034 }
2035 return false;
2036 }
2037 return ret;
2038}
2039
2040static bool create_oneofdef(
2041 const symtab_addctx *ctx, upb_msgdef *m,
2042 const google_protobuf_OneofDescriptorProto *oneof_proto) {
2043 upb_oneofdef *o;
2044 upb_strview name = google_protobuf_OneofDescriptorProto_name(oneof_proto);
2045 upb_value v;
2046
2047 o = (upb_oneofdef*)&m->oneofs[m->oneof_count++];
2048 o->parent = m;
2049 o->full_name = makefullname(ctx, m->full_name, name);
2050
2051 v = pack_def(o, UPB_DEFTYPE_ONEOF);
2052 CHK_OOM(symtab_add(ctx, o->full_name, v));
2053 CHK_OOM(upb_strtable_insert3(&m->ntof, name.data, name.size, v, ctx->alloc));
2054
2055 CHK_OOM(upb_inttable_init2(&o->itof, UPB_CTYPE_CONSTPTR, ctx->alloc));
2056 CHK_OOM(upb_strtable_init2(&o->ntof, UPB_CTYPE_CONSTPTR, ctx->alloc));
2057
2058 return true;
2059}
2060
2061static bool parse_default(const symtab_addctx *ctx, const char *str, size_t len,
2062 upb_fielddef *f) {
2063 char *end;
2064 char nullz[64];
2065 errno = 0;
2066
2067 switch (upb_fielddef_type(f)) {
2068 case UPB_TYPE_INT32:
2069 case UPB_TYPE_INT64:
2070 case UPB_TYPE_UINT32:
2071 case UPB_TYPE_UINT64:
2072 case UPB_TYPE_DOUBLE:
2073 case UPB_TYPE_FLOAT:
2074 /* Standard C number parsing functions expect null-terminated strings. */
2075 if (len >= sizeof(nullz) - 1) {
2076 return false;
2077 }
2078 memcpy(nullz, str, len);
2079 nullz[len] = '\0';
2080 str = nullz;
2081 break;
2082 default:
2083 break;
2084 }
2085
2086 switch (upb_fielddef_type(f)) {
2087 case UPB_TYPE_INT32: {
2088 long val = strtol(str, &end, 0);
2089 CHK(val <= INT32_MAX && val >= INT32_MIN && errno != ERANGE && !*end);
2090 f->defaultval.sint = val;
2091 break;
2092 }
2093 case UPB_TYPE_ENUM: {
2094 const upb_enumdef *e = f->sub.enumdef;
2095 int32_t val;
2096 CHK(upb_enumdef_ntoi(e, str, len, &val));
2097 f->defaultval.sint = val;
2098 break;
2099 }
2100 case UPB_TYPE_INT64: {
2101 /* XXX: Need to write our own strtoll, since it's not available in c89. */
2102 long long val = strtol(str, &end, 0);
2103 CHK(val <= INT64_MAX && val >= INT64_MIN && errno != ERANGE && !*end);
2104 f->defaultval.sint = val;
2105 break;
2106 }
2107 case UPB_TYPE_UINT32: {
2108 unsigned long val = strtoul(str, &end, 0);
2109 CHK(val <= UINT32_MAX && errno != ERANGE && !*end);
2110 f->defaultval.uint = val;
2111 break;
2112 }
2113 case UPB_TYPE_UINT64: {
2114 /* XXX: Need to write our own strtoull, since it's not available in c89. */
2115 unsigned long long val = strtoul(str, &end, 0);
2116 CHK(val <= UINT64_MAX && errno != ERANGE && !*end);
2117 f->defaultval.uint = val;
2118 break;
2119 }
2120 case UPB_TYPE_DOUBLE: {
2121 double val = strtod(str, &end);
2122 CHK(errno != ERANGE && !*end);
2123 f->defaultval.dbl = val;
2124 break;
2125 }
2126 case UPB_TYPE_FLOAT: {
2127 /* XXX: Need to write our own strtof, since it's not available in c89. */
2128 float val = strtod(str, &end);
2129 CHK(errno != ERANGE && !*end);
2130 f->defaultval.flt = val;
2131 break;
2132 }
2133 case UPB_TYPE_BOOL: {
2134 if (streql2(str, len, "false")) {
2135 f->defaultval.boolean = false;
2136 } else if (streql2(str, len, "true")) {
2137 f->defaultval.boolean = true;
2138 } else {
2139 return false;
2140 }
2141 }
2142 case UPB_TYPE_STRING:
2143 f->defaultval.str = newstr(ctx->alloc, str, len);
2144 break;
2145 case UPB_TYPE_BYTES:
2146 /* XXX: need to interpret the C-escaped value. */
2147 f->defaultval.str = newstr(ctx->alloc, str, len);
2148 break;
2149 case UPB_TYPE_MESSAGE:
2150 /* Should not have a default value. */
2151 return false;
2152 }
2153 return true;
2154}
2155
2156static void set_default_default(const symtab_addctx *ctx, upb_fielddef *f) {
2157 switch (upb_fielddef_type(f)) {
2158 case UPB_TYPE_INT32:
2159 case UPB_TYPE_INT64:
2160 case UPB_TYPE_ENUM:
2161 f->defaultval.sint = 0;
2162 break;
2163 case UPB_TYPE_UINT64:
2164 case UPB_TYPE_UINT32:
2165 f->defaultval.uint = 0;
2166 break;
2167 case UPB_TYPE_DOUBLE:
2168 case UPB_TYPE_FLOAT:
2169 f->defaultval.dbl = 0;
2170 break;
2171 case UPB_TYPE_STRING:
2172 case UPB_TYPE_BYTES:
2173 f->defaultval.str = newstr(ctx->alloc, NULL, 0);
2174 break;
2175 case UPB_TYPE_BOOL:
2176 f->defaultval.boolean = false;
2177 break;
2178 case UPB_TYPE_MESSAGE:
2179 break;
2180 }
2181}
2182
2183static bool create_fielddef(
2184 const symtab_addctx *ctx, const char *prefix, upb_msgdef *m,
2185 const google_protobuf_FieldDescriptorProto *field_proto) {
2186 upb_alloc *alloc = ctx->alloc;
2187 upb_fielddef *f;
2188 const google_protobuf_FieldOptions *options;
2189 upb_strview name;
2190 const char *full_name;
2191 const char *shortname;
2192 uint32_t field_number;
2193
2194 if (!google_protobuf_FieldDescriptorProto_has_name(field_proto)) {
2195 upb_status_seterrmsg(ctx->status, "field has no name");
2196 return false;
2197 }
2198
2199 name = google_protobuf_FieldDescriptorProto_name(field_proto);
2200 CHK(upb_isident(name, false, ctx->status));
2201 full_name = makefullname(ctx, prefix, name);
2202 shortname = shortdefname(full_name);
2203
2204 field_number = google_protobuf_FieldDescriptorProto_number(field_proto);
2205
2206 if (field_number == 0 || field_number > UPB_MAX_FIELDNUMBER) {
2207 upb_status_seterrf(ctx->status, "invalid field number (%u)", field_number);
2208 return false;
2209 }
2210
2211 if (m) {
2212 /* direct message field. */
2213 upb_value v, packed_v;
2214
2215 f = (upb_fielddef*)&m->fields[m->field_count++];
2216 f->msgdef = m;
2217 f->is_extension_ = false;
2218
2219 packed_v = pack_def(f, UPB_DEFTYPE_FIELD);
2220 v = upb_value_constptr(f);
2221
2222 if (!upb_strtable_insert3(&m->ntof, name.data, name.size, packed_v, alloc)) {
2223 upb_status_seterrf(ctx->status, "duplicate field name (%s)", shortname);
2224 return false;
2225 }
2226
2227 if (!upb_inttable_insert2(&m->itof, field_number, v, alloc)) {
2228 upb_status_seterrf(ctx->status, "duplicate field number (%u)",
2229 field_number);
2230 return false;
2231 }
2232 } else {
2233 /* extension field. */
2234 f = (upb_fielddef*)&ctx->file->exts[ctx->file->ext_count];
2235 f->is_extension_ = true;
2236 CHK_OOM(symtab_add(ctx, full_name, pack_def(f, UPB_DEFTYPE_FIELD)));
2237 }
2238
2239 f->full_name = full_name;
2240 f->file = ctx->file;
2241 f->type_ = (int)google_protobuf_FieldDescriptorProto_type(field_proto);
2242 f->label_ = (int)google_protobuf_FieldDescriptorProto_label(field_proto);
2243 f->number_ = field_number;
2244 f->oneof = NULL;
2245
2246 /* We can't resolve the subdef or (in the case of extensions) the containing
2247 * message yet, because it may not have been defined yet. We stash a pointer
2248 * to the field_proto until later when we can properly resolve it. */
2249 f->sub.unresolved = field_proto;
2250
2251 if (f->label_ == UPB_LABEL_REQUIRED && f->file->syntax == UPB_SYNTAX_PROTO3) {
2252 upb_status_seterrf(ctx->status, "proto3 fields cannot be required (%s)",
2253 f->full_name);
2254 return false;
2255 }
2256
2257 if (google_protobuf_FieldDescriptorProto_has_oneof_index(field_proto)) {
2258 int oneof_index =
2259 google_protobuf_FieldDescriptorProto_oneof_index(field_proto);
2260 upb_oneofdef *oneof;
2261 upb_value v = upb_value_constptr(f);
2262
2263 if (upb_fielddef_label(f) != UPB_LABEL_OPTIONAL) {
2264 upb_status_seterrf(ctx->status,
2265 "fields in oneof must have OPTIONAL label (%s)",
2266 f->full_name);
2267 return false;
2268 }
2269
2270 if (!m) {
2271 upb_status_seterrf(ctx->status,
2272 "oneof_index provided for extension field (%s)",
2273 f->full_name);
2274 return false;
2275 }
2276
2277 if (oneof_index >= m->oneof_count) {
2278 upb_status_seterrf(ctx->status, "oneof_index out of range (%s)",
2279 f->full_name);
2280 return false;
2281 }
2282
2283 oneof = (upb_oneofdef*)&m->oneofs[oneof_index];
2284 f->oneof = oneof;
2285
2286 CHK(upb_inttable_insert2(&oneof->itof, f->number_, v, alloc));
2287 CHK(upb_strtable_insert3(&oneof->ntof, name.data, name.size, v, alloc));
2288 } else {
2289 f->oneof = NULL;
2290 }
2291
2292 if (google_protobuf_FieldDescriptorProto_has_options(field_proto)) {
2293 options = google_protobuf_FieldDescriptorProto_options(field_proto);
2294 f->lazy_ = google_protobuf_FieldOptions_lazy(options);
2295 f->packed_ = google_protobuf_FieldOptions_packed(options);
2296 } else {
2297 f->lazy_ = false;
2298 f->packed_ = false;
2299 }
2300
2301 return true;
2302}
2303
2304static bool create_enumdef(
2305 const symtab_addctx *ctx, const char *prefix,
2306 const google_protobuf_EnumDescriptorProto *enum_proto) {
2307 upb_enumdef *e;
2308 const google_protobuf_EnumValueDescriptorProto *const *values;
2309 upb_strview name;
2310 size_t i, n;
2311
2312 name = google_protobuf_EnumDescriptorProto_name(enum_proto);
2313 CHK(upb_isident(name, false, ctx->status));
2314
2315 e = (upb_enumdef*)&ctx->file->enums[ctx->file->enum_count++];
2316 e->full_name = makefullname(ctx, prefix, name);
2317 CHK_OOM(symtab_add(ctx, e->full_name, pack_def(e, UPB_DEFTYPE_ENUM)));
2318
2319 CHK_OOM(upb_strtable_init2(&e->ntoi, UPB_CTYPE_INT32, ctx->alloc));
2320 CHK_OOM(upb_inttable_init2(&e->iton, UPB_CTYPE_CSTR, ctx->alloc));
2321
2322 e->file = ctx->file;
2323 e->defaultval = 0;
2324
2325 values = google_protobuf_EnumDescriptorProto_value(enum_proto, &n);
2326
2327 if (n == 0) {
2328 upb_status_seterrf(ctx->status,
2329 "enums must contain at least one value (%s)",
2330 e->full_name);
2331 return false;
2332 }
2333
2334 for (i = 0; i < n; i++) {
2335 const google_protobuf_EnumValueDescriptorProto *value = values[i];
2336 upb_strview name = google_protobuf_EnumValueDescriptorProto_name(value);
2337 char *name2 = strviewdup(ctx, name);
2338 int32_t num = google_protobuf_EnumValueDescriptorProto_number(value);
2339 upb_value v = upb_value_int32(num);
2340
2341 if (i == 0 && e->file->syntax == UPB_SYNTAX_PROTO3 && num != 0) {
2342 upb_status_seterrf(ctx->status,
2343 "for proto3, the first enum value must be zero (%s)",
2344 e->full_name);
2345 return false;
2346 }
2347
2348 if (upb_strtable_lookup(&e->ntoi, name2, NULL)) {
2349 upb_status_seterrf(ctx->status, "duplicate enum label '%s'", name2);
2350 return false;
2351 }
2352
2353 CHK_OOM(name2)
2354 CHK_OOM(
2355 upb_strtable_insert3(&e->ntoi, name2, strlen(name2), v, ctx->alloc));
2356
2357 if (!upb_inttable_lookup(&e->iton, num, NULL)) {
2358 upb_value v = upb_value_cstr(name2);
2359 CHK_OOM(upb_inttable_insert2(&e->iton, num, v, ctx->alloc));
2360 }
2361 }
2362
2363 upb_inttable_compact2(&e->iton, ctx->alloc);
2364
2365 return true;
2366}
2367
2368static bool create_msgdef(const symtab_addctx *ctx, const char *prefix,
2369 const google_protobuf_DescriptorProto *msg_proto) {
2370 upb_msgdef *m;
2371 const google_protobuf_MessageOptions *options;
2372 const google_protobuf_OneofDescriptorProto *const *oneofs;
2373 const google_protobuf_FieldDescriptorProto *const *fields;
2374 const google_protobuf_EnumDescriptorProto *const *enums;
2375 const google_protobuf_DescriptorProto *const *msgs;
2376 size_t i, n;
2377 upb_strview name;
2378
2379 name = google_protobuf_DescriptorProto_name(msg_proto);
2380 CHK(upb_isident(name, false, ctx->status));
2381
2382 m = (upb_msgdef*)&ctx->file->msgs[ctx->file->msg_count++];
2383 m->full_name = makefullname(ctx, prefix, name);
2384 CHK_OOM(symtab_add(ctx, m->full_name, pack_def(m, UPB_DEFTYPE_MSG)));
2385
2386 CHK_OOM(upb_inttable_init2(&m->itof, UPB_CTYPE_CONSTPTR, ctx->alloc));
2387 CHK_OOM(upb_strtable_init2(&m->ntof, UPB_CTYPE_CONSTPTR, ctx->alloc));
2388
2389 m->file = ctx->file;
2390 m->map_entry = false;
2391
2392 options = google_protobuf_DescriptorProto_options(msg_proto);
2393
2394 if (options) {
2395 m->map_entry = google_protobuf_MessageOptions_map_entry(options);
2396 }
2397
2398 oneofs = google_protobuf_DescriptorProto_oneof_decl(msg_proto, &n);
2399 m->oneof_count = 0;
2400 m->oneofs = upb_malloc(ctx->alloc, sizeof(*m->oneofs) * n);
2401 for (i = 0; i < n; i++) {
2402 CHK(create_oneofdef(ctx, m, oneofs[i]));
2403 }
2404
2405 fields = google_protobuf_DescriptorProto_field(msg_proto, &n);
2406 m->field_count = 0;
2407 m->fields = upb_malloc(ctx->alloc, sizeof(*m->fields) * n);
2408 for (i = 0; i < n; i++) {
2409 CHK(create_fielddef(ctx, m->full_name, m, fields[i]));
2410 }
2411
2412 CHK(assign_msg_indices(m, ctx->status));
2413 assign_msg_wellknowntype(m);
2414 upb_inttable_compact2(&m->itof, ctx->alloc);
2415
2416 /* This message is built. Now build nested messages and enums. */
2417
2418 enums = google_protobuf_DescriptorProto_enum_type(msg_proto, &n);
2419 for (i = 0; i < n; i++) {
2420 CHK(create_enumdef(ctx, m->full_name, enums[i]));
2421 }
2422
2423 msgs = google_protobuf_DescriptorProto_nested_type(msg_proto, &n);
2424 for (i = 0; i < n; i++) {
2425 CHK(create_msgdef(ctx, m->full_name, msgs[i]));
2426 }
2427
2428 return true;
2429}
2430
2431typedef struct {
2432 int msg_count;
2433 int enum_count;
2434 int ext_count;
2435} decl_counts;
2436
2437static void count_types_in_msg(const google_protobuf_DescriptorProto *msg_proto,
2438 decl_counts *counts) {
2439 const google_protobuf_DescriptorProto *const *msgs;
2440 size_t i, n;
2441
2442 counts->msg_count++;
2443
2444 msgs = google_protobuf_DescriptorProto_nested_type(msg_proto, &n);
2445 for (i = 0; i < n; i++) {
2446 count_types_in_msg(msgs[i], counts);
2447 }
2448
2449 google_protobuf_DescriptorProto_enum_type(msg_proto, &n);
2450 counts->enum_count += n;
2451
2452 google_protobuf_DescriptorProto_extension(msg_proto, &n);
2453 counts->ext_count += n;
2454}
2455
2456static void count_types_in_file(
2457 const google_protobuf_FileDescriptorProto *file_proto,
2458 decl_counts *counts) {
2459 const google_protobuf_DescriptorProto *const *msgs;
2460 size_t i, n;
2461
2462 msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n);
2463 for (i = 0; i < n; i++) {
2464 count_types_in_msg(msgs[i], counts);
2465 }
2466
2467 google_protobuf_FileDescriptorProto_enum_type(file_proto, &n);
2468 counts->enum_count += n;
2469
2470 google_protobuf_FileDescriptorProto_extension(file_proto, &n);
2471 counts->ext_count += n;
2472}
2473
2474static bool resolve_fielddef(const symtab_addctx *ctx, const char *prefix,
2475 upb_fielddef *f) {
2476 upb_strview name;
2477 const google_protobuf_FieldDescriptorProto *field_proto = f->sub.unresolved;
2478
2479 if (f->is_extension_) {
2480 if (!google_protobuf_FieldDescriptorProto_has_extendee(field_proto)) {
2481 upb_status_seterrf(ctx->status,
2482 "extension for field '%s' had no extendee",
2483 f->full_name);
2484 return false;
2485 }
2486
2487 name = google_protobuf_FieldDescriptorProto_extendee(field_proto);
2488 f->msgdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_MSG);
2489 CHK(f->msgdef);
2490 }
2491
2492 if ((upb_fielddef_issubmsg(f) || f->type_ == UPB_DESCRIPTOR_TYPE_ENUM) &&
2493 !google_protobuf_FieldDescriptorProto_has_type_name(field_proto)) {
2494 upb_status_seterrf(ctx->status, "field '%s' is missing type name",
2495 f->full_name);
2496 return false;
2497 }
2498
2499 name = google_protobuf_FieldDescriptorProto_type_name(field_proto);
2500
2501 if (upb_fielddef_issubmsg(f)) {
2502 f->sub.msgdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_MSG);
2503 CHK(f->sub.msgdef);
2504 } else if (f->type_ == UPB_DESCRIPTOR_TYPE_ENUM) {
2505 f->sub.enumdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_ENUM);
2506 CHK(f->sub.enumdef);
2507 }
2508
2509 /* Have to delay resolving of the default value until now because of the enum
2510 * case, since enum defaults are specified with a label. */
2511 if (google_protobuf_FieldDescriptorProto_has_default_value(field_proto)) {
2512 upb_strview defaultval =
2513 google_protobuf_FieldDescriptorProto_default_value(field_proto);
2514
2515 if (f->file->syntax == UPB_SYNTAX_PROTO3) {
2516 upb_status_seterrf(ctx->status,
2517 "proto3 fields cannot have explicit defaults (%s)",
2518 f->full_name);
2519 return false;
2520 }
2521
2522 if (upb_fielddef_issubmsg(f)) {
2523 upb_status_seterrf(ctx->status,
2524 "message fields cannot have explicit defaults (%s)",
2525 f->full_name);
2526 return false;
2527 }
2528
2529 if (!parse_default(ctx, defaultval.data, defaultval.size, f)) {
2530 upb_status_seterrf(ctx->status,
2531 "couldn't parse default '" UPB_STRVIEW_FORMAT
2532 "' for field (%s)",
2533 UPB_STRVIEW_ARGS(defaultval), f->full_name);
2534 return false;
2535 }
2536 } else {
2537 set_default_default(ctx, f);
2538 }
2539
2540 return true;
2541}
2542
2543static bool build_filedef(
2544 const symtab_addctx *ctx, upb_filedef *file,
2545 const google_protobuf_FileDescriptorProto *file_proto) {
2546 upb_alloc *alloc = ctx->alloc;
2547 const google_protobuf_FileOptions *file_options_proto;
2548 const google_protobuf_DescriptorProto *const *msgs;
2549 const google_protobuf_EnumDescriptorProto *const *enums;
2550 const google_protobuf_FieldDescriptorProto *const *exts;
2551 const upb_strview* strs;
2552 size_t i, n;
2553 decl_counts counts = {0};
2554
2555 count_types_in_file(file_proto, &counts);
2556
2557 file->msgs = upb_malloc(alloc, sizeof(*file->msgs) * counts.msg_count);
2558 file->enums = upb_malloc(alloc, sizeof(*file->enums) * counts.enum_count);
2559 file->exts = upb_malloc(alloc, sizeof(*file->exts) * counts.ext_count);
2560
2561 CHK_OOM(counts.msg_count == 0 || file->msgs);
2562 CHK_OOM(counts.enum_count == 0 || file->enums);
2563 CHK_OOM(counts.ext_count == 0 || file->exts);
2564
2565 /* We increment these as defs are added. */
2566 file->msg_count = 0;
2567 file->enum_count = 0;
2568 file->ext_count = 0;
2569
2570 if (!google_protobuf_FileDescriptorProto_has_name(file_proto)) {
2571 upb_status_seterrmsg(ctx->status, "File has no name");
2572 return false;
2573 }
2574
2575 file->name =
2576 strviewdup(ctx, google_protobuf_FileDescriptorProto_name(file_proto));
2577 file->phpprefix = NULL;
2578 file->phpnamespace = NULL;
2579
2580 if (google_protobuf_FileDescriptorProto_has_package(file_proto)) {
2581 upb_strview package =
2582 google_protobuf_FileDescriptorProto_package(file_proto);
2583 CHK(upb_isident(package, true, ctx->status));
2584 file->package = strviewdup(ctx, package);
2585 } else {
2586 file->package = NULL;
2587 }
2588
2589 if (google_protobuf_FileDescriptorProto_has_syntax(file_proto)) {
2590 upb_strview syntax =
2591 google_protobuf_FileDescriptorProto_syntax(file_proto);
2592
2593 if (streql_view(syntax, "proto2")) {
2594 file->syntax = UPB_SYNTAX_PROTO2;
2595 } else if (streql_view(syntax, "proto3")) {
2596 file->syntax = UPB_SYNTAX_PROTO3;
2597 } else {
2598 upb_status_seterrf(ctx->status, "Invalid syntax '%s'", syntax);
2599 return false;
2600 }
2601 } else {
2602 file->syntax = UPB_SYNTAX_PROTO2;
2603 }
2604
2605 /* Read options. */
2606 file_options_proto = google_protobuf_FileDescriptorProto_options(file_proto);
2607 if (file_options_proto) {
2608 if (google_protobuf_FileOptions_has_php_class_prefix(file_options_proto)) {
2609 file->phpprefix = strviewdup(
2610 ctx,
2611 google_protobuf_FileOptions_php_class_prefix(file_options_proto));
2612 }
2613 if (google_protobuf_FileOptions_has_php_namespace(file_options_proto)) {
2614 file->phpnamespace = strviewdup(
2615 ctx, google_protobuf_FileOptions_php_namespace(file_options_proto));
2616 }
2617 }
2618
2619 /* Verify dependencies. */
2620 strs = google_protobuf_FileDescriptorProto_dependency(file_proto, &n);
2621 file->deps = upb_malloc(alloc, sizeof(*file->deps) * n) ;
2622 CHK_OOM(n == 0 || file->deps);
2623
2624 for (i = 0; i < n; i++) {
2625 upb_strview dep_name = strs[i];
2626 upb_value v;
2627 if (!upb_strtable_lookup2(&ctx->symtab->files, dep_name.data,
2628 dep_name.size, &v)) {
2629 upb_status_seterrf(ctx->status,
2630 "Depends on file '" UPB_STRVIEW_FORMAT
2631 "', but it has not been loaded",
2632 UPB_STRVIEW_ARGS(dep_name));
2633 return false;
2634 }
2635 file->deps[i] = upb_value_getconstptr(v);
2636 }
2637
2638 /* Create messages. */
2639 msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n);
2640 for (i = 0; i < n; i++) {
2641 CHK(create_msgdef(ctx, file->package, msgs[i]));
2642 }
2643
2644 /* Create enums. */
2645 enums = google_protobuf_FileDescriptorProto_enum_type(file_proto, &n);
2646 for (i = 0; i < n; i++) {
2647 CHK(create_enumdef(ctx, file->package, enums[i]));
2648 }
2649
2650 /* Create extensions. */
2651 exts = google_protobuf_FileDescriptorProto_extension(file_proto, &n);
2652 file->exts = upb_malloc(alloc, sizeof(*file->exts) * n);
2653 CHK_OOM(n == 0 || file->exts);
2654 for (i = 0; i < n; i++) {
2655 CHK(create_fielddef(ctx, file->package, NULL, exts[i]));
2656 }
2657
2658 /* Now that all names are in the table, resolve references. */
2659 for (i = 0; i < file->ext_count; i++) {
2660 CHK(resolve_fielddef(ctx, file->package, (upb_fielddef*)&file->exts[i]));
2661 }
2662
2663 for (i = 0; i < file->msg_count; i++) {
2664 const upb_msgdef *m = &file->msgs[i];
2665 int j;
2666 for (j = 0; j < m->field_count; j++) {
2667 CHK(resolve_fielddef(ctx, m->full_name, (upb_fielddef*)&m->fields[j]));
2668 }
2669 }
2670
2671 return true;
2672 }
2673
2674static bool upb_symtab_addtotabs(upb_symtab *s, symtab_addctx *ctx,
2675 upb_status *status) {
2676 const upb_filedef *file = ctx->file;
2677 upb_alloc *alloc = upb_arena_alloc(s->arena);
2678 upb_strtable_iter iter;
2679
2680 CHK_OOM(upb_strtable_insert3(&s->files, file->name, strlen(file->name),
2681 upb_value_constptr(file), alloc));
2682
2683 upb_strtable_begin(&iter, ctx->addtab);
2684 for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
2685 const char *key = upb_strtable_iter_key(&iter);
2686 size_t keylen = upb_strtable_iter_keylength(&iter);
2687 upb_value value = upb_strtable_iter_value(&iter);
2688 CHK_OOM(upb_strtable_insert3(&s->syms, key, keylen, value, alloc));
2689 }
2690
2691 return true;
2692}
2693
Paul Yange0e54662016-09-15 11:09:01 -07002694/* upb_filedef ****************************************************************/
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07002695
Paul Yange0e54662016-09-15 11:09:01 -07002696const char *upb_filedef_name(const upb_filedef *f) {
2697 return f->name;
2698}
2699
2700const char *upb_filedef_package(const upb_filedef *f) {
2701 return f->package;
2702}
2703
Paul Yang6b27c1f2017-03-17 11:08:06 -07002704const char *upb_filedef_phpprefix(const upb_filedef *f) {
2705 return f->phpprefix;
2706}
2707
Paul Yang6f325802017-06-05 00:10:18 -07002708const char *upb_filedef_phpnamespace(const upb_filedef *f) {
2709 return f->phpnamespace;
2710}
2711
Paul Yange0e54662016-09-15 11:09:01 -07002712upb_syntax_t upb_filedef_syntax(const upb_filedef *f) {
2713 return f->syntax;
2714}
2715
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08002716int upb_filedef_msgcount(const upb_filedef *f) {
2717 return f->msg_count;
Paul Yange0e54662016-09-15 11:09:01 -07002718}
2719
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08002720int upb_filedef_depcount(const upb_filedef *f) {
2721 return f->dep_count;
Paul Yange0e54662016-09-15 11:09:01 -07002722}
2723
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08002724int upb_filedef_enumcount(const upb_filedef *f) {
2725 return f->enum_count;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07002726}
2727
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08002728const upb_filedef *upb_filedef_dep(const upb_filedef *f, int i) {
2729 return i < 0 || i >= f->dep_count ? NULL : f->deps[i];
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07002730}
2731
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08002732const upb_msgdef *upb_filedef_msg(const upb_filedef *f, int i) {
2733 return i < 0 || i >= f->msg_count ? NULL : &f->msgs[i];
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07002734}
2735
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08002736const upb_enumdef *upb_filedef_enum(const upb_filedef *f, int i) {
2737 return i < 0 || i >= f->enum_count ? NULL : &f->enums[i];
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07002738}
Paul Yang5a3405c2017-02-06 12:40:51 -08002739
2740void upb_symtab_free(upb_symtab *s) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08002741 upb_arena_free(s->arena);
Paul Yang5a3405c2017-02-06 12:40:51 -08002742 upb_gfree(s);
2743}
2744
2745upb_symtab *upb_symtab_new() {
2746 upb_symtab *s = upb_gmalloc(sizeof(*s));
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08002747 upb_alloc *alloc;
2748
Paul Yang5a3405c2017-02-06 12:40:51 -08002749 if (!s) {
2750 return NULL;
2751 }
2752
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08002753 s->arena = upb_arena_new();
2754 alloc = upb_arena_alloc(s->arena);
Paul Yang5a3405c2017-02-06 12:40:51 -08002755
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08002756 if (!upb_strtable_init2(&s->syms, UPB_CTYPE_CONSTPTR, alloc) ||
2757 !upb_strtable_init2(&s->files, UPB_CTYPE_CONSTPTR, alloc)) {
2758 upb_arena_free(s->arena);
2759 upb_gfree(s);
2760 s = NULL;
2761 }
2762 return s;
Paul Yang5a3405c2017-02-06 12:40:51 -08002763}
2764
2765const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) {
2766 upb_value v;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08002767 return upb_strtable_lookup(&s->syms, sym, &v) ?
2768 unpack_def(v, UPB_DEFTYPE_MSG) : NULL;
Paul Yang5a3405c2017-02-06 12:40:51 -08002769}
2770
Paul Yang8faa7782018-12-26 10:36:09 -08002771const upb_msgdef *upb_symtab_lookupmsg2(const upb_symtab *s, const char *sym,
2772 size_t len) {
2773 upb_value v;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08002774 return upb_strtable_lookup2(&s->syms, sym, len, &v) ?
2775 unpack_def(v, UPB_DEFTYPE_MSG) : NULL;
Paul Yang8faa7782018-12-26 10:36:09 -08002776}
2777
Paul Yang5a3405c2017-02-06 12:40:51 -08002778const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) {
2779 upb_value v;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08002780 return upb_strtable_lookup(&s->syms, sym, &v) ?
2781 unpack_def(v, UPB_DEFTYPE_ENUM) : NULL;
Paul Yang5a3405c2017-02-06 12:40:51 -08002782}
2783
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08002784const upb_filedef *upb_symtab_lookupfile(const upb_symtab *s, const char *name) {
2785 upb_value v;
2786 return upb_strtable_lookup(&s->files, name, &v) ? upb_value_getconstptr(v)
2787 : NULL;
Paul Yang5a3405c2017-02-06 12:40:51 -08002788}
2789
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08002790const upb_filedef *upb_symtab_addfile(
2791 upb_symtab *s, const google_protobuf_FileDescriptorProto *file_proto,
2792 upb_status *status) {
2793 upb_arena *tmparena = upb_arena_new();
Paul Yang5a3405c2017-02-06 12:40:51 -08002794 upb_strtable addtab;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08002795 upb_alloc *alloc = upb_arena_alloc(s->arena);
2796 upb_filedef *file = upb_malloc(alloc, sizeof(*file));
2797 bool ok;
2798 symtab_addctx ctx;
Paul Yang5a3405c2017-02-06 12:40:51 -08002799
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08002800 ctx.file = file;
2801 ctx.symtab = s;
2802 ctx.alloc = alloc;
2803 ctx.tmp = upb_arena_alloc(tmparena);
2804 ctx.addtab = &addtab;
2805 ctx.status = status;
2806
2807 ok = file &&
2808 upb_strtable_init2(&addtab, UPB_CTYPE_CONSTPTR, ctx.tmp) &&
2809 build_filedef(&ctx, file, file_proto) &&
2810 upb_symtab_addtotabs(s, &ctx, status);
2811
2812 upb_arena_free(tmparena);
2813 return ok ? file : NULL;
2814}
2815
2816/* Include here since we want most of this file to be stdio-free. */
2817#include <stdio.h>
2818
2819bool _upb_symtab_loaddefinit(upb_symtab *s, const upb_def_init *init) {
2820 /* Since this function should never fail (it would indicate a bug in upb) we
2821 * print errors to stderr instead of returning error status to the user. */
2822 upb_def_init **deps = init->deps;
2823 google_protobuf_FileDescriptorProto *file;
2824 upb_arena *arena;
2825 upb_status status;
2826
2827 upb_status_clear(&status);
2828
2829 if (upb_strtable_lookup(&s->files, init->filename, NULL)) {
Paul Yang5a3405c2017-02-06 12:40:51 -08002830 return true;
2831 }
2832
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08002833 arena = upb_arena_new();
2834
2835 for (; *deps; deps++) {
2836 if (!_upb_symtab_loaddefinit(s, *deps)) goto err;
Paul Yang5a3405c2017-02-06 12:40:51 -08002837 }
2838
Adam Cozzette8645d892019-03-26 14:32:20 -07002839 file = google_protobuf_FileDescriptorProto_parse(
2840 init->descriptor.data, init->descriptor.size, arena);
Paul Yang5a3405c2017-02-06 12:40:51 -08002841
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08002842 if (!file) {
2843 upb_status_seterrf(
2844 &status,
2845 "Failed to parse compiled-in descriptor for file '%s'. This should "
2846 "never happen.",
2847 init->filename);
Paul Yang5a3405c2017-02-06 12:40:51 -08002848 goto err;
2849 }
2850
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08002851 if (!upb_symtab_addfile(s, file, &status)) goto err;
Paul Yang5a3405c2017-02-06 12:40:51 -08002852
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08002853 upb_arena_free(arena);
Paul Yang5a3405c2017-02-06 12:40:51 -08002854 return true;
2855
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08002856err:
2857 fprintf(stderr, "Error loading compiled-in descriptor: %s\n",
2858 upb_status_errmsg(&status));
2859 upb_arena_free(arena);
Paul Yang5a3405c2017-02-06 12:40:51 -08002860 return false;
2861}
2862
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08002863#undef CHK
2864#undef CHK_OOM
Paul Yang60327462017-10-09 12:39:13 -07002865/* We encode backwards, to avoid pre-computing lengths (one-pass encode). */
2866
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08002867#include <string.h>
Paul Yang60327462017-10-09 12:39:13 -07002868
2869#define UPB_PB_VARINT_MAX_LEN 10
2870#define CHK(x) do { if (!(x)) { return false; } } while(0)
2871
2872/* Maps descriptor type -> upb field type. */
2873static const uint8_t upb_desctype_to_fieldtype2[] = {
2874 UPB_WIRE_TYPE_END_GROUP, /* ENDGROUP */
2875 UPB_TYPE_DOUBLE, /* DOUBLE */
2876 UPB_TYPE_FLOAT, /* FLOAT */
2877 UPB_TYPE_INT64, /* INT64 */
2878 UPB_TYPE_UINT64, /* UINT64 */
2879 UPB_TYPE_INT32, /* INT32 */
2880 UPB_TYPE_UINT64, /* FIXED64 */
2881 UPB_TYPE_UINT32, /* FIXED32 */
2882 UPB_TYPE_BOOL, /* BOOL */
2883 UPB_TYPE_STRING, /* STRING */
2884 UPB_TYPE_MESSAGE, /* GROUP */
2885 UPB_TYPE_MESSAGE, /* MESSAGE */
2886 UPB_TYPE_BYTES, /* BYTES */
2887 UPB_TYPE_UINT32, /* UINT32 */
2888 UPB_TYPE_ENUM, /* ENUM */
2889 UPB_TYPE_INT32, /* SFIXED32 */
2890 UPB_TYPE_INT64, /* SFIXED64 */
2891 UPB_TYPE_INT32, /* SINT32 */
2892 UPB_TYPE_INT64, /* SINT64 */
2893};
2894
2895static size_t upb_encode_varint(uint64_t val, char *buf) {
2896 size_t i;
2897 if (val < 128) { buf[0] = val; return 1; }
2898 i = 0;
2899 while (val) {
2900 uint8_t byte = val & 0x7fU;
2901 val >>= 7;
2902 if (val) byte |= 0x80U;
2903 buf[i++] = byte;
2904 }
2905 return i;
2906}
2907
2908static uint32_t upb_zzencode_32(int32_t n) { return (n << 1) ^ (n >> 31); }
2909static uint64_t upb_zzencode_64(int64_t n) { return (n << 1) ^ (n >> 63); }
2910
2911typedef struct {
Paul Yang9bda1f12018-09-22 18:57:43 -07002912 upb_alloc *alloc;
Paul Yang60327462017-10-09 12:39:13 -07002913 char *buf, *ptr, *limit;
2914} upb_encstate;
2915
2916static size_t upb_roundup_pow2(size_t bytes) {
2917 size_t ret = 128;
2918 while (ret < bytes) {
2919 ret *= 2;
2920 }
2921 return ret;
2922}
2923
2924static bool upb_encode_growbuffer(upb_encstate *e, size_t bytes) {
2925 size_t old_size = e->limit - e->buf;
2926 size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr));
Paul Yang9bda1f12018-09-22 18:57:43 -07002927 char *new_buf = upb_realloc(e->alloc, e->buf, old_size, new_size);
Paul Yang60327462017-10-09 12:39:13 -07002928 CHK(new_buf);
2929
2930 /* We want previous data at the end, realloc() put it at the beginning. */
Paul Yang6dd563a2018-03-08 17:35:22 -08002931 memmove(new_buf + new_size - old_size, e->buf, old_size);
Paul Yang60327462017-10-09 12:39:13 -07002932
2933 e->ptr = new_buf + new_size - (e->limit - e->ptr);
2934 e->limit = new_buf + new_size;
2935 e->buf = new_buf;
2936 return true;
2937}
2938
2939/* Call to ensure that at least "bytes" bytes are available for writing at
2940 * e->ptr. Returns false if the bytes could not be allocated. */
2941static bool upb_encode_reserve(upb_encstate *e, size_t bytes) {
2942 CHK(UPB_LIKELY((size_t)(e->ptr - e->buf) >= bytes) ||
2943 upb_encode_growbuffer(e, bytes));
2944
2945 e->ptr -= bytes;
2946 return true;
2947}
2948
2949/* Writes the given bytes to the buffer, handling reserve/advance. */
2950static bool upb_put_bytes(upb_encstate *e, const void *data, size_t len) {
2951 CHK(upb_encode_reserve(e, len));
2952 memcpy(e->ptr, data, len);
2953 return true;
2954}
2955
2956static bool upb_put_fixed64(upb_encstate *e, uint64_t val) {
2957 /* TODO(haberman): byte-swap for big endian. */
2958 return upb_put_bytes(e, &val, sizeof(uint64_t));
2959}
2960
2961static bool upb_put_fixed32(upb_encstate *e, uint32_t val) {
2962 /* TODO(haberman): byte-swap for big endian. */
2963 return upb_put_bytes(e, &val, sizeof(uint32_t));
2964}
2965
2966static bool upb_put_varint(upb_encstate *e, uint64_t val) {
2967 size_t len;
2968 char *start;
2969 CHK(upb_encode_reserve(e, UPB_PB_VARINT_MAX_LEN));
2970 len = upb_encode_varint(val, e->ptr);
2971 start = e->ptr + UPB_PB_VARINT_MAX_LEN - len;
2972 memmove(start, e->ptr, len);
2973 e->ptr = start;
2974 return true;
2975}
2976
2977static bool upb_put_double(upb_encstate *e, double d) {
2978 uint64_t u64;
2979 UPB_ASSERT(sizeof(double) == sizeof(uint64_t));
2980 memcpy(&u64, &d, sizeof(uint64_t));
2981 return upb_put_fixed64(e, u64);
2982}
2983
2984static bool upb_put_float(upb_encstate *e, float d) {
2985 uint32_t u32;
2986 UPB_ASSERT(sizeof(float) == sizeof(uint32_t));
2987 memcpy(&u32, &d, sizeof(uint32_t));
2988 return upb_put_fixed32(e, u32);
2989}
2990
Paul Yang9bda1f12018-09-22 18:57:43 -07002991static uint32_t upb_readcase(const char *msg, const upb_msglayout_field *f) {
Paul Yang60327462017-10-09 12:39:13 -07002992 uint32_t ret;
Paul Yang9bda1f12018-09-22 18:57:43 -07002993 uint32_t offset = ~f->presence;
2994 memcpy(&ret, msg + offset, sizeof(ret));
Paul Yang60327462017-10-09 12:39:13 -07002995 return ret;
2996}
2997
Paul Yang9bda1f12018-09-22 18:57:43 -07002998static bool upb_readhasbit(const char *msg, const upb_msglayout_field *f) {
2999 uint32_t hasbit = f->presence;
3000 UPB_ASSERT(f->presence > 0);
3001 return msg[hasbit / 8] & (1 << (hasbit % 8));
Paul Yang60327462017-10-09 12:39:13 -07003002}
3003
3004static bool upb_put_tag(upb_encstate *e, int field_number, int wire_type) {
3005 return upb_put_varint(e, (field_number << 3) | wire_type);
3006}
3007
3008static bool upb_put_fixedarray(upb_encstate *e, const upb_array *arr,
3009 size_t size) {
3010 size_t bytes = arr->len * size;
3011 return upb_put_bytes(e, arr->data, bytes) && upb_put_varint(e, bytes);
3012}
3013
3014bool upb_encode_message(upb_encstate *e, const char *msg,
Paul Yang9bda1f12018-09-22 18:57:43 -07003015 const upb_msglayout *m, size_t *size);
Paul Yang60327462017-10-09 12:39:13 -07003016
3017static bool upb_encode_array(upb_encstate *e, const char *field_mem,
Paul Yang9bda1f12018-09-22 18:57:43 -07003018 const upb_msglayout *m,
3019 const upb_msglayout_field *f) {
Paul Yang60327462017-10-09 12:39:13 -07003020 const upb_array *arr = *(const upb_array**)field_mem;
3021
3022 if (arr == NULL || arr->len == 0) {
3023 return true;
3024 }
3025
Paul Yang0f4ad852018-03-06 13:30:03 -08003026 UPB_ASSERT(arr->type == upb_desctype_to_fieldtype2[f->descriptortype]);
Paul Yang60327462017-10-09 12:39:13 -07003027
3028#define VARINT_CASE(ctype, encode) { \
3029 ctype *start = arr->data; \
3030 ctype *ptr = start + arr->len; \
3031 size_t pre_len = e->limit - e->ptr; \
3032 do { \
3033 ptr--; \
3034 CHK(upb_put_varint(e, encode)); \
3035 } while (ptr != start); \
3036 CHK(upb_put_varint(e, e->limit - e->ptr - pre_len)); \
3037} \
3038break; \
3039do { ; } while(0)
3040
Paul Yang0f4ad852018-03-06 13:30:03 -08003041 switch (f->descriptortype) {
Paul Yang60327462017-10-09 12:39:13 -07003042 case UPB_DESCRIPTOR_TYPE_DOUBLE:
3043 CHK(upb_put_fixedarray(e, arr, sizeof(double)));
3044 break;
3045 case UPB_DESCRIPTOR_TYPE_FLOAT:
3046 CHK(upb_put_fixedarray(e, arr, sizeof(float)));
3047 break;
3048 case UPB_DESCRIPTOR_TYPE_SFIXED64:
3049 case UPB_DESCRIPTOR_TYPE_FIXED64:
3050 CHK(upb_put_fixedarray(e, arr, sizeof(uint64_t)));
3051 break;
3052 case UPB_DESCRIPTOR_TYPE_FIXED32:
3053 case UPB_DESCRIPTOR_TYPE_SFIXED32:
3054 CHK(upb_put_fixedarray(e, arr, sizeof(uint32_t)));
3055 break;
3056 case UPB_DESCRIPTOR_TYPE_INT64:
3057 case UPB_DESCRIPTOR_TYPE_UINT64:
3058 VARINT_CASE(uint64_t, *ptr);
3059 case UPB_DESCRIPTOR_TYPE_UINT32:
Paul Yang6dd563a2018-03-08 17:35:22 -08003060 VARINT_CASE(uint32_t, *ptr);
Paul Yang60327462017-10-09 12:39:13 -07003061 case UPB_DESCRIPTOR_TYPE_INT32:
3062 case UPB_DESCRIPTOR_TYPE_ENUM:
Paul Yang6dd563a2018-03-08 17:35:22 -08003063 VARINT_CASE(int32_t, (int64_t)*ptr);
Paul Yang60327462017-10-09 12:39:13 -07003064 case UPB_DESCRIPTOR_TYPE_BOOL:
3065 VARINT_CASE(bool, *ptr);
3066 case UPB_DESCRIPTOR_TYPE_SINT32:
3067 VARINT_CASE(int32_t, upb_zzencode_32(*ptr));
3068 case UPB_DESCRIPTOR_TYPE_SINT64:
3069 VARINT_CASE(int64_t, upb_zzencode_64(*ptr));
3070 case UPB_DESCRIPTOR_TYPE_STRING:
3071 case UPB_DESCRIPTOR_TYPE_BYTES: {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003072 upb_strview *start = arr->data;
3073 upb_strview *ptr = start + arr->len;
Paul Yang60327462017-10-09 12:39:13 -07003074 do {
3075 ptr--;
3076 CHK(upb_put_bytes(e, ptr->data, ptr->size) &&
3077 upb_put_varint(e, ptr->size) &&
3078 upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED));
3079 } while (ptr != start);
3080 return true;
3081 }
3082 case UPB_DESCRIPTOR_TYPE_GROUP: {
3083 void **start = arr->data;
3084 void **ptr = start + arr->len;
Paul Yang9bda1f12018-09-22 18:57:43 -07003085 const upb_msglayout *subm = m->submsgs[f->submsg_index];
Paul Yang60327462017-10-09 12:39:13 -07003086 do {
3087 size_t size;
3088 ptr--;
3089 CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP) &&
3090 upb_encode_message(e, *ptr, subm, &size) &&
3091 upb_put_tag(e, f->number, UPB_WIRE_TYPE_START_GROUP));
3092 } while (ptr != start);
3093 return true;
3094 }
3095 case UPB_DESCRIPTOR_TYPE_MESSAGE: {
3096 void **start = arr->data;
3097 void **ptr = start + arr->len;
Paul Yang9bda1f12018-09-22 18:57:43 -07003098 const upb_msglayout *subm = m->submsgs[f->submsg_index];
Paul Yang60327462017-10-09 12:39:13 -07003099 do {
3100 size_t size;
3101 ptr--;
3102 CHK(upb_encode_message(e, *ptr, subm, &size) &&
3103 upb_put_varint(e, size) &&
3104 upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED));
3105 } while (ptr != start);
3106 return true;
3107 }
3108 }
3109#undef VARINT_CASE
3110
3111 /* We encode all primitive arrays as packed, regardless of what was specified
3112 * in the .proto file. Could special case 1-sized arrays. */
3113 CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED));
3114 return true;
3115}
3116
3117static bool upb_encode_scalarfield(upb_encstate *e, const char *field_mem,
Paul Yang9bda1f12018-09-22 18:57:43 -07003118 const upb_msglayout *m,
3119 const upb_msglayout_field *f,
3120 bool skip_zero_value) {
Paul Yang60327462017-10-09 12:39:13 -07003121#define CASE(ctype, type, wire_type, encodeval) do { \
3122 ctype val = *(ctype*)field_mem; \
3123 if (skip_zero_value && val == 0) { \
3124 return true; \
3125 } \
3126 return upb_put_ ## type(e, encodeval) && \
3127 upb_put_tag(e, f->number, wire_type); \
3128} while(0)
3129
Paul Yang0f4ad852018-03-06 13:30:03 -08003130 switch (f->descriptortype) {
Paul Yang60327462017-10-09 12:39:13 -07003131 case UPB_DESCRIPTOR_TYPE_DOUBLE:
3132 CASE(double, double, UPB_WIRE_TYPE_64BIT, val);
3133 case UPB_DESCRIPTOR_TYPE_FLOAT:
3134 CASE(float, float, UPB_WIRE_TYPE_32BIT, val);
3135 case UPB_DESCRIPTOR_TYPE_INT64:
3136 case UPB_DESCRIPTOR_TYPE_UINT64:
3137 CASE(uint64_t, varint, UPB_WIRE_TYPE_VARINT, val);
3138 case UPB_DESCRIPTOR_TYPE_UINT32:
Paul Yang6dd563a2018-03-08 17:35:22 -08003139 CASE(uint32_t, varint, UPB_WIRE_TYPE_VARINT, val);
Paul Yang60327462017-10-09 12:39:13 -07003140 case UPB_DESCRIPTOR_TYPE_INT32:
3141 case UPB_DESCRIPTOR_TYPE_ENUM:
Paul Yang6dd563a2018-03-08 17:35:22 -08003142 CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, (int64_t)val);
Paul Yang60327462017-10-09 12:39:13 -07003143 case UPB_DESCRIPTOR_TYPE_SFIXED64:
3144 case UPB_DESCRIPTOR_TYPE_FIXED64:
3145 CASE(uint64_t, fixed64, UPB_WIRE_TYPE_64BIT, val);
3146 case UPB_DESCRIPTOR_TYPE_FIXED32:
3147 case UPB_DESCRIPTOR_TYPE_SFIXED32:
3148 CASE(uint32_t, fixed32, UPB_WIRE_TYPE_32BIT, val);
3149 case UPB_DESCRIPTOR_TYPE_BOOL:
3150 CASE(bool, varint, UPB_WIRE_TYPE_VARINT, val);
3151 case UPB_DESCRIPTOR_TYPE_SINT32:
3152 CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, upb_zzencode_32(val));
3153 case UPB_DESCRIPTOR_TYPE_SINT64:
3154 CASE(int64_t, varint, UPB_WIRE_TYPE_VARINT, upb_zzencode_64(val));
3155 case UPB_DESCRIPTOR_TYPE_STRING:
3156 case UPB_DESCRIPTOR_TYPE_BYTES: {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003157 upb_strview view = *(upb_strview*)field_mem;
Paul Yang60327462017-10-09 12:39:13 -07003158 if (skip_zero_value && view.size == 0) {
3159 return true;
3160 }
3161 return upb_put_bytes(e, view.data, view.size) &&
3162 upb_put_varint(e, view.size) &&
3163 upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
3164 }
3165 case UPB_DESCRIPTOR_TYPE_GROUP: {
3166 size_t size;
Paul Yang9bda1f12018-09-22 18:57:43 -07003167 void *submsg = *(void **)field_mem;
3168 const upb_msglayout *subm = m->submsgs[f->submsg_index];
3169 if (submsg == NULL) {
Paul Yang60327462017-10-09 12:39:13 -07003170 return true;
3171 }
3172 return upb_put_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP) &&
3173 upb_encode_message(e, submsg, subm, &size) &&
3174 upb_put_tag(e, f->number, UPB_WIRE_TYPE_START_GROUP);
3175 }
3176 case UPB_DESCRIPTOR_TYPE_MESSAGE: {
3177 size_t size;
Paul Yang9bda1f12018-09-22 18:57:43 -07003178 void *submsg = *(void **)field_mem;
3179 const upb_msglayout *subm = m->submsgs[f->submsg_index];
3180 if (submsg == NULL) {
Paul Yang60327462017-10-09 12:39:13 -07003181 return true;
3182 }
3183 return upb_encode_message(e, submsg, subm, &size) &&
3184 upb_put_varint(e, size) &&
3185 upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
3186 }
3187 }
3188#undef CASE
3189 UPB_UNREACHABLE();
3190}
3191
Paul Yang9bda1f12018-09-22 18:57:43 -07003192bool upb_encode_message(upb_encstate *e, const char *msg,
3193 const upb_msglayout *m, size_t *size) {
Paul Yang60327462017-10-09 12:39:13 -07003194 int i;
Paul Yang6dd563a2018-03-08 17:35:22 -08003195 size_t pre_len = e->limit - e->ptr;
Paul Yang9bda1f12018-09-22 18:57:43 -07003196 const char *unknown;
3197 size_t unknown_size;
Paul Yang60327462017-10-09 12:39:13 -07003198
3199 for (i = m->field_count - 1; i >= 0; i--) {
Paul Yang9bda1f12018-09-22 18:57:43 -07003200 const upb_msglayout_field *f = &m->fields[i];
Paul Yang60327462017-10-09 12:39:13 -07003201
3202 if (f->label == UPB_LABEL_REPEATED) {
3203 CHK(upb_encode_array(e, msg + f->offset, m, f));
3204 } else {
Paul Yang9bda1f12018-09-22 18:57:43 -07003205 bool skip_empty = false;
3206 if (f->presence == 0) {
3207 /* Proto3 presence. */
3208 skip_empty = true;
3209 } else if (f->presence > 0) {
3210 /* Proto2 presence: hasbit. */
3211 if (!upb_readhasbit(msg, f)) {
3212 continue;
3213 }
3214 } else {
3215 /* Field is in a oneof. */
3216 if (upb_readcase(msg, f) != f->number) {
3217 continue;
Paul Yang6dd563a2018-03-08 17:35:22 -08003218 }
Paul Yang60327462017-10-09 12:39:13 -07003219 }
Paul Yang9bda1f12018-09-22 18:57:43 -07003220 CHK(upb_encode_scalarfield(e, msg + f->offset, m, f, skip_empty));
Paul Yang60327462017-10-09 12:39:13 -07003221 }
3222 }
3223
Paul Yang9bda1f12018-09-22 18:57:43 -07003224 unknown = upb_msg_getunknown(msg, &unknown_size);
3225
3226 if (unknown) {
3227 upb_put_bytes(e, unknown, unknown_size);
3228 }
3229
Paul Yang6dd563a2018-03-08 17:35:22 -08003230 *size = (e->limit - e->ptr) - pre_len;
Paul Yang60327462017-10-09 12:39:13 -07003231 return true;
3232}
3233
Paul Yang9bda1f12018-09-22 18:57:43 -07003234char *upb_encode(const void *msg, const upb_msglayout *m, upb_arena *arena,
3235 size_t *size) {
Paul Yang60327462017-10-09 12:39:13 -07003236 upb_encstate e;
Paul Yang9bda1f12018-09-22 18:57:43 -07003237 e.alloc = upb_arena_alloc(arena);
Paul Yang60327462017-10-09 12:39:13 -07003238 e.buf = NULL;
3239 e.limit = NULL;
3240 e.ptr = NULL;
3241
3242 if (!upb_encode_message(&e, msg, m, size)) {
3243 *size = 0;
3244 return NULL;
3245 }
3246
3247 *size = e.limit - e.ptr;
3248
3249 if (*size == 0) {
3250 static char ch;
3251 return &ch;
3252 } else {
3253 UPB_ASSERT(e.ptr);
3254 return e.ptr;
3255 }
3256}
3257
3258#undef CHK
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003259/*
3260** TODO(haberman): it's unclear whether a lot of the consistency checks should
Paul Yange0e54662016-09-15 11:09:01 -07003261** UPB_ASSERT() or return false.
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003262*/
3263
3264
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003265#include <string.h>
3266
3267
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003268
3269struct upb_handlers {
3270 upb_handlercache *cache;
3271 const upb_msgdef *msg;
3272 const upb_handlers **sub;
3273 const void *top_closure_type;
3274 upb_handlers_tabent table[1]; /* Dynamically-sized field handler array. */
3275};
3276
3277static void *upb_calloc(upb_arena *arena, size_t size) {
3278 void *mem = upb_malloc(upb_arena_alloc(arena), size);
Paul Yange0e54662016-09-15 11:09:01 -07003279 if (mem) {
3280 memset(mem, 0, size);
3281 }
3282 return mem;
3283}
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003284
3285/* Defined for the sole purpose of having a unique pointer value for
3286 * UPB_NO_CLOSURE. */
3287char _upb_noclosure;
3288
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003289/* Given a selector for a STARTSUBMSG handler, resolves to a pointer to the
3290 * subhandlers for this submessage field. */
3291#define SUBH(h, selector) (h->sub[selector])
3292
3293/* The selector for a submessage field is the field index. */
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003294#define SUBH_F(h, f) SUBH(h, upb_fielddef_index(f))
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003295
3296static int32_t trygetsel(upb_handlers *h, const upb_fielddef *f,
3297 upb_handlertype_t type) {
3298 upb_selector_t sel;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003299 bool ok;
3300
3301 ok = upb_handlers_getselector(f, type, &sel);
3302
3303 UPB_ASSERT(upb_handlers_msgdef(h) == upb_fielddef_containingtype(f));
3304 UPB_ASSERT(ok);
3305
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003306 return sel;
3307}
3308
3309static upb_selector_t handlers_getsel(upb_handlers *h, const upb_fielddef *f,
3310 upb_handlertype_t type) {
3311 int32_t sel = trygetsel(h, f, type);
Paul Yange0e54662016-09-15 11:09:01 -07003312 UPB_ASSERT(sel >= 0);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003313 return sel;
3314}
3315
3316static const void **returntype(upb_handlers *h, const upb_fielddef *f,
3317 upb_handlertype_t type) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003318 return &h->table[handlers_getsel(h, f, type)].attr.return_closure_type;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003319}
3320
3321static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f,
3322 upb_handlertype_t type, upb_func *func,
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003323 const upb_handlerattr *attr) {
3324 upb_handlerattr set_attr = UPB_HANDLERATTR_INIT;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003325 const void *closure_type;
3326 const void **context_closure_type;
3327
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003328 UPB_ASSERT(!h->table[sel].func);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003329
3330 if (attr) {
3331 set_attr = *attr;
3332 }
3333
3334 /* Check that the given closure type matches the closure type that has been
3335 * established for this context (if any). */
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003336 closure_type = set_attr.closure_type;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003337
3338 if (type == UPB_HANDLER_STRING) {
3339 context_closure_type = returntype(h, f, UPB_HANDLER_STARTSTR);
3340 } else if (f && upb_fielddef_isseq(f) &&
3341 type != UPB_HANDLER_STARTSEQ &&
3342 type != UPB_HANDLER_ENDSEQ) {
3343 context_closure_type = returntype(h, f, UPB_HANDLER_STARTSEQ);
3344 } else {
3345 context_closure_type = &h->top_closure_type;
3346 }
3347
3348 if (closure_type && *context_closure_type &&
3349 closure_type != *context_closure_type) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003350 return false;
3351 }
3352
3353 if (closure_type)
3354 *context_closure_type = closure_type;
3355
3356 /* If this is a STARTSEQ or STARTSTR handler, check that the returned pointer
3357 * matches any pre-existing expectations about what type is expected. */
3358 if (type == UPB_HANDLER_STARTSEQ || type == UPB_HANDLER_STARTSTR) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003359 const void *return_type = set_attr.return_closure_type;
3360 const void *table_return_type = h->table[sel].attr.return_closure_type;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003361 if (return_type && table_return_type && return_type != table_return_type) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003362 return false;
3363 }
3364
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003365 if (table_return_type && !return_type) {
3366 set_attr.return_closure_type = table_return_type;
3367 }
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003368 }
3369
3370 h->table[sel].func = (upb_func*)func;
3371 h->table[sel].attr = set_attr;
3372 return true;
3373}
3374
3375/* Returns the effective closure type for this handler (which will propagate
3376 * from outer frames if this frame has no START* handler). Not implemented for
3377 * UPB_HANDLER_STRING at the moment since this is not needed. Returns NULL is
3378 * the effective closure type is unspecified (either no handler was registered
3379 * to specify it or the handler that was registered did not specify the closure
3380 * type). */
3381const void *effective_closure_type(upb_handlers *h, const upb_fielddef *f,
3382 upb_handlertype_t type) {
3383 const void *ret;
3384 upb_selector_t sel;
3385
Paul Yange0e54662016-09-15 11:09:01 -07003386 UPB_ASSERT(type != UPB_HANDLER_STRING);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003387 ret = h->top_closure_type;
3388
3389 if (upb_fielddef_isseq(f) &&
3390 type != UPB_HANDLER_STARTSEQ &&
3391 type != UPB_HANDLER_ENDSEQ &&
3392 h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)].func) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003393 ret = h->table[sel].attr.return_closure_type;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003394 }
3395
3396 if (type == UPB_HANDLER_STRING &&
3397 h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSTR)].func) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003398 ret = h->table[sel].attr.return_closure_type;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003399 }
3400
3401 /* The effective type of the submessage; not used yet.
3402 * if (type == SUBMESSAGE &&
3403 * h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)].func) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003404 * ret = h->table[sel].attr.return_closure_type;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003405 * } */
3406
3407 return ret;
3408}
3409
3410/* Checks whether the START* handler specified by f & type is missing even
3411 * though it is required to convert the established type of an outer frame
3412 * ("closure_type") into the established type of an inner frame (represented in
3413 * the return closure type of this handler's attr. */
3414bool checkstart(upb_handlers *h, const upb_fielddef *f, upb_handlertype_t type,
3415 upb_status *status) {
3416 const void *closure_type;
3417 const upb_handlerattr *attr;
3418 const void *return_closure_type;
3419
3420 upb_selector_t sel = handlers_getsel(h, f, type);
3421 if (h->table[sel].func) return true;
3422 closure_type = effective_closure_type(h, f, type);
3423 attr = &h->table[sel].attr;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003424 return_closure_type = attr->return_closure_type;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003425 if (closure_type && return_closure_type &&
3426 closure_type != return_closure_type) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003427 return false;
3428 }
3429 return true;
3430}
3431
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003432static upb_handlers *upb_handlers_new(const upb_msgdef *md,
3433 upb_handlercache *cache,
3434 upb_arena *arena) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003435 int extra;
3436 upb_handlers *h;
3437
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003438 extra = sizeof(upb_handlers_tabent) * (upb_msgdef_selectorcount(md) - 1);
3439 h = upb_calloc(arena, sizeof(*h) + extra);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003440 if (!h) return NULL;
3441
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003442 h->cache = cache;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003443 h->msg = md;
Paul Yange0e54662016-09-15 11:09:01 -07003444
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003445 if (upb_msgdef_submsgfieldcount(md) > 0) {
3446 size_t bytes = upb_msgdef_submsgfieldcount(md) * sizeof(*h->sub);
3447 h->sub = upb_calloc(arena, bytes);
3448 if (!h->sub) return NULL;
Paul Yange0e54662016-09-15 11:09:01 -07003449 } else {
3450 h->sub = 0;
3451 }
3452
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003453 /* calloc() above initialized all handlers to NULL. */
3454 return h;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003455}
3456
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003457/* Public interface ***********************************************************/
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003458
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003459#define SETTER(name, handlerctype, handlertype) \
3460 bool upb_handlers_set##name(upb_handlers *h, const upb_fielddef *f, \
3461 handlerctype func, \
3462 const upb_handlerattr *attr) { \
3463 int32_t sel = trygetsel(h, f, handlertype); \
3464 return doset(h, sel, f, handlertype, (upb_func *)func, attr); \
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003465 }
3466
3467SETTER(int32, upb_int32_handlerfunc*, UPB_HANDLER_INT32)
3468SETTER(int64, upb_int64_handlerfunc*, UPB_HANDLER_INT64)
3469SETTER(uint32, upb_uint32_handlerfunc*, UPB_HANDLER_UINT32)
3470SETTER(uint64, upb_uint64_handlerfunc*, UPB_HANDLER_UINT64)
3471SETTER(float, upb_float_handlerfunc*, UPB_HANDLER_FLOAT)
3472SETTER(double, upb_double_handlerfunc*, UPB_HANDLER_DOUBLE)
3473SETTER(bool, upb_bool_handlerfunc*, UPB_HANDLER_BOOL)
3474SETTER(startstr, upb_startstr_handlerfunc*, UPB_HANDLER_STARTSTR)
3475SETTER(string, upb_string_handlerfunc*, UPB_HANDLER_STRING)
3476SETTER(endstr, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSTR)
3477SETTER(startseq, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSEQ)
3478SETTER(startsubmsg, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSUBMSG)
3479SETTER(endsubmsg, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSUBMSG)
3480SETTER(endseq, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSEQ)
3481
3482#undef SETTER
3483
Paul Yang60327462017-10-09 12:39:13 -07003484bool upb_handlers_setunknown(upb_handlers *h, upb_unknown_handlerfunc *func,
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003485 const upb_handlerattr *attr) {
Paul Yang60327462017-10-09 12:39:13 -07003486 return doset(h, UPB_UNKNOWN_SELECTOR, NULL, UPB_HANDLER_INT32,
3487 (upb_func *)func, attr);
3488}
3489
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003490bool upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handlerfunc *func,
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003491 const upb_handlerattr *attr) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003492 return doset(h, UPB_STARTMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
3493 (upb_func *)func, attr);
3494}
3495
3496bool upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handlerfunc *func,
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003497 const upb_handlerattr *attr) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003498 return doset(h, UPB_ENDMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
3499 (upb_func *)func, attr);
3500}
3501
3502bool upb_handlers_setsubhandlers(upb_handlers *h, const upb_fielddef *f,
3503 const upb_handlers *sub) {
Paul Yange0e54662016-09-15 11:09:01 -07003504 UPB_ASSERT(sub);
Paul Yange0e54662016-09-15 11:09:01 -07003505 UPB_ASSERT(upb_fielddef_issubmsg(f));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003506 if (SUBH_F(h, f)) return false; /* Can't reset. */
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003507 if (upb_handlers_msgdef(sub) != upb_fielddef_msgsubdef(f)) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003508 return false;
3509 }
3510 SUBH_F(h, f) = sub;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003511 return true;
3512}
3513
3514const upb_handlers *upb_handlers_getsubhandlers(const upb_handlers *h,
3515 const upb_fielddef *f) {
Paul Yange0e54662016-09-15 11:09:01 -07003516 UPB_ASSERT(upb_fielddef_issubmsg(f));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003517 return SUBH_F(h, f);
3518}
3519
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003520upb_func *upb_handlers_gethandler(const upb_handlers *h, upb_selector_t s,
3521 const void **handler_data) {
3522 upb_func *ret = (upb_func *)h->table[s].func;
3523 if (ret && handler_data) {
3524 *handler_data = h->table[s].attr.handler_data;
3525 }
3526 return ret;
3527}
3528
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003529bool upb_handlers_getattr(const upb_handlers *h, upb_selector_t sel,
3530 upb_handlerattr *attr) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003531 if (!upb_handlers_gethandler(h, sel, NULL))
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003532 return false;
3533 *attr = h->table[sel].attr;
3534 return true;
3535}
3536
3537const upb_handlers *upb_handlers_getsubhandlers_sel(const upb_handlers *h,
3538 upb_selector_t sel) {
3539 /* STARTSUBMSG selector in sel is the field's selector base. */
3540 return SUBH(h, sel - UPB_STATIC_SELECTOR_COUNT);
3541}
3542
3543const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h) { return h->msg; }
3544
3545bool upb_handlers_addcleanup(upb_handlers *h, void *p, upb_handlerfree *func) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003546 return upb_handlercache_addcleanup(h->cache, p, func);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003547}
3548
3549upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f) {
3550 switch (upb_fielddef_type(f)) {
3551 case UPB_TYPE_INT32:
3552 case UPB_TYPE_ENUM: return UPB_HANDLER_INT32;
3553 case UPB_TYPE_INT64: return UPB_HANDLER_INT64;
3554 case UPB_TYPE_UINT32: return UPB_HANDLER_UINT32;
3555 case UPB_TYPE_UINT64: return UPB_HANDLER_UINT64;
3556 case UPB_TYPE_FLOAT: return UPB_HANDLER_FLOAT;
3557 case UPB_TYPE_DOUBLE: return UPB_HANDLER_DOUBLE;
3558 case UPB_TYPE_BOOL: return UPB_HANDLER_BOOL;
Paul Yange0e54662016-09-15 11:09:01 -07003559 default: UPB_ASSERT(false); return -1; /* Invalid input. */
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003560 }
3561}
3562
3563bool upb_handlers_getselector(const upb_fielddef *f, upb_handlertype_t type,
3564 upb_selector_t *s) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003565 uint32_t selector_base = upb_fielddef_selectorbase(f);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003566 switch (type) {
3567 case UPB_HANDLER_INT32:
3568 case UPB_HANDLER_INT64:
3569 case UPB_HANDLER_UINT32:
3570 case UPB_HANDLER_UINT64:
3571 case UPB_HANDLER_FLOAT:
3572 case UPB_HANDLER_DOUBLE:
3573 case UPB_HANDLER_BOOL:
3574 if (!upb_fielddef_isprimitive(f) ||
3575 upb_handlers_getprimitivehandlertype(f) != type)
3576 return false;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003577 *s = selector_base;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003578 break;
3579 case UPB_HANDLER_STRING:
3580 if (upb_fielddef_isstring(f)) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003581 *s = selector_base;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003582 } else if (upb_fielddef_lazy(f)) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003583 *s = selector_base + 3;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003584 } else {
3585 return false;
3586 }
3587 break;
3588 case UPB_HANDLER_STARTSTR:
3589 if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003590 *s = selector_base + 1;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003591 } else {
3592 return false;
3593 }
3594 break;
3595 case UPB_HANDLER_ENDSTR:
3596 if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003597 *s = selector_base + 2;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003598 } else {
3599 return false;
3600 }
3601 break;
3602 case UPB_HANDLER_STARTSEQ:
3603 if (!upb_fielddef_isseq(f)) return false;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003604 *s = selector_base - 2;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003605 break;
3606 case UPB_HANDLER_ENDSEQ:
3607 if (!upb_fielddef_isseq(f)) return false;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003608 *s = selector_base - 1;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003609 break;
3610 case UPB_HANDLER_STARTSUBMSG:
3611 if (!upb_fielddef_issubmsg(f)) return false;
3612 /* Selectors for STARTSUBMSG are at the beginning of the table so that the
3613 * selector can also be used as an index into the "sub" array of
3614 * subhandlers. The indexes for the two into these two tables are the
3615 * same, except that in the handler table the static selectors come first. */
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003616 *s = upb_fielddef_index(f) + UPB_STATIC_SELECTOR_COUNT;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003617 break;
3618 case UPB_HANDLER_ENDSUBMSG:
3619 if (!upb_fielddef_issubmsg(f)) return false;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003620 *s = selector_base;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003621 break;
3622 }
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003623 UPB_ASSERT((size_t)*s < upb_msgdef_selectorcount(upb_fielddef_containingtype(f)));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003624 return true;
3625}
3626
3627uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) {
3628 return upb_fielddef_isseq(f) ? 2 : 0;
3629}
3630
3631uint32_t upb_handlers_selectorcount(const upb_fielddef *f) {
3632 uint32_t ret = 1;
3633 if (upb_fielddef_isseq(f)) ret += 2; /* STARTSEQ/ENDSEQ */
3634 if (upb_fielddef_isstring(f)) ret += 2; /* [STRING]/STARTSTR/ENDSTR */
3635 if (upb_fielddef_issubmsg(f)) {
3636 /* ENDSUBMSG (STARTSUBMSG is at table beginning) */
3637 ret += 0;
3638 if (upb_fielddef_lazy(f)) {
3639 /* STARTSTR/ENDSTR/STRING (for lazy) */
3640 ret += 3;
3641 }
3642 }
3643 return ret;
3644}
3645
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003646/* upb_handlercache ***********************************************************/
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003647
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003648struct upb_handlercache {
3649 upb_arena *arena;
3650 upb_inttable tab; /* maps upb_msgdef* -> upb_handlers*. */
3651 upb_handlers_callback *callback;
3652 const void *closure;
3653};
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003654
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003655const upb_handlers *upb_handlercache_get(upb_handlercache *c,
3656 const upb_msgdef *md) {
3657 upb_msg_field_iter i;
3658 upb_value v;
3659 upb_handlers *h;
3660
3661 if (upb_inttable_lookupptr(&c->tab, md, &v)) {
3662 return upb_value_getptr(v);
3663 }
3664
3665 h = upb_handlers_new(md, c, c->arena);
3666 v = upb_value_ptr(h);
3667
3668 if (!h) return NULL;
3669 if (!upb_inttable_insertptr(&c->tab, md, v)) return NULL;
3670
3671 c->callback(c->closure, h);
3672
3673 /* For each submessage field, get or create a handlers object and set it as
3674 * the subhandlers. */
3675 for(upb_msg_field_begin(&i, md);
3676 !upb_msg_field_done(&i);
3677 upb_msg_field_next(&i)) {
3678 upb_fielddef *f = upb_msg_iter_field(&i);
3679
3680 if (upb_fielddef_issubmsg(f)) {
3681 const upb_msgdef *subdef = upb_fielddef_msgsubdef(f);
3682 const upb_handlers *sub_mh = upb_handlercache_get(c, subdef);
3683
3684 if (!sub_mh) return NULL;
3685
3686 upb_handlers_setsubhandlers(h, f, sub_mh);
3687 }
3688 }
3689
3690 return h;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003691}
3692
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003693
3694upb_handlercache *upb_handlercache_new(upb_handlers_callback *callback,
3695 const void *closure) {
3696 upb_handlercache *cache = upb_gmalloc(sizeof(*cache));
3697
3698 if (!cache) return NULL;
3699
3700 cache->arena = upb_arena_new();
3701
3702 cache->callback = callback;
3703 cache->closure = closure;
3704
3705 if (!upb_inttable_init(&cache->tab, UPB_CTYPE_PTR)) goto oom;
3706
3707 return cache;
3708
3709oom:
3710 upb_gfree(cache);
3711 return NULL;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003712}
3713
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003714void upb_handlercache_free(upb_handlercache *cache) {
3715 upb_inttable_uninit(&cache->tab);
3716 upb_arena_free(cache->arena);
3717 upb_gfree(cache);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003718}
3719
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003720bool upb_handlercache_addcleanup(upb_handlercache *c, void *p,
3721 upb_handlerfree *func) {
3722 return upb_arena_addcleanup(c->arena, p, func);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003723}
3724
3725/* upb_byteshandler ***********************************************************/
3726
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003727bool upb_byteshandler_setstartstr(upb_byteshandler *h,
3728 upb_startstr_handlerfunc *func, void *d) {
3729 h->table[UPB_STARTSTR_SELECTOR].func = (upb_func*)func;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003730 h->table[UPB_STARTSTR_SELECTOR].attr.handler_data = d;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003731 return true;
3732}
3733
3734bool upb_byteshandler_setstring(upb_byteshandler *h,
3735 upb_string_handlerfunc *func, void *d) {
3736 h->table[UPB_STRING_SELECTOR].func = (upb_func*)func;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003737 h->table[UPB_STRING_SELECTOR].attr.handler_data = d;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003738 return true;
3739}
3740
3741bool upb_byteshandler_setendstr(upb_byteshandler *h,
3742 upb_endfield_handlerfunc *func, void *d) {
3743 h->table[UPB_ENDSTR_SELECTOR].func = (upb_func*)func;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003744 h->table[UPB_ENDSTR_SELECTOR].attr.handler_data = d;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07003745 return true;
3746}
Paul Yang5a3405c2017-02-06 12:40:51 -08003747
Paul Yang5a3405c2017-02-06 12:40:51 -08003748/** Handlers for upb_msg ******************************************************/
3749
3750typedef struct {
3751 size_t offset;
3752 int32_t hasbit;
3753} upb_msg_handlerdata;
3754
3755/* Fallback implementation if the handler is not specialized by the producer. */
3756#define MSG_WRITER(type, ctype) \
3757 bool upb_msg_set ## type (void *c, const void *hd, ctype val) { \
3758 uint8_t *m = c; \
3759 const upb_msg_handlerdata *d = hd; \
3760 if (d->hasbit > 0) \
3761 *(uint8_t*)&m[d->hasbit / 8] |= 1 << (d->hasbit % 8); \
3762 *(ctype*)&m[d->offset] = val; \
3763 return true; \
3764 } \
3765
3766MSG_WRITER(double, double)
3767MSG_WRITER(float, float)
3768MSG_WRITER(int32, int32_t)
3769MSG_WRITER(int64, int64_t)
3770MSG_WRITER(uint32, uint32_t)
3771MSG_WRITER(uint64, uint64_t)
3772MSG_WRITER(bool, bool)
3773
3774bool upb_msg_setscalarhandler(upb_handlers *h, const upb_fielddef *f,
3775 size_t offset, int32_t hasbit) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003776 upb_handlerattr attr = UPB_HANDLERATTR_INIT;
Paul Yang5a3405c2017-02-06 12:40:51 -08003777 bool ok;
3778
3779 upb_msg_handlerdata *d = upb_gmalloc(sizeof(*d));
3780 if (!d) return false;
3781 d->offset = offset;
3782 d->hasbit = hasbit;
3783
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003784 attr.handler_data = d;
3785 attr.alwaysok = true;
Paul Yang5a3405c2017-02-06 12:40:51 -08003786 upb_handlers_addcleanup(h, d, upb_gfree);
3787
3788#define TYPE(u, l) \
3789 case UPB_TYPE_##u: \
3790 ok = upb_handlers_set##l(h, f, upb_msg_set##l, &attr); break;
3791
3792 ok = false;
3793
3794 switch (upb_fielddef_type(f)) {
3795 TYPE(INT64, int64);
3796 TYPE(INT32, int32);
3797 TYPE(ENUM, int32);
3798 TYPE(UINT64, uint64);
3799 TYPE(UINT32, uint32);
3800 TYPE(DOUBLE, double);
3801 TYPE(FLOAT, float);
3802 TYPE(BOOL, bool);
3803 default: UPB_ASSERT(false); break;
3804 }
3805#undef TYPE
3806
Paul Yang5a3405c2017-02-06 12:40:51 -08003807 return ok;
3808}
3809
3810bool upb_msg_getscalarhandlerdata(const upb_handlers *h,
3811 upb_selector_t s,
3812 upb_fieldtype_t *type,
3813 size_t *offset,
3814 int32_t *hasbit) {
3815 const upb_msg_handlerdata *d;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003816 const void *p;
3817 upb_func *f = upb_handlers_gethandler(h, s, &p);
Paul Yang5a3405c2017-02-06 12:40:51 -08003818
3819 if ((upb_int64_handlerfunc*)f == upb_msg_setint64) {
3820 *type = UPB_TYPE_INT64;
3821 } else if ((upb_int32_handlerfunc*)f == upb_msg_setint32) {
3822 *type = UPB_TYPE_INT32;
3823 } else if ((upb_uint64_handlerfunc*)f == upb_msg_setuint64) {
3824 *type = UPB_TYPE_UINT64;
3825 } else if ((upb_uint32_handlerfunc*)f == upb_msg_setuint32) {
3826 *type = UPB_TYPE_UINT32;
3827 } else if ((upb_double_handlerfunc*)f == upb_msg_setdouble) {
3828 *type = UPB_TYPE_DOUBLE;
3829 } else if ((upb_float_handlerfunc*)f == upb_msg_setfloat) {
3830 *type = UPB_TYPE_FLOAT;
3831 } else if ((upb_bool_handlerfunc*)f == upb_msg_setbool) {
3832 *type = UPB_TYPE_BOOL;
3833 } else {
3834 return false;
3835 }
3836
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003837 d = p;
Paul Yang5a3405c2017-02-06 12:40:51 -08003838 *offset = d->offset;
3839 *hasbit = d->hasbit;
3840 return true;
3841}
Paul Yang9bda1f12018-09-22 18:57:43 -07003842
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003843#include <string.h>
Paul Yang9bda1f12018-09-22 18:57:43 -07003844
3845bool upb_fieldtype_mapkeyok(upb_fieldtype_t type) {
3846 return type == UPB_TYPE_BOOL || type == UPB_TYPE_INT32 ||
3847 type == UPB_TYPE_UINT32 || type == UPB_TYPE_INT64 ||
3848 type == UPB_TYPE_UINT64 || type == UPB_TYPE_STRING;
3849}
3850
3851#define PTR_AT(msg, ofs, type) (type*)((char*)msg + ofs)
3852#define VOIDPTR_AT(msg, ofs) PTR_AT(msg, ofs, void)
3853#define ENCODE_MAX_NESTING 64
3854#define CHECK_TRUE(x) if (!(x)) { return false; }
3855
3856/** upb_msgval ****************************************************************/
3857
Paul Yang9bda1f12018-09-22 18:57:43 -07003858/* These functions will generate real memcpy() calls on ARM sadly, because
3859 * the compiler assumes they might not be aligned. */
3860
3861static upb_msgval upb_msgval_read(const void *p, size_t ofs,
3862 uint8_t size) {
3863 upb_msgval val;
3864 p = (char*)p + ofs;
3865 memcpy(&val, p, size);
3866 return val;
3867}
3868
3869static void upb_msgval_write(void *p, size_t ofs, upb_msgval val,
3870 uint8_t size) {
3871 p = (char*)p + ofs;
3872 memcpy(p, &val, size);
3873}
3874
3875static size_t upb_msgval_sizeof(upb_fieldtype_t type) {
3876 switch (type) {
3877 case UPB_TYPE_DOUBLE:
3878 case UPB_TYPE_INT64:
3879 case UPB_TYPE_UINT64:
3880 return 8;
3881 case UPB_TYPE_ENUM:
3882 case UPB_TYPE_INT32:
3883 case UPB_TYPE_UINT32:
3884 case UPB_TYPE_FLOAT:
3885 return 4;
3886 case UPB_TYPE_BOOL:
3887 return 1;
3888 case UPB_TYPE_MESSAGE:
3889 return sizeof(void*);
3890 case UPB_TYPE_BYTES:
3891 case UPB_TYPE_STRING:
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08003892 return sizeof(upb_strview);
Paul Yang9bda1f12018-09-22 18:57:43 -07003893 }
3894 UPB_UNREACHABLE();
3895}
3896
3897static uint8_t upb_msg_fieldsize(const upb_msglayout_field *field) {
3898 if (field->label == UPB_LABEL_REPEATED) {
3899 return sizeof(void*);
3900 } else {
3901 return upb_msgval_sizeof(upb_desctype_to_fieldtype[field->descriptortype]);
3902 }
3903}
3904
3905/* TODO(haberman): this is broken right now because upb_msgval can contain
3906 * a char* / size_t pair, which is too big for a upb_value. To fix this
3907 * we'll probably need to dynamically allocate a upb_msgval and store a
3908 * pointer to that in the tables for extensions/maps. */
3909static upb_value upb_toval(upb_msgval val) {
3910 upb_value ret;
3911 UPB_UNUSED(val);
3912 memset(&ret, 0, sizeof(upb_value)); /* XXX */
3913 return ret;
3914}
3915
3916static upb_msgval upb_msgval_fromval(upb_value val) {
3917 upb_msgval ret;
3918 UPB_UNUSED(val);
3919 memset(&ret, 0, sizeof(upb_msgval)); /* XXX */
3920 return ret;
3921}
3922
3923static upb_ctype_t upb_fieldtotabtype(upb_fieldtype_t type) {
3924 switch (type) {
3925 case UPB_TYPE_FLOAT: return UPB_CTYPE_FLOAT;
3926 case UPB_TYPE_DOUBLE: return UPB_CTYPE_DOUBLE;
3927 case UPB_TYPE_BOOL: return UPB_CTYPE_BOOL;
3928 case UPB_TYPE_BYTES:
3929 case UPB_TYPE_MESSAGE:
3930 case UPB_TYPE_STRING: return UPB_CTYPE_CONSTPTR;
3931 case UPB_TYPE_ENUM:
3932 case UPB_TYPE_INT32: return UPB_CTYPE_INT32;
3933 case UPB_TYPE_UINT32: return UPB_CTYPE_UINT32;
3934 case UPB_TYPE_INT64: return UPB_CTYPE_INT64;
3935 case UPB_TYPE_UINT64: return UPB_CTYPE_UINT64;
3936 default: UPB_ASSERT(false); return 0;
3937 }
3938}
3939
3940
3941/** upb_msg *******************************************************************/
3942
3943/* If we always read/write as a consistent type to each address, this shouldn't
3944 * violate aliasing.
3945 */
3946#define DEREF(msg, ofs, type) *PTR_AT(msg, ofs, type)
3947
3948/* Internal members of a upb_msg. We can change this without breaking binary
3949 * compatibility. We put these before the user's data. The user's upb_msg*
3950 * points after the upb_msg_internal. */
3951
3952/* Used when a message is not extendable. */
3953typedef struct {
3954 /* TODO(haberman): use pointer tagging so we we are slim when known unknown
3955 * fields are not present. */
3956 upb_arena *arena;
3957 char *unknown;
3958 size_t unknown_len;
3959 size_t unknown_size;
3960} upb_msg_internal;
3961
3962/* Used when a message is extendable. */
3963typedef struct {
3964 upb_inttable *extdict;
3965 upb_msg_internal base;
3966} upb_msg_internal_withext;
3967
3968static int upb_msg_internalsize(const upb_msglayout *l) {
3969 return sizeof(upb_msg_internal) - l->extendable * sizeof(void *);
3970}
3971
3972static upb_msg_internal *upb_msg_getinternal(upb_msg *msg) {
3973 return VOIDPTR_AT(msg, -sizeof(upb_msg_internal));
3974}
3975
3976static const upb_msg_internal *upb_msg_getinternal_const(const upb_msg *msg) {
3977 return VOIDPTR_AT(msg, -sizeof(upb_msg_internal));
3978}
3979
3980static upb_msg_internal_withext *upb_msg_getinternalwithext(
3981 upb_msg *msg, const upb_msglayout *l) {
3982 UPB_ASSERT(l->extendable);
3983 return VOIDPTR_AT(msg, -sizeof(upb_msg_internal_withext));
3984}
3985
3986void upb_msg_addunknown(upb_msg *msg, const char *data, size_t len) {
3987 upb_msg_internal* in = upb_msg_getinternal(msg);
3988 if (len > in->unknown_size - in->unknown_len) {
3989 upb_alloc *alloc = upb_arena_alloc(in->arena);
3990 size_t need = in->unknown_size + len;
3991 size_t newsize = UPB_MAX(in->unknown_size * 2, need);
3992 in->unknown = upb_realloc(alloc, in->unknown, in->unknown_size, newsize);
3993 in->unknown_size = newsize;
3994 }
3995 memcpy(in->unknown + in->unknown_len, data, len);
3996 in->unknown_len += len;
3997}
3998
3999const char *upb_msg_getunknown(const upb_msg *msg, size_t *len) {
4000 const upb_msg_internal* in = upb_msg_getinternal_const(msg);
4001 *len = in->unknown_len;
4002 return in->unknown;
4003}
4004
4005static const upb_msglayout_field *upb_msg_checkfield(int field_index,
4006 const upb_msglayout *l) {
4007 UPB_ASSERT(field_index >= 0 && field_index < l->field_count);
4008 return &l->fields[field_index];
4009}
4010
4011static bool upb_msg_inoneof(const upb_msglayout_field *field) {
4012 return field->presence < 0;
4013}
4014
4015static uint32_t *upb_msg_oneofcase(const upb_msg *msg, int field_index,
4016 const upb_msglayout *l) {
4017 const upb_msglayout_field *field = upb_msg_checkfield(field_index, l);
4018 UPB_ASSERT(upb_msg_inoneof(field));
4019 return PTR_AT(msg, ~field->presence, uint32_t);
4020}
4021
4022static size_t upb_msg_sizeof(const upb_msglayout *l) {
4023 return l->size + upb_msg_internalsize(l);
4024}
4025
4026upb_msg *upb_msg_new(const upb_msglayout *l, upb_arena *a) {
4027 upb_alloc *alloc = upb_arena_alloc(a);
4028 void *mem = upb_malloc(alloc, upb_msg_sizeof(l));
4029 upb_msg_internal *in;
4030 upb_msg *msg;
4031
4032 if (!mem) {
4033 return NULL;
4034 }
4035
4036 msg = VOIDPTR_AT(mem, upb_msg_internalsize(l));
4037
4038 /* Initialize normal members. */
4039 memset(msg, 0, l->size);
4040
4041 /* Initialize internal members. */
4042 in = upb_msg_getinternal(msg);
4043 in->arena = a;
4044 in->unknown = NULL;
4045 in->unknown_len = 0;
4046 in->unknown_size = 0;
4047
4048 if (l->extendable) {
4049 upb_msg_getinternalwithext(msg, l)->extdict = NULL;
4050 }
4051
4052 return msg;
4053}
4054
4055upb_arena *upb_msg_arena(const upb_msg *msg) {
4056 return upb_msg_getinternal_const(msg)->arena;
4057}
4058
4059bool upb_msg_has(const upb_msg *msg,
4060 int field_index,
4061 const upb_msglayout *l) {
4062 const upb_msglayout_field *field = upb_msg_checkfield(field_index, l);
4063
4064 UPB_ASSERT(field->presence);
4065
4066 if (upb_msg_inoneof(field)) {
4067 /* Oneofs are set when the oneof number is set to this field. */
4068 return *upb_msg_oneofcase(msg, field_index, l) == field->number;
4069 } else {
4070 /* Other fields are set when their hasbit is set. */
4071 uint32_t hasbit = field->presence;
4072 return DEREF(msg, hasbit / 8, char) | (1 << (hasbit % 8));
4073 }
4074}
4075
4076upb_msgval upb_msg_get(const upb_msg *msg, int field_index,
4077 const upb_msglayout *l) {
4078 const upb_msglayout_field *field = upb_msg_checkfield(field_index, l);
4079 int size = upb_msg_fieldsize(field);
4080 return upb_msgval_read(msg, field->offset, size);
4081}
4082
4083void upb_msg_set(upb_msg *msg, int field_index, upb_msgval val,
4084 const upb_msglayout *l) {
4085 const upb_msglayout_field *field = upb_msg_checkfield(field_index, l);
4086 int size = upb_msg_fieldsize(field);
4087 upb_msgval_write(msg, field->offset, val, size);
4088}
4089
4090
4091/** upb_array *****************************************************************/
4092
4093#define DEREF_ARR(arr, i, type) ((type*)arr->data)[i]
4094
4095upb_array *upb_array_new(upb_fieldtype_t type, upb_arena *a) {
4096 upb_alloc *alloc = upb_arena_alloc(a);
4097 upb_array *ret = upb_malloc(alloc, sizeof(upb_array));
4098
4099 if (!ret) {
4100 return NULL;
4101 }
4102
4103 ret->type = type;
4104 ret->data = NULL;
4105 ret->len = 0;
4106 ret->size = 0;
4107 ret->element_size = upb_msgval_sizeof(type);
4108 ret->arena = a;
4109
4110 return ret;
4111}
4112
4113size_t upb_array_size(const upb_array *arr) {
4114 return arr->len;
4115}
4116
4117upb_fieldtype_t upb_array_type(const upb_array *arr) {
4118 return arr->type;
4119}
4120
4121upb_msgval upb_array_get(const upb_array *arr, size_t i) {
4122 UPB_ASSERT(i < arr->len);
4123 return upb_msgval_read(arr->data, i * arr->element_size, arr->element_size);
4124}
4125
4126bool upb_array_set(upb_array *arr, size_t i, upb_msgval val) {
4127 UPB_ASSERT(i <= arr->len);
4128
4129 if (i == arr->len) {
4130 /* Extending the array. */
4131
4132 if (i == arr->size) {
4133 /* Need to reallocate. */
4134 size_t new_size = UPB_MAX(arr->size * 2, 8);
4135 size_t new_bytes = new_size * arr->element_size;
4136 size_t old_bytes = arr->size * arr->element_size;
4137 upb_alloc *alloc = upb_arena_alloc(arr->arena);
4138 upb_msgval *new_data =
4139 upb_realloc(alloc, arr->data, old_bytes, new_bytes);
4140
4141 if (!new_data) {
4142 return false;
4143 }
4144
4145 arr->data = new_data;
4146 arr->size = new_size;
4147 }
4148
4149 arr->len = i + 1;
4150 }
4151
4152 upb_msgval_write(arr->data, i * arr->element_size, val, arr->element_size);
4153 return true;
4154}
4155
4156
4157/** upb_map *******************************************************************/
4158
4159struct upb_map {
4160 upb_fieldtype_t key_type;
4161 upb_fieldtype_t val_type;
4162 /* We may want to optimize this to use inttable where possible, for greater
4163 * efficiency and lower memory footprint. */
4164 upb_strtable strtab;
4165 upb_arena *arena;
4166};
4167
4168static void upb_map_tokey(upb_fieldtype_t type, upb_msgval *key,
4169 const char **out_key, size_t *out_len) {
4170 switch (type) {
4171 case UPB_TYPE_STRING:
4172 /* Point to string data of the input key. */
4173 *out_key = key->str.data;
4174 *out_len = key->str.size;
4175 return;
4176 case UPB_TYPE_BOOL:
4177 case UPB_TYPE_INT32:
4178 case UPB_TYPE_UINT32:
4179 case UPB_TYPE_INT64:
4180 case UPB_TYPE_UINT64:
4181 /* Point to the key itself. XXX: big-endian. */
4182 *out_key = (const char*)key;
4183 *out_len = upb_msgval_sizeof(type);
4184 return;
4185 case UPB_TYPE_BYTES:
4186 case UPB_TYPE_DOUBLE:
4187 case UPB_TYPE_ENUM:
4188 case UPB_TYPE_FLOAT:
4189 case UPB_TYPE_MESSAGE:
4190 break; /* Cannot be a map key. */
4191 }
4192 UPB_UNREACHABLE();
4193}
4194
4195static upb_msgval upb_map_fromkey(upb_fieldtype_t type, const char *key,
4196 size_t len) {
4197 switch (type) {
4198 case UPB_TYPE_STRING:
4199 return upb_msgval_makestr(key, len);
4200 case UPB_TYPE_BOOL:
4201 case UPB_TYPE_INT32:
4202 case UPB_TYPE_UINT32:
4203 case UPB_TYPE_INT64:
4204 case UPB_TYPE_UINT64:
4205 return upb_msgval_read(key, 0, upb_msgval_sizeof(type));
4206 case UPB_TYPE_BYTES:
4207 case UPB_TYPE_DOUBLE:
4208 case UPB_TYPE_ENUM:
4209 case UPB_TYPE_FLOAT:
4210 case UPB_TYPE_MESSAGE:
4211 break; /* Cannot be a map key. */
4212 }
4213 UPB_UNREACHABLE();
4214}
4215
4216upb_map *upb_map_new(upb_fieldtype_t ktype, upb_fieldtype_t vtype,
4217 upb_arena *a) {
4218 upb_ctype_t vtabtype = upb_fieldtotabtype(vtype);
4219 upb_alloc *alloc = upb_arena_alloc(a);
4220 upb_map *map = upb_malloc(alloc, sizeof(upb_map));
4221
4222 if (!map) {
4223 return NULL;
4224 }
4225
4226 UPB_ASSERT(upb_fieldtype_mapkeyok(ktype));
4227 map->key_type = ktype;
4228 map->val_type = vtype;
4229 map->arena = a;
4230
4231 if (!upb_strtable_init2(&map->strtab, vtabtype, alloc)) {
4232 return NULL;
4233 }
4234
4235 return map;
4236}
4237
4238size_t upb_map_size(const upb_map *map) {
4239 return upb_strtable_count(&map->strtab);
4240}
4241
4242upb_fieldtype_t upb_map_keytype(const upb_map *map) {
4243 return map->key_type;
4244}
4245
4246upb_fieldtype_t upb_map_valuetype(const upb_map *map) {
4247 return map->val_type;
4248}
4249
4250bool upb_map_get(const upb_map *map, upb_msgval key, upb_msgval *val) {
4251 upb_value tabval;
4252 const char *key_str;
4253 size_t key_len;
4254 bool ret;
4255
4256 upb_map_tokey(map->key_type, &key, &key_str, &key_len);
4257 ret = upb_strtable_lookup2(&map->strtab, key_str, key_len, &tabval);
4258 if (ret) {
4259 memcpy(val, &tabval, sizeof(tabval));
4260 }
4261
4262 return ret;
4263}
4264
4265bool upb_map_set(upb_map *map, upb_msgval key, upb_msgval val,
4266 upb_msgval *removed) {
4267 const char *key_str;
4268 size_t key_len;
4269 upb_value tabval = upb_toval(val);
4270 upb_value removedtabval;
4271 upb_alloc *a = upb_arena_alloc(map->arena);
4272
4273 upb_map_tokey(map->key_type, &key, &key_str, &key_len);
4274
4275 /* TODO(haberman): add overwrite operation to minimize number of lookups. */
4276 if (upb_strtable_lookup2(&map->strtab, key_str, key_len, NULL)) {
4277 upb_strtable_remove3(&map->strtab, key_str, key_len, &removedtabval, a);
4278 memcpy(&removed, &removedtabval, sizeof(removed));
4279 }
4280
4281 return upb_strtable_insert3(&map->strtab, key_str, key_len, tabval, a);
4282}
4283
4284bool upb_map_del(upb_map *map, upb_msgval key) {
4285 const char *key_str;
4286 size_t key_len;
4287 upb_alloc *a = upb_arena_alloc(map->arena);
4288
4289 upb_map_tokey(map->key_type, &key, &key_str, &key_len);
4290 return upb_strtable_remove3(&map->strtab, key_str, key_len, NULL, a);
4291}
4292
4293
4294/** upb_mapiter ***************************************************************/
4295
4296struct upb_mapiter {
4297 upb_strtable_iter iter;
4298 upb_fieldtype_t key_type;
4299};
4300
4301size_t upb_mapiter_sizeof() {
4302 return sizeof(upb_mapiter);
4303}
4304
4305void upb_mapiter_begin(upb_mapiter *i, const upb_map *map) {
4306 upb_strtable_begin(&i->iter, &map->strtab);
4307 i->key_type = map->key_type;
4308}
4309
4310upb_mapiter *upb_mapiter_new(const upb_map *t, upb_alloc *a) {
4311 upb_mapiter *ret = upb_malloc(a, upb_mapiter_sizeof());
4312
4313 if (!ret) {
4314 return NULL;
4315 }
4316
4317 upb_mapiter_begin(ret, t);
4318 return ret;
4319}
4320
4321void upb_mapiter_free(upb_mapiter *i, upb_alloc *a) {
4322 upb_free(a, i);
4323}
4324
4325void upb_mapiter_next(upb_mapiter *i) {
4326 upb_strtable_next(&i->iter);
4327}
4328
4329bool upb_mapiter_done(const upb_mapiter *i) {
4330 return upb_strtable_done(&i->iter);
4331}
4332
4333upb_msgval upb_mapiter_key(const upb_mapiter *i) {
4334 return upb_map_fromkey(i->key_type, upb_strtable_iter_key(&i->iter),
4335 upb_strtable_iter_keylength(&i->iter));
4336}
4337
4338upb_msgval upb_mapiter_value(const upb_mapiter *i) {
4339 return upb_msgval_fromval(upb_strtable_iter_value(&i->iter));
4340}
4341
4342void upb_mapiter_setdone(upb_mapiter *i) {
4343 upb_strtable_iter_setdone(&i->iter);
4344}
4345
4346bool upb_mapiter_isequal(const upb_mapiter *i1, const upb_mapiter *i2) {
4347 return upb_strtable_iter_isequal(&i1->iter, &i2->iter);
4348}
4349
4350
4351static bool is_power_of_two(size_t val) {
4352 return (val & (val - 1)) == 0;
4353}
4354
4355/* Align up to the given power of 2. */
4356static size_t align_up(size_t val, size_t align) {
4357 UPB_ASSERT(is_power_of_two(align));
4358 return (val + align - 1) & ~(align - 1);
4359}
4360
4361static size_t div_round_up(size_t n, size_t d) {
4362 return (n + d - 1) / d;
4363}
4364
4365static size_t upb_msgval_sizeof2(upb_fieldtype_t type) {
4366 switch (type) {
4367 case UPB_TYPE_DOUBLE:
4368 case UPB_TYPE_INT64:
4369 case UPB_TYPE_UINT64:
4370 return 8;
4371 case UPB_TYPE_ENUM:
4372 case UPB_TYPE_INT32:
4373 case UPB_TYPE_UINT32:
4374 case UPB_TYPE_FLOAT:
4375 return 4;
4376 case UPB_TYPE_BOOL:
4377 return 1;
4378 case UPB_TYPE_MESSAGE:
4379 return sizeof(void*);
4380 case UPB_TYPE_BYTES:
4381 case UPB_TYPE_STRING:
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08004382 return sizeof(upb_strview);
Paul Yang9bda1f12018-09-22 18:57:43 -07004383 }
4384 UPB_UNREACHABLE();
4385}
4386
4387static uint8_t upb_msg_fielddefsize(const upb_fielddef *f) {
4388 if (upb_fielddef_isseq(f)) {
4389 return sizeof(void*);
4390 } else {
4391 return upb_msgval_sizeof2(upb_fielddef_type(f));
4392 }
4393}
4394
4395
4396/** upb_msglayout *************************************************************/
4397
4398static void upb_msglayout_free(upb_msglayout *l) {
4399 upb_gfree(l);
4400}
4401
4402static size_t upb_msglayout_place(upb_msglayout *l, size_t size) {
4403 size_t ret;
4404
4405 l->size = align_up(l->size, size);
4406 ret = l->size;
4407 l->size += size;
4408 return ret;
4409}
4410
4411static bool upb_msglayout_init(const upb_msgdef *m,
4412 upb_msglayout *l,
4413 upb_msgfactory *factory) {
4414 upb_msg_field_iter it;
4415 upb_msg_oneof_iter oit;
4416 size_t hasbit;
4417 size_t submsg_count = 0;
4418 const upb_msglayout **submsgs;
4419 upb_msglayout_field *fields;
4420
4421 for (upb_msg_field_begin(&it, m);
4422 !upb_msg_field_done(&it);
4423 upb_msg_field_next(&it)) {
4424 const upb_fielddef* f = upb_msg_iter_field(&it);
4425 if (upb_fielddef_issubmsg(f)) {
4426 submsg_count++;
4427 }
4428 }
4429
4430 memset(l, 0, sizeof(*l));
4431
4432 fields = upb_gmalloc(upb_msgdef_numfields(m) * sizeof(*fields));
4433 submsgs = upb_gmalloc(submsg_count * sizeof(*submsgs));
4434
4435 if ((!fields && upb_msgdef_numfields(m)) ||
4436 (!submsgs && submsg_count)) {
4437 /* OOM. */
4438 upb_gfree(fields);
4439 upb_gfree(submsgs);
4440 return false;
4441 }
4442
4443 l->field_count = upb_msgdef_numfields(m);
4444 l->fields = fields;
4445 l->submsgs = submsgs;
4446
4447 /* Allocate data offsets in three stages:
4448 *
4449 * 1. hasbits.
4450 * 2. regular fields.
4451 * 3. oneof fields.
4452 *
4453 * OPT: There is a lot of room for optimization here to minimize the size.
4454 */
4455
4456 /* Allocate hasbits and set basic field attributes. */
4457 submsg_count = 0;
4458 for (upb_msg_field_begin(&it, m), hasbit = 0;
4459 !upb_msg_field_done(&it);
4460 upb_msg_field_next(&it)) {
4461 const upb_fielddef* f = upb_msg_iter_field(&it);
4462 upb_msglayout_field *field = &fields[upb_fielddef_index(f)];
4463
4464 field->number = upb_fielddef_number(f);
4465 field->descriptortype = upb_fielddef_descriptortype(f);
4466 field->label = upb_fielddef_label(f);
4467
4468 if (upb_fielddef_issubmsg(f)) {
4469 const upb_msglayout *sub_layout =
4470 upb_msgfactory_getlayout(factory, upb_fielddef_msgsubdef(f));
4471 field->submsg_index = submsg_count++;
4472 submsgs[field->submsg_index] = sub_layout;
4473 }
4474
4475 if (upb_fielddef_haspresence(f) && !upb_fielddef_containingoneof(f)) {
4476 field->presence = (hasbit++);
4477 } else {
4478 field->presence = 0;
4479 }
4480 }
4481
4482 /* Account for space used by hasbits. */
4483 l->size = div_round_up(hasbit, 8);
4484
4485 /* Allocate non-oneof fields. */
4486 for (upb_msg_field_begin(&it, m); !upb_msg_field_done(&it);
4487 upb_msg_field_next(&it)) {
4488 const upb_fielddef* f = upb_msg_iter_field(&it);
4489 size_t field_size = upb_msg_fielddefsize(f);
4490 size_t index = upb_fielddef_index(f);
4491
4492 if (upb_fielddef_containingoneof(f)) {
4493 /* Oneofs are handled separately below. */
4494 continue;
4495 }
4496
4497 fields[index].offset = upb_msglayout_place(l, field_size);
4498 }
4499
4500 /* Allocate oneof fields. Each oneof field consists of a uint32 for the case
4501 * and space for the actual data. */
4502 for (upb_msg_oneof_begin(&oit, m); !upb_msg_oneof_done(&oit);
4503 upb_msg_oneof_next(&oit)) {
4504 const upb_oneofdef* o = upb_msg_iter_oneof(&oit);
4505 upb_oneof_iter fit;
4506
4507 size_t case_size = sizeof(uint32_t); /* Could potentially optimize this. */
4508 size_t field_size = 0;
4509 uint32_t case_offset;
4510 uint32_t data_offset;
4511
4512 /* Calculate field size: the max of all field sizes. */
4513 for (upb_oneof_begin(&fit, o);
4514 !upb_oneof_done(&fit);
4515 upb_oneof_next(&fit)) {
4516 const upb_fielddef* f = upb_oneof_iter_field(&fit);
4517 field_size = UPB_MAX(field_size, upb_msg_fielddefsize(f));
4518 }
4519
4520 /* Align and allocate case offset. */
4521 case_offset = upb_msglayout_place(l, case_size);
4522 data_offset = upb_msglayout_place(l, field_size);
4523
4524 for (upb_oneof_begin(&fit, o);
4525 !upb_oneof_done(&fit);
4526 upb_oneof_next(&fit)) {
4527 const upb_fielddef* f = upb_oneof_iter_field(&fit);
4528 fields[upb_fielddef_index(f)].offset = data_offset;
4529 fields[upb_fielddef_index(f)].presence = ~case_offset;
4530 }
4531 }
4532
4533 /* Size of the entire structure should be a multiple of its greatest
4534 * alignment. TODO: track overall alignment for real? */
4535 l->size = align_up(l->size, 8);
4536
4537 return true;
4538}
4539
4540
4541/** upb_msgfactory ************************************************************/
4542
4543struct upb_msgfactory {
4544 const upb_symtab *symtab; /* We own a ref. */
4545 upb_inttable layouts;
Paul Yang9bda1f12018-09-22 18:57:43 -07004546};
4547
4548upb_msgfactory *upb_msgfactory_new(const upb_symtab *symtab) {
4549 upb_msgfactory *ret = upb_gmalloc(sizeof(*ret));
4550
4551 ret->symtab = symtab;
4552 upb_inttable_init(&ret->layouts, UPB_CTYPE_PTR);
Paul Yang9bda1f12018-09-22 18:57:43 -07004553
4554 return ret;
4555}
4556
4557void upb_msgfactory_free(upb_msgfactory *f) {
4558 upb_inttable_iter i;
4559 upb_inttable_begin(&i, &f->layouts);
4560 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
4561 upb_msglayout *l = upb_value_getptr(upb_inttable_iter_value(&i));
4562 upb_msglayout_free(l);
4563 }
4564
Paul Yang9bda1f12018-09-22 18:57:43 -07004565 upb_inttable_uninit(&f->layouts);
Paul Yang9bda1f12018-09-22 18:57:43 -07004566 upb_gfree(f);
4567}
4568
4569const upb_symtab *upb_msgfactory_symtab(const upb_msgfactory *f) {
4570 return f->symtab;
4571}
4572
4573const upb_msglayout *upb_msgfactory_getlayout(upb_msgfactory *f,
4574 const upb_msgdef *m) {
4575 upb_value v;
4576 UPB_ASSERT(upb_symtab_lookupmsg(f->symtab, upb_msgdef_fullname(m)) == m);
4577 UPB_ASSERT(!upb_msgdef_mapentry(m));
4578
4579 if (upb_inttable_lookupptr(&f->layouts, m, &v)) {
4580 UPB_ASSERT(upb_value_getptr(v));
4581 return upb_value_getptr(v);
4582 } else {
4583 /* In case of circular dependency, layout has to be inserted first. */
4584 upb_msglayout *l = upb_gmalloc(sizeof(*l));
4585 upb_msgfactory *mutable_f = (void*)f;
4586 upb_inttable_insertptr(&mutable_f->layouts, m, upb_value_ptr(l));
4587 UPB_ASSERT(l);
4588 if (!upb_msglayout_init(m, l, f)) {
4589 upb_msglayout_free(l);
4590 }
4591 return l;
4592 }
4593}
Paul Yangc4f2a922019-01-17 10:18:43 -08004594
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08004595#ifndef UINTPTR_MAX
4596#error must include stdint.h first
4597#endif
4598
Paul Yangc4f2a922019-01-17 10:18:43 -08004599#if UINTPTR_MAX == 0xffffffff
4600#define UPB_SIZE(size32, size64) size32
4601#else
4602#define UPB_SIZE(size32, size64) size64
4603#endif
4604
4605#define UPB_FIELD_AT(msg, fieldtype, offset) \
4606 *(fieldtype*)((const char*)(msg) + offset)
4607
4608#define UPB_READ_ONEOF(msg, fieldtype, offset, case_offset, case_val, default) \
4609 UPB_FIELD_AT(msg, int, case_offset) == case_val \
4610 ? UPB_FIELD_AT(msg, fieldtype, offset) \
4611 : default
4612
4613#define UPB_WRITE_ONEOF(msg, fieldtype, offset, value, case_offset, case_val) \
4614 UPB_FIELD_AT(msg, int, case_offset) = case_val; \
4615 UPB_FIELD_AT(msg, fieldtype, offset) = value;
4616
4617#undef UPB_SIZE
4618#undef UPB_FIELD_AT
4619#undef UPB_READ_ONEOF
4620#undef UPB_WRITE_ONEOF
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004621
4622
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08004623bool upb_bufsrc_putbuf(const char *buf, size_t len, upb_bytessink sink) {
Paul Yang5a3405c2017-02-06 12:40:51 -08004624 void *subc;
Paul Yange0e54662016-09-15 11:09:01 -07004625 bool ret;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08004626 upb_bufhandle handle = UPB_BUFHANDLE_INIT;
4627 handle.buf = buf;
Paul Yang5a3405c2017-02-06 12:40:51 -08004628 ret = upb_bytessink_start(sink, len, &subc);
4629 if (ret && len != 0) {
4630 ret = (upb_bytessink_putbuf(sink, subc, buf, len, &handle) >= len);
Paul Yange0e54662016-09-15 11:09:01 -07004631 }
Paul Yang5a3405c2017-02-06 12:40:51 -08004632 if (ret) {
4633 ret = upb_bytessink_end(sink);
Paul Yange0e54662016-09-15 11:09:01 -07004634 }
Paul Yange0e54662016-09-15 11:09:01 -07004635 return ret;
4636}
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004637/*
4638** upb_table Implementation
4639**
4640** Implementation is heavily inspired by Lua's ltable.c.
4641*/
4642
4643
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004644#include <string.h>
4645
4646#define UPB_MAXARRSIZE 16 /* 64k. */
4647
4648/* From Chromium. */
4649#define ARRAY_SIZE(x) \
4650 ((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x])))))
4651
Paul Yange0e54662016-09-15 11:09:01 -07004652static void upb_check_alloc(upb_table *t, upb_alloc *a) {
4653 UPB_UNUSED(t);
4654 UPB_UNUSED(a);
4655 UPB_ASSERT_DEBUGVAR(t->alloc == a);
4656}
4657
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004658static const double MAX_LOAD = 0.85;
4659
4660/* The minimum utilization of the array part of a mixed hash/array table. This
4661 * is a speed/memory-usage tradeoff (though it's not straightforward because of
4662 * cache effects). The lower this is, the more memory we'll use. */
4663static const double MIN_DENSITY = 0.1;
4664
4665bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; }
4666
4667int log2ceil(uint64_t v) {
4668 int ret = 0;
4669 bool pow2 = is_pow2(v);
4670 while (v >>= 1) ret++;
4671 ret = pow2 ? ret : ret + 1; /* Ceiling. */
4672 return UPB_MIN(UPB_MAXARRSIZE, ret);
4673}
4674
Paul Yange0e54662016-09-15 11:09:01 -07004675char *upb_strdup(const char *s, upb_alloc *a) {
4676 return upb_strdup2(s, strlen(s), a);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004677}
4678
Paul Yange0e54662016-09-15 11:09:01 -07004679char *upb_strdup2(const char *s, size_t len, upb_alloc *a) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004680 size_t n;
4681 char *p;
4682
4683 /* Prevent overflow errors. */
4684 if (len == SIZE_MAX) return NULL;
4685 /* Always null-terminate, even if binary data; but don't rely on the input to
4686 * have a null-terminating byte since it may be a raw binary buffer. */
4687 n = len + 1;
Paul Yange0e54662016-09-15 11:09:01 -07004688 p = upb_malloc(a, n);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004689 if (p) {
4690 memcpy(p, s, len);
4691 p[len] = 0;
4692 }
4693 return p;
4694}
4695
4696/* A type to represent the lookup key of either a strtable or an inttable. */
4697typedef union {
4698 uintptr_t num;
4699 struct {
4700 const char *str;
4701 size_t len;
4702 } str;
4703} lookupkey_t;
4704
4705static lookupkey_t strkey2(const char *str, size_t len) {
4706 lookupkey_t k;
4707 k.str.str = str;
4708 k.str.len = len;
4709 return k;
4710}
4711
4712static lookupkey_t intkey(uintptr_t key) {
4713 lookupkey_t k;
4714 k.num = key;
4715 return k;
4716}
4717
4718typedef uint32_t hashfunc_t(upb_tabkey key);
4719typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2);
4720
4721/* Base table (shared code) ***************************************************/
4722
4723/* For when we need to cast away const. */
4724static upb_tabent *mutable_entries(upb_table *t) {
4725 return (upb_tabent*)t->entries;
4726}
4727
4728static bool isfull(upb_table *t) {
Paul Yange0e54662016-09-15 11:09:01 -07004729 if (upb_table_size(t) == 0) {
4730 return true;
4731 } else {
4732 return ((double)(t->count + 1) / upb_table_size(t)) > MAX_LOAD;
4733 }
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004734}
4735
Paul Yange0e54662016-09-15 11:09:01 -07004736static bool init(upb_table *t, upb_ctype_t ctype, uint8_t size_lg2,
4737 upb_alloc *a) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004738 size_t bytes;
4739
4740 t->count = 0;
4741 t->ctype = ctype;
4742 t->size_lg2 = size_lg2;
4743 t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
Paul Yange0e54662016-09-15 11:09:01 -07004744#ifndef NDEBUG
4745 t->alloc = a;
4746#endif
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004747 bytes = upb_table_size(t) * sizeof(upb_tabent);
4748 if (bytes > 0) {
Paul Yange0e54662016-09-15 11:09:01 -07004749 t->entries = upb_malloc(a, bytes);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004750 if (!t->entries) return false;
4751 memset(mutable_entries(t), 0, bytes);
4752 } else {
4753 t->entries = NULL;
4754 }
4755 return true;
4756}
4757
Paul Yange0e54662016-09-15 11:09:01 -07004758static void uninit(upb_table *t, upb_alloc *a) {
4759 upb_check_alloc(t, a);
4760 upb_free(a, mutable_entries(t));
4761}
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004762
4763static upb_tabent *emptyent(upb_table *t) {
4764 upb_tabent *e = mutable_entries(t) + upb_table_size(t);
Paul Yange0e54662016-09-15 11:09:01 -07004765 while (1) { if (upb_tabent_isempty(--e)) return e; UPB_ASSERT(e > t->entries); }
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004766}
4767
4768static upb_tabent *getentry_mutable(upb_table *t, uint32_t hash) {
4769 return (upb_tabent*)upb_getentry(t, hash);
4770}
4771
4772static const upb_tabent *findentry(const upb_table *t, lookupkey_t key,
4773 uint32_t hash, eqlfunc_t *eql) {
4774 const upb_tabent *e;
4775
4776 if (t->size_lg2 == 0) return NULL;
4777 e = upb_getentry(t, hash);
4778 if (upb_tabent_isempty(e)) return NULL;
4779 while (1) {
4780 if (eql(e->key, key)) return e;
4781 if ((e = e->next) == NULL) return NULL;
4782 }
4783}
4784
4785static upb_tabent *findentry_mutable(upb_table *t, lookupkey_t key,
4786 uint32_t hash, eqlfunc_t *eql) {
4787 return (upb_tabent*)findentry(t, key, hash, eql);
4788}
4789
4790static bool lookup(const upb_table *t, lookupkey_t key, upb_value *v,
4791 uint32_t hash, eqlfunc_t *eql) {
4792 const upb_tabent *e = findentry(t, key, hash, eql);
4793 if (e) {
4794 if (v) {
4795 _upb_value_setval(v, e->val.val, t->ctype);
4796 }
4797 return true;
4798 } else {
4799 return false;
4800 }
4801}
4802
4803/* The given key must not already exist in the table. */
4804static void insert(upb_table *t, lookupkey_t key, upb_tabkey tabkey,
4805 upb_value val, uint32_t hash,
4806 hashfunc_t *hashfunc, eqlfunc_t *eql) {
4807 upb_tabent *mainpos_e;
4808 upb_tabent *our_e;
4809
Paul Yange0e54662016-09-15 11:09:01 -07004810 UPB_ASSERT(findentry(t, key, hash, eql) == NULL);
4811 UPB_ASSERT_DEBUGVAR(val.ctype == t->ctype);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004812
4813 t->count++;
4814 mainpos_e = getentry_mutable(t, hash);
4815 our_e = mainpos_e;
4816
4817 if (upb_tabent_isempty(mainpos_e)) {
4818 /* Our main position is empty; use it. */
4819 our_e->next = NULL;
4820 } else {
4821 /* Collision. */
4822 upb_tabent *new_e = emptyent(t);
4823 /* Head of collider's chain. */
4824 upb_tabent *chain = getentry_mutable(t, hashfunc(mainpos_e->key));
4825 if (chain == mainpos_e) {
4826 /* Existing ent is in its main posisiton (it has the same hash as us, and
4827 * is the head of our chain). Insert to new ent and append to this chain. */
4828 new_e->next = mainpos_e->next;
4829 mainpos_e->next = new_e;
4830 our_e = new_e;
4831 } else {
4832 /* Existing ent is not in its main position (it is a node in some other
4833 * chain). This implies that no existing ent in the table has our hash.
4834 * Evict it (updating its chain) and use its ent for head of our chain. */
4835 *new_e = *mainpos_e; /* copies next. */
4836 while (chain->next != mainpos_e) {
4837 chain = (upb_tabent*)chain->next;
Paul Yange0e54662016-09-15 11:09:01 -07004838 UPB_ASSERT(chain);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004839 }
4840 chain->next = new_e;
4841 our_e = mainpos_e;
4842 our_e->next = NULL;
4843 }
4844 }
4845 our_e->key = tabkey;
4846 our_e->val.val = val.val;
Paul Yange0e54662016-09-15 11:09:01 -07004847 UPB_ASSERT(findentry(t, key, hash, eql) == our_e);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004848}
4849
4850static bool rm(upb_table *t, lookupkey_t key, upb_value *val,
4851 upb_tabkey *removed, uint32_t hash, eqlfunc_t *eql) {
4852 upb_tabent *chain = getentry_mutable(t, hash);
4853 if (upb_tabent_isempty(chain)) return false;
4854 if (eql(chain->key, key)) {
4855 /* Element to remove is at the head of its chain. */
4856 t->count--;
Paul Yange0e54662016-09-15 11:09:01 -07004857 if (val) _upb_value_setval(val, chain->val.val, t->ctype);
4858 if (removed) *removed = chain->key;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004859 if (chain->next) {
4860 upb_tabent *move = (upb_tabent*)chain->next;
4861 *chain = *move;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004862 move->key = 0; /* Make the slot empty. */
4863 } else {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004864 chain->key = 0; /* Make the slot empty. */
4865 }
4866 return true;
4867 } else {
4868 /* Element to remove is either in a non-head position or not in the
4869 * table. */
Paul Yange0e54662016-09-15 11:09:01 -07004870 while (chain->next && !eql(chain->next->key, key)) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004871 chain = (upb_tabent*)chain->next;
Paul Yange0e54662016-09-15 11:09:01 -07004872 }
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004873 if (chain->next) {
4874 /* Found element to remove. */
Paul Yange0e54662016-09-15 11:09:01 -07004875 upb_tabent *rm = (upb_tabent*)chain->next;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004876 t->count--;
Paul Yange0e54662016-09-15 11:09:01 -07004877 if (val) _upb_value_setval(val, chain->next->val.val, t->ctype);
4878 if (removed) *removed = rm->key;
4879 rm->key = 0; /* Make the slot empty. */
4880 chain->next = rm->next;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004881 return true;
4882 } else {
Paul Yange0e54662016-09-15 11:09:01 -07004883 /* Element to remove is not in the table. */
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004884 return false;
4885 }
4886 }
4887}
4888
4889static size_t next(const upb_table *t, size_t i) {
4890 do {
4891 if (++i >= upb_table_size(t))
4892 return SIZE_MAX;
4893 } while(upb_tabent_isempty(&t->entries[i]));
4894
4895 return i;
4896}
4897
4898static size_t begin(const upb_table *t) {
4899 return next(t, -1);
4900}
4901
4902
4903/* upb_strtable ***************************************************************/
4904
4905/* A simple "subclass" of upb_table that only adds a hash function for strings. */
4906
Paul Yange0e54662016-09-15 11:09:01 -07004907static upb_tabkey strcopy(lookupkey_t k2, upb_alloc *a) {
Paul Yang9bda1f12018-09-22 18:57:43 -07004908 uint32_t len = (uint32_t) k2.str.len;
Paul Yange0e54662016-09-15 11:09:01 -07004909 char *str = upb_malloc(a, k2.str.len + sizeof(uint32_t) + 1);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004910 if (str == NULL) return 0;
Paul Yang9bda1f12018-09-22 18:57:43 -07004911 memcpy(str, &len, sizeof(uint32_t));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004912 memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len + 1);
4913 return (uintptr_t)str;
4914}
4915
4916static uint32_t strhash(upb_tabkey key) {
4917 uint32_t len;
4918 char *str = upb_tabstr(key, &len);
4919 return MurmurHash2(str, len, 0);
4920}
4921
4922static bool streql(upb_tabkey k1, lookupkey_t k2) {
4923 uint32_t len;
4924 char *str = upb_tabstr(k1, &len);
4925 return len == k2.str.len && memcmp(str, k2.str.str, len) == 0;
4926}
4927
Paul Yange0e54662016-09-15 11:09:01 -07004928bool upb_strtable_init2(upb_strtable *t, upb_ctype_t ctype, upb_alloc *a) {
4929 return init(&t->t, ctype, 2, a);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004930}
4931
Paul Yange0e54662016-09-15 11:09:01 -07004932void upb_strtable_uninit2(upb_strtable *t, upb_alloc *a) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004933 size_t i;
4934 for (i = 0; i < upb_table_size(&t->t); i++)
Paul Yange0e54662016-09-15 11:09:01 -07004935 upb_free(a, (void*)t->t.entries[i].key);
4936 uninit(&t->t, a);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004937}
4938
Paul Yange0e54662016-09-15 11:09:01 -07004939bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_alloc *a) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004940 upb_strtable new_table;
4941 upb_strtable_iter i;
4942
Paul Yange0e54662016-09-15 11:09:01 -07004943 upb_check_alloc(&t->t, a);
4944
4945 if (!init(&new_table.t, t->t.ctype, size_lg2, a))
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004946 return false;
4947 upb_strtable_begin(&i, t);
4948 for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) {
Paul Yange0e54662016-09-15 11:09:01 -07004949 upb_strtable_insert3(
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004950 &new_table,
4951 upb_strtable_iter_key(&i),
4952 upb_strtable_iter_keylength(&i),
Paul Yange0e54662016-09-15 11:09:01 -07004953 upb_strtable_iter_value(&i),
4954 a);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004955 }
Paul Yange0e54662016-09-15 11:09:01 -07004956 upb_strtable_uninit2(t, a);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004957 *t = new_table;
4958 return true;
4959}
4960
Paul Yange0e54662016-09-15 11:09:01 -07004961bool upb_strtable_insert3(upb_strtable *t, const char *k, size_t len,
4962 upb_value v, upb_alloc *a) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004963 lookupkey_t key;
4964 upb_tabkey tabkey;
4965 uint32_t hash;
4966
Paul Yange0e54662016-09-15 11:09:01 -07004967 upb_check_alloc(&t->t, a);
4968
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004969 if (isfull(&t->t)) {
4970 /* Need to resize. New table of double the size, add old elements to it. */
Paul Yange0e54662016-09-15 11:09:01 -07004971 if (!upb_strtable_resize(t, t->t.size_lg2 + 1, a)) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004972 return false;
4973 }
4974 }
4975
4976 key = strkey2(k, len);
Paul Yange0e54662016-09-15 11:09:01 -07004977 tabkey = strcopy(key, a);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004978 if (tabkey == 0) return false;
4979
4980 hash = MurmurHash2(key.str.str, key.str.len, 0);
4981 insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
4982 return true;
4983}
4984
4985bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
4986 upb_value *v) {
4987 uint32_t hash = MurmurHash2(key, len, 0);
4988 return lookup(&t->t, strkey2(key, len), v, hash, &streql);
4989}
4990
Paul Yange0e54662016-09-15 11:09:01 -07004991bool upb_strtable_remove3(upb_strtable *t, const char *key, size_t len,
4992 upb_value *val, upb_alloc *alloc) {
4993 uint32_t hash = MurmurHash2(key, len, 0);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004994 upb_tabkey tabkey;
4995 if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) {
Paul Yange0e54662016-09-15 11:09:01 -07004996 upb_free(alloc, (void*)tabkey);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07004997 return true;
4998 } else {
4999 return false;
5000 }
5001}
5002
5003/* Iteration */
5004
5005static const upb_tabent *str_tabent(const upb_strtable_iter *i) {
5006 return &i->t->t.entries[i->index];
5007}
5008
5009void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) {
5010 i->t = t;
5011 i->index = begin(&t->t);
5012}
5013
5014void upb_strtable_next(upb_strtable_iter *i) {
5015 i->index = next(&i->t->t, i->index);
5016}
5017
5018bool upb_strtable_done(const upb_strtable_iter *i) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08005019 if (!i->t) return true;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005020 return i->index >= upb_table_size(&i->t->t) ||
5021 upb_tabent_isempty(str_tabent(i));
5022}
5023
Paul Yange0e54662016-09-15 11:09:01 -07005024const char *upb_strtable_iter_key(const upb_strtable_iter *i) {
5025 UPB_ASSERT(!upb_strtable_done(i));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005026 return upb_tabstr(str_tabent(i)->key, NULL);
5027}
5028
Paul Yange0e54662016-09-15 11:09:01 -07005029size_t upb_strtable_iter_keylength(const upb_strtable_iter *i) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005030 uint32_t len;
Paul Yange0e54662016-09-15 11:09:01 -07005031 UPB_ASSERT(!upb_strtable_done(i));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005032 upb_tabstr(str_tabent(i)->key, &len);
5033 return len;
5034}
5035
5036upb_value upb_strtable_iter_value(const upb_strtable_iter *i) {
Paul Yange0e54662016-09-15 11:09:01 -07005037 UPB_ASSERT(!upb_strtable_done(i));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005038 return _upb_value_val(str_tabent(i)->val.val, i->t->t.ctype);
5039}
5040
5041void upb_strtable_iter_setdone(upb_strtable_iter *i) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08005042 i->t = NULL;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005043 i->index = SIZE_MAX;
5044}
5045
5046bool upb_strtable_iter_isequal(const upb_strtable_iter *i1,
5047 const upb_strtable_iter *i2) {
5048 if (upb_strtable_done(i1) && upb_strtable_done(i2))
5049 return true;
5050 return i1->t == i2->t && i1->index == i2->index;
5051}
5052
5053
5054/* upb_inttable ***************************************************************/
5055
5056/* For inttables we use a hybrid structure where small keys are kept in an
5057 * array and large keys are put in the hash table. */
5058
5059static uint32_t inthash(upb_tabkey key) { return upb_inthash(key); }
5060
5061static bool inteql(upb_tabkey k1, lookupkey_t k2) {
5062 return k1 == k2.num;
5063}
5064
5065static upb_tabval *mutable_array(upb_inttable *t) {
5066 return (upb_tabval*)t->array;
5067}
5068
5069static upb_tabval *inttable_val(upb_inttable *t, uintptr_t key) {
5070 if (key < t->array_size) {
5071 return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL;
5072 } else {
5073 upb_tabent *e =
5074 findentry_mutable(&t->t, intkey(key), upb_inthash(key), &inteql);
5075 return e ? &e->val : NULL;
5076 }
5077}
5078
5079static const upb_tabval *inttable_val_const(const upb_inttable *t,
5080 uintptr_t key) {
5081 return inttable_val((upb_inttable*)t, key);
5082}
5083
5084size_t upb_inttable_count(const upb_inttable *t) {
5085 return t->t.count + t->array_count;
5086}
5087
5088static void check(upb_inttable *t) {
5089 UPB_UNUSED(t);
5090#if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG)
5091 {
5092 /* This check is very expensive (makes inserts/deletes O(N)). */
5093 size_t count = 0;
5094 upb_inttable_iter i;
5095 upb_inttable_begin(&i, t);
5096 for(; !upb_inttable_done(&i); upb_inttable_next(&i), count++) {
Paul Yange0e54662016-09-15 11:09:01 -07005097 UPB_ASSERT(upb_inttable_lookup(t, upb_inttable_iter_key(&i), NULL));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005098 }
Paul Yange0e54662016-09-15 11:09:01 -07005099 UPB_ASSERT(count == upb_inttable_count(t));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005100 }
5101#endif
5102}
5103
5104bool upb_inttable_sizedinit(upb_inttable *t, upb_ctype_t ctype,
Paul Yange0e54662016-09-15 11:09:01 -07005105 size_t asize, int hsize_lg2, upb_alloc *a) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005106 size_t array_bytes;
5107
Paul Yange0e54662016-09-15 11:09:01 -07005108 if (!init(&t->t, ctype, hsize_lg2, a)) return false;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005109 /* Always make the array part at least 1 long, so that we know key 0
5110 * won't be in the hash part, which simplifies things. */
5111 t->array_size = UPB_MAX(1, asize);
5112 t->array_count = 0;
5113 array_bytes = t->array_size * sizeof(upb_value);
Paul Yange0e54662016-09-15 11:09:01 -07005114 t->array = upb_malloc(a, array_bytes);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005115 if (!t->array) {
Paul Yange0e54662016-09-15 11:09:01 -07005116 uninit(&t->t, a);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005117 return false;
5118 }
5119 memset(mutable_array(t), 0xff, array_bytes);
5120 check(t);
5121 return true;
5122}
5123
Paul Yange0e54662016-09-15 11:09:01 -07005124bool upb_inttable_init2(upb_inttable *t, upb_ctype_t ctype, upb_alloc *a) {
5125 return upb_inttable_sizedinit(t, ctype, 0, 4, a);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005126}
5127
Paul Yange0e54662016-09-15 11:09:01 -07005128void upb_inttable_uninit2(upb_inttable *t, upb_alloc *a) {
5129 uninit(&t->t, a);
5130 upb_free(a, mutable_array(t));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005131}
5132
Paul Yange0e54662016-09-15 11:09:01 -07005133bool upb_inttable_insert2(upb_inttable *t, uintptr_t key, upb_value val,
5134 upb_alloc *a) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005135 upb_tabval tabval;
5136 tabval.val = val.val;
Paul Yange0e54662016-09-15 11:09:01 -07005137 UPB_ASSERT(upb_arrhas(tabval)); /* This will reject (uint64_t)-1. Fix this. */
5138
5139 upb_check_alloc(&t->t, a);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005140
5141 if (key < t->array_size) {
Paul Yange0e54662016-09-15 11:09:01 -07005142 UPB_ASSERT(!upb_arrhas(t->array[key]));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005143 t->array_count++;
5144 mutable_array(t)[key].val = val.val;
5145 } else {
5146 if (isfull(&t->t)) {
5147 /* Need to resize the hash part, but we re-use the array part. */
5148 size_t i;
5149 upb_table new_table;
Paul Yange0e54662016-09-15 11:09:01 -07005150
5151 if (!init(&new_table, t->t.ctype, t->t.size_lg2 + 1, a)) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005152 return false;
Paul Yange0e54662016-09-15 11:09:01 -07005153 }
5154
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005155 for (i = begin(&t->t); i < upb_table_size(&t->t); i = next(&t->t, i)) {
5156 const upb_tabent *e = &t->t.entries[i];
5157 uint32_t hash;
5158 upb_value v;
5159
5160 _upb_value_setval(&v, e->val.val, t->t.ctype);
5161 hash = upb_inthash(e->key);
5162 insert(&new_table, intkey(e->key), e->key, v, hash, &inthash, &inteql);
5163 }
5164
Paul Yange0e54662016-09-15 11:09:01 -07005165 UPB_ASSERT(t->t.count == new_table.count);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005166
Paul Yange0e54662016-09-15 11:09:01 -07005167 uninit(&t->t, a);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005168 t->t = new_table;
5169 }
5170 insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql);
5171 }
5172 check(t);
5173 return true;
5174}
5175
5176bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v) {
5177 const upb_tabval *table_v = inttable_val_const(t, key);
5178 if (!table_v) return false;
5179 if (v) _upb_value_setval(v, table_v->val, t->t.ctype);
5180 return true;
5181}
5182
5183bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val) {
5184 upb_tabval *table_v = inttable_val(t, key);
5185 if (!table_v) return false;
5186 table_v->val = val.val;
5187 return true;
5188}
5189
5190bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) {
5191 bool success;
5192 if (key < t->array_size) {
5193 if (upb_arrhas(t->array[key])) {
5194 upb_tabval empty = UPB_TABVALUE_EMPTY_INIT;
5195 t->array_count--;
5196 if (val) {
5197 _upb_value_setval(val, t->array[key].val, t->t.ctype);
5198 }
5199 mutable_array(t)[key] = empty;
5200 success = true;
5201 } else {
5202 success = false;
5203 }
5204 } else {
Paul Yange0e54662016-09-15 11:09:01 -07005205 success = rm(&t->t, intkey(key), val, NULL, upb_inthash(key), &inteql);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005206 }
5207 check(t);
5208 return success;
5209}
5210
Paul Yange0e54662016-09-15 11:09:01 -07005211bool upb_inttable_push2(upb_inttable *t, upb_value val, upb_alloc *a) {
5212 upb_check_alloc(&t->t, a);
5213 return upb_inttable_insert2(t, upb_inttable_count(t), val, a);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005214}
5215
5216upb_value upb_inttable_pop(upb_inttable *t) {
5217 upb_value val;
5218 bool ok = upb_inttable_remove(t, upb_inttable_count(t) - 1, &val);
Paul Yange0e54662016-09-15 11:09:01 -07005219 UPB_ASSERT(ok);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005220 return val;
5221}
5222
Paul Yange0e54662016-09-15 11:09:01 -07005223bool upb_inttable_insertptr2(upb_inttable *t, const void *key, upb_value val,
5224 upb_alloc *a) {
5225 upb_check_alloc(&t->t, a);
5226 return upb_inttable_insert2(t, (uintptr_t)key, val, a);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005227}
5228
5229bool upb_inttable_lookupptr(const upb_inttable *t, const void *key,
5230 upb_value *v) {
5231 return upb_inttable_lookup(t, (uintptr_t)key, v);
5232}
5233
5234bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) {
5235 return upb_inttable_remove(t, (uintptr_t)key, val);
5236}
5237
Paul Yange0e54662016-09-15 11:09:01 -07005238void upb_inttable_compact2(upb_inttable *t, upb_alloc *a) {
5239 /* A power-of-two histogram of the table keys. */
5240 size_t counts[UPB_MAXARRSIZE + 1] = {0};
5241
5242 /* The max key in each bucket. */
5243 uintptr_t max[UPB_MAXARRSIZE + 1] = {0};
5244
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005245 upb_inttable_iter i;
Paul Yange0e54662016-09-15 11:09:01 -07005246 size_t arr_count;
5247 int size_lg2;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005248 upb_inttable new_t;
5249
Paul Yange0e54662016-09-15 11:09:01 -07005250 upb_check_alloc(&t->t, a);
5251
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005252 upb_inttable_begin(&i, t);
5253 for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
5254 uintptr_t key = upb_inttable_iter_key(&i);
Paul Yange0e54662016-09-15 11:09:01 -07005255 int bucket = log2ceil(key);
5256 max[bucket] = UPB_MAX(max[bucket], key);
5257 counts[bucket]++;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005258 }
5259
Paul Yange0e54662016-09-15 11:09:01 -07005260 /* Find the largest power of two that satisfies the MIN_DENSITY
5261 * definition (while actually having some keys). */
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005262 arr_count = upb_inttable_count(t);
5263
Paul Yange0e54662016-09-15 11:09:01 -07005264 for (size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 0; size_lg2--) {
5265 if (counts[size_lg2] == 0) {
5266 /* We can halve again without losing any entries. */
5267 continue;
5268 } else if (arr_count >= (1 << size_lg2) * MIN_DENSITY) {
5269 break;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005270 }
Paul Yange0e54662016-09-15 11:09:01 -07005271
5272 arr_count -= counts[size_lg2];
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005273 }
5274
Paul Yange0e54662016-09-15 11:09:01 -07005275 UPB_ASSERT(arr_count <= upb_inttable_count(t));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005276
5277 {
5278 /* Insert all elements into new, perfectly-sized table. */
Paul Yange0e54662016-09-15 11:09:01 -07005279 size_t arr_size = max[size_lg2] + 1; /* +1 so arr[max] will fit. */
5280 size_t hash_count = upb_inttable_count(t) - arr_count;
5281 size_t hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0;
5282 size_t hashsize_lg2 = log2ceil(hash_size);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005283
Paul Yange0e54662016-09-15 11:09:01 -07005284 upb_inttable_sizedinit(&new_t, t->t.ctype, arr_size, hashsize_lg2, a);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005285 upb_inttable_begin(&i, t);
5286 for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
5287 uintptr_t k = upb_inttable_iter_key(&i);
Paul Yange0e54662016-09-15 11:09:01 -07005288 upb_inttable_insert2(&new_t, k, upb_inttable_iter_value(&i), a);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005289 }
Paul Yange0e54662016-09-15 11:09:01 -07005290 UPB_ASSERT(new_t.array_size == arr_size);
5291 UPB_ASSERT(new_t.t.size_lg2 == hashsize_lg2);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005292 }
Paul Yange0e54662016-09-15 11:09:01 -07005293 upb_inttable_uninit2(t, a);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005294 *t = new_t;
5295}
5296
5297/* Iteration. */
5298
5299static const upb_tabent *int_tabent(const upb_inttable_iter *i) {
Paul Yange0e54662016-09-15 11:09:01 -07005300 UPB_ASSERT(!i->array_part);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005301 return &i->t->t.entries[i->index];
5302}
5303
5304static upb_tabval int_arrent(const upb_inttable_iter *i) {
Paul Yange0e54662016-09-15 11:09:01 -07005305 UPB_ASSERT(i->array_part);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005306 return i->t->array[i->index];
5307}
5308
5309void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t) {
5310 i->t = t;
5311 i->index = -1;
5312 i->array_part = true;
5313 upb_inttable_next(i);
5314}
5315
5316void upb_inttable_next(upb_inttable_iter *iter) {
5317 const upb_inttable *t = iter->t;
5318 if (iter->array_part) {
5319 while (++iter->index < t->array_size) {
5320 if (upb_arrhas(int_arrent(iter))) {
5321 return;
5322 }
5323 }
5324 iter->array_part = false;
5325 iter->index = begin(&t->t);
5326 } else {
5327 iter->index = next(&t->t, iter->index);
5328 }
5329}
5330
5331bool upb_inttable_done(const upb_inttable_iter *i) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08005332 if (!i->t) return true;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005333 if (i->array_part) {
5334 return i->index >= i->t->array_size ||
5335 !upb_arrhas(int_arrent(i));
5336 } else {
5337 return i->index >= upb_table_size(&i->t->t) ||
5338 upb_tabent_isempty(int_tabent(i));
5339 }
5340}
5341
5342uintptr_t upb_inttable_iter_key(const upb_inttable_iter *i) {
Paul Yange0e54662016-09-15 11:09:01 -07005343 UPB_ASSERT(!upb_inttable_done(i));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005344 return i->array_part ? i->index : int_tabent(i)->key;
5345}
5346
5347upb_value upb_inttable_iter_value(const upb_inttable_iter *i) {
Paul Yange0e54662016-09-15 11:09:01 -07005348 UPB_ASSERT(!upb_inttable_done(i));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005349 return _upb_value_val(
5350 i->array_part ? i->t->array[i->index].val : int_tabent(i)->val.val,
5351 i->t->t.ctype);
5352}
5353
5354void upb_inttable_iter_setdone(upb_inttable_iter *i) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08005355 i->t = NULL;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005356 i->index = SIZE_MAX;
5357 i->array_part = false;
5358}
5359
5360bool upb_inttable_iter_isequal(const upb_inttable_iter *i1,
5361 const upb_inttable_iter *i2) {
5362 if (upb_inttable_done(i1) && upb_inttable_done(i2))
5363 return true;
5364 return i1->t == i2->t && i1->index == i2->index &&
5365 i1->array_part == i2->array_part;
5366}
5367
Adam Cozzette8645d892019-03-26 14:32:20 -07005368#if defined(UPB_UNALIGNED_READS_OK) || defined(__s390x__)
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005369/* -----------------------------------------------------------------------------
5370 * MurmurHash2, by Austin Appleby (released as public domain).
5371 * Reformatted and C99-ified by Joshua Haberman.
5372 * Note - This code makes a few assumptions about how your machine behaves -
5373 * 1. We can read a 4-byte value from any address without crashing
5374 * 2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t
5375 * And it has a few limitations -
5376 * 1. It will not work incrementally.
5377 * 2. It will not produce the same results on little-endian and big-endian
5378 * machines. */
5379uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) {
5380 /* 'm' and 'r' are mixing constants generated offline.
5381 * They're not really 'magic', they just happen to work well. */
5382 const uint32_t m = 0x5bd1e995;
5383 const int32_t r = 24;
5384
5385 /* Initialize the hash to a 'random' value */
5386 uint32_t h = seed ^ len;
5387
5388 /* Mix 4 bytes at a time into the hash */
5389 const uint8_t * data = (const uint8_t *)key;
5390 while(len >= 4) {
5391 uint32_t k = *(uint32_t *)data;
5392
5393 k *= m;
5394 k ^= k >> r;
5395 k *= m;
5396
5397 h *= m;
5398 h ^= k;
5399
5400 data += 4;
5401 len -= 4;
5402 }
5403
5404 /* Handle the last few bytes of the input array */
5405 switch(len) {
5406 case 3: h ^= data[2] << 16;
5407 case 2: h ^= data[1] << 8;
5408 case 1: h ^= data[0]; h *= m;
5409 };
5410
5411 /* Do a few final mixes of the hash to ensure the last few
5412 * bytes are well-incorporated. */
5413 h ^= h >> 13;
5414 h *= m;
5415 h ^= h >> 15;
5416
5417 return h;
5418}
5419
5420#else /* !UPB_UNALIGNED_READS_OK */
5421
5422/* -----------------------------------------------------------------------------
5423 * MurmurHashAligned2, by Austin Appleby
5424 * Same algorithm as MurmurHash2, but only does aligned reads - should be safer
5425 * on certain platforms.
5426 * Performance will be lower than MurmurHash2 */
5427
5428#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
5429
5430uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) {
5431 const uint32_t m = 0x5bd1e995;
5432 const int32_t r = 24;
5433 const uint8_t * data = (const uint8_t *)key;
5434 uint32_t h = seed ^ len;
5435 uint8_t align = (uintptr_t)data & 3;
5436
5437 if(align && (len >= 4)) {
5438 /* Pre-load the temp registers */
5439 uint32_t t = 0, d = 0;
5440 int32_t sl;
5441 int32_t sr;
5442
5443 switch(align) {
5444 case 1: t |= data[2] << 16;
5445 case 2: t |= data[1] << 8;
5446 case 3: t |= data[0];
5447 }
5448
5449 t <<= (8 * align);
5450
5451 data += 4-align;
5452 len -= 4-align;
5453
5454 sl = 8 * (4-align);
5455 sr = 8 * align;
5456
5457 /* Mix */
5458
5459 while(len >= 4) {
5460 uint32_t k;
5461
5462 d = *(uint32_t *)data;
5463 t = (t >> sr) | (d << sl);
5464
5465 k = t;
5466
5467 MIX(h,k,m);
5468
5469 t = d;
5470
5471 data += 4;
5472 len -= 4;
5473 }
5474
5475 /* Handle leftover data in temp registers */
5476
5477 d = 0;
5478
5479 if(len >= align) {
5480 uint32_t k;
5481
5482 switch(align) {
5483 case 3: d |= data[2] << 16;
5484 case 2: d |= data[1] << 8;
5485 case 1: d |= data[0];
5486 }
5487
5488 k = (t >> sr) | (d << sl);
5489 MIX(h,k,m);
5490
5491 data += align;
5492 len -= align;
5493
5494 /* ----------
5495 * Handle tail bytes */
5496
5497 switch(len) {
5498 case 3: h ^= data[2] << 16;
5499 case 2: h ^= data[1] << 8;
5500 case 1: h ^= data[0]; h *= m;
5501 };
5502 } else {
5503 switch(len) {
5504 case 3: d |= data[2] << 16;
5505 case 2: d |= data[1] << 8;
5506 case 1: d |= data[0];
5507 case 0: h ^= (t >> sr) | (d << sl); h *= m;
5508 }
5509 }
5510
5511 h ^= h >> 13;
5512 h *= m;
5513 h ^= h >> 15;
5514
5515 return h;
5516 } else {
5517 while(len >= 4) {
5518 uint32_t k = *(uint32_t *)data;
5519
5520 MIX(h,k,m);
5521
5522 data += 4;
5523 len -= 4;
5524 }
5525
5526 /* ----------
5527 * Handle tail bytes */
5528
5529 switch(len) {
5530 case 3: h ^= data[2] << 16;
5531 case 2: h ^= data[1] << 8;
5532 case 1: h ^= data[0]; h *= m;
5533 };
5534
5535 h ^= h >> 13;
5536 h *= m;
5537 h ^= h >> 15;
5538
5539 return h;
5540 }
5541}
5542#undef MIX
5543
5544#endif /* UPB_UNALIGNED_READS_OK */
5545
5546#include <errno.h>
5547#include <stdarg.h>
5548#include <stddef.h>
5549#include <stdint.h>
5550#include <stdio.h>
5551#include <stdlib.h>
5552#include <string.h>
5553
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005554/* Guarantee null-termination and provide ellipsis truncation.
5555 * It may be tempting to "optimize" this by initializing these final
5556 * four bytes up-front and then being careful never to overwrite them,
5557 * this is safer and simpler. */
5558static void nullz(upb_status *status) {
5559 const char *ellipsis = "...";
5560 size_t len = strlen(ellipsis);
Paul Yange0e54662016-09-15 11:09:01 -07005561 UPB_ASSERT(sizeof(status->msg) > len);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005562 memcpy(status->msg + sizeof(status->msg) - len, ellipsis, len);
5563}
5564
Paul Yange0e54662016-09-15 11:09:01 -07005565/* upb_status *****************************************************************/
5566
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005567void upb_status_clear(upb_status *status) {
5568 if (!status) return;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08005569 status->ok = true;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005570 status->msg[0] = '\0';
5571}
5572
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08005573bool upb_ok(const upb_status *status) { return status->ok; }
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005574
5575const char *upb_status_errmsg(const upb_status *status) { return status->msg; }
5576
5577void upb_status_seterrmsg(upb_status *status, const char *msg) {
5578 if (!status) return;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08005579 status->ok = false;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005580 strncpy(status->msg, msg, sizeof(status->msg));
5581 nullz(status);
5582}
5583
5584void upb_status_seterrf(upb_status *status, const char *fmt, ...) {
5585 va_list args;
5586 va_start(args, fmt);
5587 upb_status_vseterrf(status, fmt, args);
5588 va_end(args);
5589}
5590
5591void upb_status_vseterrf(upb_status *status, const char *fmt, va_list args) {
5592 if (!status) return;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08005593 status->ok = false;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005594 _upb_vsnprintf(status->msg, sizeof(status->msg), fmt, args);
5595 nullz(status);
5596}
5597
Paul Yange0e54662016-09-15 11:09:01 -07005598/* upb_alloc ******************************************************************/
5599
5600static void *upb_global_allocfunc(upb_alloc *alloc, void *ptr, size_t oldsize,
5601 size_t size) {
5602 UPB_UNUSED(alloc);
5603 UPB_UNUSED(oldsize);
5604 if (size == 0) {
5605 free(ptr);
5606 return NULL;
5607 } else {
5608 return realloc(ptr, size);
5609 }
5610}
5611
5612upb_alloc upb_alloc_global = {&upb_global_allocfunc};
5613
Paul Yange0e54662016-09-15 11:09:01 -07005614/* upb_arena ******************************************************************/
5615
5616/* Be conservative and choose 16 in case anyone is using SSE. */
5617static const size_t maxalign = 16;
5618
Paul Yang5a3405c2017-02-06 12:40:51 -08005619static size_t align_up_max(size_t size) {
Paul Yange0e54662016-09-15 11:09:01 -07005620 return ((size + maxalign - 1) / maxalign) * maxalign;
5621}
5622
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08005623struct upb_arena {
5624 /* We implement the allocator interface.
5625 * This must be the first member of upb_arena! */
5626 upb_alloc alloc;
5627
5628 /* Allocator to allocate arena blocks. We are responsible for freeing these
5629 * when we are destroyed. */
5630 upb_alloc *block_alloc;
5631
5632 size_t bytes_allocated;
5633 size_t next_block_size;
5634 size_t max_block_size;
5635
5636 /* Linked list of blocks. Points to an arena_block, defined in env.c */
5637 void *block_head;
5638
5639 /* Cleanup entries. Pointer to a cleanup_ent, defined in env.c */
5640 void *cleanup_head;
5641};
5642
Paul Yange0e54662016-09-15 11:09:01 -07005643typedef struct mem_block {
5644 struct mem_block *next;
5645 size_t size;
5646 size_t used;
5647 bool owned;
5648 /* Data follows. */
5649} mem_block;
5650
5651typedef struct cleanup_ent {
5652 struct cleanup_ent *next;
5653 upb_cleanup_func *cleanup;
5654 void *ud;
5655} cleanup_ent;
5656
5657static void upb_arena_addblock(upb_arena *a, void *ptr, size_t size,
5658 bool owned) {
5659 mem_block *block = ptr;
5660
5661 block->next = a->block_head;
5662 block->size = size;
Paul Yang5a3405c2017-02-06 12:40:51 -08005663 block->used = align_up_max(sizeof(mem_block));
Paul Yange0e54662016-09-15 11:09:01 -07005664 block->owned = owned;
5665
5666 a->block_head = block;
5667
5668 /* TODO(haberman): ASAN poison. */
5669}
5670
Paul Yange0e54662016-09-15 11:09:01 -07005671static mem_block *upb_arena_allocblock(upb_arena *a, size_t size) {
5672 size_t block_size = UPB_MAX(size, a->next_block_size) + sizeof(mem_block);
5673 mem_block *block = upb_malloc(a->block_alloc, block_size);
5674
5675 if (!block) {
5676 return NULL;
5677 }
5678
5679 upb_arena_addblock(a, block, block_size, true);
5680 a->next_block_size = UPB_MIN(block_size * 2, a->max_block_size);
5681
5682 return block;
5683}
5684
5685static void *upb_arena_doalloc(upb_alloc *alloc, void *ptr, size_t oldsize,
5686 size_t size) {
5687 upb_arena *a = (upb_arena*)alloc; /* upb_alloc is initial member. */
5688 mem_block *block = a->block_head;
5689 void *ret;
5690
5691 if (size == 0) {
5692 return NULL; /* We are an arena, don't need individual frees. */
5693 }
5694
Paul Yang5a3405c2017-02-06 12:40:51 -08005695 size = align_up_max(size);
Paul Yange0e54662016-09-15 11:09:01 -07005696
5697 /* TODO(haberman): special-case if this is a realloc of the last alloc? */
5698
5699 if (!block || block->size - block->used < size) {
5700 /* Slow path: have to allocate a new block. */
5701 block = upb_arena_allocblock(a, size);
5702
5703 if (!block) {
5704 return NULL; /* Out of memory. */
5705 }
5706 }
5707
5708 ret = (char*)block + block->used;
5709 block->used += size;
5710
5711 if (oldsize > 0) {
5712 memcpy(ret, ptr, oldsize); /* Preserve existing data. */
5713 }
5714
5715 /* TODO(haberman): ASAN unpoison. */
5716
5717 a->bytes_allocated += size;
5718 return ret;
5719}
5720
5721/* Public Arena API ***********************************************************/
5722
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08005723#define upb_alignof(type) offsetof (struct { char c; type member; }, member)
5724
5725upb_arena *upb_arena_init(void *mem, size_t n, upb_alloc *alloc) {
5726 const size_t first_block_overhead = sizeof(upb_arena) + sizeof(mem_block);
5727 upb_arena *a;
5728 bool owned = false;
5729
5730 /* Round block size down to alignof(*a) since we will allocate the arena
5731 * itself at the end. */
5732 n &= ~(upb_alignof(upb_arena) - 1);
5733
5734 if (n < first_block_overhead) {
5735 /* We need to malloc the initial block. */
5736 n = first_block_overhead + 256;
5737 owned = true;
5738 if (!alloc || !(mem = upb_malloc(alloc, n))) {
5739 return NULL;
5740 }
5741 }
5742
5743 a = (void*)((char*)mem + n - sizeof(*a));
5744 n -= sizeof(*a);
5745
Paul Yange0e54662016-09-15 11:09:01 -07005746 a->alloc.func = &upb_arena_doalloc;
5747 a->block_alloc = &upb_alloc_global;
5748 a->bytes_allocated = 0;
5749 a->next_block_size = 256;
5750 a->max_block_size = 16384;
5751 a->cleanup_head = NULL;
5752 a->block_head = NULL;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08005753 a->block_alloc = alloc;
5754
5755 upb_arena_addblock(a, mem, n, owned);
5756
5757 return a;
Paul Yange0e54662016-09-15 11:09:01 -07005758}
5759
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08005760#undef upb_alignof
Paul Yange0e54662016-09-15 11:09:01 -07005761
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08005762void upb_arena_free(upb_arena *a) {
Paul Yange0e54662016-09-15 11:09:01 -07005763 cleanup_ent *ent = a->cleanup_head;
5764 mem_block *block = a->block_head;
5765
5766 while (ent) {
5767 ent->cleanup(ent->ud);
5768 ent = ent->next;
5769 }
5770
5771 /* Must do this after running cleanup functions, because this will delete
5772 * the memory we store our cleanup entries in! */
5773 while (block) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08005774 /* Load first since we are deleting block. */
Paul Yange0e54662016-09-15 11:09:01 -07005775 mem_block *next = block->next;
5776
5777 if (block->owned) {
5778 upb_free(a->block_alloc, block);
5779 }
5780
5781 block = next;
5782 }
Paul Yange0e54662016-09-15 11:09:01 -07005783}
5784
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08005785bool upb_arena_addcleanup(upb_arena *a, void *ud, upb_cleanup_func *func) {
Paul Yange0e54662016-09-15 11:09:01 -07005786 cleanup_ent *ent = upb_malloc(&a->alloc, sizeof(cleanup_ent));
5787 if (!ent) {
5788 return false; /* Out of memory. */
5789 }
5790
5791 ent->cleanup = func;
5792 ent->ud = ud;
5793 ent->next = a->cleanup_head;
5794 a->cleanup_head = ent;
5795
5796 return true;
5797}
5798
5799size_t upb_arena_bytesallocated(const upb_arena *a) {
5800 return a->bytes_allocated;
5801}
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005802/*
5803** protobuf decoder bytecode compiler
5804**
5805** Code to compile a upb::Handlers into bytecode for decoding a protobuf
5806** according to that specific schema and destination handlers.
5807**
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005808** Bytecode definition is in decoder.int.h.
5809*/
5810
5811#include <stdarg.h>
5812
5813#ifdef UPB_DUMP_BYTECODE
5814#include <stdio.h>
5815#endif
5816
5817#define MAXLABEL 5
5818#define EMPTYLABEL -1
5819
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005820/* upb_pbdecodermethod ********************************************************/
5821
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08005822static void freemethod(upb_pbdecodermethod *method) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005823 upb_inttable_uninit(&method->dispatch);
Paul Yange0e54662016-09-15 11:09:01 -07005824 upb_gfree(method);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005825}
5826
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005827static upb_pbdecodermethod *newmethod(const upb_handlers *dest_handlers,
5828 mgroup *group) {
Paul Yange0e54662016-09-15 11:09:01 -07005829 upb_pbdecodermethod *ret = upb_gmalloc(sizeof(*ret));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005830 upb_byteshandler_init(&ret->input_handler_);
5831
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08005832 ret->group = group;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005833 ret->dest_handlers_ = dest_handlers;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005834 upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64);
5835
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005836 return ret;
5837}
5838
5839const upb_handlers *upb_pbdecodermethod_desthandlers(
5840 const upb_pbdecodermethod *m) {
5841 return m->dest_handlers_;
5842}
5843
5844const upb_byteshandler *upb_pbdecodermethod_inputhandler(
5845 const upb_pbdecodermethod *m) {
5846 return &m->input_handler_;
5847}
5848
5849bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m) {
5850 return m->is_native_;
5851}
5852
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005853
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08005854/* mgroup *********************************************************************/
5855
5856static void freegroup(mgroup *g) {
5857 upb_inttable_iter i;
5858
5859 upb_inttable_begin(&i, &g->methods);
5860 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
5861 freemethod(upb_value_getptr(upb_inttable_iter_value(&i)));
5862 }
5863
5864 upb_inttable_uninit(&g->methods);
5865 upb_gfree(g->bytecode);
5866 upb_gfree(g);
5867}
5868
5869mgroup *newgroup() {
5870 mgroup *g = upb_gmalloc(sizeof(*g));
5871 upb_inttable_init(&g->methods, UPB_CTYPE_PTR);
5872 g->bytecode = NULL;
5873 g->bytecode_end = NULL;
5874 return g;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005875}
5876
5877
5878/* bytecode compiler **********************************************************/
5879
5880/* Data used only at compilation time. */
5881typedef struct {
5882 mgroup *group;
5883
5884 uint32_t *pc;
5885 int fwd_labels[MAXLABEL];
5886 int back_labels[MAXLABEL];
5887
5888 /* For fields marked "lazy", parse them lazily or eagerly? */
5889 bool lazy;
5890} compiler;
5891
5892static compiler *newcompiler(mgroup *group, bool lazy) {
Paul Yange0e54662016-09-15 11:09:01 -07005893 compiler *ret = upb_gmalloc(sizeof(*ret));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005894 int i;
5895
5896 ret->group = group;
5897 ret->lazy = lazy;
5898 for (i = 0; i < MAXLABEL; i++) {
5899 ret->fwd_labels[i] = EMPTYLABEL;
5900 ret->back_labels[i] = EMPTYLABEL;
5901 }
5902 return ret;
5903}
5904
5905static void freecompiler(compiler *c) {
Paul Yange0e54662016-09-15 11:09:01 -07005906 upb_gfree(c);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005907}
5908
5909const size_t ptr_words = sizeof(void*) / sizeof(uint32_t);
5910
5911/* How many words an instruction is. */
5912static int instruction_len(uint32_t instr) {
5913 switch (getop(instr)) {
5914 case OP_SETDISPATCH: return 1 + ptr_words;
5915 case OP_TAGN: return 3;
5916 case OP_SETBIGGROUPNUM: return 2;
5917 default: return 1;
5918 }
5919}
5920
5921bool op_has_longofs(int32_t instruction) {
5922 switch (getop(instruction)) {
5923 case OP_CALL:
5924 case OP_BRANCH:
5925 case OP_CHECKDELIM:
5926 return true;
5927 /* The "tag" instructions only have 8 bytes available for the jump target,
5928 * but that is ok because these opcodes only require short jumps. */
5929 case OP_TAG1:
5930 case OP_TAG2:
5931 case OP_TAGN:
5932 return false;
5933 default:
Paul Yange0e54662016-09-15 11:09:01 -07005934 UPB_ASSERT(false);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005935 return false;
5936 }
5937}
5938
5939static int32_t getofs(uint32_t instruction) {
5940 if (op_has_longofs(instruction)) {
5941 return (int32_t)instruction >> 8;
5942 } else {
5943 return (int8_t)(instruction >> 8);
5944 }
5945}
5946
5947static void setofs(uint32_t *instruction, int32_t ofs) {
5948 if (op_has_longofs(*instruction)) {
5949 *instruction = getop(*instruction) | ofs << 8;
5950 } else {
5951 *instruction = (*instruction & ~0xff00) | ((ofs & 0xff) << 8);
5952 }
Paul Yange0e54662016-09-15 11:09:01 -07005953 UPB_ASSERT(getofs(*instruction) == ofs); /* Would fail in cases of overflow. */
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005954}
5955
5956static uint32_t pcofs(compiler *c) { return c->pc - c->group->bytecode; }
5957
5958/* Defines a local label at the current PC location. All previous forward
5959 * references are updated to point to this location. The location is noted
5960 * for any future backward references. */
5961static void label(compiler *c, unsigned int label) {
5962 int val;
5963 uint32_t *codep;
5964
Paul Yange0e54662016-09-15 11:09:01 -07005965 UPB_ASSERT(label < MAXLABEL);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005966 val = c->fwd_labels[label];
5967 codep = (val == EMPTYLABEL) ? NULL : c->group->bytecode + val;
5968 while (codep) {
5969 int ofs = getofs(*codep);
5970 setofs(codep, c->pc - codep - instruction_len(*codep));
5971 codep = ofs ? codep + ofs : NULL;
5972 }
5973 c->fwd_labels[label] = EMPTYLABEL;
5974 c->back_labels[label] = pcofs(c);
5975}
5976
5977/* Creates a reference to a numbered label; either a forward reference
5978 * (positive arg) or backward reference (negative arg). For forward references
5979 * the value returned now is actually a "next" pointer into a linked list of all
5980 * instructions that use this label and will be patched later when the label is
5981 * defined with label().
5982 *
5983 * The returned value is the offset that should be written into the instruction.
5984 */
5985static int32_t labelref(compiler *c, int label) {
Paul Yange0e54662016-09-15 11:09:01 -07005986 UPB_ASSERT(label < MAXLABEL);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07005987 if (label == LABEL_DISPATCH) {
5988 /* No resolving required. */
5989 return 0;
5990 } else if (label < 0) {
5991 /* Backward local label. Relative to the next instruction. */
5992 uint32_t from = (c->pc + 1) - c->group->bytecode;
5993 return c->back_labels[-label] - from;
5994 } else {
5995 /* Forward local label: prepend to (possibly-empty) linked list. */
5996 int *lptr = &c->fwd_labels[label];
5997 int32_t ret = (*lptr == EMPTYLABEL) ? 0 : *lptr - pcofs(c);
5998 *lptr = pcofs(c);
5999 return ret;
6000 }
6001}
6002
6003static void put32(compiler *c, uint32_t v) {
6004 mgroup *g = c->group;
6005 if (c->pc == g->bytecode_end) {
6006 int ofs = pcofs(c);
6007 size_t oldsize = g->bytecode_end - g->bytecode;
6008 size_t newsize = UPB_MAX(oldsize * 2, 64);
6009 /* TODO(haberman): handle OOM. */
Paul Yange0e54662016-09-15 11:09:01 -07006010 g->bytecode = upb_grealloc(g->bytecode, oldsize * sizeof(uint32_t),
6011 newsize * sizeof(uint32_t));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006012 g->bytecode_end = g->bytecode + newsize;
6013 c->pc = g->bytecode + ofs;
6014 }
6015 *c->pc++ = v;
6016}
6017
Paul Yang60327462017-10-09 12:39:13 -07006018static void putop(compiler *c, int op, ...) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006019 va_list ap;
6020 va_start(ap, op);
6021
6022 switch (op) {
6023 case OP_SETDISPATCH: {
6024 uintptr_t ptr = (uintptr_t)va_arg(ap, void*);
6025 put32(c, OP_SETDISPATCH);
6026 put32(c, ptr);
6027 if (sizeof(uintptr_t) > sizeof(uint32_t))
6028 put32(c, (uint64_t)ptr >> 32);
6029 break;
6030 }
6031 case OP_STARTMSG:
6032 case OP_ENDMSG:
6033 case OP_PUSHLENDELIM:
6034 case OP_POP:
6035 case OP_SETDELIM:
6036 case OP_HALT:
6037 case OP_RET:
6038 case OP_DISPATCH:
6039 put32(c, op);
6040 break;
6041 case OP_PARSE_DOUBLE:
6042 case OP_PARSE_FLOAT:
6043 case OP_PARSE_INT64:
6044 case OP_PARSE_UINT64:
6045 case OP_PARSE_INT32:
6046 case OP_PARSE_FIXED64:
6047 case OP_PARSE_FIXED32:
6048 case OP_PARSE_BOOL:
6049 case OP_PARSE_UINT32:
6050 case OP_PARSE_SFIXED32:
6051 case OP_PARSE_SFIXED64:
6052 case OP_PARSE_SINT32:
6053 case OP_PARSE_SINT64:
6054 case OP_STARTSEQ:
6055 case OP_ENDSEQ:
6056 case OP_STARTSUBMSG:
6057 case OP_ENDSUBMSG:
6058 case OP_STARTSTR:
6059 case OP_STRING:
6060 case OP_ENDSTR:
6061 case OP_PUSHTAGDELIM:
6062 put32(c, op | va_arg(ap, upb_selector_t) << 8);
6063 break;
6064 case OP_SETBIGGROUPNUM:
6065 put32(c, op);
6066 put32(c, va_arg(ap, int));
6067 break;
6068 case OP_CALL: {
6069 const upb_pbdecodermethod *method = va_arg(ap, upb_pbdecodermethod *);
6070 put32(c, op | (method->code_base.ofs - (pcofs(c) + 1)) << 8);
6071 break;
6072 }
6073 case OP_CHECKDELIM:
6074 case OP_BRANCH: {
6075 uint32_t instruction = op;
6076 int label = va_arg(ap, int);
6077 setofs(&instruction, labelref(c, label));
6078 put32(c, instruction);
6079 break;
6080 }
6081 case OP_TAG1:
6082 case OP_TAG2: {
6083 int label = va_arg(ap, int);
6084 uint64_t tag = va_arg(ap, uint64_t);
6085 uint32_t instruction = op | (tag << 16);
Paul Yange0e54662016-09-15 11:09:01 -07006086 UPB_ASSERT(tag <= 0xffff);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006087 setofs(&instruction, labelref(c, label));
6088 put32(c, instruction);
6089 break;
6090 }
6091 case OP_TAGN: {
6092 int label = va_arg(ap, int);
6093 uint64_t tag = va_arg(ap, uint64_t);
6094 uint32_t instruction = op | (upb_value_size(tag) << 16);
6095 setofs(&instruction, labelref(c, label));
6096 put32(c, instruction);
6097 put32(c, tag);
6098 put32(c, tag >> 32);
6099 break;
6100 }
6101 }
6102
6103 va_end(ap);
6104}
6105
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08006106#if defined(UPB_DUMP_BYTECODE)
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006107
6108const char *upb_pbdecoder_getopname(unsigned int op) {
6109#define QUOTE(x) #x
6110#define EXPAND_AND_QUOTE(x) QUOTE(x)
6111#define OPNAME(x) OP_##x
6112#define OP(x) case OPNAME(x): return EXPAND_AND_QUOTE(OPNAME(x));
6113#define T(x) OP(PARSE_##x)
6114 /* Keep in sync with list in decoder.int.h. */
6115 switch ((opcode)op) {
6116 T(DOUBLE) T(FLOAT) T(INT64) T(UINT64) T(INT32) T(FIXED64) T(FIXED32)
6117 T(BOOL) T(UINT32) T(SFIXED32) T(SFIXED64) T(SINT32) T(SINT64)
6118 OP(STARTMSG) OP(ENDMSG) OP(STARTSEQ) OP(ENDSEQ) OP(STARTSUBMSG)
6119 OP(ENDSUBMSG) OP(STARTSTR) OP(STRING) OP(ENDSTR) OP(CALL) OP(RET)
6120 OP(PUSHLENDELIM) OP(PUSHTAGDELIM) OP(SETDELIM) OP(CHECKDELIM)
6121 OP(BRANCH) OP(TAG1) OP(TAG2) OP(TAGN) OP(SETDISPATCH) OP(POP)
6122 OP(SETBIGGROUPNUM) OP(DISPATCH) OP(HALT)
6123 }
6124 return "<unknown op>";
6125#undef OP
6126#undef T
6127}
6128
6129#endif
6130
6131#ifdef UPB_DUMP_BYTECODE
6132
6133static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) {
6134
6135 uint32_t *begin = p;
6136
6137 while (p < end) {
6138 fprintf(f, "%p %8tx", p, p - begin);
6139 uint32_t instr = *p++;
6140 uint8_t op = getop(instr);
6141 fprintf(f, " %s", upb_pbdecoder_getopname(op));
6142 switch ((opcode)op) {
6143 case OP_SETDISPATCH: {
6144 const upb_inttable *dispatch;
6145 memcpy(&dispatch, p, sizeof(void*));
6146 p += ptr_words;
6147 const upb_pbdecodermethod *method =
6148 (void *)((char *)dispatch -
6149 offsetof(upb_pbdecodermethod, dispatch));
6150 fprintf(f, " %s", upb_msgdef_fullname(
6151 upb_handlers_msgdef(method->dest_handlers_)));
6152 break;
6153 }
6154 case OP_DISPATCH:
6155 case OP_STARTMSG:
6156 case OP_ENDMSG:
6157 case OP_PUSHLENDELIM:
6158 case OP_POP:
6159 case OP_SETDELIM:
6160 case OP_HALT:
6161 case OP_RET:
6162 break;
6163 case OP_PARSE_DOUBLE:
6164 case OP_PARSE_FLOAT:
6165 case OP_PARSE_INT64:
6166 case OP_PARSE_UINT64:
6167 case OP_PARSE_INT32:
6168 case OP_PARSE_FIXED64:
6169 case OP_PARSE_FIXED32:
6170 case OP_PARSE_BOOL:
6171 case OP_PARSE_UINT32:
6172 case OP_PARSE_SFIXED32:
6173 case OP_PARSE_SFIXED64:
6174 case OP_PARSE_SINT32:
6175 case OP_PARSE_SINT64:
6176 case OP_STARTSEQ:
6177 case OP_ENDSEQ:
6178 case OP_STARTSUBMSG:
6179 case OP_ENDSUBMSG:
6180 case OP_STARTSTR:
6181 case OP_STRING:
6182 case OP_ENDSTR:
6183 case OP_PUSHTAGDELIM:
6184 fprintf(f, " %d", instr >> 8);
6185 break;
6186 case OP_SETBIGGROUPNUM:
6187 fprintf(f, " %d", *p++);
6188 break;
6189 case OP_CHECKDELIM:
6190 case OP_CALL:
6191 case OP_BRANCH:
6192 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
6193 break;
6194 case OP_TAG1:
6195 case OP_TAG2: {
6196 fprintf(f, " tag:0x%x", instr >> 16);
6197 if (getofs(instr)) {
6198 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
6199 }
6200 break;
6201 }
6202 case OP_TAGN: {
6203 uint64_t tag = *p++;
6204 tag |= (uint64_t)*p++ << 32;
6205 fprintf(f, " tag:0x%llx", (long long)tag);
6206 fprintf(f, " n:%d", instr >> 16);
6207 if (getofs(instr)) {
6208 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
6209 }
6210 break;
6211 }
6212 }
6213 fputs("\n", f);
6214 }
6215}
6216
6217#endif
6218
6219static uint64_t get_encoded_tag(const upb_fielddef *f, int wire_type) {
6220 uint32_t tag = (upb_fielddef_number(f) << 3) | wire_type;
6221 uint64_t encoded_tag = upb_vencode32(tag);
6222 /* No tag should be greater than 5 bytes. */
Paul Yange0e54662016-09-15 11:09:01 -07006223 UPB_ASSERT(encoded_tag <= 0xffffffffff);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006224 return encoded_tag;
6225}
6226
6227static void putchecktag(compiler *c, const upb_fielddef *f,
6228 int wire_type, int dest) {
6229 uint64_t tag = get_encoded_tag(f, wire_type);
6230 switch (upb_value_size(tag)) {
6231 case 1:
6232 putop(c, OP_TAG1, dest, tag);
6233 break;
6234 case 2:
6235 putop(c, OP_TAG2, dest, tag);
6236 break;
6237 default:
6238 putop(c, OP_TAGN, dest, tag);
6239 break;
6240 }
6241}
6242
6243static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
6244 upb_selector_t selector;
6245 bool ok = upb_handlers_getselector(f, type, &selector);
Paul Yange0e54662016-09-15 11:09:01 -07006246 UPB_ASSERT(ok);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006247 return selector;
6248}
6249
6250/* Takes an existing, primary dispatch table entry and repacks it with a
6251 * different alternate wire type. Called when we are inserting a secondary
6252 * dispatch table entry for an alternate wire type. */
6253static uint64_t repack(uint64_t dispatch, int new_wt2) {
6254 uint64_t ofs;
6255 uint8_t wt1;
6256 uint8_t old_wt2;
6257 upb_pbdecoder_unpackdispatch(dispatch, &ofs, &wt1, &old_wt2);
Paul Yange0e54662016-09-15 11:09:01 -07006258 UPB_ASSERT(old_wt2 == NO_WIRE_TYPE); /* wt2 should not be set yet. */
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006259 return upb_pbdecoder_packdispatch(ofs, wt1, new_wt2);
6260}
6261
6262/* Marks the current bytecode position as the dispatch target for this message,
6263 * field, and wire type. */
6264static void dispatchtarget(compiler *c, upb_pbdecodermethod *method,
6265 const upb_fielddef *f, int wire_type) {
6266 /* Offset is relative to msg base. */
6267 uint64_t ofs = pcofs(c) - method->code_base.ofs;
6268 uint32_t fn = upb_fielddef_number(f);
6269 upb_inttable *d = &method->dispatch;
6270 upb_value v;
6271 if (upb_inttable_remove(d, fn, &v)) {
6272 /* TODO: prioritize based on packed setting in .proto file. */
6273 uint64_t repacked = repack(upb_value_getuint64(v), wire_type);
6274 upb_inttable_insert(d, fn, upb_value_uint64(repacked));
6275 upb_inttable_insert(d, fn + UPB_MAX_FIELDNUMBER, upb_value_uint64(ofs));
6276 } else {
6277 uint64_t val = upb_pbdecoder_packdispatch(ofs, wire_type, NO_WIRE_TYPE);
6278 upb_inttable_insert(d, fn, upb_value_uint64(val));
6279 }
6280}
6281
6282static void putpush(compiler *c, const upb_fielddef *f) {
6283 if (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) {
6284 putop(c, OP_PUSHLENDELIM);
6285 } else {
6286 uint32_t fn = upb_fielddef_number(f);
6287 if (fn >= 1 << 24) {
6288 putop(c, OP_PUSHTAGDELIM, 0);
6289 putop(c, OP_SETBIGGROUPNUM, fn);
6290 } else {
6291 putop(c, OP_PUSHTAGDELIM, fn);
6292 }
6293 }
6294}
6295
6296static upb_pbdecodermethod *find_submethod(const compiler *c,
6297 const upb_pbdecodermethod *method,
6298 const upb_fielddef *f) {
6299 const upb_handlers *sub =
6300 upb_handlers_getsubhandlers(method->dest_handlers_, f);
6301 upb_value v;
6302 return upb_inttable_lookupptr(&c->group->methods, sub, &v)
6303 ? upb_value_getptr(v)
6304 : NULL;
6305}
6306
6307static void putsel(compiler *c, opcode op, upb_selector_t sel,
6308 const upb_handlers *h) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08006309 if (upb_handlers_gethandler(h, sel, NULL)) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006310 putop(c, op, sel);
6311 }
6312}
6313
6314/* Puts an opcode to call a callback, but only if a callback actually exists for
6315 * this field and handler type. */
6316static void maybeput(compiler *c, opcode op, const upb_handlers *h,
6317 const upb_fielddef *f, upb_handlertype_t type) {
6318 putsel(c, op, getsel(f, type), h);
6319}
6320
6321static bool haslazyhandlers(const upb_handlers *h, const upb_fielddef *f) {
6322 if (!upb_fielddef_lazy(f))
6323 return false;
6324
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08006325 return upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STARTSTR), NULL) ||
6326 upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STRING), NULL) ||
6327 upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_ENDSTR), NULL);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006328}
6329
6330
6331/* bytecode compiler code generation ******************************************/
6332
6333/* Symbolic names for our local labels. */
6334#define LABEL_LOOPSTART 1 /* Top of a repeated field loop. */
6335#define LABEL_LOOPBREAK 2 /* To jump out of a repeated loop */
6336#define LABEL_FIELD 3 /* Jump backward to find the most recent field. */
6337#define LABEL_ENDMSG 4 /* To reach the OP_ENDMSG instr for this msg. */
6338
6339/* Generates bytecode to parse a single non-lazy message field. */
6340static void generate_msgfield(compiler *c, const upb_fielddef *f,
6341 upb_pbdecodermethod *method) {
6342 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
6343 const upb_pbdecodermethod *sub_m = find_submethod(c, method, f);
6344 int wire_type;
6345
6346 if (!sub_m) {
6347 /* Don't emit any code for this field at all; it will be parsed as an
Paul Yange0e54662016-09-15 11:09:01 -07006348 * unknown field.
6349 *
6350 * TODO(haberman): we should change this to parse it as a string field
6351 * instead. It will probably be faster, but more importantly, once we
6352 * start vending unknown fields, a field shouldn't be treated as unknown
6353 * just because it doesn't have subhandlers registered. */
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006354 return;
6355 }
6356
6357 label(c, LABEL_FIELD);
6358
6359 wire_type =
6360 (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE)
6361 ? UPB_WIRE_TYPE_DELIMITED
6362 : UPB_WIRE_TYPE_START_GROUP;
6363
6364 if (upb_fielddef_isseq(f)) {
6365 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
6366 putchecktag(c, f, wire_type, LABEL_DISPATCH);
6367 dispatchtarget(c, method, f, wire_type);
6368 putop(c, OP_PUSHTAGDELIM, 0);
6369 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
6370 label(c, LABEL_LOOPSTART);
6371 putpush(c, f);
6372 putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
6373 putop(c, OP_CALL, sub_m);
6374 putop(c, OP_POP);
6375 maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
6376 if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
6377 putop(c, OP_SETDELIM);
6378 }
6379 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
6380 putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
6381 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
6382 label(c, LABEL_LOOPBREAK);
6383 putop(c, OP_POP);
6384 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
6385 } else {
6386 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
6387 putchecktag(c, f, wire_type, LABEL_DISPATCH);
6388 dispatchtarget(c, method, f, wire_type);
6389 putpush(c, f);
6390 putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
6391 putop(c, OP_CALL, sub_m);
6392 putop(c, OP_POP);
6393 maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
6394 if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
6395 putop(c, OP_SETDELIM);
6396 }
6397 }
6398}
6399
6400/* Generates bytecode to parse a single string or lazy submessage field. */
6401static void generate_delimfield(compiler *c, const upb_fielddef *f,
6402 upb_pbdecodermethod *method) {
6403 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
6404
6405 label(c, LABEL_FIELD);
6406 if (upb_fielddef_isseq(f)) {
6407 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
6408 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
6409 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
6410 putop(c, OP_PUSHTAGDELIM, 0);
6411 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
6412 label(c, LABEL_LOOPSTART);
6413 putop(c, OP_PUSHLENDELIM);
6414 putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
6415 /* Need to emit even if no handler to skip past the string. */
6416 putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006417 maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
Paul Yangc4f2a922019-01-17 10:18:43 -08006418 putop(c, OP_POP);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006419 putop(c, OP_SETDELIM);
6420 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
6421 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK);
6422 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
6423 label(c, LABEL_LOOPBREAK);
6424 putop(c, OP_POP);
6425 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
6426 } else {
6427 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
6428 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
6429 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
6430 putop(c, OP_PUSHLENDELIM);
6431 putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
6432 putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006433 maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
Paul Yangc4f2a922019-01-17 10:18:43 -08006434 putop(c, OP_POP);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006435 putop(c, OP_SETDELIM);
6436 }
6437}
6438
6439/* Generates bytecode to parse a single primitive field. */
6440static void generate_primitivefield(compiler *c, const upb_fielddef *f,
6441 upb_pbdecodermethod *method) {
6442 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
6443 upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f);
6444 opcode parse_type;
6445 upb_selector_t sel;
6446 int wire_type;
6447
6448 label(c, LABEL_FIELD);
6449
6450 /* From a decoding perspective, ENUM is the same as INT32. */
6451 if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM)
6452 descriptor_type = UPB_DESCRIPTOR_TYPE_INT32;
6453
6454 parse_type = (opcode)descriptor_type;
6455
6456 /* TODO(haberman): generate packed or non-packed first depending on "packed"
6457 * setting in the fielddef. This will favor (in speed) whichever was
6458 * specified. */
6459
Paul Yange0e54662016-09-15 11:09:01 -07006460 UPB_ASSERT((int)parse_type >= 0 && parse_type <= OP_MAX);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006461 sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
6462 wire_type = upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
6463 if (upb_fielddef_isseq(f)) {
6464 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
6465 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
6466 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
6467 putop(c, OP_PUSHLENDELIM);
6468 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Packed */
6469 label(c, LABEL_LOOPSTART);
6470 putop(c, parse_type, sel);
6471 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
6472 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
6473 dispatchtarget(c, method, f, wire_type);
6474 putop(c, OP_PUSHTAGDELIM, 0);
6475 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Non-packed */
6476 label(c, LABEL_LOOPSTART);
6477 putop(c, parse_type, sel);
6478 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
6479 putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
6480 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
6481 label(c, LABEL_LOOPBREAK);
6482 putop(c, OP_POP); /* Packed and non-packed join. */
6483 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
6484 putop(c, OP_SETDELIM); /* Could remove for non-packed by dup ENDSEQ. */
6485 } else {
6486 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
6487 putchecktag(c, f, wire_type, LABEL_DISPATCH);
6488 dispatchtarget(c, method, f, wire_type);
6489 putop(c, parse_type, sel);
6490 }
6491}
6492
6493/* Adds bytecode for parsing the given message to the given decoderplan,
6494 * while adding all dispatch targets to this message's dispatch table. */
6495static void compile_method(compiler *c, upb_pbdecodermethod *method) {
6496 const upb_handlers *h;
6497 const upb_msgdef *md;
6498 uint32_t* start_pc;
6499 upb_msg_field_iter i;
6500 upb_value val;
6501
Paul Yange0e54662016-09-15 11:09:01 -07006502 UPB_ASSERT(method);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006503
6504 /* Clear all entries in the dispatch table. */
6505 upb_inttable_uninit(&method->dispatch);
6506 upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64);
6507
6508 h = upb_pbdecodermethod_desthandlers(method);
6509 md = upb_handlers_msgdef(h);
6510
6511 method->code_base.ofs = pcofs(c);
6512 putop(c, OP_SETDISPATCH, &method->dispatch);
6513 putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h);
6514 label(c, LABEL_FIELD);
6515 start_pc = c->pc;
6516 for(upb_msg_field_begin(&i, md);
6517 !upb_msg_field_done(&i);
6518 upb_msg_field_next(&i)) {
6519 const upb_fielddef *f = upb_msg_iter_field(&i);
6520 upb_fieldtype_t type = upb_fielddef_type(f);
6521
6522 if (type == UPB_TYPE_MESSAGE && !(haslazyhandlers(h, f) && c->lazy)) {
6523 generate_msgfield(c, f, method);
6524 } else if (type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES ||
6525 type == UPB_TYPE_MESSAGE) {
6526 generate_delimfield(c, f, method);
6527 } else {
6528 generate_primitivefield(c, f, method);
6529 }
6530 }
6531
6532 /* If there were no fields, or if no handlers were defined, we need to
6533 * generate a non-empty loop body so that we can at least dispatch for unknown
6534 * fields and check for the end of the message. */
6535 if (c->pc == start_pc) {
6536 /* Check for end-of-message. */
6537 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
6538 /* Unconditionally dispatch. */
6539 putop(c, OP_DISPATCH, 0);
6540 }
6541
6542 /* For now we just loop back to the last field of the message (or if none,
6543 * the DISPATCH opcode for the message). */
6544 putop(c, OP_BRANCH, -LABEL_FIELD);
6545
6546 /* Insert both a label and a dispatch table entry for this end-of-msg. */
6547 label(c, LABEL_ENDMSG);
6548 val = upb_value_uint64(pcofs(c) - method->code_base.ofs);
6549 upb_inttable_insert(&method->dispatch, DISPATCH_ENDMSG, val);
6550
6551 putsel(c, OP_ENDMSG, UPB_ENDMSG_SELECTOR, h);
6552 putop(c, OP_RET);
6553
6554 upb_inttable_compact(&method->dispatch);
6555}
6556
6557/* Populate "methods" with new upb_pbdecodermethod objects reachable from "h".
6558 * Returns the method for these handlers.
6559 *
6560 * Generates a new method for every destination handlers reachable from "h". */
6561static void find_methods(compiler *c, const upb_handlers *h) {
6562 upb_value v;
6563 upb_msg_field_iter i;
6564 const upb_msgdef *md;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08006565 upb_pbdecodermethod *method;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006566
6567 if (upb_inttable_lookupptr(&c->group->methods, h, &v))
6568 return;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08006569
6570 method = newmethod(h, c->group);
6571 upb_inttable_insertptr(&c->group->methods, h, upb_value_ptr(method));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006572
6573 /* Find submethods. */
6574 md = upb_handlers_msgdef(h);
6575 for(upb_msg_field_begin(&i, md);
6576 !upb_msg_field_done(&i);
6577 upb_msg_field_next(&i)) {
6578 const upb_fielddef *f = upb_msg_iter_field(&i);
6579 const upb_handlers *sub_h;
6580 if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
6581 (sub_h = upb_handlers_getsubhandlers(h, f)) != NULL) {
6582 /* We only generate a decoder method for submessages with handlers.
6583 * Others will be parsed as unknown fields. */
6584 find_methods(c, sub_h);
6585 }
6586 }
6587}
6588
6589/* (Re-)compile bytecode for all messages in "msgs."
6590 * Overwrites any existing bytecode in "c". */
6591static void compile_methods(compiler *c) {
6592 upb_inttable_iter i;
6593
6594 /* Start over at the beginning of the bytecode. */
6595 c->pc = c->group->bytecode;
6596
6597 upb_inttable_begin(&i, &c->group->methods);
6598 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
6599 upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
6600 compile_method(c, method);
6601 }
6602}
6603
6604static void set_bytecode_handlers(mgroup *g) {
6605 upb_inttable_iter i;
6606 upb_inttable_begin(&i, &g->methods);
6607 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
6608 upb_pbdecodermethod *m = upb_value_getptr(upb_inttable_iter_value(&i));
6609 upb_byteshandler *h = &m->input_handler_;
6610
6611 m->code_base.ptr = g->bytecode + m->code_base.ofs;
6612
6613 upb_byteshandler_setstartstr(h, upb_pbdecoder_startbc, m->code_base.ptr);
6614 upb_byteshandler_setstring(h, upb_pbdecoder_decode, g);
6615 upb_byteshandler_setendstr(h, upb_pbdecoder_end, m);
6616 }
6617}
6618
6619
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006620/* TODO(haberman): allow this to be constructed for an arbitrary set of dest
6621 * handlers and other mgroups (but verify we have a transitive closure). */
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08006622const mgroup *mgroup_new(const upb_handlers *dest, bool allowjit, bool lazy) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006623 mgroup *g;
6624 compiler *c;
6625
6626 UPB_UNUSED(allowjit);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006627
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08006628 g = newgroup();
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006629 c = newcompiler(g, lazy);
6630 find_methods(c, dest);
6631
6632 /* We compile in two passes:
6633 * 1. all messages are assigned relative offsets from the beginning of the
6634 * bytecode (saved in method->code_base).
6635 * 2. forwards OP_CALL instructions can be correctly linked since message
6636 * offsets have been previously assigned.
6637 *
6638 * Could avoid the second pass by linking OP_CALL instructions somehow. */
6639 compile_methods(c);
6640 compile_methods(c);
6641 g->bytecode_end = c->pc;
6642 freecompiler(c);
6643
6644#ifdef UPB_DUMP_BYTECODE
6645 {
Paul Yange0e54662016-09-15 11:09:01 -07006646 FILE *f = fopen("/tmp/upb-bytecode", "w");
6647 UPB_ASSERT(f);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006648 dumpbc(g->bytecode, g->bytecode_end, stderr);
6649 dumpbc(g->bytecode, g->bytecode_end, f);
6650 fclose(f);
Paul Yange0e54662016-09-15 11:09:01 -07006651
6652 f = fopen("/tmp/upb-bytecode.bin", "wb");
6653 UPB_ASSERT(f);
6654 fwrite(g->bytecode, 1, g->bytecode_end - g->bytecode, f);
6655 fclose(f);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006656 }
6657#endif
6658
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08006659 set_bytecode_handlers(g);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006660 return g;
6661}
6662
6663
6664/* upb_pbcodecache ************************************************************/
6665
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08006666upb_pbcodecache *upb_pbcodecache_new(upb_handlercache *dest) {
6667 upb_pbcodecache *c = upb_gmalloc(sizeof(*c));
6668
6669 if (!c) return NULL;
6670
6671 c->dest = dest;
6672 c->allow_jit = true;
6673 c->lazy = false;
6674
6675 c->arena = upb_arena_new();
6676 if (!upb_inttable_init(&c->groups, UPB_CTYPE_CONSTPTR)) return NULL;
6677
6678 return c;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006679}
6680
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08006681void upb_pbcodecache_free(upb_pbcodecache *c) {
6682 size_t i;
6683
6684 for (i = 0; i < upb_inttable_count(&c->groups); i++) {
6685 upb_value v;
6686 bool ok = upb_inttable_lookup(&c->groups, i, &v);
6687 UPB_ASSERT(ok);
6688 freegroup((void*)upb_value_getconstptr(v));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006689 }
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08006690
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006691 upb_inttable_uninit(&c->groups);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08006692 upb_arena_free(c->arena);
6693 upb_gfree(c);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006694}
6695
6696bool upb_pbcodecache_allowjit(const upb_pbcodecache *c) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08006697 return c->allow_jit;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006698}
6699
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08006700void upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow) {
6701 UPB_ASSERT(upb_inttable_count(&c->groups) == 0);
6702 c->allow_jit = allow;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006703}
6704
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08006705void upb_pbdecodermethodopts_setlazy(upb_pbcodecache *c, bool lazy) {
6706 UPB_ASSERT(upb_inttable_count(&c->groups) == 0);
6707 c->lazy = lazy;
6708}
6709
6710const upb_pbdecodermethod *upb_pbcodecache_get(upb_pbcodecache *c,
6711 const upb_msgdef *md) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006712 upb_value v;
6713 bool ok;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08006714 const upb_handlers *h;
6715 const mgroup *g;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006716
6717 /* Right now we build a new DecoderMethod every time.
6718 * TODO(haberman): properly cache methods by their true key. */
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08006719 h = upb_handlercache_get(c->dest, md);
6720 g = mgroup_new(h, c->allow_jit, c->lazy);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006721 upb_inttable_push(&c->groups, upb_value_constptr(g));
6722
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08006723 ok = upb_inttable_lookupptr(&g->methods, h, &v);
Paul Yange0e54662016-09-15 11:09:01 -07006724 UPB_ASSERT(ok);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006725 return upb_value_getptr(v);
6726}
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006727/*
6728** upb::Decoder (Bytecode Decoder VM)
6729**
6730** Bytecode must previously have been generated using the bytecode compiler in
6731** compile_decoder.c. This decoder then walks through the bytecode op-by-op to
6732** parse the input.
6733**
6734** Decoding is fully resumable; we just keep a pointer to the current bytecode
6735** instruction and resume from there. A fair amount of the logic here is to
6736** handle the fact that values can span buffer seams and we have to be able to
6737** be capable of suspending/resuming from any byte in the stream. This
6738** sometimes requires keeping a few trailing bytes from the last buffer around
6739** in the "residual" buffer.
6740*/
6741
6742#include <inttypes.h>
6743#include <stddef.h>
6744
6745#ifdef UPB_DUMP_BYTECODE
6746#include <stdio.h>
6747#endif
6748
6749#define CHECK_SUSPEND(x) if (!(x)) return upb_pbdecoder_suspend(d);
6750
6751/* Error messages that are shared between the bytecode and JIT decoders. */
6752const char *kPbDecoderStackOverflow = "Nesting too deep.";
6753const char *kPbDecoderSubmessageTooLong =
6754 "Submessage end extends past enclosing submessage.";
6755
6756/* Error messages shared within this file. */
6757static const char *kUnterminatedVarint = "Unterminated varint.";
6758
6759/* upb_pbdecoder **************************************************************/
6760
6761static opcode halt = OP_HALT;
6762
Paul Yange0e54662016-09-15 11:09:01 -07006763/* A dummy character we can point to when the user passes us a NULL buffer.
6764 * We need this because in C (NULL + 0) and (NULL - NULL) are undefined
6765 * behavior, which would invalidate functions like curbufleft(). */
6766static const char dummy_char;
6767
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006768/* Whether an op consumes any of the input buffer. */
6769static bool consumes_input(opcode op) {
6770 switch (op) {
6771 case OP_SETDISPATCH:
6772 case OP_STARTMSG:
6773 case OP_ENDMSG:
6774 case OP_STARTSEQ:
6775 case OP_ENDSEQ:
6776 case OP_STARTSUBMSG:
6777 case OP_ENDSUBMSG:
6778 case OP_STARTSTR:
6779 case OP_ENDSTR:
6780 case OP_PUSHTAGDELIM:
6781 case OP_POP:
6782 case OP_SETDELIM:
6783 case OP_SETBIGGROUPNUM:
6784 case OP_CHECKDELIM:
6785 case OP_CALL:
6786 case OP_RET:
6787 case OP_BRANCH:
6788 return false;
6789 default:
6790 return true;
6791 }
6792}
6793
6794static size_t stacksize(upb_pbdecoder *d, size_t entries) {
6795 UPB_UNUSED(d);
6796 return entries * sizeof(upb_pbdecoder_frame);
6797}
6798
6799static size_t callstacksize(upb_pbdecoder *d, size_t entries) {
6800 UPB_UNUSED(d);
6801
6802#ifdef UPB_USE_JIT_X64
6803 if (d->method_->is_native_) {
6804 /* Each native stack frame needs two pointers, plus we need a few frames for
6805 * the enter/exit trampolines. */
6806 size_t ret = entries * sizeof(void*) * 2;
6807 ret += sizeof(void*) * 10;
6808 return ret;
6809 }
6810#endif
6811
6812 return entries * sizeof(uint32_t*);
6813}
6814
6815
6816static bool in_residual_buf(const upb_pbdecoder *d, const char *p);
6817
6818/* It's unfortunate that we have to micro-manage the compiler with
6819 * UPB_FORCEINLINE and UPB_NOINLINE, especially since this tuning is necessarily
6820 * specific to one hardware configuration. But empirically on a Core i7,
6821 * performance increases 30-50% with these annotations. Every instance where
6822 * these appear, gcc 4.2.1 made the wrong decision and degraded performance in
6823 * benchmarks. */
6824
6825static void seterr(upb_pbdecoder *d, const char *msg) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08006826 upb_status_seterrmsg(d->status, msg);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006827}
6828
6829void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg) {
6830 seterr(d, msg);
6831}
6832
6833
6834/* Buffering ******************************************************************/
6835
6836/* We operate on one buffer at a time, which is either the user's buffer passed
6837 * to our "decode" callback or some residual bytes from the previous buffer. */
6838
6839/* How many bytes can be safely read from d->ptr without reading past end-of-buf
6840 * or past the current delimited end. */
6841static size_t curbufleft(const upb_pbdecoder *d) {
Paul Yange0e54662016-09-15 11:09:01 -07006842 UPB_ASSERT(d->data_end >= d->ptr);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006843 return d->data_end - d->ptr;
6844}
6845
6846/* How many bytes are available before end-of-buffer. */
6847static size_t bufleft(const upb_pbdecoder *d) {
6848 return d->end - d->ptr;
6849}
6850
6851/* Overall stream offset of d->ptr. */
6852uint64_t offset(const upb_pbdecoder *d) {
6853 return d->bufstart_ofs + (d->ptr - d->buf);
6854}
6855
6856/* How many bytes are available before the end of this delimited region. */
6857size_t delim_remaining(const upb_pbdecoder *d) {
6858 return d->top->end_ofs - offset(d);
6859}
6860
6861/* Advances d->ptr. */
6862static void advance(upb_pbdecoder *d, size_t len) {
Paul Yange0e54662016-09-15 11:09:01 -07006863 UPB_ASSERT(curbufleft(d) >= len);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006864 d->ptr += len;
6865}
6866
6867static bool in_buf(const char *p, const char *buf, const char *end) {
6868 return p >= buf && p <= end;
6869}
6870
6871static bool in_residual_buf(const upb_pbdecoder *d, const char *p) {
6872 return in_buf(p, d->residual, d->residual_end);
6873}
6874
6875/* Calculates the delim_end value, which is affected by both the current buffer
6876 * and the parsing stack, so must be called whenever either is updated. */
6877static void set_delim_end(upb_pbdecoder *d) {
6878 size_t delim_ofs = d->top->end_ofs - d->bufstart_ofs;
6879 if (delim_ofs <= (size_t)(d->end - d->buf)) {
6880 d->delim_end = d->buf + delim_ofs;
6881 d->data_end = d->delim_end;
6882 } else {
6883 d->data_end = d->end;
6884 d->delim_end = NULL;
6885 }
6886}
6887
6888static void switchtobuf(upb_pbdecoder *d, const char *buf, const char *end) {
6889 d->ptr = buf;
6890 d->buf = buf;
6891 d->end = end;
6892 set_delim_end(d);
6893}
6894
6895static void advancetobuf(upb_pbdecoder *d, const char *buf, size_t len) {
Paul Yange0e54662016-09-15 11:09:01 -07006896 UPB_ASSERT(curbufleft(d) == 0);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006897 d->bufstart_ofs += (d->end - d->buf);
6898 switchtobuf(d, buf, buf + len);
6899}
6900
6901static void checkpoint(upb_pbdecoder *d) {
6902 /* The assertion here is in the interests of efficiency, not correctness.
6903 * We are trying to ensure that we don't checkpoint() more often than
6904 * necessary. */
Paul Yange0e54662016-09-15 11:09:01 -07006905 UPB_ASSERT(d->checkpoint != d->ptr);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006906 d->checkpoint = d->ptr;
6907}
6908
6909/* Skips "bytes" bytes in the stream, which may be more than available. If we
6910 * skip more bytes than are available, we return a long read count to the caller
6911 * indicating how many bytes can be skipped over before passing actual data
6912 * again. Skipped bytes can pass a NULL buffer and the decoder guarantees they
6913 * won't actually be read.
6914 */
6915static int32_t skip(upb_pbdecoder *d, size_t bytes) {
Paul Yange0e54662016-09-15 11:09:01 -07006916 UPB_ASSERT(!in_residual_buf(d, d->ptr) || d->size_param == 0);
6917 UPB_ASSERT(d->skip == 0);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006918 if (bytes > delim_remaining(d)) {
6919 seterr(d, "Skipped value extended beyond enclosing submessage.");
6920 return upb_pbdecoder_suspend(d);
Paul Yange0e54662016-09-15 11:09:01 -07006921 } else if (bufleft(d) >= bytes) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006922 /* Skipped data is all in current buffer, and more is still available. */
6923 advance(d, bytes);
6924 d->skip = 0;
6925 return DECODE_OK;
6926 } else {
6927 /* Skipped data extends beyond currently available buffers. */
6928 d->pc = d->last;
6929 d->skip = bytes - curbufleft(d);
6930 d->bufstart_ofs += (d->end - d->buf);
6931 d->residual_end = d->residual;
6932 switchtobuf(d, d->residual, d->residual_end);
6933 return d->size_param + d->skip;
6934 }
6935}
6936
6937
6938/* Resumes the decoder from an initial state or from a previous suspend. */
6939int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
6940 size_t size, const upb_bufhandle *handle) {
6941 UPB_UNUSED(p); /* Useless; just for the benefit of the JIT. */
6942
Paul Yange0e54662016-09-15 11:09:01 -07006943 /* d->skip and d->residual_end could probably elegantly be represented
6944 * as a single variable, to more easily represent this invariant. */
6945 UPB_ASSERT(!(d->skip && d->residual_end > d->residual));
6946
6947 /* We need to remember the original size_param, so that the value we return
6948 * is relative to it, even if we do some skipping first. */
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006949 d->size_param = size;
6950 d->handle = handle;
6951
Paul Yange0e54662016-09-15 11:09:01 -07006952 /* Have to handle this case specially (ie. not with skip()) because the user
6953 * is allowed to pass a NULL buffer here, which won't allow us to safely
6954 * calculate a d->end or use our normal functions like curbufleft(). */
6955 if (d->skip && d->skip >= size) {
6956 d->skip -= size;
6957 d->bufstart_ofs += size;
6958 buf = &dummy_char;
6959 size = 0;
6960
6961 /* We can't just return now, because we might need to execute some ops
6962 * like CHECKDELIM, which could call some callbacks and pop the stack. */
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006963 }
6964
Paul Yange0e54662016-09-15 11:09:01 -07006965 /* We need to pretend that this was the actual buffer param, since some of the
6966 * calculations assume that d->ptr/d->buf is relative to this. */
6967 d->buf_param = buf;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006968
6969 if (!buf) {
6970 /* NULL buf is ok if its entire span is covered by the "skip" above, but
6971 * by this point we know that "skip" doesn't cover the buffer. */
6972 seterr(d, "Passed NULL buffer over non-skippable region.");
6973 return upb_pbdecoder_suspend(d);
6974 }
6975
Paul Yange0e54662016-09-15 11:09:01 -07006976 if (d->residual_end > d->residual) {
6977 /* We have residual bytes from the last buffer. */
6978 UPB_ASSERT(d->ptr == d->residual);
6979 } else {
6980 switchtobuf(d, buf, buf + size);
6981 }
6982
6983 d->checkpoint = d->ptr;
6984
6985 /* Handle skips that don't cover the whole buffer (as above). */
6986 if (d->skip) {
6987 size_t skip_bytes = d->skip;
6988 d->skip = 0;
6989 CHECK_RETURN(skip(d, skip_bytes));
6990 checkpoint(d);
6991 }
6992
6993 /* If we're inside an unknown group, continue to parse unknown values. */
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006994 if (d->top->groupnum < 0) {
6995 CHECK_RETURN(upb_pbdecoder_skipunknown(d, -1, 0));
Paul Yange0e54662016-09-15 11:09:01 -07006996 checkpoint(d);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07006997 }
6998
6999 return DECODE_OK;
7000}
7001
7002/* Suspends the decoder at the last checkpoint, without saving any residual
7003 * bytes. If there are any unconsumed bytes, returns a short byte count. */
7004size_t upb_pbdecoder_suspend(upb_pbdecoder *d) {
7005 d->pc = d->last;
7006 if (d->checkpoint == d->residual) {
7007 /* Checkpoint was in residual buf; no user bytes were consumed. */
7008 d->ptr = d->residual;
7009 return 0;
7010 } else {
Paul Yange0e54662016-09-15 11:09:01 -07007011 size_t ret = d->size_param - (d->end - d->checkpoint);
7012 UPB_ASSERT(!in_residual_buf(d, d->checkpoint));
7013 UPB_ASSERT(d->buf == d->buf_param || d->buf == &dummy_char);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007014
Paul Yange0e54662016-09-15 11:09:01 -07007015 d->bufstart_ofs += (d->checkpoint - d->buf);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007016 d->residual_end = d->residual;
7017 switchtobuf(d, d->residual, d->residual_end);
Paul Yange0e54662016-09-15 11:09:01 -07007018 return ret;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007019 }
7020}
7021
7022/* Suspends the decoder at the last checkpoint, and saves any unconsumed
7023 * bytes in our residual buffer. This is necessary if we need more user
7024 * bytes to form a complete value, which might not be contiguous in the
7025 * user's buffers. Always consumes all user bytes. */
7026static size_t suspend_save(upb_pbdecoder *d) {
7027 /* We hit end-of-buffer before we could parse a full value.
7028 * Save any unconsumed bytes (if any) to the residual buffer. */
7029 d->pc = d->last;
7030
7031 if (d->checkpoint == d->residual) {
7032 /* Checkpoint was in residual buf; append user byte(s) to residual buf. */
Paul Yange0e54662016-09-15 11:09:01 -07007033 UPB_ASSERT((d->residual_end - d->residual) + d->size_param <=
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007034 sizeof(d->residual));
7035 if (!in_residual_buf(d, d->ptr)) {
7036 d->bufstart_ofs -= (d->residual_end - d->residual);
7037 }
7038 memcpy(d->residual_end, d->buf_param, d->size_param);
7039 d->residual_end += d->size_param;
7040 } else {
7041 /* Checkpoint was in user buf; old residual bytes not needed. */
7042 size_t save;
Paul Yange0e54662016-09-15 11:09:01 -07007043 UPB_ASSERT(!in_residual_buf(d, d->checkpoint));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007044
7045 d->ptr = d->checkpoint;
7046 save = curbufleft(d);
Paul Yange0e54662016-09-15 11:09:01 -07007047 UPB_ASSERT(save <= sizeof(d->residual));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007048 memcpy(d->residual, d->ptr, save);
7049 d->residual_end = d->residual + save;
7050 d->bufstart_ofs = offset(d);
7051 }
7052
7053 switchtobuf(d, d->residual, d->residual_end);
7054 return d->size_param;
7055}
7056
7057/* Copies the next "bytes" bytes into "buf" and advances the stream.
7058 * Requires that this many bytes are available in the current buffer. */
7059UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf,
7060 size_t bytes) {
Paul Yange0e54662016-09-15 11:09:01 -07007061 UPB_ASSERT(bytes <= curbufleft(d));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007062 memcpy(buf, d->ptr, bytes);
7063 advance(d, bytes);
7064}
7065
7066/* Slow path for getting the next "bytes" bytes, regardless of whether they are
7067 * available in the current buffer or not. Returns a status code as described
7068 * in decoder.int.h. */
7069UPB_NOINLINE static int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
7070 size_t bytes) {
7071 const size_t avail = curbufleft(d);
7072 consumebytes(d, buf, avail);
7073 bytes -= avail;
Paul Yange0e54662016-09-15 11:09:01 -07007074 UPB_ASSERT(bytes > 0);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007075 if (in_residual_buf(d, d->ptr)) {
7076 advancetobuf(d, d->buf_param, d->size_param);
7077 }
7078 if (curbufleft(d) >= bytes) {
7079 consumebytes(d, (char *)buf + avail, bytes);
7080 return DECODE_OK;
7081 } else if (d->data_end == d->delim_end) {
7082 seterr(d, "Submessage ended in the middle of a value or group");
7083 return upb_pbdecoder_suspend(d);
7084 } else {
7085 return suspend_save(d);
7086 }
7087}
7088
7089/* Gets the next "bytes" bytes, regardless of whether they are available in the
7090 * current buffer or not. Returns a status code as described in decoder.int.h.
7091 */
7092UPB_FORCEINLINE static int32_t getbytes(upb_pbdecoder *d, void *buf,
7093 size_t bytes) {
7094 if (curbufleft(d) >= bytes) {
7095 /* Buffer has enough data to satisfy. */
7096 consumebytes(d, buf, bytes);
7097 return DECODE_OK;
7098 } else {
7099 return getbytes_slow(d, buf, bytes);
7100 }
7101}
7102
7103UPB_NOINLINE static size_t peekbytes_slow(upb_pbdecoder *d, void *buf,
7104 size_t bytes) {
7105 size_t ret = curbufleft(d);
7106 memcpy(buf, d->ptr, ret);
7107 if (in_residual_buf(d, d->ptr)) {
7108 size_t copy = UPB_MIN(bytes - ret, d->size_param);
7109 memcpy((char *)buf + ret, d->buf_param, copy);
7110 ret += copy;
7111 }
7112 return ret;
7113}
7114
7115UPB_FORCEINLINE static size_t peekbytes(upb_pbdecoder *d, void *buf,
7116 size_t bytes) {
7117 if (curbufleft(d) >= bytes) {
7118 memcpy(buf, d->ptr, bytes);
7119 return bytes;
7120 } else {
7121 return peekbytes_slow(d, buf, bytes);
7122 }
7123}
7124
7125
7126/* Decoding of wire types *****************************************************/
7127
7128/* Slow path for decoding a varint from the current buffer position.
7129 * Returns a status code as described in decoder.int.h. */
7130UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
7131 uint64_t *u64) {
7132 uint8_t byte = 0x80;
7133 int bitpos;
7134 *u64 = 0;
7135 for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
Paul Yange0e54662016-09-15 11:09:01 -07007136 CHECK_RETURN(getbytes(d, &byte, 1));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007137 *u64 |= (uint64_t)(byte & 0x7F) << bitpos;
7138 }
7139 if(bitpos == 70 && (byte & 0x80)) {
7140 seterr(d, kUnterminatedVarint);
7141 return upb_pbdecoder_suspend(d);
7142 }
7143 return DECODE_OK;
7144}
7145
7146/* Decodes a varint from the current buffer position.
7147 * Returns a status code as described in decoder.int.h. */
7148UPB_FORCEINLINE static int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
7149 if (curbufleft(d) > 0 && !(*d->ptr & 0x80)) {
7150 *u64 = *d->ptr;
7151 advance(d, 1);
7152 return DECODE_OK;
7153 } else if (curbufleft(d) >= 10) {
7154 /* Fast case. */
7155 upb_decoderet r = upb_vdecode_fast(d->ptr);
7156 if (r.p == NULL) {
7157 seterr(d, kUnterminatedVarint);
7158 return upb_pbdecoder_suspend(d);
7159 }
7160 advance(d, r.p - d->ptr);
7161 *u64 = r.val;
7162 return DECODE_OK;
7163 } else {
7164 /* Slow case -- varint spans buffer seam. */
7165 return upb_pbdecoder_decode_varint_slow(d, u64);
7166 }
7167}
7168
7169/* Decodes a 32-bit varint from the current buffer position.
7170 * Returns a status code as described in decoder.int.h. */
7171UPB_FORCEINLINE static int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
7172 uint64_t u64;
7173 int32_t ret = decode_varint(d, &u64);
7174 if (ret >= 0) return ret;
7175 if (u64 > UINT32_MAX) {
7176 seterr(d, "Unterminated 32-bit varint");
7177 /* TODO(haberman) guarantee that this function return is >= 0 somehow,
7178 * so we know this path will always be treated as error by our caller.
7179 * Right now the size_t -> int32_t can overflow and produce negative values.
7180 */
7181 *u32 = 0;
7182 return upb_pbdecoder_suspend(d);
7183 }
7184 *u32 = u64;
7185 return DECODE_OK;
7186}
7187
7188/* Decodes a fixed32 from the current buffer position.
7189 * Returns a status code as described in decoder.int.h.
7190 * TODO: proper byte swapping for big-endian machines. */
7191UPB_FORCEINLINE static int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) {
7192 return getbytes(d, u32, 4);
7193}
7194
7195/* Decodes a fixed64 from the current buffer position.
7196 * Returns a status code as described in decoder.int.h.
7197 * TODO: proper byte swapping for big-endian machines. */
7198UPB_FORCEINLINE static int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) {
7199 return getbytes(d, u64, 8);
7200}
7201
7202/* Non-static versions of the above functions.
7203 * These are called by the JIT for fallback paths. */
7204int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32) {
7205 return decode_fixed32(d, u32);
7206}
7207
7208int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64) {
7209 return decode_fixed64(d, u64);
7210}
7211
7212static double as_double(uint64_t n) { double d; memcpy(&d, &n, 8); return d; }
7213static float as_float(uint32_t n) { float f; memcpy(&f, &n, 4); return f; }
7214
7215/* Pushes a frame onto the decoder stack. */
7216static bool decoder_push(upb_pbdecoder *d, uint64_t end) {
7217 upb_pbdecoder_frame *fr = d->top;
7218
7219 if (end > fr->end_ofs) {
7220 seterr(d, kPbDecoderSubmessageTooLong);
7221 return false;
7222 } else if (fr == d->limit) {
7223 seterr(d, kPbDecoderStackOverflow);
7224 return false;
7225 }
7226
7227 fr++;
7228 fr->end_ofs = end;
7229 fr->dispatch = NULL;
7230 fr->groupnum = 0;
7231 d->top = fr;
7232 return true;
7233}
7234
7235static bool pushtagdelim(upb_pbdecoder *d, uint32_t arg) {
7236 /* While we expect to see an "end" tag (either ENDGROUP or a non-sequence
7237 * field number) prior to hitting any enclosing submessage end, pushing our
7238 * existing delim end prevents us from continuing to parse values from a
7239 * corrupt proto that doesn't give us an END tag in time. */
7240 if (!decoder_push(d, d->top->end_ofs))
7241 return false;
7242 d->top->groupnum = arg;
7243 return true;
7244}
7245
7246/* Pops a frame from the decoder stack. */
7247static void decoder_pop(upb_pbdecoder *d) { d->top--; }
7248
7249UPB_NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d,
7250 uint64_t expected) {
7251 uint64_t data = 0;
7252 size_t bytes = upb_value_size(expected);
7253 size_t read = peekbytes(d, &data, bytes);
7254 if (read == bytes && data == expected) {
7255 /* Advance past matched bytes. */
7256 int32_t ok = getbytes(d, &data, read);
Paul Yange0e54662016-09-15 11:09:01 -07007257 UPB_ASSERT(ok < 0);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007258 return DECODE_OK;
7259 } else if (read < bytes && memcmp(&data, &expected, read) == 0) {
7260 return suspend_save(d);
7261 } else {
7262 return DECODE_MISMATCH;
7263 }
7264}
7265
7266int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, int32_t fieldnum,
7267 uint8_t wire_type) {
7268 if (fieldnum >= 0)
7269 goto have_tag;
7270
7271 while (true) {
7272 uint32_t tag;
7273 CHECK_RETURN(decode_v32(d, &tag));
7274 wire_type = tag & 0x7;
7275 fieldnum = tag >> 3;
7276
7277have_tag:
7278 if (fieldnum == 0) {
7279 seterr(d, "Saw invalid field number (0)");
7280 return upb_pbdecoder_suspend(d);
7281 }
7282
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007283 switch (wire_type) {
7284 case UPB_WIRE_TYPE_32BIT:
7285 CHECK_RETURN(skip(d, 4));
7286 break;
7287 case UPB_WIRE_TYPE_64BIT:
7288 CHECK_RETURN(skip(d, 8));
7289 break;
7290 case UPB_WIRE_TYPE_VARINT: {
7291 uint64_t u64;
7292 CHECK_RETURN(decode_varint(d, &u64));
7293 break;
7294 }
7295 case UPB_WIRE_TYPE_DELIMITED: {
7296 uint32_t len;
7297 CHECK_RETURN(decode_v32(d, &len));
7298 CHECK_RETURN(skip(d, len));
7299 break;
7300 }
7301 case UPB_WIRE_TYPE_START_GROUP:
7302 CHECK_SUSPEND(pushtagdelim(d, -fieldnum));
7303 break;
7304 case UPB_WIRE_TYPE_END_GROUP:
7305 if (fieldnum == -d->top->groupnum) {
7306 decoder_pop(d);
7307 } else if (fieldnum == d->top->groupnum) {
7308 return DECODE_ENDGROUP;
7309 } else {
7310 seterr(d, "Unmatched ENDGROUP tag.");
7311 return upb_pbdecoder_suspend(d);
7312 }
7313 break;
7314 default:
7315 seterr(d, "Invalid wire type");
7316 return upb_pbdecoder_suspend(d);
7317 }
7318
7319 if (d->top->groupnum >= 0) {
Paul Yang60327462017-10-09 12:39:13 -07007320 /* TODO: More code needed for handling unknown groups. */
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08007321 upb_sink_putunknown(d->top->sink, d->checkpoint, d->ptr - d->checkpoint);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007322 return DECODE_OK;
7323 }
7324
7325 /* Unknown group -- continue looping over unknown fields. */
7326 checkpoint(d);
7327 }
7328}
7329
7330static void goto_endmsg(upb_pbdecoder *d) {
7331 upb_value v;
7332 bool found = upb_inttable_lookup32(d->top->dispatch, DISPATCH_ENDMSG, &v);
Paul Yange0e54662016-09-15 11:09:01 -07007333 UPB_ASSERT(found);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007334 d->pc = d->top->base + upb_value_getuint64(v);
7335}
7336
7337/* Parses a tag and jumps to the corresponding bytecode instruction for this
7338 * field.
7339 *
7340 * If the tag is unknown (or the wire type doesn't match), parses the field as
7341 * unknown. If the tag is a valid ENDGROUP tag, jumps to the bytecode
7342 * instruction for the end of message. */
7343static int32_t dispatch(upb_pbdecoder *d) {
7344 upb_inttable *dispatch = d->top->dispatch;
7345 uint32_t tag;
7346 uint8_t wire_type;
7347 uint32_t fieldnum;
7348 upb_value val;
7349 int32_t retval;
7350
7351 /* Decode tag. */
7352 CHECK_RETURN(decode_v32(d, &tag));
7353 wire_type = tag & 0x7;
7354 fieldnum = tag >> 3;
7355
7356 /* Lookup tag. Because of packed/non-packed compatibility, we have to
7357 * check the wire type against two possibilities. */
7358 if (fieldnum != DISPATCH_ENDMSG &&
7359 upb_inttable_lookup32(dispatch, fieldnum, &val)) {
7360 uint64_t v = upb_value_getuint64(val);
7361 if (wire_type == (v & 0xff)) {
7362 d->pc = d->top->base + (v >> 16);
7363 return DECODE_OK;
7364 } else if (wire_type == ((v >> 8) & 0xff)) {
7365 bool found =
7366 upb_inttable_lookup(dispatch, fieldnum + UPB_MAX_FIELDNUMBER, &val);
Paul Yange0e54662016-09-15 11:09:01 -07007367 UPB_ASSERT(found);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007368 d->pc = d->top->base + upb_value_getuint64(val);
7369 return DECODE_OK;
7370 }
7371 }
7372
7373 /* We have some unknown fields (or ENDGROUP) to parse. The DISPATCH or TAG
7374 * bytecode that triggered this is preceded by a CHECKDELIM bytecode which
7375 * we need to back up to, so that when we're done skipping unknown data we
7376 * can re-check the delimited end. */
7377 d->last--; /* Necessary if we get suspended */
7378 d->pc = d->last;
Paul Yange0e54662016-09-15 11:09:01 -07007379 UPB_ASSERT(getop(*d->last) == OP_CHECKDELIM);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007380
7381 /* Unknown field or ENDGROUP. */
7382 retval = upb_pbdecoder_skipunknown(d, fieldnum, wire_type);
7383
7384 CHECK_RETURN(retval);
7385
7386 if (retval == DECODE_ENDGROUP) {
7387 goto_endmsg(d);
7388 return DECODE_OK;
7389 }
7390
7391 return DECODE_OK;
7392}
7393
7394/* Callers know that the stack is more than one deep because the opcodes that
7395 * call this only occur after PUSH operations. */
7396upb_pbdecoder_frame *outer_frame(upb_pbdecoder *d) {
Paul Yange0e54662016-09-15 11:09:01 -07007397 UPB_ASSERT(d->top != d->stack);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007398 return d->top - 1;
7399}
7400
7401
7402/* The main decoding loop *****************************************************/
7403
7404/* The main decoder VM function. Uses traditional bytecode dispatch loop with a
7405 * switch() statement. */
7406size_t run_decoder_vm(upb_pbdecoder *d, const mgroup *group,
7407 const upb_bufhandle* handle) {
7408
7409#define VMCASE(op, code) \
7410 case op: { code; if (consumes_input(op)) checkpoint(d); break; }
7411#define PRIMITIVE_OP(type, wt, name, convfunc, ctype) \
7412 VMCASE(OP_PARSE_ ## type, { \
7413 ctype val; \
7414 CHECK_RETURN(decode_ ## wt(d, &val)); \
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08007415 upb_sink_put ## name(d->top->sink, arg, (convfunc)(val)); \
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007416 })
7417
7418 while(1) {
7419 int32_t instruction;
7420 opcode op;
7421 uint32_t arg;
7422 int32_t longofs;
7423
7424 d->last = d->pc;
7425 instruction = *d->pc++;
7426 op = getop(instruction);
7427 arg = instruction >> 8;
7428 longofs = arg;
Paul Yange0e54662016-09-15 11:09:01 -07007429 UPB_ASSERT(d->ptr != d->residual_end);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007430 UPB_UNUSED(group);
7431#ifdef UPB_DUMP_BYTECODE
7432 fprintf(stderr, "s_ofs=%d buf_ofs=%d data_rem=%d buf_rem=%d delim_rem=%d "
7433 "%x %s (%d)\n",
7434 (int)offset(d),
7435 (int)(d->ptr - d->buf),
7436 (int)(d->data_end - d->ptr),
7437 (int)(d->end - d->ptr),
7438 (int)((d->top->end_ofs - d->bufstart_ofs) - (d->ptr - d->buf)),
7439 (int)(d->pc - 1 - group->bytecode),
7440 upb_pbdecoder_getopname(op),
7441 arg);
7442#endif
7443 switch (op) {
7444 /* Technically, we are losing data if we see a 32-bit varint that is not
7445 * properly sign-extended. We could detect this and error about the data
7446 * loss, but proto2 does not do this, so we pass. */
7447 PRIMITIVE_OP(INT32, varint, int32, int32_t, uint64_t)
7448 PRIMITIVE_OP(INT64, varint, int64, int64_t, uint64_t)
7449 PRIMITIVE_OP(UINT32, varint, uint32, uint32_t, uint64_t)
7450 PRIMITIVE_OP(UINT64, varint, uint64, uint64_t, uint64_t)
7451 PRIMITIVE_OP(FIXED32, fixed32, uint32, uint32_t, uint32_t)
7452 PRIMITIVE_OP(FIXED64, fixed64, uint64, uint64_t, uint64_t)
7453 PRIMITIVE_OP(SFIXED32, fixed32, int32, int32_t, uint32_t)
7454 PRIMITIVE_OP(SFIXED64, fixed64, int64, int64_t, uint64_t)
7455 PRIMITIVE_OP(BOOL, varint, bool, bool, uint64_t)
7456 PRIMITIVE_OP(DOUBLE, fixed64, double, as_double, uint64_t)
7457 PRIMITIVE_OP(FLOAT, fixed32, float, as_float, uint32_t)
7458 PRIMITIVE_OP(SINT32, varint, int32, upb_zzdec_32, uint64_t)
7459 PRIMITIVE_OP(SINT64, varint, int64, upb_zzdec_64, uint64_t)
7460
7461 VMCASE(OP_SETDISPATCH,
7462 d->top->base = d->pc - 1;
7463 memcpy(&d->top->dispatch, d->pc, sizeof(void*));
7464 d->pc += sizeof(void*) / sizeof(uint32_t);
7465 )
7466 VMCASE(OP_STARTMSG,
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08007467 CHECK_SUSPEND(upb_sink_startmsg(d->top->sink));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007468 )
7469 VMCASE(OP_ENDMSG,
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08007470 CHECK_SUSPEND(upb_sink_endmsg(d->top->sink, d->status));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007471 )
7472 VMCASE(OP_STARTSEQ,
7473 upb_pbdecoder_frame *outer = outer_frame(d);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08007474 CHECK_SUSPEND(upb_sink_startseq(outer->sink, arg, &d->top->sink));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007475 )
7476 VMCASE(OP_ENDSEQ,
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08007477 CHECK_SUSPEND(upb_sink_endseq(d->top->sink, arg));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007478 )
7479 VMCASE(OP_STARTSUBMSG,
7480 upb_pbdecoder_frame *outer = outer_frame(d);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08007481 CHECK_SUSPEND(upb_sink_startsubmsg(outer->sink, arg, &d->top->sink));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007482 )
7483 VMCASE(OP_ENDSUBMSG,
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08007484 CHECK_SUSPEND(upb_sink_endsubmsg(d->top->sink, arg));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007485 )
7486 VMCASE(OP_STARTSTR,
7487 uint32_t len = delim_remaining(d);
7488 upb_pbdecoder_frame *outer = outer_frame(d);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08007489 CHECK_SUSPEND(upb_sink_startstr(outer->sink, arg, len, &d->top->sink));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007490 if (len == 0) {
7491 d->pc++; /* Skip OP_STRING. */
7492 }
7493 )
7494 VMCASE(OP_STRING,
7495 uint32_t len = curbufleft(d);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08007496 size_t n = upb_sink_putstring(d->top->sink, arg, d->ptr, len, handle);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007497 if (n > len) {
7498 if (n > delim_remaining(d)) {
7499 seterr(d, "Tried to skip past end of string.");
7500 return upb_pbdecoder_suspend(d);
7501 } else {
7502 int32_t ret = skip(d, n);
7503 /* This shouldn't return DECODE_OK, because n > len. */
Paul Yange0e54662016-09-15 11:09:01 -07007504 UPB_ASSERT(ret >= 0);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007505 return ret;
7506 }
7507 }
7508 advance(d, n);
7509 if (n < len || d->delim_end == NULL) {
7510 /* We aren't finished with this string yet. */
7511 d->pc--; /* Repeat OP_STRING. */
7512 if (n > 0) checkpoint(d);
7513 return upb_pbdecoder_suspend(d);
7514 }
7515 )
7516 VMCASE(OP_ENDSTR,
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08007517 CHECK_SUSPEND(upb_sink_endstr(d->top->sink, arg));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007518 )
7519 VMCASE(OP_PUSHTAGDELIM,
7520 CHECK_SUSPEND(pushtagdelim(d, arg));
7521 )
7522 VMCASE(OP_SETBIGGROUPNUM,
7523 d->top->groupnum = *d->pc++;
7524 )
7525 VMCASE(OP_POP,
Paul Yange0e54662016-09-15 11:09:01 -07007526 UPB_ASSERT(d->top > d->stack);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007527 decoder_pop(d);
7528 )
7529 VMCASE(OP_PUSHLENDELIM,
7530 uint32_t len;
7531 CHECK_RETURN(decode_v32(d, &len));
7532 CHECK_SUSPEND(decoder_push(d, offset(d) + len));
7533 set_delim_end(d);
7534 )
7535 VMCASE(OP_SETDELIM,
7536 set_delim_end(d);
7537 )
7538 VMCASE(OP_CHECKDELIM,
7539 /* We are guaranteed of this assert because we never allow ourselves to
7540 * consume bytes beyond data_end, which covers delim_end when non-NULL.
7541 */
Paul Yange0e54662016-09-15 11:09:01 -07007542 UPB_ASSERT(!(d->delim_end && d->ptr > d->delim_end));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007543 if (d->ptr == d->delim_end)
7544 d->pc += longofs;
7545 )
7546 VMCASE(OP_CALL,
7547 d->callstack[d->call_len++] = d->pc;
7548 d->pc += longofs;
7549 )
7550 VMCASE(OP_RET,
Paul Yange0e54662016-09-15 11:09:01 -07007551 UPB_ASSERT(d->call_len > 0);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007552 d->pc = d->callstack[--d->call_len];
7553 )
7554 VMCASE(OP_BRANCH,
7555 d->pc += longofs;
7556 )
7557 VMCASE(OP_TAG1,
7558 uint8_t expected;
7559 CHECK_SUSPEND(curbufleft(d) > 0);
7560 expected = (arg >> 8) & 0xff;
7561 if (*d->ptr == expected) {
7562 advance(d, 1);
7563 } else {
7564 int8_t shortofs;
7565 badtag:
7566 shortofs = arg;
7567 if (shortofs == LABEL_DISPATCH) {
7568 CHECK_RETURN(dispatch(d));
7569 } else {
7570 d->pc += shortofs;
7571 break; /* Avoid checkpoint(). */
7572 }
7573 }
7574 )
7575 VMCASE(OP_TAG2,
7576 uint16_t expected;
7577 CHECK_SUSPEND(curbufleft(d) > 0);
7578 expected = (arg >> 8) & 0xffff;
7579 if (curbufleft(d) >= 2) {
7580 uint16_t actual;
7581 memcpy(&actual, d->ptr, 2);
7582 if (expected == actual) {
7583 advance(d, 2);
7584 } else {
7585 goto badtag;
7586 }
7587 } else {
7588 int32_t result = upb_pbdecoder_checktag_slow(d, expected);
7589 if (result == DECODE_MISMATCH) goto badtag;
7590 if (result >= 0) return result;
7591 }
7592 )
7593 VMCASE(OP_TAGN, {
7594 uint64_t expected;
7595 int32_t result;
7596 memcpy(&expected, d->pc, 8);
7597 d->pc += 2;
7598 result = upb_pbdecoder_checktag_slow(d, expected);
7599 if (result == DECODE_MISMATCH) goto badtag;
7600 if (result >= 0) return result;
7601 })
7602 VMCASE(OP_DISPATCH, {
7603 CHECK_RETURN(dispatch(d));
7604 })
7605 VMCASE(OP_HALT, {
7606 return d->size_param;
7607 })
7608 }
7609 }
7610}
7611
7612
7613/* BytesHandler handlers ******************************************************/
7614
7615void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) {
7616 upb_pbdecoder *d = closure;
7617 UPB_UNUSED(size_hint);
7618 d->top->end_ofs = UINT64_MAX;
7619 d->bufstart_ofs = 0;
7620 d->call_len = 1;
7621 d->callstack[0] = &halt;
7622 d->pc = pc;
7623 d->skip = 0;
7624 return d;
7625}
7626
7627void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint) {
7628 upb_pbdecoder *d = closure;
7629 UPB_UNUSED(hd);
7630 UPB_UNUSED(size_hint);
7631 d->top->end_ofs = UINT64_MAX;
7632 d->bufstart_ofs = 0;
7633 d->call_len = 0;
7634 d->skip = 0;
7635 return d;
7636}
7637
7638bool upb_pbdecoder_end(void *closure, const void *handler_data) {
7639 upb_pbdecoder *d = closure;
7640 const upb_pbdecodermethod *method = handler_data;
7641 uint64_t end;
7642 char dummy;
7643
7644 if (d->residual_end > d->residual) {
7645 seterr(d, "Unexpected EOF: decoder still has buffered unparsed data");
7646 return false;
7647 }
7648
7649 if (d->skip) {
7650 seterr(d, "Unexpected EOF inside skipped data");
7651 return false;
7652 }
7653
7654 if (d->top->end_ofs != UINT64_MAX) {
7655 seterr(d, "Unexpected EOF inside delimited string");
7656 return false;
7657 }
7658
7659 /* The user's end() call indicates that the message ends here. */
7660 end = offset(d);
7661 d->top->end_ofs = end;
7662
7663#ifdef UPB_USE_JIT_X64
7664 if (method->is_native_) {
7665 const mgroup *group = (const mgroup*)method->group;
7666 if (d->top != d->stack)
7667 d->stack->end_ofs = 0;
7668 group->jit_code(closure, method->code_base.ptr, &dummy, 0, NULL);
7669 } else
7670#endif
7671 {
7672 const uint32_t *p = d->pc;
7673 d->stack->end_ofs = end;
7674 /* Check the previous bytecode, but guard against beginning. */
7675 if (p != method->code_base.ptr) p--;
7676 if (getop(*p) == OP_CHECKDELIM) {
7677 /* Rewind from OP_TAG* to OP_CHECKDELIM. */
Paul Yange0e54662016-09-15 11:09:01 -07007678 UPB_ASSERT(getop(*d->pc) == OP_TAG1 ||
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007679 getop(*d->pc) == OP_TAG2 ||
7680 getop(*d->pc) == OP_TAGN ||
7681 getop(*d->pc) == OP_DISPATCH);
7682 d->pc = p;
7683 }
7684 upb_pbdecoder_decode(closure, handler_data, &dummy, 0, NULL);
7685 }
7686
7687 if (d->call_len != 0) {
7688 seterr(d, "Unexpected EOF inside submessage or group");
7689 return false;
7690 }
7691
7692 return true;
7693}
7694
7695size_t upb_pbdecoder_decode(void *decoder, const void *group, const char *buf,
7696 size_t size, const upb_bufhandle *handle) {
7697 int32_t result = upb_pbdecoder_resume(decoder, NULL, buf, size, handle);
7698
7699 if (result == DECODE_ENDGROUP) goto_endmsg(decoder);
7700 CHECK_RETURN(result);
7701
7702 return run_decoder_vm(decoder, group, handle);
7703}
7704
7705
7706/* Public API *****************************************************************/
7707
7708void upb_pbdecoder_reset(upb_pbdecoder *d) {
7709 d->top = d->stack;
7710 d->top->groupnum = 0;
7711 d->ptr = d->residual;
7712 d->buf = d->residual;
7713 d->end = d->residual;
7714 d->residual_end = d->residual;
7715}
7716
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08007717upb_pbdecoder *upb_pbdecoder_create(upb_arena *a, const upb_pbdecodermethod *m,
7718 upb_sink sink, upb_status *status) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007719 const size_t default_max_nesting = 64;
7720#ifndef NDEBUG
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08007721 size_t size_before = upb_arena_bytesallocated(a);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007722#endif
7723
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08007724 upb_pbdecoder *d = upb_arena_malloc(a, sizeof(upb_pbdecoder));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007725 if (!d) return NULL;
7726
7727 d->method_ = m;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08007728 d->callstack = upb_arena_malloc(a, callstacksize(d, default_max_nesting));
7729 d->stack = upb_arena_malloc(a, stacksize(d, default_max_nesting));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007730 if (!d->stack || !d->callstack) {
7731 return NULL;
7732 }
7733
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08007734 d->arena = a;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007735 d->limit = d->stack + default_max_nesting - 1;
7736 d->stack_size = default_max_nesting;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08007737 d->status = status;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007738
7739 upb_pbdecoder_reset(d);
7740 upb_bytessink_reset(&d->input_, &m->input_handler_, d);
7741
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007742 if (d->method_->dest_handlers_) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08007743 if (sink.handlers != d->method_->dest_handlers_)
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007744 return NULL;
7745 }
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08007746 d->top->sink = sink;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007747
7748 /* If this fails, increase the value in decoder.h. */
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08007749 UPB_ASSERT_DEBUGVAR(upb_arena_bytesallocated(a) - size_before <=
Paul Yange0e54662016-09-15 11:09:01 -07007750 UPB_PB_DECODER_SIZE);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007751 return d;
7752}
7753
7754uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d) {
7755 return offset(d);
7756}
7757
7758const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) {
7759 return d->method_;
7760}
7761
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08007762upb_bytessink upb_pbdecoder_input(upb_pbdecoder *d) {
7763 return d->input_;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007764}
7765
7766size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d) {
7767 return d->stack_size;
7768}
7769
7770bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max) {
Paul Yange0e54662016-09-15 11:09:01 -07007771 UPB_ASSERT(d->top >= d->stack);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007772
7773 if (max < (size_t)(d->top - d->stack)) {
7774 /* Can't set a limit smaller than what we are currently at. */
7775 return false;
7776 }
7777
7778 if (max > d->stack_size) {
7779 /* Need to reallocate stack and callstack to accommodate. */
7780 size_t old_size = stacksize(d, d->stack_size);
7781 size_t new_size = stacksize(d, max);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08007782 void *p = upb_arena_realloc(d->arena, d->stack, old_size, new_size);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007783 if (!p) {
7784 return false;
7785 }
7786 d->stack = p;
7787
7788 old_size = callstacksize(d, d->stack_size);
7789 new_size = callstacksize(d, max);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08007790 p = upb_arena_realloc(d->arena, d->callstack, old_size, new_size);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007791 if (!p) {
7792 return false;
7793 }
7794 d->callstack = p;
7795
7796 d->stack_size = max;
7797 }
7798
7799 d->limit = d->stack + max - 1;
7800 return true;
7801}
7802/*
7803** upb::Encoder
7804**
7805** Since we are implementing pure handlers (ie. without any out-of-band access
7806** to pre-computed lengths), we have to buffer all submessages before we can
7807** emit even their first byte.
7808**
7809** Not knowing the size of submessages also means we can't write a perfect
7810** zero-copy implementation, even with buffering. Lengths are stored as
7811** varints, which means that we don't know how many bytes to reserve for the
7812** length until we know what the length is.
7813**
7814** This leaves us with three main choices:
7815**
7816** 1. buffer all submessage data in a temporary buffer, then copy it exactly
7817** once into the output buffer.
7818**
7819** 2. attempt to buffer data directly into the output buffer, estimating how
7820** many bytes each length will take. When our guesses are wrong, use
7821** memmove() to grow or shrink the allotted space.
7822**
7823** 3. buffer directly into the output buffer, allocating a max length
7824** ahead-of-time for each submessage length. If we overallocated, we waste
7825** space, but no memcpy() or memmove() is required. This approach requires
7826** defining a maximum size for submessages and rejecting submessages that
7827** exceed that size.
7828**
7829** (2) and (3) have the potential to have better performance, but they are more
7830** complicated and subtle to implement:
7831**
7832** (3) requires making an arbitrary choice of the maximum message size; it
7833** wastes space when submessages are shorter than this and fails
7834** completely when they are longer. This makes it more finicky and
7835** requires configuration based on the input. It also makes it impossible
7836** to perfectly match the output of reference encoders that always use the
7837** optimal amount of space for each length.
7838**
7839** (2) requires guessing the the size upfront, and if multiple lengths are
7840** guessed wrong the minimum required number of memmove() operations may
7841** be complicated to compute correctly. Implemented properly, it may have
7842** a useful amortized or average cost, but more investigation is required
7843** to determine this and what the optimal algorithm is to achieve it.
7844**
7845** (1) makes you always pay for exactly one copy, but its implementation is
7846** the simplest and its performance is predictable.
7847**
7848** So for now, we implement (1) only. If we wish to optimize later, we should
7849** be able to do it without affecting users.
7850**
7851** The strategy is to buffer the segments of data that do *not* depend on
7852** unknown lengths in one buffer, and keep a separate buffer of segment pointers
7853** and lengths. When the top-level submessage ends, we can go beginning to end,
7854** alternating the writing of lengths with memcpy() of the rest of the data.
7855** At the top level though, no buffering is required.
7856*/
7857
7858
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007859
7860/* The output buffer is divided into segments; a segment is a string of data
7861 * that is "ready to go" -- it does not need any varint lengths inserted into
7862 * the middle. The seams between segments are where varints will be inserted
7863 * once they are known.
7864 *
7865 * We also use the concept of a "run", which is a range of encoded bytes that
7866 * occur at a single submessage level. Every segment contains one or more runs.
7867 *
7868 * A segment can span messages. Consider:
7869 *
7870 * .--Submessage lengths---------.
7871 * | | |
7872 * | V V
7873 * V | |--------------- | |-----------------
7874 * Submessages: | |-----------------------------------------------
7875 * Top-level msg: ------------------------------------------------------------
7876 *
7877 * Segments: ----- ------------------- -----------------
7878 * Runs: *---- *--------------*--- *----------------
7879 * (* marks the start)
7880 *
7881 * Note that the top-level menssage is not in any segment because it does not
7882 * have any length preceding it.
7883 *
7884 * A segment is only interrupted when another length needs to be inserted. So
7885 * observe how the second segment spans both the inner submessage and part of
7886 * the next enclosing message. */
7887typedef struct {
7888 uint32_t msglen; /* The length to varint-encode before this segment. */
7889 uint32_t seglen; /* Length of the segment. */
7890} upb_pb_encoder_segment;
7891
7892struct upb_pb_encoder {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08007893 upb_arena *arena;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007894
7895 /* Our input and output. */
7896 upb_sink input_;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08007897 upb_bytessink output_;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007898
7899 /* The "subclosure" -- used as the inner closure as part of the bytessink
7900 * protocol. */
7901 void *subc;
7902
7903 /* The output buffer and limit, and our current write position. "buf"
7904 * initially points to "initbuf", but is dynamically allocated if we need to
7905 * grow beyond the initial size. */
7906 char *buf, *ptr, *limit;
7907
7908 /* The beginning of the current run, or undefined if we are at the top
7909 * level. */
7910 char *runbegin;
7911
7912 /* The list of segments we are accumulating. */
7913 upb_pb_encoder_segment *segbuf, *segptr, *seglimit;
7914
7915 /* The stack of enclosing submessages. Each entry in the stack points to the
7916 * segment where this submessage's length is being accumulated. */
7917 int *stack, *top, *stacklimit;
7918
7919 /* Depth of startmsg/endmsg calls. */
7920 int depth;
7921};
7922
7923/* low-level buffering ********************************************************/
7924
7925/* Low-level functions for interacting with the output buffer. */
7926
7927/* TODO(haberman): handle pushback */
7928static void putbuf(upb_pb_encoder *e, const char *buf, size_t len) {
7929 size_t n = upb_bytessink_putbuf(e->output_, e->subc, buf, len, NULL);
Paul Yange0e54662016-09-15 11:09:01 -07007930 UPB_ASSERT(n == len);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007931}
7932
7933static upb_pb_encoder_segment *top(upb_pb_encoder *e) {
7934 return &e->segbuf[*e->top];
7935}
7936
7937/* Call to ensure that at least "bytes" bytes are available for writing at
7938 * e->ptr. Returns false if the bytes could not be allocated. */
7939static bool reserve(upb_pb_encoder *e, size_t bytes) {
7940 if ((size_t)(e->limit - e->ptr) < bytes) {
7941 /* Grow buffer. */
7942 char *new_buf;
7943 size_t needed = bytes + (e->ptr - e->buf);
7944 size_t old_size = e->limit - e->buf;
7945
7946 size_t new_size = old_size;
7947
7948 while (new_size < needed) {
7949 new_size *= 2;
7950 }
7951
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08007952 new_buf = upb_arena_realloc(e->arena, e->buf, old_size, new_size);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007953
7954 if (new_buf == NULL) {
7955 return false;
7956 }
7957
7958 e->ptr = new_buf + (e->ptr - e->buf);
7959 e->runbegin = new_buf + (e->runbegin - e->buf);
7960 e->limit = new_buf + new_size;
7961 e->buf = new_buf;
7962 }
7963
7964 return true;
7965}
7966
7967/* Call when "bytes" bytes have been writte at e->ptr. The caller *must* have
7968 * previously called reserve() with at least this many bytes. */
7969static void encoder_advance(upb_pb_encoder *e, size_t bytes) {
Paul Yange0e54662016-09-15 11:09:01 -07007970 UPB_ASSERT((size_t)(e->limit - e->ptr) >= bytes);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07007971 e->ptr += bytes;
7972}
7973
7974/* Call when all of the bytes for a handler have been written. Flushes the
7975 * bytes if possible and necessary, returning false if this failed. */
7976static bool commit(upb_pb_encoder *e) {
7977 if (!e->top) {
7978 /* We aren't inside a delimited region. Flush our accumulated bytes to
7979 * the output.
7980 *
7981 * TODO(haberman): in the future we may want to delay flushing for
7982 * efficiency reasons. */
7983 putbuf(e, e->buf, e->ptr - e->buf);
7984 e->ptr = e->buf;
7985 }
7986
7987 return true;
7988}
7989
7990/* Writes the given bytes to the buffer, handling reserve/advance. */
7991static bool encode_bytes(upb_pb_encoder *e, const void *data, size_t len) {
7992 if (!reserve(e, len)) {
7993 return false;
7994 }
7995
7996 memcpy(e->ptr, data, len);
7997 encoder_advance(e, len);
7998 return true;
7999}
8000
8001/* Finish the current run by adding the run totals to the segment and message
8002 * length. */
8003static void accumulate(upb_pb_encoder *e) {
8004 size_t run_len;
Paul Yange0e54662016-09-15 11:09:01 -07008005 UPB_ASSERT(e->ptr >= e->runbegin);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008006 run_len = e->ptr - e->runbegin;
8007 e->segptr->seglen += run_len;
8008 top(e)->msglen += run_len;
8009 e->runbegin = e->ptr;
8010}
8011
8012/* Call to indicate the start of delimited region for which the full length is
8013 * not yet known. All data will be buffered until the length is known.
8014 * Delimited regions may be nested; their lengths will all be tracked properly. */
8015static bool start_delim(upb_pb_encoder *e) {
8016 if (e->top) {
8017 /* We are already buffering, advance to the next segment and push it on the
8018 * stack. */
8019 accumulate(e);
8020
8021 if (++e->top == e->stacklimit) {
8022 /* TODO(haberman): grow stack? */
8023 return false;
8024 }
8025
8026 if (++e->segptr == e->seglimit) {
8027 /* Grow segment buffer. */
8028 size_t old_size =
8029 (e->seglimit - e->segbuf) * sizeof(upb_pb_encoder_segment);
8030 size_t new_size = old_size * 2;
8031 upb_pb_encoder_segment *new_buf =
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08008032 upb_arena_realloc(e->arena, e->segbuf, old_size, new_size);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008033
8034 if (new_buf == NULL) {
8035 return false;
8036 }
8037
8038 e->segptr = new_buf + (e->segptr - e->segbuf);
8039 e->seglimit = new_buf + (new_size / sizeof(upb_pb_encoder_segment));
8040 e->segbuf = new_buf;
8041 }
8042 } else {
8043 /* We were previously at the top level, start buffering. */
8044 e->segptr = e->segbuf;
8045 e->top = e->stack;
8046 e->runbegin = e->ptr;
8047 }
8048
8049 *e->top = e->segptr - e->segbuf;
8050 e->segptr->seglen = 0;
8051 e->segptr->msglen = 0;
8052
8053 return true;
8054}
8055
8056/* Call to indicate the end of a delimited region. We now know the length of
8057 * the delimited region. If we are not nested inside any other delimited
8058 * regions, we can now emit all of the buffered data we accumulated. */
8059static bool end_delim(upb_pb_encoder *e) {
8060 size_t msglen;
8061 accumulate(e);
8062 msglen = top(e)->msglen;
8063
8064 if (e->top == e->stack) {
8065 /* All lengths are now available, emit all buffered data. */
8066 char buf[UPB_PB_VARINT_MAX_LEN];
8067 upb_pb_encoder_segment *s;
8068 const char *ptr = e->buf;
8069 for (s = e->segbuf; s <= e->segptr; s++) {
8070 size_t lenbytes = upb_vencode64(s->msglen, buf);
8071 putbuf(e, buf, lenbytes);
8072 putbuf(e, ptr, s->seglen);
8073 ptr += s->seglen;
8074 }
8075
8076 e->ptr = e->buf;
8077 e->top = NULL;
8078 } else {
8079 /* Need to keep buffering; propagate length info into enclosing
8080 * submessages. */
8081 --e->top;
8082 top(e)->msglen += msglen + upb_varint_size(msglen);
8083 }
8084
8085 return true;
8086}
8087
8088
8089/* tag_t **********************************************************************/
8090
8091/* A precomputed (pre-encoded) tag and length. */
8092
8093typedef struct {
8094 uint8_t bytes;
8095 char tag[7];
8096} tag_t;
8097
8098/* Allocates a new tag for this field, and sets it in these handlerattr. */
8099static void new_tag(upb_handlers *h, const upb_fielddef *f, upb_wiretype_t wt,
8100 upb_handlerattr *attr) {
8101 uint32_t n = upb_fielddef_number(f);
8102
Paul Yange0e54662016-09-15 11:09:01 -07008103 tag_t *tag = upb_gmalloc(sizeof(tag_t));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008104 tag->bytes = upb_vencode64((n << 3) | wt, tag->tag);
8105
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08008106 attr->handler_data = tag;
Paul Yange0e54662016-09-15 11:09:01 -07008107 upb_handlers_addcleanup(h, tag, upb_gfree);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008108}
8109
8110static bool encode_tag(upb_pb_encoder *e, const tag_t *tag) {
8111 return encode_bytes(e, tag->tag, tag->bytes);
8112}
8113
8114
8115/* encoding of wire types *****************************************************/
8116
8117static bool encode_fixed64(upb_pb_encoder *e, uint64_t val) {
8118 /* TODO(haberman): byte-swap for big endian. */
8119 return encode_bytes(e, &val, sizeof(uint64_t));
8120}
8121
8122static bool encode_fixed32(upb_pb_encoder *e, uint32_t val) {
8123 /* TODO(haberman): byte-swap for big endian. */
8124 return encode_bytes(e, &val, sizeof(uint32_t));
8125}
8126
8127static bool encode_varint(upb_pb_encoder *e, uint64_t val) {
8128 if (!reserve(e, UPB_PB_VARINT_MAX_LEN)) {
8129 return false;
8130 }
8131
8132 encoder_advance(e, upb_vencode64(val, e->ptr));
8133 return true;
8134}
8135
8136static uint64_t dbl2uint64(double d) {
8137 uint64_t ret;
8138 memcpy(&ret, &d, sizeof(uint64_t));
8139 return ret;
8140}
8141
8142static uint32_t flt2uint32(float d) {
8143 uint32_t ret;
8144 memcpy(&ret, &d, sizeof(uint32_t));
8145 return ret;
8146}
8147
8148
8149/* encoding of proto types ****************************************************/
8150
8151static bool startmsg(void *c, const void *hd) {
8152 upb_pb_encoder *e = c;
8153 UPB_UNUSED(hd);
8154 if (e->depth++ == 0) {
8155 upb_bytessink_start(e->output_, 0, &e->subc);
8156 }
8157 return true;
8158}
8159
8160static bool endmsg(void *c, const void *hd, upb_status *status) {
8161 upb_pb_encoder *e = c;
8162 UPB_UNUSED(hd);
8163 UPB_UNUSED(status);
8164 if (--e->depth == 0) {
8165 upb_bytessink_end(e->output_);
8166 }
8167 return true;
8168}
8169
8170static void *encode_startdelimfield(void *c, const void *hd) {
8171 bool ok = encode_tag(c, hd) && commit(c) && start_delim(c);
8172 return ok ? c : UPB_BREAK;
8173}
8174
Paul Yang60327462017-10-09 12:39:13 -07008175static bool encode_unknown(void *c, const void *hd, const char *buf,
8176 size_t len) {
8177 UPB_UNUSED(hd);
8178 return encode_bytes(c, buf, len) && commit(c);
8179}
8180
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008181static bool encode_enddelimfield(void *c, const void *hd) {
8182 UPB_UNUSED(hd);
8183 return end_delim(c);
8184}
8185
8186static void *encode_startgroup(void *c, const void *hd) {
8187 return (encode_tag(c, hd) && commit(c)) ? c : UPB_BREAK;
8188}
8189
8190static bool encode_endgroup(void *c, const void *hd) {
8191 return encode_tag(c, hd) && commit(c);
8192}
8193
8194static void *encode_startstr(void *c, const void *hd, size_t size_hint) {
8195 UPB_UNUSED(size_hint);
8196 return encode_startdelimfield(c, hd);
8197}
8198
8199static size_t encode_strbuf(void *c, const void *hd, const char *buf,
8200 size_t len, const upb_bufhandle *h) {
8201 UPB_UNUSED(hd);
8202 UPB_UNUSED(h);
8203 return encode_bytes(c, buf, len) ? len : 0;
8204}
8205
8206#define T(type, ctype, convert, encode) \
8207 static bool encode_scalar_##type(void *e, const void *hd, ctype val) { \
8208 return encode_tag(e, hd) && encode(e, (convert)(val)) && commit(e); \
8209 } \
8210 static bool encode_packed_##type(void *e, const void *hd, ctype val) { \
8211 UPB_UNUSED(hd); \
8212 return encode(e, (convert)(val)); \
8213 }
8214
8215T(double, double, dbl2uint64, encode_fixed64)
8216T(float, float, flt2uint32, encode_fixed32)
8217T(int64, int64_t, uint64_t, encode_varint)
Paul Yangf23869c2017-02-07 21:33:28 -08008218T(int32, int32_t, int64_t, encode_varint)
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008219T(fixed64, uint64_t, uint64_t, encode_fixed64)
8220T(fixed32, uint32_t, uint32_t, encode_fixed32)
8221T(bool, bool, bool, encode_varint)
8222T(uint32, uint32_t, uint32_t, encode_varint)
8223T(uint64, uint64_t, uint64_t, encode_varint)
8224T(enum, int32_t, uint32_t, encode_varint)
8225T(sfixed32, int32_t, uint32_t, encode_fixed32)
8226T(sfixed64, int64_t, uint64_t, encode_fixed64)
8227T(sint32, int32_t, upb_zzenc_32, encode_varint)
8228T(sint64, int64_t, upb_zzenc_64, encode_varint)
8229
8230#undef T
8231
8232
8233/* code to build the handlers *************************************************/
8234
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08008235#include <stdio.h>
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008236static void newhandlers_callback(const void *closure, upb_handlers *h) {
8237 const upb_msgdef *m;
8238 upb_msg_field_iter i;
8239
8240 UPB_UNUSED(closure);
8241
8242 upb_handlers_setstartmsg(h, startmsg, NULL);
8243 upb_handlers_setendmsg(h, endmsg, NULL);
Paul Yang60327462017-10-09 12:39:13 -07008244 upb_handlers_setunknown(h, encode_unknown, NULL);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008245
8246 m = upb_handlers_msgdef(h);
8247 for(upb_msg_field_begin(&i, m);
8248 !upb_msg_field_done(&i);
8249 upb_msg_field_next(&i)) {
8250 const upb_fielddef *f = upb_msg_iter_field(&i);
8251 bool packed = upb_fielddef_isseq(f) && upb_fielddef_isprimitive(f) &&
8252 upb_fielddef_packed(f);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08008253 upb_handlerattr attr = UPB_HANDLERATTR_INIT;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008254 upb_wiretype_t wt =
8255 packed ? UPB_WIRE_TYPE_DELIMITED
8256 : upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
8257
8258 /* Pre-encode the tag for this field. */
8259 new_tag(h, f, wt, &attr);
8260
8261 if (packed) {
8262 upb_handlers_setstartseq(h, f, encode_startdelimfield, &attr);
8263 upb_handlers_setendseq(h, f, encode_enddelimfield, &attr);
8264 }
8265
8266#define T(upper, lower, upbtype) \
8267 case UPB_DESCRIPTOR_TYPE_##upper: \
8268 if (packed) { \
8269 upb_handlers_set##upbtype(h, f, encode_packed_##lower, &attr); \
8270 } else { \
8271 upb_handlers_set##upbtype(h, f, encode_scalar_##lower, &attr); \
8272 } \
8273 break;
8274
8275 switch (upb_fielddef_descriptortype(f)) {
8276 T(DOUBLE, double, double);
8277 T(FLOAT, float, float);
8278 T(INT64, int64, int64);
8279 T(INT32, int32, int32);
8280 T(FIXED64, fixed64, uint64);
8281 T(FIXED32, fixed32, uint32);
8282 T(BOOL, bool, bool);
8283 T(UINT32, uint32, uint32);
8284 T(UINT64, uint64, uint64);
8285 T(ENUM, enum, int32);
8286 T(SFIXED32, sfixed32, int32);
8287 T(SFIXED64, sfixed64, int64);
8288 T(SINT32, sint32, int32);
8289 T(SINT64, sint64, int64);
8290 case UPB_DESCRIPTOR_TYPE_STRING:
8291 case UPB_DESCRIPTOR_TYPE_BYTES:
8292 upb_handlers_setstartstr(h, f, encode_startstr, &attr);
8293 upb_handlers_setendstr(h, f, encode_enddelimfield, &attr);
8294 upb_handlers_setstring(h, f, encode_strbuf, &attr);
8295 break;
8296 case UPB_DESCRIPTOR_TYPE_MESSAGE:
8297 upb_handlers_setstartsubmsg(h, f, encode_startdelimfield, &attr);
8298 upb_handlers_setendsubmsg(h, f, encode_enddelimfield, &attr);
8299 break;
8300 case UPB_DESCRIPTOR_TYPE_GROUP: {
8301 /* Endgroup takes a different tag (wire_type = END_GROUP). */
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08008302 upb_handlerattr attr2 = UPB_HANDLERATTR_INIT;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008303 new_tag(h, f, UPB_WIRE_TYPE_END_GROUP, &attr2);
8304
8305 upb_handlers_setstartsubmsg(h, f, encode_startgroup, &attr);
8306 upb_handlers_setendsubmsg(h, f, encode_endgroup, &attr2);
8307
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008308 break;
8309 }
8310 }
8311
8312#undef T
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008313 }
8314}
8315
8316void upb_pb_encoder_reset(upb_pb_encoder *e) {
8317 e->segptr = NULL;
8318 e->top = NULL;
8319 e->depth = 0;
8320}
8321
8322
8323/* public API *****************************************************************/
8324
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08008325upb_handlercache *upb_pb_encoder_newcache() {
8326 return upb_handlercache_new(newhandlers_callback, NULL);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008327}
8328
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08008329upb_pb_encoder *upb_pb_encoder_create(upb_arena *arena, const upb_handlers *h,
8330 upb_bytessink output) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008331 const size_t initial_bufsize = 256;
8332 const size_t initial_segbufsize = 16;
8333 /* TODO(haberman): make this configurable. */
8334 const size_t stack_size = 64;
8335#ifndef NDEBUG
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08008336 const size_t size_before = upb_arena_bytesallocated(arena);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008337#endif
8338
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08008339 upb_pb_encoder *e = upb_arena_malloc(arena, sizeof(upb_pb_encoder));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008340 if (!e) return NULL;
8341
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08008342 e->buf = upb_arena_malloc(arena, initial_bufsize);
8343 e->segbuf = upb_arena_malloc(arena, initial_segbufsize * sizeof(*e->segbuf));
8344 e->stack = upb_arena_malloc(arena, stack_size * sizeof(*e->stack));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008345
8346 if (!e->buf || !e->segbuf || !e->stack) {
8347 return NULL;
8348 }
8349
8350 e->limit = e->buf + initial_bufsize;
8351 e->seglimit = e->segbuf + initial_segbufsize;
8352 e->stacklimit = e->stack + stack_size;
8353
8354 upb_pb_encoder_reset(e);
8355 upb_sink_reset(&e->input_, h, e);
8356
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08008357 e->arena = arena;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008358 e->output_ = output;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08008359 e->subc = output.closure;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008360 e->ptr = e->buf;
8361
8362 /* If this fails, increase the value in encoder.h. */
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08008363 UPB_ASSERT_DEBUGVAR(upb_arena_bytesallocated(arena) - size_before <=
Paul Yange0e54662016-09-15 11:09:01 -07008364 UPB_PB_ENCODER_SIZE);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008365 return e;
8366}
8367
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08008368upb_sink upb_pb_encoder_input(upb_pb_encoder *e) { return e->input_; }
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008369/*
8370 * upb::pb::TextPrinter
8371 *
8372 * OPT: This is not optimized at all. It uses printf() which parses the format
8373 * string every time, and it allocates memory for every put.
8374 */
8375
8376
8377#include <ctype.h>
8378#include <float.h>
8379#include <inttypes.h>
8380#include <stdarg.h>
8381#include <stdio.h>
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008382#include <string.h>
8383
8384
8385struct upb_textprinter {
8386 upb_sink input_;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08008387 upb_bytessink output_;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008388 int indent_depth_;
8389 bool single_line_;
8390 void *subc;
8391};
8392
8393#define CHECK(x) if ((x) < 0) goto err;
8394
8395static const char *shortname(const char *longname) {
8396 const char *last = strrchr(longname, '.');
8397 return last ? last + 1 : longname;
8398}
8399
8400static int indent(upb_textprinter *p) {
8401 int i;
8402 if (!p->single_line_)
8403 for (i = 0; i < p->indent_depth_; i++)
8404 upb_bytessink_putbuf(p->output_, p->subc, " ", 2, NULL);
8405 return 0;
8406}
8407
8408static int endfield(upb_textprinter *p) {
8409 const char ch = (p->single_line_ ? ' ' : '\n');
8410 upb_bytessink_putbuf(p->output_, p->subc, &ch, 1, NULL);
8411 return 0;
8412}
8413
8414static int putescaped(upb_textprinter *p, const char *buf, size_t len,
8415 bool preserve_utf8) {
8416 /* Based on CEscapeInternal() from Google's protobuf release. */
8417 char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf);
8418 const char *end = buf + len;
8419
8420 /* I think hex is prettier and more useful, but proto2 uses octal; should
8421 * investigate whether it can parse hex also. */
8422 const bool use_hex = false;
8423 bool last_hex_escape = false; /* true if last output char was \xNN */
8424
8425 for (; buf < end; buf++) {
8426 bool is_hex_escape;
8427
8428 if (dstend - dst < 4) {
8429 upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
8430 dst = dstbuf;
8431 }
8432
8433 is_hex_escape = false;
8434 switch (*buf) {
8435 case '\n': *(dst++) = '\\'; *(dst++) = 'n'; break;
8436 case '\r': *(dst++) = '\\'; *(dst++) = 'r'; break;
8437 case '\t': *(dst++) = '\\'; *(dst++) = 't'; break;
8438 case '\"': *(dst++) = '\\'; *(dst++) = '\"'; break;
8439 case '\'': *(dst++) = '\\'; *(dst++) = '\''; break;
8440 case '\\': *(dst++) = '\\'; *(dst++) = '\\'; break;
8441 default:
8442 /* Note that if we emit \xNN and the buf character after that is a hex
8443 * digit then that digit must be escaped too to prevent it being
8444 * interpreted as part of the character code by C. */
8445 if ((!preserve_utf8 || (uint8_t)*buf < 0x80) &&
8446 (!isprint(*buf) || (last_hex_escape && isxdigit(*buf)))) {
8447 sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*buf);
8448 is_hex_escape = use_hex;
8449 dst += 4;
8450 } else {
8451 *(dst++) = *buf; break;
8452 }
8453 }
8454 last_hex_escape = is_hex_escape;
8455 }
8456 /* Flush remaining data. */
8457 upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
8458 return 0;
8459}
8460
8461bool putf(upb_textprinter *p, const char *fmt, ...) {
8462 va_list args;
8463 va_list args_copy;
8464 char *str;
8465 int written;
8466 int len;
8467 bool ok;
8468
8469 va_start(args, fmt);
8470
8471 /* Run once to get the length of the string. */
8472 _upb_va_copy(args_copy, args);
8473 len = _upb_vsnprintf(NULL, 0, fmt, args_copy);
8474 va_end(args_copy);
8475
8476 /* + 1 for NULL terminator (vsprintf() requires it even if we don't). */
Paul Yange0e54662016-09-15 11:09:01 -07008477 str = upb_gmalloc(len + 1);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008478 if (!str) return false;
8479 written = vsprintf(str, fmt, args);
8480 va_end(args);
Paul Yange0e54662016-09-15 11:09:01 -07008481 UPB_ASSERT(written == len);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008482
8483 ok = upb_bytessink_putbuf(p->output_, p->subc, str, len, NULL);
Paul Yange0e54662016-09-15 11:09:01 -07008484 upb_gfree(str);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008485 return ok;
8486}
8487
8488
8489/* handlers *******************************************************************/
8490
8491static bool textprinter_startmsg(void *c, const void *hd) {
8492 upb_textprinter *p = c;
8493 UPB_UNUSED(hd);
8494 if (p->indent_depth_ == 0) {
8495 upb_bytessink_start(p->output_, 0, &p->subc);
8496 }
8497 return true;
8498}
8499
8500static bool textprinter_endmsg(void *c, const void *hd, upb_status *s) {
8501 upb_textprinter *p = c;
8502 UPB_UNUSED(hd);
8503 UPB_UNUSED(s);
8504 if (p->indent_depth_ == 0) {
8505 upb_bytessink_end(p->output_);
8506 }
8507 return true;
8508}
8509
8510#define TYPE(name, ctype, fmt) \
8511 static bool textprinter_put ## name(void *closure, const void *handler_data, \
8512 ctype val) { \
8513 upb_textprinter *p = closure; \
8514 const upb_fielddef *f = handler_data; \
8515 CHECK(indent(p)); \
8516 putf(p, "%s: " fmt, upb_fielddef_name(f), val); \
8517 CHECK(endfield(p)); \
8518 return true; \
8519 err: \
8520 return false; \
8521}
8522
8523static bool textprinter_putbool(void *closure, const void *handler_data,
8524 bool val) {
8525 upb_textprinter *p = closure;
8526 const upb_fielddef *f = handler_data;
8527 CHECK(indent(p));
8528 putf(p, "%s: %s", upb_fielddef_name(f), val ? "true" : "false");
8529 CHECK(endfield(p));
8530 return true;
8531err:
8532 return false;
8533}
8534
8535#define STRINGIFY_HELPER(x) #x
8536#define STRINGIFY_MACROVAL(x) STRINGIFY_HELPER(x)
8537
8538TYPE(int32, int32_t, "%" PRId32)
8539TYPE(int64, int64_t, "%" PRId64)
8540TYPE(uint32, uint32_t, "%" PRIu32)
8541TYPE(uint64, uint64_t, "%" PRIu64)
8542TYPE(float, float, "%." STRINGIFY_MACROVAL(FLT_DIG) "g")
8543TYPE(double, double, "%." STRINGIFY_MACROVAL(DBL_DIG) "g")
8544
8545#undef TYPE
8546
8547/* Output a symbolic value from the enum if found, else just print as int32. */
8548static bool textprinter_putenum(void *closure, const void *handler_data,
8549 int32_t val) {
8550 upb_textprinter *p = closure;
8551 const upb_fielddef *f = handler_data;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08008552 const upb_enumdef *enum_def = upb_fielddef_enumsubdef(f);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008553 const char *label = upb_enumdef_iton(enum_def, val);
8554 if (label) {
8555 indent(p);
8556 putf(p, "%s: %s", upb_fielddef_name(f), label);
8557 endfield(p);
8558 } else {
8559 if (!textprinter_putint32(closure, handler_data, val))
8560 return false;
8561 }
8562 return true;
8563}
8564
8565static void *textprinter_startstr(void *closure, const void *handler_data,
8566 size_t size_hint) {
8567 upb_textprinter *p = closure;
8568 const upb_fielddef *f = handler_data;
8569 UPB_UNUSED(size_hint);
8570 indent(p);
8571 putf(p, "%s: \"", upb_fielddef_name(f));
8572 return p;
8573}
8574
8575static bool textprinter_endstr(void *closure, const void *handler_data) {
8576 upb_textprinter *p = closure;
8577 UPB_UNUSED(handler_data);
8578 putf(p, "\"");
8579 endfield(p);
8580 return true;
8581}
8582
8583static size_t textprinter_putstr(void *closure, const void *hd, const char *buf,
8584 size_t len, const upb_bufhandle *handle) {
8585 upb_textprinter *p = closure;
8586 const upb_fielddef *f = hd;
8587 UPB_UNUSED(handle);
8588 CHECK(putescaped(p, buf, len, upb_fielddef_type(f) == UPB_TYPE_STRING));
8589 return len;
8590err:
8591 return 0;
8592}
8593
8594static void *textprinter_startsubmsg(void *closure, const void *handler_data) {
8595 upb_textprinter *p = closure;
8596 const char *name = handler_data;
8597 CHECK(indent(p));
8598 putf(p, "%s {%c", name, p->single_line_ ? ' ' : '\n');
8599 p->indent_depth_++;
8600 return p;
8601err:
8602 return UPB_BREAK;
8603}
8604
8605static bool textprinter_endsubmsg(void *closure, const void *handler_data) {
8606 upb_textprinter *p = closure;
8607 UPB_UNUSED(handler_data);
8608 p->indent_depth_--;
8609 CHECK(indent(p));
8610 upb_bytessink_putbuf(p->output_, p->subc, "}", 1, NULL);
8611 CHECK(endfield(p));
8612 return true;
8613err:
8614 return false;
8615}
8616
8617static void onmreg(const void *c, upb_handlers *h) {
8618 const upb_msgdef *m = upb_handlers_msgdef(h);
8619 upb_msg_field_iter i;
8620 UPB_UNUSED(c);
8621
8622 upb_handlers_setstartmsg(h, textprinter_startmsg, NULL);
8623 upb_handlers_setendmsg(h, textprinter_endmsg, NULL);
8624
8625 for(upb_msg_field_begin(&i, m);
8626 !upb_msg_field_done(&i);
8627 upb_msg_field_next(&i)) {
8628 upb_fielddef *f = upb_msg_iter_field(&i);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08008629 upb_handlerattr attr = UPB_HANDLERATTR_INIT;
8630 attr.handler_data = f;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008631 switch (upb_fielddef_type(f)) {
8632 case UPB_TYPE_INT32:
8633 upb_handlers_setint32(h, f, textprinter_putint32, &attr);
8634 break;
8635 case UPB_TYPE_INT64:
8636 upb_handlers_setint64(h, f, textprinter_putint64, &attr);
8637 break;
8638 case UPB_TYPE_UINT32:
8639 upb_handlers_setuint32(h, f, textprinter_putuint32, &attr);
8640 break;
8641 case UPB_TYPE_UINT64:
8642 upb_handlers_setuint64(h, f, textprinter_putuint64, &attr);
8643 break;
8644 case UPB_TYPE_FLOAT:
8645 upb_handlers_setfloat(h, f, textprinter_putfloat, &attr);
8646 break;
8647 case UPB_TYPE_DOUBLE:
8648 upb_handlers_setdouble(h, f, textprinter_putdouble, &attr);
8649 break;
8650 case UPB_TYPE_BOOL:
8651 upb_handlers_setbool(h, f, textprinter_putbool, &attr);
8652 break;
8653 case UPB_TYPE_STRING:
8654 case UPB_TYPE_BYTES:
8655 upb_handlers_setstartstr(h, f, textprinter_startstr, &attr);
8656 upb_handlers_setstring(h, f, textprinter_putstr, &attr);
8657 upb_handlers_setendstr(h, f, textprinter_endstr, &attr);
8658 break;
8659 case UPB_TYPE_MESSAGE: {
8660 const char *name =
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08008661 upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_GROUP
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008662 ? shortname(upb_msgdef_fullname(upb_fielddef_msgsubdef(f)))
8663 : upb_fielddef_name(f);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08008664 attr.handler_data = name;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008665 upb_handlers_setstartsubmsg(h, f, textprinter_startsubmsg, &attr);
8666 upb_handlers_setendsubmsg(h, f, textprinter_endsubmsg, &attr);
8667 break;
8668 }
8669 case UPB_TYPE_ENUM:
8670 upb_handlers_setint32(h, f, textprinter_putenum, &attr);
8671 break;
8672 }
8673 }
8674}
8675
8676static void textprinter_reset(upb_textprinter *p, bool single_line) {
8677 p->single_line_ = single_line;
8678 p->indent_depth_ = 0;
8679}
8680
8681
8682/* Public API *****************************************************************/
8683
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08008684upb_textprinter *upb_textprinter_create(upb_arena *arena, const upb_handlers *h,
8685 upb_bytessink output) {
8686 upb_textprinter *p = upb_arena_malloc(arena, sizeof(upb_textprinter));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008687 if (!p) return NULL;
8688
8689 p->output_ = output;
8690 upb_sink_reset(&p->input_, h, p);
8691 textprinter_reset(p, false);
8692
8693 return p;
8694}
8695
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08008696upb_handlercache *upb_textprinter_newcache() {
8697 return upb_handlercache_new(&onmreg, NULL);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008698}
8699
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08008700upb_sink upb_textprinter_input(upb_textprinter *p) { return p->input_; }
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008701
8702void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line) {
8703 p->single_line_ = single_line;
8704}
8705
8706
8707/* Index is descriptor type. */
8708const uint8_t upb_pb_native_wire_types[] = {
8709 UPB_WIRE_TYPE_END_GROUP, /* ENDGROUP */
8710 UPB_WIRE_TYPE_64BIT, /* DOUBLE */
8711 UPB_WIRE_TYPE_32BIT, /* FLOAT */
8712 UPB_WIRE_TYPE_VARINT, /* INT64 */
8713 UPB_WIRE_TYPE_VARINT, /* UINT64 */
8714 UPB_WIRE_TYPE_VARINT, /* INT32 */
8715 UPB_WIRE_TYPE_64BIT, /* FIXED64 */
8716 UPB_WIRE_TYPE_32BIT, /* FIXED32 */
8717 UPB_WIRE_TYPE_VARINT, /* BOOL */
8718 UPB_WIRE_TYPE_DELIMITED, /* STRING */
8719 UPB_WIRE_TYPE_START_GROUP, /* GROUP */
8720 UPB_WIRE_TYPE_DELIMITED, /* MESSAGE */
8721 UPB_WIRE_TYPE_DELIMITED, /* BYTES */
8722 UPB_WIRE_TYPE_VARINT, /* UINT32 */
8723 UPB_WIRE_TYPE_VARINT, /* ENUM */
8724 UPB_WIRE_TYPE_32BIT, /* SFIXED32 */
8725 UPB_WIRE_TYPE_64BIT, /* SFIXED64 */
8726 UPB_WIRE_TYPE_VARINT, /* SINT32 */
8727 UPB_WIRE_TYPE_VARINT, /* SINT64 */
8728};
8729
8730/* A basic branch-based decoder, uses 32-bit values to get good performance
8731 * on 32-bit architectures (but performs well on 64-bits also).
8732 * This scheme comes from the original Google Protobuf implementation
8733 * (proto2). */
8734upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r) {
8735 upb_decoderet err = {NULL, 0};
8736 const char *p = r.p;
8737 uint32_t low = (uint32_t)r.val;
8738 uint32_t high = 0;
8739 uint32_t b;
8740 b = *(p++); low |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
8741 b = *(p++); low |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
8742 b = *(p++); low |= (b & 0x7fU) << 28;
8743 high = (b & 0x7fU) >> 4; if (!(b & 0x80)) goto done;
8744 b = *(p++); high |= (b & 0x7fU) << 3; if (!(b & 0x80)) goto done;
8745 b = *(p++); high |= (b & 0x7fU) << 10; if (!(b & 0x80)) goto done;
8746 b = *(p++); high |= (b & 0x7fU) << 17; if (!(b & 0x80)) goto done;
8747 b = *(p++); high |= (b & 0x7fU) << 24; if (!(b & 0x80)) goto done;
8748 b = *(p++); high |= (b & 0x7fU) << 31; if (!(b & 0x80)) goto done;
8749 return err;
8750
8751done:
8752 r.val = ((uint64_t)high << 32) | low;
8753 r.p = p;
8754 return r;
8755}
8756
8757/* Like the previous, but uses 64-bit values. */
8758upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r) {
8759 const char *p = r.p;
8760 uint64_t val = r.val;
8761 uint64_t b;
8762 upb_decoderet err = {NULL, 0};
8763 b = *(p++); val |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
8764 b = *(p++); val |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
8765 b = *(p++); val |= (b & 0x7fU) << 28; if (!(b & 0x80)) goto done;
8766 b = *(p++); val |= (b & 0x7fU) << 35; if (!(b & 0x80)) goto done;
8767 b = *(p++); val |= (b & 0x7fU) << 42; if (!(b & 0x80)) goto done;
8768 b = *(p++); val |= (b & 0x7fU) << 49; if (!(b & 0x80)) goto done;
8769 b = *(p++); val |= (b & 0x7fU) << 56; if (!(b & 0x80)) goto done;
8770 b = *(p++); val |= (b & 0x7fU) << 63; if (!(b & 0x80)) goto done;
8771 return err;
8772
8773done:
8774 r.val = val;
8775 r.p = p;
8776 return r;
8777}
8778
Paul Yang8faa7782018-12-26 10:36:09 -08008779#line 1 "upb/json/parser.rl"
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008780/*
8781** upb::json::Parser (upb_json_parser)
8782**
8783** A parser that uses the Ragel State Machine Compiler to generate
8784** the finite automata.
8785**
8786** Ragel only natively handles regular languages, but we can manually
8787** program it a bit to handle context-free languages like JSON, by using
8788** the "fcall" and "fret" constructs.
8789**
8790** This parser can handle the basics, but needs several things to be fleshed
8791** out:
8792**
8793** - handling of unicode escape sequences (including high surrogate pairs).
8794** - properly check and report errors for unknown fields, stack overflow,
8795** improper array nesting (or lack of nesting).
8796** - handling of base64 sequences with padding characters.
8797** - handling of push-back (non-success returns from sink functions).
8798** - handling of keys/escape-sequences/etc that span input buffers.
8799*/
8800
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08008801#include <ctype.h>
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008802#include <errno.h>
Paul Yang60327462017-10-09 12:39:13 -07008803#include <float.h>
8804#include <math.h>
Paul Yange0e54662016-09-15 11:09:01 -07008805#include <stdint.h>
Paul Yang9bda1f12018-09-22 18:57:43 -07008806#include <stdio.h>
Paul Yange0e54662016-09-15 11:09:01 -07008807#include <stdlib.h>
8808#include <string.h>
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008809
Paul Yang9bda1f12018-09-22 18:57:43 -07008810#include <time.h>
8811
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008812
8813#define UPB_JSON_MAX_DEPTH 64
8814
Paul Yang9bda1f12018-09-22 18:57:43 -07008815/* Type of value message */
8816enum {
8817 VALUE_NULLVALUE = 0,
8818 VALUE_NUMBERVALUE = 1,
8819 VALUE_STRINGVALUE = 2,
8820 VALUE_BOOLVALUE = 3,
8821 VALUE_STRUCTVALUE = 4,
8822 VALUE_LISTVALUE = 5
8823};
Bo Yange3ee7162018-08-10 18:09:02 +00008824
8825/* Forward declare */
8826static bool is_top_level(upb_json_parser *p);
Paul Yang9bda1f12018-09-22 18:57:43 -07008827static bool is_wellknown_msg(upb_json_parser *p, upb_wellknowntype_t type);
8828static bool is_wellknown_field(upb_json_parser *p, upb_wellknowntype_t type);
Bo Yange3ee7162018-08-10 18:09:02 +00008829
8830static bool is_number_wrapper_object(upb_json_parser *p);
8831static bool does_number_wrapper_start(upb_json_parser *p);
8832static bool does_number_wrapper_end(upb_json_parser *p);
8833
8834static bool is_string_wrapper_object(upb_json_parser *p);
8835static bool does_string_wrapper_start(upb_json_parser *p);
8836static bool does_string_wrapper_end(upb_json_parser *p);
8837
Paul Yangc4f2a922019-01-17 10:18:43 -08008838static bool does_fieldmask_start(upb_json_parser *p);
8839static bool does_fieldmask_end(upb_json_parser *p);
8840static void start_fieldmask_object(upb_json_parser *p);
8841static void end_fieldmask_object(upb_json_parser *p);
8842
Bo Yange3ee7162018-08-10 18:09:02 +00008843static void start_wrapper_object(upb_json_parser *p);
8844static void end_wrapper_object(upb_json_parser *p);
8845
Paul Yang9bda1f12018-09-22 18:57:43 -07008846static void start_value_object(upb_json_parser *p, int value_type);
8847static void end_value_object(upb_json_parser *p);
8848
8849static void start_listvalue_object(upb_json_parser *p);
8850static void end_listvalue_object(upb_json_parser *p);
8851
8852static void start_structvalue_object(upb_json_parser *p);
8853static void end_structvalue_object(upb_json_parser *p);
8854
8855static void start_object(upb_json_parser *p);
8856static void end_object(upb_json_parser *p);
8857
Paul Yang8faa7782018-12-26 10:36:09 -08008858static void start_any_object(upb_json_parser *p, const char *ptr);
8859static bool end_any_object(upb_json_parser *p, const char *ptr);
8860
Bo Yange3ee7162018-08-10 18:09:02 +00008861static bool start_subobject(upb_json_parser *p);
8862static void end_subobject(upb_json_parser *p);
8863
Paul Yang9bda1f12018-09-22 18:57:43 -07008864static void start_member(upb_json_parser *p);
8865static void end_member(upb_json_parser *p);
8866static bool end_membername(upb_json_parser *p);
8867
Paul Yang8faa7782018-12-26 10:36:09 -08008868static void start_any_member(upb_json_parser *p, const char *ptr);
8869static void end_any_member(upb_json_parser *p, const char *ptr);
8870static bool end_any_membername(upb_json_parser *p);
8871
8872size_t parse(void *closure, const void *hd, const char *buf, size_t size,
8873 const upb_bufhandle *handle);
8874static bool end(void *closure, const void *hd);
8875
Bo Yange3ee7162018-08-10 18:09:02 +00008876static const char eof_ch = 'e';
8877
Paul Yang8faa7782018-12-26 10:36:09 -08008878/* stringsink */
8879typedef struct {
8880 upb_byteshandler handler;
8881 upb_bytessink sink;
8882 char *ptr;
8883 size_t len, size;
8884} upb_stringsink;
8885
8886
8887static void *stringsink_start(void *_sink, const void *hd, size_t size_hint) {
8888 upb_stringsink *sink = _sink;
8889 sink->len = 0;
8890 UPB_UNUSED(hd);
8891 UPB_UNUSED(size_hint);
8892 return sink;
8893}
8894
8895static size_t stringsink_string(void *_sink, const void *hd, const char *ptr,
8896 size_t len, const upb_bufhandle *handle) {
8897 upb_stringsink *sink = _sink;
8898 size_t new_size = sink->size;
8899
8900 UPB_UNUSED(hd);
8901 UPB_UNUSED(handle);
8902
8903 while (sink->len + len > new_size) {
8904 new_size *= 2;
8905 }
8906
8907 if (new_size != sink->size) {
8908 sink->ptr = realloc(sink->ptr, new_size);
8909 sink->size = new_size;
8910 }
8911
8912 memcpy(sink->ptr + sink->len, ptr, len);
8913 sink->len += len;
8914
8915 return len;
8916}
8917
8918void upb_stringsink_init(upb_stringsink *sink) {
8919 upb_byteshandler_init(&sink->handler);
8920 upb_byteshandler_setstartstr(&sink->handler, stringsink_start, NULL);
8921 upb_byteshandler_setstring(&sink->handler, stringsink_string, NULL);
8922
8923 upb_bytessink_reset(&sink->sink, &sink->handler, sink);
8924
8925 sink->size = 32;
8926 sink->ptr = malloc(sink->size);
8927 sink->len = 0;
8928}
8929
8930void upb_stringsink_uninit(upb_stringsink *sink) { free(sink->ptr); }
8931
8932typedef struct {
8933 /* For encoding Any value field in binary format. */
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08008934 upb_handlercache *encoder_handlercache;
Paul Yang8faa7782018-12-26 10:36:09 -08008935 upb_stringsink stringsink;
8936
8937 /* For decoding Any value field in json format. */
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08008938 upb_json_codecache *parser_codecache;
Paul Yang8faa7782018-12-26 10:36:09 -08008939 upb_sink sink;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08008940 upb_json_parser *parser;
Paul Yang8faa7782018-12-26 10:36:09 -08008941
8942 /* Mark the range of uninterpreted values in json input before type url. */
8943 const char *before_type_url_start;
8944 const char *before_type_url_end;
8945
8946 /* Mark the range of uninterpreted values in json input after type url. */
8947 const char *after_type_url_start;
8948} upb_jsonparser_any_frame;
8949
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008950typedef struct {
8951 upb_sink sink;
8952
8953 /* The current message in which we're parsing, and the field whose value we're
8954 * expecting next. */
8955 const upb_msgdef *m;
8956 const upb_fielddef *f;
8957
Paul Yange0e54662016-09-15 11:09:01 -07008958 /* The table mapping json name to fielddef for this message. */
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08008959 const upb_strtable *name_table;
Paul Yange0e54662016-09-15 11:09:01 -07008960
Paul Yang4b145b12019-03-12 10:56:58 -07008961 /* We are in a repeated-field context. We need this flag to decide whether to
8962 * handle the array as a normal repeated field or a
8963 * google.protobuf.ListValue/google.protobuf.Value. */
8964 bool is_repeated;
8965
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008966 /* We are in a repeated-field context, ready to emit mapentries as
8967 * submessages. This flag alters the start-of-object (open-brace) behavior to
8968 * begin a sequence of mapentry messages rather than a single submessage. */
8969 bool is_map;
8970
8971 /* We are in a map-entry message context. This flag is set when parsing the
8972 * value field of a single map entry and indicates to all value-field parsers
8973 * (subobjects, strings, numbers, and bools) that the map-entry submessage
8974 * should end as soon as the value is parsed. */
8975 bool is_mapentry;
8976
8977 /* If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent
8978 * message's map field that we're currently parsing. This differs from |f|
8979 * because |f| is the field in the *current* message (i.e., the map-entry
8980 * message itself), not the parent's field that leads to this map. */
8981 const upb_fielddef *mapfield;
Paul Yangd2d4b402018-10-12 13:46:26 -07008982
Paul Yang8faa7782018-12-26 10:36:09 -08008983 /* We are in an Any message context. This flag is set when parsing the Any
8984 * message and indicates to all field parsers (subobjects, strings, numbers,
8985 * and bools) that the parsed field should be serialized as binary data or
8986 * cached (type url not found yet). */
8987 bool is_any;
8988
8989 /* The type of packed message in Any. */
8990 upb_jsonparser_any_frame *any_frame;
8991
Paul Yangd2d4b402018-10-12 13:46:26 -07008992 /* True if the field to be parsed is unknown. */
8993 bool is_unknown_field;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07008994} upb_jsonparser_frame;
8995
Adam Cozzette8645d892019-03-26 14:32:20 -07008996static void init_frame(upb_jsonparser_frame* frame) {
8997 frame->m = NULL;
8998 frame->f = NULL;
8999 frame->name_table = NULL;
9000 frame->is_repeated = false;
9001 frame->is_map = false;
9002 frame->is_mapentry = false;
9003 frame->mapfield = NULL;
9004 frame->is_any = false;
9005 frame->any_frame = NULL;
9006 frame->is_unknown_field = false;
9007}
9008
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009009struct upb_json_parser {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009010 upb_arena *arena;
Paul Yange0e54662016-09-15 11:09:01 -07009011 const upb_json_parsermethod *method;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009012 upb_bytessink input_;
9013
9014 /* Stack to track the JSON scopes we are in. */
9015 upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH];
9016 upb_jsonparser_frame *top;
9017 upb_jsonparser_frame *limit;
9018
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009019 upb_status *status;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009020
9021 /* Ragel's internal parsing stack for the parsing state machine. */
9022 int current_state;
9023 int parser_stack[UPB_JSON_MAX_DEPTH];
9024 int parser_top;
9025
9026 /* The handle for the current buffer. */
9027 const upb_bufhandle *handle;
9028
9029 /* Accumulate buffer. See details in parser.rl. */
9030 const char *accumulated;
9031 size_t accumulated_len;
9032 char *accumulate_buf;
9033 size_t accumulate_buf_size;
9034
9035 /* Multi-part text data. See details in parser.rl. */
9036 int multipart_state;
9037 upb_selector_t string_selector;
9038
9039 /* Input capture. See details in parser.rl. */
9040 const char *capture;
9041
9042 /* Intermediate result of parsing a unicode escape sequence. */
9043 uint32_t digit;
Paul Yang26eeec92018-07-09 14:29:23 -07009044
Paul Yang8faa7782018-12-26 10:36:09 -08009045 /* For resolve type url in Any. */
9046 const upb_symtab *symtab;
9047
Paul Yang26eeec92018-07-09 14:29:23 -07009048 /* Whether to proceed if unknown field is met. */
9049 bool ignore_json_unknown;
Paul Yang9bda1f12018-09-22 18:57:43 -07009050
9051 /* Cache for parsing timestamp due to base and zone are handled in different
9052 * handlers. */
9053 struct tm tm;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009054};
9055
Adam Cozzette8645d892019-03-26 14:32:20 -07009056static upb_jsonparser_frame* start_jsonparser_frame(upb_json_parser *p) {
9057 upb_jsonparser_frame *inner;
9058 inner = p->top + 1;
9059 init_frame(inner);
9060 return inner;
9061}
Paul Yange0e54662016-09-15 11:09:01 -07009062
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009063struct upb_json_codecache {
9064 upb_arena *arena;
9065 upb_inttable methods; /* upb_msgdef* -> upb_json_parsermethod* */
9066};
Paul Yange0e54662016-09-15 11:09:01 -07009067
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009068struct upb_json_parsermethod {
9069 const upb_json_codecache *cache;
Paul Yange0e54662016-09-15 11:09:01 -07009070 upb_byteshandler input_handler_;
9071
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009072 /* Maps json_name -> fielddef */
9073 upb_strtable name_table;
Paul Yange0e54662016-09-15 11:09:01 -07009074};
9075
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009076#define PARSER_CHECK_RETURN(x) if (!(x)) return false
9077
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009078static upb_jsonparser_any_frame *json_parser_any_frame_new(
9079 upb_json_parser *p) {
9080 upb_jsonparser_any_frame *frame;
9081
9082 frame = upb_arena_malloc(p->arena, sizeof(upb_jsonparser_any_frame));
9083
9084 frame->encoder_handlercache = upb_pb_encoder_newcache();
9085 frame->parser_codecache = upb_json_codecache_new();
Paul Yang8faa7782018-12-26 10:36:09 -08009086 frame->parser = NULL;
9087 frame->before_type_url_start = NULL;
9088 frame->before_type_url_end = NULL;
9089 frame->after_type_url_start = NULL;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009090
9091 upb_stringsink_init(&frame->stringsink);
9092
9093 return frame;
Paul Yang8faa7782018-12-26 10:36:09 -08009094}
9095
9096static void json_parser_any_frame_set_payload_type(
9097 upb_json_parser *p,
9098 upb_jsonparser_any_frame *frame,
9099 const upb_msgdef *payload_type) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009100 const upb_handlers *h;
9101 const upb_json_parsermethod *parser_method;
9102 upb_pb_encoder *encoder;
9103
Paul Yang8faa7782018-12-26 10:36:09 -08009104 /* Initialize encoder. */
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009105 h = upb_handlercache_get(frame->encoder_handlercache, payload_type);
9106 encoder = upb_pb_encoder_create(p->arena, h, frame->stringsink.sink);
Paul Yang8faa7782018-12-26 10:36:09 -08009107
9108 /* Initialize parser. */
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009109 parser_method = upb_json_codecache_get(frame->parser_codecache, payload_type);
9110 upb_sink_reset(&frame->sink, h, encoder);
Paul Yang8faa7782018-12-26 10:36:09 -08009111 frame->parser =
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009112 upb_json_parser_create(p->arena, parser_method, p->symtab, frame->sink,
9113 p->status, p->ignore_json_unknown);
Paul Yang8faa7782018-12-26 10:36:09 -08009114}
9115
9116static void json_parser_any_frame_free(upb_jsonparser_any_frame *frame) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009117 upb_handlercache_free(frame->encoder_handlercache);
9118 upb_json_codecache_free(frame->parser_codecache);
Paul Yang8faa7782018-12-26 10:36:09 -08009119 upb_stringsink_uninit(&frame->stringsink);
9120}
9121
9122static bool json_parser_any_frame_has_type_url(
9123 upb_jsonparser_any_frame *frame) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009124 return frame->parser != NULL;
Paul Yang8faa7782018-12-26 10:36:09 -08009125}
9126
9127static bool json_parser_any_frame_has_value_before_type_url(
9128 upb_jsonparser_any_frame *frame) {
9129 return frame->before_type_url_start != frame->before_type_url_end;
9130}
9131
9132static bool json_parser_any_frame_has_value_after_type_url(
9133 upb_jsonparser_any_frame *frame) {
9134 return frame->after_type_url_start != NULL;
9135}
9136
9137static bool json_parser_any_frame_has_value(
9138 upb_jsonparser_any_frame *frame) {
9139 return json_parser_any_frame_has_value_before_type_url(frame) ||
9140 json_parser_any_frame_has_value_after_type_url(frame);
9141}
9142
9143static void json_parser_any_frame_set_before_type_url_end(
9144 upb_jsonparser_any_frame *frame,
9145 const char *ptr) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009146 if (frame->parser == NULL) {
Paul Yang8faa7782018-12-26 10:36:09 -08009147 frame->before_type_url_end = ptr;
9148 }
9149}
9150
9151static void json_parser_any_frame_set_after_type_url_start_once(
9152 upb_jsonparser_any_frame *frame,
9153 const char *ptr) {
9154 if (json_parser_any_frame_has_type_url(frame) &&
9155 frame->after_type_url_start == NULL) {
9156 frame->after_type_url_start = ptr;
9157 }
9158}
9159
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009160/* Used to signal that a capture has been suspended. */
9161static char suspend_capture;
9162
9163static upb_selector_t getsel_for_handlertype(upb_json_parser *p,
9164 upb_handlertype_t type) {
9165 upb_selector_t sel;
9166 bool ok = upb_handlers_getselector(p->top->f, type, &sel);
Paul Yange0e54662016-09-15 11:09:01 -07009167 UPB_ASSERT(ok);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009168 return sel;
9169}
9170
9171static upb_selector_t parser_getsel(upb_json_parser *p) {
9172 return getsel_for_handlertype(
9173 p, upb_handlers_getprimitivehandlertype(p->top->f));
9174}
9175
9176static bool check_stack(upb_json_parser *p) {
9177 if ((p->top + 1) == p->limit) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009178 upb_status_seterrmsg(p->status, "Nesting too deep");
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009179 return false;
9180 }
9181
9182 return true;
9183}
9184
Paul Yange0e54662016-09-15 11:09:01 -07009185static void set_name_table(upb_json_parser *p, upb_jsonparser_frame *frame) {
9186 upb_value v;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009187 const upb_json_codecache *cache = p->method->cache;
9188 bool ok;
9189 const upb_json_parsermethod *method;
9190
9191 ok = upb_inttable_lookupptr(&cache->methods, frame->m, &v);
Paul Yange0e54662016-09-15 11:09:01 -07009192 UPB_ASSERT(ok);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009193 method = upb_value_getconstptr(v);
9194
9195 frame->name_table = &method->name_table;
Paul Yange0e54662016-09-15 11:09:01 -07009196}
9197
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009198/* There are GCC/Clang built-ins for overflow checking which we could start
9199 * using if there was any performance benefit to it. */
9200
9201static bool checked_add(size_t a, size_t b, size_t *c) {
9202 if (SIZE_MAX - a < b) return false;
9203 *c = a + b;
9204 return true;
9205}
9206
9207static size_t saturating_multiply(size_t a, size_t b) {
9208 /* size_t is unsigned, so this is defined behavior even on overflow. */
9209 size_t ret = a * b;
9210 if (b != 0 && ret / b != a) {
9211 ret = SIZE_MAX;
9212 }
9213 return ret;
9214}
9215
9216
9217/* Base64 decoding ************************************************************/
9218
9219/* TODO(haberman): make this streaming. */
9220
9221static const signed char b64table[] = {
9222 -1, -1, -1, -1, -1, -1, -1, -1,
9223 -1, -1, -1, -1, -1, -1, -1, -1,
9224 -1, -1, -1, -1, -1, -1, -1, -1,
9225 -1, -1, -1, -1, -1, -1, -1, -1,
9226 -1, -1, -1, -1, -1, -1, -1, -1,
9227 -1, -1, -1, 62/*+*/, -1, -1, -1, 63/*/ */,
9228 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
9229 60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1,
9230 -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/,
9231 07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
9232 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
9233 23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, -1,
9234 -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
9235 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
9236 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
9237 49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1,
9238 -1, -1, -1, -1, -1, -1, -1, -1,
9239 -1, -1, -1, -1, -1, -1, -1, -1,
9240 -1, -1, -1, -1, -1, -1, -1, -1,
9241 -1, -1, -1, -1, -1, -1, -1, -1,
9242 -1, -1, -1, -1, -1, -1, -1, -1,
9243 -1, -1, -1, -1, -1, -1, -1, -1,
9244 -1, -1, -1, -1, -1, -1, -1, -1,
9245 -1, -1, -1, -1, -1, -1, -1, -1,
9246 -1, -1, -1, -1, -1, -1, -1, -1,
9247 -1, -1, -1, -1, -1, -1, -1, -1,
9248 -1, -1, -1, -1, -1, -1, -1, -1,
9249 -1, -1, -1, -1, -1, -1, -1, -1,
9250 -1, -1, -1, -1, -1, -1, -1, -1,
9251 -1, -1, -1, -1, -1, -1, -1, -1,
9252 -1, -1, -1, -1, -1, -1, -1, -1,
9253 -1, -1, -1, -1, -1, -1, -1, -1
9254};
9255
9256/* Returns the table value sign-extended to 32 bits. Knowing that the upper
9257 * bits will be 1 for unrecognized characters makes it easier to check for
9258 * this error condition later (see below). */
9259int32_t b64lookup(unsigned char ch) { return b64table[ch]; }
9260
9261/* Returns true if the given character is not a valid base64 character or
9262 * padding. */
9263bool nonbase64(unsigned char ch) { return b64lookup(ch) == -1 && ch != '='; }
9264
9265static bool base64_push(upb_json_parser *p, upb_selector_t sel, const char *ptr,
9266 size_t len) {
9267 const char *limit = ptr + len;
9268 for (; ptr < limit; ptr += 4) {
9269 uint32_t val;
9270 char output[3];
9271
9272 if (limit - ptr < 4) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009273 upb_status_seterrf(p->status,
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009274 "Base64 input for bytes field not a multiple of 4: %s",
9275 upb_fielddef_name(p->top->f));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009276 return false;
9277 }
9278
9279 val = b64lookup(ptr[0]) << 18 |
9280 b64lookup(ptr[1]) << 12 |
9281 b64lookup(ptr[2]) << 6 |
9282 b64lookup(ptr[3]);
9283
9284 /* Test the upper bit; returns true if any of the characters returned -1. */
9285 if (val & 0x80000000) {
9286 goto otherchar;
9287 }
9288
9289 output[0] = val >> 16;
9290 output[1] = (val >> 8) & 0xff;
9291 output[2] = val & 0xff;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009292 upb_sink_putstring(p->top->sink, sel, output, 3, NULL);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009293 }
9294 return true;
9295
9296otherchar:
9297 if (nonbase64(ptr[0]) || nonbase64(ptr[1]) || nonbase64(ptr[2]) ||
9298 nonbase64(ptr[3]) ) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009299 upb_status_seterrf(p->status,
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009300 "Non-base64 characters in bytes field: %s",
9301 upb_fielddef_name(p->top->f));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009302 return false;
9303 } if (ptr[2] == '=') {
9304 uint32_t val;
9305 char output;
9306
9307 /* Last group contains only two input bytes, one output byte. */
9308 if (ptr[0] == '=' || ptr[1] == '=' || ptr[3] != '=') {
9309 goto badpadding;
9310 }
9311
9312 val = b64lookup(ptr[0]) << 18 |
9313 b64lookup(ptr[1]) << 12;
9314
Paul Yange0e54662016-09-15 11:09:01 -07009315 UPB_ASSERT(!(val & 0x80000000));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009316 output = val >> 16;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009317 upb_sink_putstring(p->top->sink, sel, &output, 1, NULL);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009318 return true;
9319 } else {
9320 uint32_t val;
9321 char output[2];
9322
9323 /* Last group contains only three input bytes, two output bytes. */
9324 if (ptr[0] == '=' || ptr[1] == '=' || ptr[2] == '=') {
9325 goto badpadding;
9326 }
9327
9328 val = b64lookup(ptr[0]) << 18 |
9329 b64lookup(ptr[1]) << 12 |
9330 b64lookup(ptr[2]) << 6;
9331
9332 output[0] = val >> 16;
9333 output[1] = (val >> 8) & 0xff;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009334 upb_sink_putstring(p->top->sink, sel, output, 2, NULL);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009335 return true;
9336 }
9337
9338badpadding:
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009339 upb_status_seterrf(p->status,
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009340 "Incorrect base64 padding for field: %s (%.*s)",
9341 upb_fielddef_name(p->top->f),
9342 4, ptr);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009343 return false;
9344}
9345
9346
9347/* Accumulate buffer **********************************************************/
9348
9349/* Functionality for accumulating a buffer.
9350 *
9351 * Some parts of the parser need an entire value as a contiguous string. For
9352 * example, to look up a member name in a hash table, or to turn a string into
9353 * a number, the relevant library routines need the input string to be in
9354 * contiguous memory, even if the value spanned two or more buffers in the
9355 * input. These routines handle that.
9356 *
9357 * In the common case we can just point to the input buffer to get this
9358 * contiguous string and avoid any actual copy. So we optimistically begin
9359 * this way. But there are a few cases where we must instead copy into a
9360 * separate buffer:
9361 *
9362 * 1. The string was not contiguous in the input (it spanned buffers).
9363 *
9364 * 2. The string included escape sequences that need to be interpreted to get
9365 * the true value in a contiguous buffer. */
9366
9367static void assert_accumulate_empty(upb_json_parser *p) {
Paul Yange0e54662016-09-15 11:09:01 -07009368 UPB_ASSERT(p->accumulated == NULL);
9369 UPB_ASSERT(p->accumulated_len == 0);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009370}
9371
9372static void accumulate_clear(upb_json_parser *p) {
9373 p->accumulated = NULL;
9374 p->accumulated_len = 0;
9375}
9376
9377/* Used internally by accumulate_append(). */
9378static bool accumulate_realloc(upb_json_parser *p, size_t need) {
9379 void *mem;
9380 size_t old_size = p->accumulate_buf_size;
9381 size_t new_size = UPB_MAX(old_size, 128);
9382 while (new_size < need) {
9383 new_size = saturating_multiply(new_size, 2);
9384 }
9385
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009386 mem = upb_arena_realloc(p->arena, p->accumulate_buf, old_size, new_size);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009387 if (!mem) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009388 upb_status_seterrmsg(p->status, "Out of memory allocating buffer.");
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009389 return false;
9390 }
9391
9392 p->accumulate_buf = mem;
9393 p->accumulate_buf_size = new_size;
9394 return true;
9395}
9396
9397/* Logically appends the given data to the append buffer.
9398 * If "can_alias" is true, we will try to avoid actually copying, but the buffer
9399 * must be valid until the next accumulate_append() call (if any). */
9400static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len,
9401 bool can_alias) {
9402 size_t need;
9403
9404 if (!p->accumulated && can_alias) {
9405 p->accumulated = buf;
9406 p->accumulated_len = len;
9407 return true;
9408 }
9409
9410 if (!checked_add(p->accumulated_len, len, &need)) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009411 upb_status_seterrmsg(p->status, "Integer overflow.");
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009412 return false;
9413 }
9414
9415 if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) {
9416 return false;
9417 }
9418
9419 if (p->accumulated != p->accumulate_buf) {
9420 memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len);
9421 p->accumulated = p->accumulate_buf;
9422 }
9423
9424 memcpy(p->accumulate_buf + p->accumulated_len, buf, len);
9425 p->accumulated_len += len;
9426 return true;
9427}
9428
9429/* Returns a pointer to the data accumulated since the last accumulate_clear()
9430 * call, and writes the length to *len. This with point either to the input
9431 * buffer or a temporary accumulate buffer. */
9432static const char *accumulate_getptr(upb_json_parser *p, size_t *len) {
Paul Yange0e54662016-09-15 11:09:01 -07009433 UPB_ASSERT(p->accumulated);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009434 *len = p->accumulated_len;
9435 return p->accumulated;
9436}
9437
9438
9439/* Mult-part text data ********************************************************/
9440
9441/* When we have text data in the input, it can often come in multiple segments.
9442 * For example, there may be some raw string data followed by an escape
9443 * sequence. The two segments are processed with different logic. Also buffer
9444 * seams in the input can cause multiple segments.
9445 *
9446 * As we see segments, there are two main cases for how we want to process them:
9447 *
9448 * 1. we want to push the captured input directly to string handlers.
9449 *
9450 * 2. we need to accumulate all the parts into a contiguous buffer for further
9451 * processing (field name lookup, string->number conversion, etc). */
9452
9453/* This is the set of states for p->multipart_state. */
9454enum {
9455 /* We are not currently processing multipart data. */
9456 MULTIPART_INACTIVE = 0,
9457
9458 /* We are processing multipart data by accumulating it into a contiguous
9459 * buffer. */
9460 MULTIPART_ACCUMULATE = 1,
9461
9462 /* We are processing multipart data by pushing each part directly to the
9463 * current string handlers. */
9464 MULTIPART_PUSHEAGERLY = 2
9465};
9466
9467/* Start a multi-part text value where we accumulate the data for processing at
9468 * the end. */
9469static void multipart_startaccum(upb_json_parser *p) {
9470 assert_accumulate_empty(p);
Paul Yange0e54662016-09-15 11:09:01 -07009471 UPB_ASSERT(p->multipart_state == MULTIPART_INACTIVE);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009472 p->multipart_state = MULTIPART_ACCUMULATE;
9473}
9474
9475/* Start a multi-part text value where we immediately push text data to a string
9476 * value with the given selector. */
9477static void multipart_start(upb_json_parser *p, upb_selector_t sel) {
9478 assert_accumulate_empty(p);
Paul Yange0e54662016-09-15 11:09:01 -07009479 UPB_ASSERT(p->multipart_state == MULTIPART_INACTIVE);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009480 p->multipart_state = MULTIPART_PUSHEAGERLY;
9481 p->string_selector = sel;
9482}
9483
9484static bool multipart_text(upb_json_parser *p, const char *buf, size_t len,
9485 bool can_alias) {
9486 switch (p->multipart_state) {
9487 case MULTIPART_INACTIVE:
9488 upb_status_seterrmsg(
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009489 p->status, "Internal error: unexpected state MULTIPART_INACTIVE");
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009490 return false;
9491
9492 case MULTIPART_ACCUMULATE:
9493 if (!accumulate_append(p, buf, len, can_alias)) {
9494 return false;
9495 }
9496 break;
9497
9498 case MULTIPART_PUSHEAGERLY: {
9499 const upb_bufhandle *handle = can_alias ? p->handle : NULL;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009500 upb_sink_putstring(p->top->sink, p->string_selector, buf, len, handle);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009501 break;
9502 }
9503 }
9504
9505 return true;
9506}
9507
9508/* Note: this invalidates the accumulate buffer! Call only after reading its
9509 * contents. */
9510static void multipart_end(upb_json_parser *p) {
Paul Yange0e54662016-09-15 11:09:01 -07009511 UPB_ASSERT(p->multipart_state != MULTIPART_INACTIVE);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009512 p->multipart_state = MULTIPART_INACTIVE;
9513 accumulate_clear(p);
9514}
9515
9516
9517/* Input capture **************************************************************/
9518
9519/* Functionality for capturing a region of the input as text. Gracefully
9520 * handles the case where a buffer seam occurs in the middle of the captured
9521 * region. */
9522
9523static void capture_begin(upb_json_parser *p, const char *ptr) {
Paul Yange0e54662016-09-15 11:09:01 -07009524 UPB_ASSERT(p->multipart_state != MULTIPART_INACTIVE);
9525 UPB_ASSERT(p->capture == NULL);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009526 p->capture = ptr;
9527}
9528
9529static bool capture_end(upb_json_parser *p, const char *ptr) {
Paul Yange0e54662016-09-15 11:09:01 -07009530 UPB_ASSERT(p->capture);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009531 if (multipart_text(p, p->capture, ptr - p->capture, true)) {
9532 p->capture = NULL;
9533 return true;
9534 } else {
9535 return false;
9536 }
9537}
9538
9539/* This is called at the end of each input buffer (ie. when we have hit a
9540 * buffer seam). If we are in the middle of capturing the input, this
9541 * processes the unprocessed capture region. */
9542static void capture_suspend(upb_json_parser *p, const char **ptr) {
9543 if (!p->capture) return;
9544
9545 if (multipart_text(p, p->capture, *ptr - p->capture, false)) {
9546 /* We use this as a signal that we were in the middle of capturing, and
9547 * that capturing should resume at the beginning of the next buffer.
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009548 *
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009549 * We can't use *ptr here, because we have no guarantee that this pointer
9550 * will be valid when we resume (if the underlying memory is freed, then
9551 * using the pointer at all, even to compare to NULL, is likely undefined
9552 * behavior). */
9553 p->capture = &suspend_capture;
9554 } else {
9555 /* Need to back up the pointer to the beginning of the capture, since
9556 * we were not able to actually preserve it. */
9557 *ptr = p->capture;
9558 }
9559}
9560
9561static void capture_resume(upb_json_parser *p, const char *ptr) {
9562 if (p->capture) {
Paul Yange0e54662016-09-15 11:09:01 -07009563 UPB_ASSERT(p->capture == &suspend_capture);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009564 p->capture = ptr;
9565 }
9566}
9567
9568
9569/* Callbacks from the parser **************************************************/
9570
9571/* These are the functions called directly from the parser itself.
9572 * We define these in the same order as their declarations in the parser. */
9573
9574static char escape_char(char in) {
9575 switch (in) {
9576 case 'r': return '\r';
9577 case 't': return '\t';
9578 case 'n': return '\n';
9579 case 'f': return '\f';
9580 case 'b': return '\b';
9581 case '/': return '/';
9582 case '"': return '"';
9583 case '\\': return '\\';
9584 default:
Paul Yange0e54662016-09-15 11:09:01 -07009585 UPB_ASSERT(0);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009586 return 'x';
9587 }
9588}
9589
9590static bool escape(upb_json_parser *p, const char *ptr) {
9591 char ch = escape_char(*ptr);
9592 return multipart_text(p, &ch, 1, false);
9593}
9594
9595static void start_hex(upb_json_parser *p) {
9596 p->digit = 0;
9597}
9598
9599static void hexdigit(upb_json_parser *p, const char *ptr) {
9600 char ch = *ptr;
9601
9602 p->digit <<= 4;
9603
9604 if (ch >= '0' && ch <= '9') {
9605 p->digit += (ch - '0');
9606 } else if (ch >= 'a' && ch <= 'f') {
9607 p->digit += ((ch - 'a') + 10);
9608 } else {
Paul Yange0e54662016-09-15 11:09:01 -07009609 UPB_ASSERT(ch >= 'A' && ch <= 'F');
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009610 p->digit += ((ch - 'A') + 10);
9611 }
9612}
9613
9614static bool end_hex(upb_json_parser *p) {
9615 uint32_t codepoint = p->digit;
9616
9617 /* emit the codepoint as UTF-8. */
9618 char utf8[3]; /* support \u0000 -- \uFFFF -- need only three bytes. */
9619 int length = 0;
9620 if (codepoint <= 0x7F) {
9621 utf8[0] = codepoint;
9622 length = 1;
9623 } else if (codepoint <= 0x07FF) {
9624 utf8[1] = (codepoint & 0x3F) | 0x80;
9625 codepoint >>= 6;
9626 utf8[0] = (codepoint & 0x1F) | 0xC0;
9627 length = 2;
9628 } else /* codepoint <= 0xFFFF */ {
9629 utf8[2] = (codepoint & 0x3F) | 0x80;
9630 codepoint >>= 6;
9631 utf8[1] = (codepoint & 0x3F) | 0x80;
9632 codepoint >>= 6;
9633 utf8[0] = (codepoint & 0x0F) | 0xE0;
9634 length = 3;
9635 }
9636 /* TODO(haberman): Handle high surrogates: if codepoint is a high surrogate
9637 * we have to wait for the next escape to get the full code point). */
9638
9639 return multipart_text(p, utf8, length, false);
9640}
9641
9642static void start_text(upb_json_parser *p, const char *ptr) {
9643 capture_begin(p, ptr);
9644}
9645
9646static bool end_text(upb_json_parser *p, const char *ptr) {
9647 return capture_end(p, ptr);
9648}
9649
Bo Yange3ee7162018-08-10 18:09:02 +00009650static bool start_number(upb_json_parser *p, const char *ptr) {
9651 if (is_top_level(p)) {
Paul Yang9bda1f12018-09-22 18:57:43 -07009652 if (is_number_wrapper_object(p)) {
9653 start_wrapper_object(p);
9654 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
9655 start_value_object(p, VALUE_NUMBERVALUE);
9656 } else {
Bo Yange3ee7162018-08-10 18:09:02 +00009657 return false;
9658 }
Bo Yange3ee7162018-08-10 18:09:02 +00009659 } else if (does_number_wrapper_start(p)) {
9660 if (!start_subobject(p)) {
9661 return false;
9662 }
9663 start_wrapper_object(p);
Paul Yang9bda1f12018-09-22 18:57:43 -07009664 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
9665 if (!start_subobject(p)) {
9666 return false;
9667 }
9668 start_value_object(p, VALUE_NUMBERVALUE);
Bo Yange3ee7162018-08-10 18:09:02 +00009669 }
9670
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009671 multipart_startaccum(p);
9672 capture_begin(p, ptr);
Bo Yange3ee7162018-08-10 18:09:02 +00009673 return true;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009674}
9675
Paul Yang60327462017-10-09 12:39:13 -07009676static bool parse_number(upb_json_parser *p, bool is_quoted);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009677
Bo Yange3ee7162018-08-10 18:09:02 +00009678static bool end_number_nontop(upb_json_parser *p, const char *ptr) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009679 if (!capture_end(p, ptr)) {
9680 return false;
9681 }
9682
Paul Yang26eeec92018-07-09 14:29:23 -07009683 if (p->top->f == NULL) {
9684 multipart_end(p);
9685 return true;
9686 }
9687
Paul Yang60327462017-10-09 12:39:13 -07009688 return parse_number(p, false);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009689}
9690
Bo Yange3ee7162018-08-10 18:09:02 +00009691static bool end_number(upb_json_parser *p, const char *ptr) {
9692 if (!end_number_nontop(p, ptr)) {
9693 return false;
9694 }
9695
9696 if (does_number_wrapper_end(p)) {
9697 end_wrapper_object(p);
9698 if (!is_top_level(p)) {
9699 end_subobject(p);
9700 }
Paul Yang9bda1f12018-09-22 18:57:43 -07009701 return true;
9702 }
9703
9704 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
9705 end_value_object(p);
9706 if (!is_top_level(p)) {
9707 end_subobject(p);
9708 }
9709 return true;
Bo Yange3ee7162018-08-10 18:09:02 +00009710 }
9711
9712 return true;
9713}
9714
Paul Yang60327462017-10-09 12:39:13 -07009715/* |buf| is NULL-terminated. |buf| itself will never include quotes;
9716 * |is_quoted| tells us whether this text originally appeared inside quotes. */
9717static bool parse_number_from_buffer(upb_json_parser *p, const char *buf,
9718 bool is_quoted) {
9719 size_t len = strlen(buf);
9720 const char *bufend = buf + len;
9721 char *end;
9722 upb_fieldtype_t type = upb_fielddef_type(p->top->f);
9723 double val;
9724 double dummy;
9725 double inf = 1.0 / 0.0; /* C89 does not have an INFINITY macro. */
9726
9727 errno = 0;
9728
9729 if (len == 0 || buf[0] == ' ') {
9730 return false;
9731 }
9732
9733 /* For integer types, first try parsing with integer-specific routines.
9734 * If these succeed, they will be more accurate for int64/uint64 than
9735 * strtod().
9736 */
9737 switch (type) {
9738 case UPB_TYPE_ENUM:
9739 case UPB_TYPE_INT32: {
9740 long val = strtol(buf, &end, 0);
9741 if (errno == ERANGE || end != bufend) {
9742 break;
9743 } else if (val > INT32_MAX || val < INT32_MIN) {
9744 return false;
9745 } else {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009746 upb_sink_putint32(p->top->sink, parser_getsel(p), val);
Paul Yang60327462017-10-09 12:39:13 -07009747 return true;
9748 }
9749 }
9750 case UPB_TYPE_UINT32: {
9751 unsigned long val = strtoul(buf, &end, 0);
9752 if (end != bufend) {
9753 break;
9754 } else if (val > UINT32_MAX || errno == ERANGE) {
9755 return false;
9756 } else {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009757 upb_sink_putuint32(p->top->sink, parser_getsel(p), val);
Paul Yang60327462017-10-09 12:39:13 -07009758 return true;
9759 }
9760 }
9761 /* XXX: We can't handle [u]int64 properly on 32-bit machines because
9762 * strto[u]ll isn't in C89. */
9763 case UPB_TYPE_INT64: {
9764 long val = strtol(buf, &end, 0);
9765 if (errno == ERANGE || end != bufend) {
9766 break;
9767 } else {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009768 upb_sink_putint64(p->top->sink, parser_getsel(p), val);
Paul Yang60327462017-10-09 12:39:13 -07009769 return true;
9770 }
9771 }
9772 case UPB_TYPE_UINT64: {
9773 unsigned long val = strtoul(p->accumulated, &end, 0);
9774 if (end != bufend) {
9775 break;
9776 } else if (errno == ERANGE) {
9777 return false;
9778 } else {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009779 upb_sink_putuint64(p->top->sink, parser_getsel(p), val);
Paul Yang60327462017-10-09 12:39:13 -07009780 return true;
9781 }
9782 }
9783 default:
9784 break;
9785 }
9786
9787 if (type != UPB_TYPE_DOUBLE && type != UPB_TYPE_FLOAT && is_quoted) {
9788 /* Quoted numbers for integer types are not allowed to be in double form. */
9789 return false;
9790 }
9791
9792 if (len == strlen("Infinity") && strcmp(buf, "Infinity") == 0) {
9793 /* C89 does not have an INFINITY macro. */
9794 val = inf;
9795 } else if (len == strlen("-Infinity") && strcmp(buf, "-Infinity") == 0) {
9796 val = -inf;
9797 } else {
9798 val = strtod(buf, &end);
9799 if (errno == ERANGE || end != bufend) {
9800 return false;
9801 }
9802 }
9803
9804 switch (type) {
9805#define CASE(capitaltype, smalltype, ctype, min, max) \
9806 case UPB_TYPE_ ## capitaltype: { \
9807 if (modf(val, &dummy) != 0 || val > max || val < min) { \
9808 return false; \
9809 } else { \
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009810 upb_sink_put ## smalltype(p->top->sink, parser_getsel(p), \
Paul Yang60327462017-10-09 12:39:13 -07009811 (ctype)val); \
9812 return true; \
9813 } \
9814 break; \
9815 }
9816 case UPB_TYPE_ENUM:
9817 CASE(INT32, int32, int32_t, INT32_MIN, INT32_MAX);
9818 CASE(INT64, int64, int64_t, INT64_MIN, INT64_MAX);
9819 CASE(UINT32, uint32, uint32_t, 0, UINT32_MAX);
9820 CASE(UINT64, uint64, uint64_t, 0, UINT64_MAX);
9821#undef CASE
9822
9823 case UPB_TYPE_DOUBLE:
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009824 upb_sink_putdouble(p->top->sink, parser_getsel(p), val);
Paul Yang60327462017-10-09 12:39:13 -07009825 return true;
9826 case UPB_TYPE_FLOAT:
9827 if ((val > FLT_MAX || val < -FLT_MAX) && val != inf && val != -inf) {
9828 return false;
9829 } else {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009830 upb_sink_putfloat(p->top->sink, parser_getsel(p), val);
Paul Yang60327462017-10-09 12:39:13 -07009831 return true;
9832 }
9833 default:
9834 return false;
9835 }
9836}
9837
9838static bool parse_number(upb_json_parser *p, bool is_quoted) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009839 size_t len;
9840 const char *buf;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009841
9842 /* strtol() and friends unfortunately do not support specifying the length of
9843 * the input string, so we need to force a copy into a NULL-terminated buffer. */
9844 if (!multipart_text(p, "\0", 1, false)) {
9845 return false;
9846 }
9847
9848 buf = accumulate_getptr(p, &len);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009849
Paul Yang60327462017-10-09 12:39:13 -07009850 if (parse_number_from_buffer(p, buf, is_quoted)) {
9851 multipart_end(p);
9852 return true;
9853 } else {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009854 upb_status_seterrf(p->status, "error parsing number: %s", buf);
Paul Yang60327462017-10-09 12:39:13 -07009855 multipart_end(p);
9856 return false;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009857 }
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009858}
9859
9860static bool parser_putbool(upb_json_parser *p, bool val) {
9861 bool ok;
9862
Paul Yang26eeec92018-07-09 14:29:23 -07009863 if (p->top->f == NULL) {
9864 return true;
9865 }
9866
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009867 if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009868 upb_status_seterrf(p->status,
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009869 "Boolean value specified for non-bool field: %s",
9870 upb_fielddef_name(p->top->f));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009871 return false;
9872 }
9873
Joshua Habermanf5e8ee42019-03-06 12:04:17 -08009874 ok = upb_sink_putbool(p->top->sink, parser_getsel(p), val);
Paul Yange0e54662016-09-15 11:09:01 -07009875 UPB_ASSERT(ok);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009876
9877 return true;
9878}
9879
Bo Yange3ee7162018-08-10 18:09:02 +00009880static bool end_bool(upb_json_parser *p, bool val) {
9881 if (is_top_level(p)) {
Paul Yang9bda1f12018-09-22 18:57:43 -07009882 if (is_wellknown_msg(p, UPB_WELLKNOWN_BOOLVALUE)) {
9883 start_wrapper_object(p);
9884 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
9885 start_value_object(p, VALUE_BOOLVALUE);
9886 } else {
Bo Yange3ee7162018-08-10 18:09:02 +00009887 return false;
9888 }
Paul Yang9bda1f12018-09-22 18:57:43 -07009889 } else if (is_wellknown_field(p, UPB_WELLKNOWN_BOOLVALUE)) {
Bo Yange3ee7162018-08-10 18:09:02 +00009890 if (!start_subobject(p)) {
9891 return false;
9892 }
9893 start_wrapper_object(p);
Paul Yang9bda1f12018-09-22 18:57:43 -07009894 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
9895 if (!start_subobject(p)) {
9896 return false;
9897 }
9898 start_value_object(p, VALUE_BOOLVALUE);
Bo Yange3ee7162018-08-10 18:09:02 +00009899 }
9900
Paul Yangd2d4b402018-10-12 13:46:26 -07009901 if (p->top->is_unknown_field) {
9902 return true;
9903 }
9904
Bo Yange3ee7162018-08-10 18:09:02 +00009905 if (!parser_putbool(p, val)) {
9906 return false;
9907 }
9908
Paul Yang9bda1f12018-09-22 18:57:43 -07009909 if (is_wellknown_msg(p, UPB_WELLKNOWN_BOOLVALUE)) {
Bo Yange3ee7162018-08-10 18:09:02 +00009910 end_wrapper_object(p);
9911 if (!is_top_level(p)) {
9912 end_subobject(p);
9913 }
Paul Yang9bda1f12018-09-22 18:57:43 -07009914 return true;
9915 }
9916
9917 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
9918 end_value_object(p);
9919 if (!is_top_level(p)) {
9920 end_subobject(p);
9921 }
9922 return true;
9923 }
9924
9925 return true;
9926}
9927
9928static bool end_null(upb_json_parser *p) {
9929 const char *zero_ptr = "0";
9930
9931 if (is_top_level(p)) {
9932 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
9933 start_value_object(p, VALUE_NULLVALUE);
9934 } else {
9935 return true;
9936 }
9937 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
9938 if (!start_subobject(p)) {
9939 return false;
9940 }
9941 start_value_object(p, VALUE_NULLVALUE);
9942 } else {
9943 return true;
9944 }
9945
9946 /* Fill null_value field. */
9947 multipart_startaccum(p);
9948 capture_begin(p, zero_ptr);
9949 capture_end(p, zero_ptr + 1);
9950 parse_number(p, false);
9951
9952 end_value_object(p);
9953 if (!is_top_level(p)) {
9954 end_subobject(p);
Bo Yange3ee7162018-08-10 18:09:02 +00009955 }
9956
9957 return true;
9958}
9959
Paul Yang8faa7782018-12-26 10:36:09 -08009960static bool start_any_stringval(upb_json_parser *p) {
9961 multipart_startaccum(p);
9962 return true;
9963}
9964
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07009965static bool start_stringval(upb_json_parser *p) {
Bo Yange3ee7162018-08-10 18:09:02 +00009966 if (is_top_level(p)) {
Paul Yang9bda1f12018-09-22 18:57:43 -07009967 if (is_string_wrapper_object(p)) {
9968 start_wrapper_object(p);
Paul Yangc4f2a922019-01-17 10:18:43 -08009969 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_FIELDMASK)) {
9970 start_fieldmask_object(p);
9971 return true;
Paul Yang9bda1f12018-09-22 18:57:43 -07009972 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) ||
9973 is_wellknown_msg(p, UPB_WELLKNOWN_DURATION)) {
9974 start_object(p);
9975 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
9976 start_value_object(p, VALUE_STRINGVALUE);
9977 } else {
Bo Yange3ee7162018-08-10 18:09:02 +00009978 return false;
9979 }
Bo Yange3ee7162018-08-10 18:09:02 +00009980 } else if (does_string_wrapper_start(p)) {
9981 if (!start_subobject(p)) {
9982 return false;
9983 }
9984 start_wrapper_object(p);
Paul Yangc4f2a922019-01-17 10:18:43 -08009985 } else if (does_fieldmask_start(p)) {
9986 if (!start_subobject(p)) {
9987 return false;
9988 }
9989 start_fieldmask_object(p);
9990 return true;
Paul Yang9bda1f12018-09-22 18:57:43 -07009991 } else if (is_wellknown_field(p, UPB_WELLKNOWN_TIMESTAMP) ||
9992 is_wellknown_field(p, UPB_WELLKNOWN_DURATION)) {
9993 if (!start_subobject(p)) {
9994 return false;
9995 }
9996 start_object(p);
9997 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
9998 if (!start_subobject(p)) {
9999 return false;
10000 }
10001 start_value_object(p, VALUE_STRINGVALUE);
Bo Yange3ee7162018-08-10 18:09:02 +000010002 }
10003
Paul Yang26eeec92018-07-09 14:29:23 -070010004 if (p->top->f == NULL) {
10005 multipart_startaccum(p);
10006 return true;
10007 }
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010008
Paul Yang8faa7782018-12-26 10:36:09 -080010009 if (p->top->is_any) {
10010 return start_any_stringval(p);
10011 }
10012
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010013 if (upb_fielddef_isstring(p->top->f)) {
10014 upb_jsonparser_frame *inner;
10015 upb_selector_t sel;
10016
10017 if (!check_stack(p)) return false;
10018
10019 /* Start a new parser frame: parser frames correspond one-to-one with
10020 * handler frames, and string events occur in a sub-frame. */
Adam Cozzette8645d892019-03-26 14:32:20 -070010021 inner = start_jsonparser_frame(p);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010022 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010023 upb_sink_startstr(p->top->sink, sel, 0, &inner->sink);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010024 inner->m = p->top->m;
10025 inner->f = p->top->f;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010026 p->top = inner;
10027
10028 if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
10029 /* For STRING fields we push data directly to the handlers as it is
10030 * parsed. We don't do this yet for BYTES fields, because our base64
10031 * decoder is not streaming.
10032 *
10033 * TODO(haberman): make base64 decoding streaming also. */
10034 multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING));
10035 return true;
10036 } else {
10037 multipart_startaccum(p);
10038 return true;
10039 }
Paul Yang60327462017-10-09 12:39:13 -070010040 } else if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL &&
10041 upb_fielddef_type(p->top->f) != UPB_TYPE_MESSAGE) {
10042 /* No need to push a frame -- numeric values in quotes remain in the
10043 * current parser frame. These values must accmulate so we can convert
10044 * them all at once at the end. */
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010045 multipart_startaccum(p);
10046 return true;
10047 } else {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010048 upb_status_seterrf(p->status,
Paul Yang60327462017-10-09 12:39:13 -070010049 "String specified for bool or submessage field: %s",
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010050 upb_fielddef_name(p->top->f));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010051 return false;
10052 }
10053}
10054
Paul Yang8faa7782018-12-26 10:36:09 -080010055static bool end_any_stringval(upb_json_parser *p) {
10056 size_t len;
10057 const char *buf = accumulate_getptr(p, &len);
10058
10059 /* Set type_url */
10060 upb_selector_t sel;
10061 upb_jsonparser_frame *inner;
10062 if (!check_stack(p)) return false;
10063 inner = p->top + 1;
10064
10065 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010066 upb_sink_startstr(p->top->sink, sel, 0, &inner->sink);
Paul Yang8faa7782018-12-26 10:36:09 -080010067 sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010068 upb_sink_putstring(inner->sink, sel, buf, len, NULL);
Paul Yang8faa7782018-12-26 10:36:09 -080010069 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010070 upb_sink_endstr(inner->sink, sel);
Paul Yang8faa7782018-12-26 10:36:09 -080010071
10072 multipart_end(p);
10073
10074 /* Resolve type url */
10075 if (strncmp(buf, "type.googleapis.com/", 20) == 0 && len > 20) {
10076 const upb_msgdef *payload_type = NULL;
10077 buf += 20;
10078 len -= 20;
10079
10080 payload_type = upb_symtab_lookupmsg2(p->symtab, buf, len);
10081 if (payload_type == NULL) {
10082 upb_status_seterrf(
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010083 p->status, "Cannot find packed type: %.*s\n", (int)len, buf);
Paul Yang8faa7782018-12-26 10:36:09 -080010084 return false;
10085 }
10086
10087 json_parser_any_frame_set_payload_type(p, p->top->any_frame, payload_type);
Xiang Daie4794102019-02-21 11:28:50 +080010088
Paul Yang8faa7782018-12-26 10:36:09 -080010089 return true;
10090 } else {
10091 upb_status_seterrf(
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010092 p->status, "Invalid type url: %.*s\n", (int)len, buf);
Paul Yang8faa7782018-12-26 10:36:09 -080010093 return false;
10094 }
10095}
10096
Bo Yange3ee7162018-08-10 18:09:02 +000010097static bool end_stringval_nontop(upb_json_parser *p) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010098 bool ok = true;
10099
Paul Yang9bda1f12018-09-22 18:57:43 -070010100 if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) ||
10101 is_wellknown_msg(p, UPB_WELLKNOWN_DURATION)) {
10102 multipart_end(p);
10103 return true;
10104 }
10105
Paul Yang26eeec92018-07-09 14:29:23 -070010106 if (p->top->f == NULL) {
10107 multipart_end(p);
10108 return true;
10109 }
10110
Paul Yang8faa7782018-12-26 10:36:09 -080010111 if (p->top->is_any) {
10112 return end_any_stringval(p);
10113 }
10114
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010115 switch (upb_fielddef_type(p->top->f)) {
10116 case UPB_TYPE_BYTES:
10117 if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
10118 p->accumulated, p->accumulated_len)) {
10119 return false;
10120 }
10121 /* Fall through. */
10122
10123 case UPB_TYPE_STRING: {
10124 upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010125 upb_sink_endstr(p->top->sink, sel);
Paul Yangc4f2a922019-01-17 10:18:43 -080010126 p->top--;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010127 break;
10128 }
10129
10130 case UPB_TYPE_ENUM: {
10131 /* Resolve enum symbolic name to integer value. */
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010132 const upb_enumdef *enumdef = upb_fielddef_enumsubdef(p->top->f);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010133
10134 size_t len;
10135 const char *buf = accumulate_getptr(p, &len);
10136
10137 int32_t int_val = 0;
10138 ok = upb_enumdef_ntoi(enumdef, buf, len, &int_val);
10139
10140 if (ok) {
10141 upb_selector_t sel = parser_getsel(p);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010142 upb_sink_putint32(p->top->sink, sel, int_val);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010143 } else {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010144 upb_status_seterrf(p->status, "Enum value unknown: '%.*s'", len, buf);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010145 }
10146
10147 break;
10148 }
10149
Paul Yang60327462017-10-09 12:39:13 -070010150 case UPB_TYPE_INT32:
10151 case UPB_TYPE_INT64:
10152 case UPB_TYPE_UINT32:
10153 case UPB_TYPE_UINT64:
10154 case UPB_TYPE_DOUBLE:
10155 case UPB_TYPE_FLOAT:
10156 ok = parse_number(p, true);
10157 break;
10158
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010159 default:
Paul Yange0e54662016-09-15 11:09:01 -070010160 UPB_ASSERT(false);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010161 upb_status_seterrmsg(p->status, "Internal error in JSON decoder");
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010162 ok = false;
10163 break;
10164 }
10165
10166 multipart_end(p);
10167
10168 return ok;
10169}
10170
Bo Yange3ee7162018-08-10 18:09:02 +000010171static bool end_stringval(upb_json_parser *p) {
Paul Yangc4f2a922019-01-17 10:18:43 -080010172 /* FieldMask's stringvals have been ended when handling them. Only need to
10173 * close FieldMask here.*/
10174 if (does_fieldmask_end(p)) {
10175 end_fieldmask_object(p);
10176 if (!is_top_level(p)) {
10177 end_subobject(p);
10178 }
10179 return true;
10180 }
10181
Bo Yange3ee7162018-08-10 18:09:02 +000010182 if (!end_stringval_nontop(p)) {
10183 return false;
10184 }
10185
10186 if (does_string_wrapper_end(p)) {
10187 end_wrapper_object(p);
10188 if (!is_top_level(p)) {
10189 end_subobject(p);
10190 }
Paul Yang9bda1f12018-09-22 18:57:43 -070010191 return true;
Bo Yange3ee7162018-08-10 18:09:02 +000010192 }
10193
Paul Yang9bda1f12018-09-22 18:57:43 -070010194 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
10195 end_value_object(p);
10196 if (!is_top_level(p)) {
10197 end_subobject(p);
10198 }
10199 return true;
10200 }
10201
10202 if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) ||
Paul Yangc4f2a922019-01-17 10:18:43 -080010203 is_wellknown_msg(p, UPB_WELLKNOWN_DURATION) ||
10204 is_wellknown_msg(p, UPB_WELLKNOWN_FIELDMASK)) {
Paul Yang9bda1f12018-09-22 18:57:43 -070010205 end_object(p);
10206 if (!is_top_level(p)) {
10207 end_subobject(p);
10208 }
10209 return true;
10210 }
10211
10212 return true;
10213}
10214
10215static void start_duration_base(upb_json_parser *p, const char *ptr) {
10216 capture_begin(p, ptr);
10217}
10218
10219static bool end_duration_base(upb_json_parser *p, const char *ptr) {
10220 size_t len;
10221 const char *buf;
10222 char seconds_buf[14];
10223 char nanos_buf[12];
10224 char *end;
10225 int64_t seconds = 0;
10226 int32_t nanos = 0;
10227 double val = 0.0;
10228 const char *seconds_membername = "seconds";
10229 const char *nanos_membername = "nanos";
10230 size_t fraction_start;
10231
10232 if (!capture_end(p, ptr)) {
10233 return false;
10234 }
10235
10236 buf = accumulate_getptr(p, &len);
10237
10238 memset(seconds_buf, 0, 14);
10239 memset(nanos_buf, 0, 12);
10240
10241 /* Find out base end. The maximus duration is 315576000000, which cannot be
10242 * represented by double without losing precision. Thus, we need to handle
10243 * fraction and base separately. */
10244 for (fraction_start = 0; fraction_start < len && buf[fraction_start] != '.';
10245 fraction_start++);
10246
10247 /* Parse base */
10248 memcpy(seconds_buf, buf, fraction_start);
10249 seconds = strtol(seconds_buf, &end, 10);
10250 if (errno == ERANGE || end != seconds_buf + fraction_start) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010251 upb_status_seterrf(p->status, "error parsing duration: %s",
Paul Yang9bda1f12018-09-22 18:57:43 -070010252 seconds_buf);
Paul Yang9bda1f12018-09-22 18:57:43 -070010253 return false;
10254 }
10255
10256 if (seconds > 315576000000) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010257 upb_status_seterrf(p->status, "error parsing duration: "
Paul Yang9bda1f12018-09-22 18:57:43 -070010258 "maximum acceptable value is "
10259 "315576000000");
Paul Yang9bda1f12018-09-22 18:57:43 -070010260 return false;
10261 }
10262
10263 if (seconds < -315576000000) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010264 upb_status_seterrf(p->status, "error parsing duration: "
Paul Yang9bda1f12018-09-22 18:57:43 -070010265 "minimum acceptable value is "
10266 "-315576000000");
Paul Yang9bda1f12018-09-22 18:57:43 -070010267 return false;
10268 }
10269
10270 /* Parse fraction */
10271 nanos_buf[0] = '0';
10272 memcpy(nanos_buf + 1, buf + fraction_start, len - fraction_start);
10273 val = strtod(nanos_buf, &end);
10274 if (errno == ERANGE || end != nanos_buf + len - fraction_start + 1) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010275 upb_status_seterrf(p->status, "error parsing duration: %s",
Paul Yang9bda1f12018-09-22 18:57:43 -070010276 nanos_buf);
Paul Yang9bda1f12018-09-22 18:57:43 -070010277 return false;
10278 }
10279
10280 nanos = val * 1000000000;
10281 if (seconds < 0) nanos = -nanos;
10282
10283 /* Clean up buffer */
10284 multipart_end(p);
10285
10286 /* Set seconds */
10287 start_member(p);
10288 capture_begin(p, seconds_membername);
10289 capture_end(p, seconds_membername + 7);
10290 end_membername(p);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010291 upb_sink_putint64(p->top->sink, parser_getsel(p), seconds);
Paul Yang9bda1f12018-09-22 18:57:43 -070010292 end_member(p);
10293
10294 /* Set nanos */
10295 start_member(p);
10296 capture_begin(p, nanos_membername);
10297 capture_end(p, nanos_membername + 5);
10298 end_membername(p);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010299 upb_sink_putint32(p->top->sink, parser_getsel(p), nanos);
Paul Yang9bda1f12018-09-22 18:57:43 -070010300 end_member(p);
10301
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010302 /* Continue previous arena */
Paul Yang9bda1f12018-09-22 18:57:43 -070010303 multipart_startaccum(p);
10304
10305 return true;
10306}
10307
Paul Yang57b65972019-03-19 22:27:13 -070010308static int parse_timestamp_number(upb_json_parser *p) {
Paul Yang9bda1f12018-09-22 18:57:43 -070010309 size_t len;
10310 const char *buf;
Paul Yang57b65972019-03-19 22:27:13 -070010311 char *end;
10312 int val;
Paul Yang9bda1f12018-09-22 18:57:43 -070010313
Paul Yang57b65972019-03-19 22:27:13 -070010314 /* atoi() and friends unfortunately do not support specifying the length of
10315 * the input string, so we need to force a copy into a NULL-terminated buffer. */
10316 multipart_text(p, "\0", 1, false);
Paul Yang9bda1f12018-09-22 18:57:43 -070010317
10318 buf = accumulate_getptr(p, &len);
Paul Yang57b65972019-03-19 22:27:13 -070010319 val = atoi(buf);
Paul Yang9bda1f12018-09-22 18:57:43 -070010320 multipart_end(p);
10321 multipart_startaccum(p);
10322
Paul Yang57b65972019-03-19 22:27:13 -070010323 return val;
10324}
10325
10326static void start_year(upb_json_parser *p, const char *ptr) {
10327 capture_begin(p, ptr);
10328}
10329
10330static bool end_year(upb_json_parser *p, const char *ptr) {
10331 if (!capture_end(p, ptr)) {
10332 return false;
10333 }
10334 p->tm.tm_year = parse_timestamp_number(p) - 1900;
Paul Yang9bda1f12018-09-22 18:57:43 -070010335 return true;
10336}
10337
Paul Yang57b65972019-03-19 22:27:13 -070010338static void start_month(upb_json_parser *p, const char *ptr) {
10339 capture_begin(p, ptr);
10340}
10341
10342static bool end_month(upb_json_parser *p, const char *ptr) {
10343 if (!capture_end(p, ptr)) {
10344 return false;
10345 }
10346 p->tm.tm_mon = parse_timestamp_number(p) - 1;
10347 return true;
10348}
10349
10350static void start_day(upb_json_parser *p, const char *ptr) {
10351 capture_begin(p, ptr);
10352}
10353
10354static bool end_day(upb_json_parser *p, const char *ptr) {
10355 if (!capture_end(p, ptr)) {
10356 return false;
10357 }
10358 p->tm.tm_mday = parse_timestamp_number(p);
10359 return true;
10360}
10361
10362static void start_hour(upb_json_parser *p, const char *ptr) {
10363 capture_begin(p, ptr);
10364}
10365
10366static bool end_hour(upb_json_parser *p, const char *ptr) {
10367 if (!capture_end(p, ptr)) {
10368 return false;
10369 }
10370 p->tm.tm_hour = parse_timestamp_number(p);
10371 return true;
10372}
10373
10374static void start_minute(upb_json_parser *p, const char *ptr) {
10375 capture_begin(p, ptr);
10376}
10377
10378static bool end_minute(upb_json_parser *p, const char *ptr) {
10379 if (!capture_end(p, ptr)) {
10380 return false;
10381 }
10382 p->tm.tm_min = parse_timestamp_number(p);
10383 return true;
10384}
10385
10386static void start_second(upb_json_parser *p, const char *ptr) {
10387 capture_begin(p, ptr);
10388}
10389
10390static bool end_second(upb_json_parser *p, const char *ptr) {
10391 if (!capture_end(p, ptr)) {
10392 return false;
10393 }
10394 p->tm.tm_sec = parse_timestamp_number(p);
10395 return true;
10396}
10397
10398static void start_timestamp_base(upb_json_parser *p) {
10399 memset(&p->tm, 0, sizeof(struct tm));
10400}
10401
Paul Yang9bda1f12018-09-22 18:57:43 -070010402static void start_timestamp_fraction(upb_json_parser *p, const char *ptr) {
10403 capture_begin(p, ptr);
10404}
10405
10406static bool end_timestamp_fraction(upb_json_parser *p, const char *ptr) {
10407 size_t len;
10408 const char *buf;
10409 char nanos_buf[12];
10410 char *end;
10411 double val = 0.0;
10412 int32_t nanos;
10413 const char *nanos_membername = "nanos";
10414
10415 memset(nanos_buf, 0, 12);
10416
10417 if (!capture_end(p, ptr)) {
10418 return false;
10419 }
10420
10421 buf = accumulate_getptr(p, &len);
10422
10423 if (len > 10) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010424 upb_status_seterrf(p->status,
Paul Yang9bda1f12018-09-22 18:57:43 -070010425 "error parsing timestamp: at most 9-digit fraction.");
Paul Yang9bda1f12018-09-22 18:57:43 -070010426 return false;
10427 }
10428
10429 /* Parse nanos */
10430 nanos_buf[0] = '0';
10431 memcpy(nanos_buf + 1, buf, len);
10432 val = strtod(nanos_buf, &end);
10433
10434 if (errno == ERANGE || end != nanos_buf + len + 1) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010435 upb_status_seterrf(p->status, "error parsing timestamp nanos: %s",
Paul Yang9bda1f12018-09-22 18:57:43 -070010436 nanos_buf);
Paul Yang9bda1f12018-09-22 18:57:43 -070010437 return false;
10438 }
10439
10440 nanos = val * 1000000000;
10441
10442 /* Clean up previous environment */
10443 multipart_end(p);
10444
10445 /* Set nanos */
10446 start_member(p);
10447 capture_begin(p, nanos_membername);
10448 capture_end(p, nanos_membername + 5);
10449 end_membername(p);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010450 upb_sink_putint32(p->top->sink, parser_getsel(p), nanos);
Paul Yang9bda1f12018-09-22 18:57:43 -070010451 end_member(p);
10452
10453 /* Continue previous environment */
10454 multipart_startaccum(p);
10455
10456 return true;
10457}
10458
10459static void start_timestamp_zone(upb_json_parser *p, const char *ptr) {
10460 capture_begin(p, ptr);
10461}
10462
Paul Yang7597f8a2019-05-09 20:51:35 -070010463#define EPOCH_YEAR 1970
10464#define TM_YEAR_BASE 1900
10465
10466static bool isleap(int year) {
10467 return (year % 4) == 0 && (year % 100 != 0 || (year % 400) == 0);
10468}
10469
10470const unsigned short int __mon_yday[2][13] = {
10471 /* Normal years. */
10472 { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 },
10473 /* Leap years. */
10474 { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
10475};
10476
10477int64_t epoch(int year, int yday, int hour, int min, int sec) {
10478 int64_t years = year - EPOCH_YEAR;
10479
10480 int64_t leap_days = years / 4 - years / 100 + years / 400;
10481
10482 int64_t days = years * 365 + yday + leap_days;
10483 int64_t hours = days * 24 + hour;
10484 int64_t mins = hours * 60 + min;
10485 int64_t secs = mins * 60 + sec;
10486 return secs;
10487}
10488
10489
10490static int64_t upb_mktime(const struct tm *tp) {
10491 int sec = tp->tm_sec;
10492 int min = tp->tm_min;
10493 int hour = tp->tm_hour;
10494 int mday = tp->tm_mday;
10495 int mon = tp->tm_mon;
10496 int year = tp->tm_year + TM_YEAR_BASE;
10497
10498 /* Calculate day of year from year, month, and day of month. */
10499 int mon_yday = ((__mon_yday[isleap(year)][mon]) - 1);
10500 int yday = mon_yday + mday;
10501
10502 return epoch(year, yday, hour, min, sec);
10503}
10504
Paul Yang9bda1f12018-09-22 18:57:43 -070010505static bool end_timestamp_zone(upb_json_parser *p, const char *ptr) {
10506 size_t len;
10507 const char *buf;
10508 int hours;
10509 int64_t seconds;
10510 const char *seconds_membername = "seconds";
10511
10512 if (!capture_end(p, ptr)) {
10513 return false;
10514 }
10515
10516 buf = accumulate_getptr(p, &len);
10517
10518 if (buf[0] != 'Z') {
10519 if (sscanf(buf + 1, "%2d:00", &hours) != 1) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010520 upb_status_seterrf(p->status, "error parsing timestamp offset");
Paul Yang9bda1f12018-09-22 18:57:43 -070010521 return false;
10522 }
10523
10524 if (buf[0] == '+') {
10525 hours = -hours;
10526 }
10527
10528 p->tm.tm_hour += hours;
10529 }
10530
10531 /* Normalize tm */
Paul Yang7597f8a2019-05-09 20:51:35 -070010532 seconds = upb_mktime(&p->tm);
Paul Yang9bda1f12018-09-22 18:57:43 -070010533
10534 /* Check timestamp boundary */
10535 if (seconds < -62135596800) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010536 upb_status_seterrf(p->status, "error parsing timestamp: "
Paul Yang9bda1f12018-09-22 18:57:43 -070010537 "minimum acceptable value is "
10538 "0001-01-01T00:00:00Z");
Paul Yang9bda1f12018-09-22 18:57:43 -070010539 return false;
10540 }
10541
10542 /* Clean up previous environment */
10543 multipart_end(p);
10544
10545 /* Set seconds */
10546 start_member(p);
10547 capture_begin(p, seconds_membername);
10548 capture_end(p, seconds_membername + 7);
10549 end_membername(p);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010550 upb_sink_putint64(p->top->sink, parser_getsel(p), seconds);
Paul Yang9bda1f12018-09-22 18:57:43 -070010551 end_member(p);
10552
10553 /* Continue previous environment */
10554 multipart_startaccum(p);
10555
Bo Yange3ee7162018-08-10 18:09:02 +000010556 return true;
10557}
10558
Paul Yangc4f2a922019-01-17 10:18:43 -080010559static void start_fieldmask_path_text(upb_json_parser *p, const char *ptr) {
10560 capture_begin(p, ptr);
10561}
10562
10563static bool end_fieldmask_path_text(upb_json_parser *p, const char *ptr) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010564 return capture_end(p, ptr);
Paul Yangc4f2a922019-01-17 10:18:43 -080010565}
10566
10567static bool start_fieldmask_path(upb_json_parser *p) {
10568 upb_jsonparser_frame *inner;
10569 upb_selector_t sel;
10570
10571 if (!check_stack(p)) return false;
10572
10573 /* Start a new parser frame: parser frames correspond one-to-one with
10574 * handler frames, and string events occur in a sub-frame. */
Adam Cozzette8645d892019-03-26 14:32:20 -070010575 inner = start_jsonparser_frame(p);
Paul Yangc4f2a922019-01-17 10:18:43 -080010576 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010577 upb_sink_startstr(p->top->sink, sel, 0, &inner->sink);
Paul Yangc4f2a922019-01-17 10:18:43 -080010578 inner->m = p->top->m;
10579 inner->f = p->top->f;
Paul Yangc4f2a922019-01-17 10:18:43 -080010580 p->top = inner;
10581
10582 multipart_startaccum(p);
10583 return true;
10584}
10585
10586static bool lower_camel_push(
10587 upb_json_parser *p, upb_selector_t sel, const char *ptr, size_t len) {
10588 const char *limit = ptr + len;
10589 bool first = true;
10590 for (;ptr < limit; ptr++) {
10591 if (*ptr >= 'A' && *ptr <= 'Z' && !first) {
10592 char lower = tolower(*ptr);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010593 upb_sink_putstring(p->top->sink, sel, "_", 1, NULL);
10594 upb_sink_putstring(p->top->sink, sel, &lower, 1, NULL);
Paul Yangc4f2a922019-01-17 10:18:43 -080010595 } else {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010596 upb_sink_putstring(p->top->sink, sel, ptr, 1, NULL);
Paul Yangc4f2a922019-01-17 10:18:43 -080010597 }
10598 first = false;
10599 }
10600 return true;
10601}
10602
10603static bool end_fieldmask_path(upb_json_parser *p) {
10604 upb_selector_t sel;
10605
10606 if (!lower_camel_push(
10607 p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
10608 p->accumulated, p->accumulated_len)) {
10609 return false;
10610 }
10611
10612 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010613 upb_sink_endstr(p->top->sink, sel);
Paul Yangc4f2a922019-01-17 10:18:43 -080010614 p->top--;
10615
10616 multipart_end(p);
10617 return true;
10618}
10619
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010620static void start_member(upb_json_parser *p) {
Paul Yange0e54662016-09-15 11:09:01 -070010621 UPB_ASSERT(!p->top->f);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010622 multipart_startaccum(p);
10623}
10624
10625/* Helper: invoked during parse_mapentry() to emit the mapentry message's key
10626 * field based on the current contents of the accumulate buffer. */
10627static bool parse_mapentry_key(upb_json_parser *p) {
10628
10629 size_t len;
10630 const char *buf = accumulate_getptr(p, &len);
10631
10632 /* Emit the key field. We do a bit of ad-hoc parsing here because the
10633 * parser state machine has already decided that this is a string field
10634 * name, and we are reinterpreting it as some arbitrary key type. In
10635 * particular, integer and bool keys are quoted, so we need to parse the
10636 * quoted string contents here. */
10637
10638 p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY);
10639 if (p->top->f == NULL) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010640 upb_status_seterrmsg(p->status, "mapentry message has no key");
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010641 return false;
10642 }
10643 switch (upb_fielddef_type(p->top->f)) {
10644 case UPB_TYPE_INT32:
10645 case UPB_TYPE_INT64:
10646 case UPB_TYPE_UINT32:
10647 case UPB_TYPE_UINT64:
10648 /* Invoke end_number. The accum buffer has the number's text already. */
Paul Yang60327462017-10-09 12:39:13 -070010649 if (!parse_number(p, true)) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010650 return false;
10651 }
10652 break;
10653 case UPB_TYPE_BOOL:
10654 if (len == 4 && !strncmp(buf, "true", 4)) {
10655 if (!parser_putbool(p, true)) {
10656 return false;
10657 }
10658 } else if (len == 5 && !strncmp(buf, "false", 5)) {
10659 if (!parser_putbool(p, false)) {
10660 return false;
10661 }
10662 } else {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010663 upb_status_seterrmsg(p->status,
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010664 "Map bool key not 'true' or 'false'");
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010665 return false;
10666 }
10667 multipart_end(p);
10668 break;
10669 case UPB_TYPE_STRING:
10670 case UPB_TYPE_BYTES: {
10671 upb_sink subsink;
10672 upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010673 upb_sink_startstr(p->top->sink, sel, len, &subsink);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010674 sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010675 upb_sink_putstring(subsink, sel, buf, len, NULL);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010676 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010677 upb_sink_endstr(subsink, sel);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010678 multipart_end(p);
10679 break;
10680 }
10681 default:
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010682 upb_status_seterrmsg(p->status, "Invalid field type for map key");
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010683 return false;
10684 }
10685
10686 return true;
10687}
10688
10689/* Helper: emit one map entry (as a submessage in the map field sequence). This
10690 * is invoked from end_membername(), at the end of the map entry's key string,
10691 * with the map key in the accumulate buffer. It parses the key from that
10692 * buffer, emits the handler calls to start the mapentry submessage (setting up
10693 * its subframe in the process), and sets up state in the subframe so that the
10694 * value parser (invoked next) will emit the mapentry's value field and then
10695 * end the mapentry message. */
10696
10697static bool handle_mapentry(upb_json_parser *p) {
10698 const upb_fielddef *mapfield;
10699 const upb_msgdef *mapentrymsg;
10700 upb_jsonparser_frame *inner;
10701 upb_selector_t sel;
10702
10703 /* Map entry: p->top->sink is the seq frame, so we need to start a frame
10704 * for the mapentry itself, and then set |f| in that frame so that the map
10705 * value field is parsed, and also set a flag to end the frame after the
10706 * map-entry value is parsed. */
10707 if (!check_stack(p)) return false;
10708
10709 mapfield = p->top->mapfield;
10710 mapentrymsg = upb_fielddef_msgsubdef(mapfield);
10711
Adam Cozzette8645d892019-03-26 14:32:20 -070010712 inner = start_jsonparser_frame(p);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010713 p->top->f = mapfield;
10714 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010715 upb_sink_startsubmsg(p->top->sink, sel, &inner->sink);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010716 inner->m = mapentrymsg;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010717 inner->mapfield = mapfield;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010718
10719 /* Don't set this to true *yet* -- we reuse parsing handlers below to push
10720 * the key field value to the sink, and these handlers will pop the frame
10721 * if they see is_mapentry (when invoked by the parser state machine, they
10722 * would have just seen the map-entry value, not key). */
10723 inner->is_mapentry = false;
10724 p->top = inner;
10725
10726 /* send STARTMSG in submsg frame. */
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010727 upb_sink_startmsg(p->top->sink);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010728
10729 parse_mapentry_key(p);
10730
10731 /* Set up the value field to receive the map-entry value. */
10732 p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE);
10733 p->top->is_mapentry = true; /* set up to pop frame after value is parsed. */
10734 p->top->mapfield = mapfield;
10735 if (p->top->f == NULL) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010736 upb_status_seterrmsg(p->status, "mapentry message has no value");
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010737 return false;
10738 }
10739
10740 return true;
10741}
10742
10743static bool end_membername(upb_json_parser *p) {
Paul Yange0e54662016-09-15 11:09:01 -070010744 UPB_ASSERT(!p->top->f);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010745
Paul Yang26eeec92018-07-09 14:29:23 -070010746 if (!p->top->m) {
Paul Yangd2d4b402018-10-12 13:46:26 -070010747 p->top->is_unknown_field = true;
10748 multipart_end(p);
Paul Yang26eeec92018-07-09 14:29:23 -070010749 return true;
10750 }
10751
Paul Yang8faa7782018-12-26 10:36:09 -080010752 if (p->top->is_any) {
10753 return end_any_membername(p);
10754 } else if (p->top->is_map) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010755 return handle_mapentry(p);
10756 } else {
10757 size_t len;
10758 const char *buf = accumulate_getptr(p, &len);
Paul Yange0e54662016-09-15 11:09:01 -070010759 upb_value v;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010760
Paul Yange0e54662016-09-15 11:09:01 -070010761 if (upb_strtable_lookup2(p->top->name_table, buf, len, &v)) {
10762 p->top->f = upb_value_getconstptr(v);
10763 multipart_end(p);
10764
10765 return true;
Paul Yang26eeec92018-07-09 14:29:23 -070010766 } else if (p->ignore_json_unknown) {
Paul Yangd2d4b402018-10-12 13:46:26 -070010767 p->top->is_unknown_field = true;
Paul Yang26eeec92018-07-09 14:29:23 -070010768 multipart_end(p);
10769 return true;
Paul Yange0e54662016-09-15 11:09:01 -070010770 } else {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010771 upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010772 return false;
10773 }
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010774 }
10775}
10776
Paul Yang8faa7782018-12-26 10:36:09 -080010777static bool end_any_membername(upb_json_parser *p) {
10778 size_t len;
10779 const char *buf = accumulate_getptr(p, &len);
10780 upb_value v;
10781
10782 if (len == 5 && strncmp(buf, "@type", len) == 0) {
10783 upb_strtable_lookup2(p->top->name_table, "type_url", 8, &v);
10784 p->top->f = upb_value_getconstptr(v);
10785 multipart_end(p);
10786 return true;
10787 } else {
10788 p->top->is_unknown_field = true;
10789 multipart_end(p);
10790 return true;
10791 }
10792}
10793
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010794static void end_member(upb_json_parser *p) {
10795 /* If we just parsed a map-entry value, end that frame too. */
10796 if (p->top->is_mapentry) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010797 upb_selector_t sel;
10798 bool ok;
10799 const upb_fielddef *mapfield;
10800
Paul Yange0e54662016-09-15 11:09:01 -070010801 UPB_ASSERT(p->top > p->stack);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010802 /* send ENDMSG on submsg. */
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010803 upb_sink_endmsg(p->top->sink, p->status);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010804 mapfield = p->top->mapfield;
10805
10806 /* send ENDSUBMSG in repeated-field-of-mapentries frame. */
10807 p->top--;
10808 ok = upb_handlers_getselector(mapfield, UPB_HANDLER_ENDSUBMSG, &sel);
Paul Yange0e54662016-09-15 11:09:01 -070010809 UPB_ASSERT(ok);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010810 upb_sink_endsubmsg(p->top->sink, sel);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010811 }
10812
10813 p->top->f = NULL;
Paul Yangd2d4b402018-10-12 13:46:26 -070010814 p->top->is_unknown_field = false;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010815}
10816
Paul Yang8faa7782018-12-26 10:36:09 -080010817static void start_any_member(upb_json_parser *p, const char *ptr) {
10818 start_member(p);
10819 json_parser_any_frame_set_after_type_url_start_once(p->top->any_frame, ptr);
10820}
10821
10822static void end_any_member(upb_json_parser *p, const char *ptr) {
10823 json_parser_any_frame_set_before_type_url_end(p->top->any_frame, ptr);
10824 end_member(p);
10825}
10826
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010827static bool start_subobject(upb_json_parser *p) {
Paul Yangd2d4b402018-10-12 13:46:26 -070010828 if (p->top->is_unknown_field) {
Paul Yang26eeec92018-07-09 14:29:23 -070010829 upb_jsonparser_frame *inner;
10830 if (!check_stack(p)) return false;
10831
Adam Cozzette8645d892019-03-26 14:32:20 -070010832 p->top = start_jsonparser_frame(p);
Paul Yang26eeec92018-07-09 14:29:23 -070010833 return true;
10834 }
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010835
10836 if (upb_fielddef_ismap(p->top->f)) {
10837 upb_jsonparser_frame *inner;
10838 upb_selector_t sel;
10839
10840 /* Beginning of a map. Start a new parser frame in a repeated-field
10841 * context. */
10842 if (!check_stack(p)) return false;
10843
Adam Cozzette8645d892019-03-26 14:32:20 -070010844 inner = start_jsonparser_frame(p);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010845 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010846 upb_sink_startseq(p->top->sink, sel, &inner->sink);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010847 inner->m = upb_fielddef_msgsubdef(p->top->f);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010848 inner->mapfield = p->top->f;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010849 inner->is_map = true;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010850 p->top = inner;
10851
10852 return true;
10853 } else if (upb_fielddef_issubmsg(p->top->f)) {
10854 upb_jsonparser_frame *inner;
10855 upb_selector_t sel;
10856
10857 /* Beginning of a subobject. Start a new parser frame in the submsg
10858 * context. */
10859 if (!check_stack(p)) return false;
10860
Adam Cozzette8645d892019-03-26 14:32:20 -070010861 inner = start_jsonparser_frame(p);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010862 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010863 upb_sink_startsubmsg(p->top->sink, sel, &inner->sink);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010864 inner->m = upb_fielddef_msgsubdef(p->top->f);
Paul Yange0e54662016-09-15 11:09:01 -070010865 set_name_table(p, inner);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010866 p->top = inner;
10867
Paul Yang8faa7782018-12-26 10:36:09 -080010868 if (is_wellknown_msg(p, UPB_WELLKNOWN_ANY)) {
10869 p->top->is_any = true;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010870 p->top->any_frame = json_parser_any_frame_new(p);
Paul Yang8faa7782018-12-26 10:36:09 -080010871 } else {
10872 p->top->is_any = false;
10873 p->top->any_frame = NULL;
10874 }
10875
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010876 return true;
10877 } else {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010878 upb_status_seterrf(p->status,
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010879 "Object specified for non-message/group field: %s",
10880 upb_fielddef_name(p->top->f));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010881 return false;
10882 }
10883}
10884
Paul Yang9bda1f12018-09-22 18:57:43 -070010885static bool start_subobject_full(upb_json_parser *p) {
10886 if (is_top_level(p)) {
10887 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
10888 start_value_object(p, VALUE_STRUCTVALUE);
10889 if (!start_subobject(p)) return false;
10890 start_structvalue_object(p);
10891 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_STRUCT)) {
10892 start_structvalue_object(p);
10893 } else {
10894 return true;
10895 }
10896 } else if (is_wellknown_field(p, UPB_WELLKNOWN_STRUCT)) {
10897 if (!start_subobject(p)) return false;
10898 start_structvalue_object(p);
10899 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
10900 if (!start_subobject(p)) return false;
10901 start_value_object(p, VALUE_STRUCTVALUE);
10902 if (!start_subobject(p)) return false;
10903 start_structvalue_object(p);
10904 }
10905
10906 return start_subobject(p);
10907}
10908
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010909static void end_subobject(upb_json_parser *p) {
Bo Yange3ee7162018-08-10 18:09:02 +000010910 if (is_top_level(p)) {
10911 return;
10912 }
10913
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010914 if (p->top->is_map) {
10915 upb_selector_t sel;
10916 p->top--;
10917 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010918 upb_sink_endseq(p->top->sink, sel);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010919 } else {
10920 upb_selector_t sel;
Paul Yang26eeec92018-07-09 14:29:23 -070010921 bool is_unknown = p->top->m == NULL;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010922 p->top--;
Paul Yang26eeec92018-07-09 14:29:23 -070010923 if (!is_unknown) {
10924 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010925 upb_sink_endsubmsg(p->top->sink, sel);
Paul Yang26eeec92018-07-09 14:29:23 -070010926 }
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010927 }
10928}
10929
Paul Yang9bda1f12018-09-22 18:57:43 -070010930static void end_subobject_full(upb_json_parser *p) {
10931 end_subobject(p);
10932
10933 if (is_wellknown_msg(p, UPB_WELLKNOWN_STRUCT)) {
10934 end_structvalue_object(p);
10935 if (!is_top_level(p)) {
10936 end_subobject(p);
10937 }
10938 }
10939
10940 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
10941 end_value_object(p);
10942 if (!is_top_level(p)) {
10943 end_subobject(p);
10944 }
10945 }
10946}
10947
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010948static bool start_array(upb_json_parser *p) {
10949 upb_jsonparser_frame *inner;
10950 upb_selector_t sel;
10951
Paul Yang9bda1f12018-09-22 18:57:43 -070010952 if (is_top_level(p)) {
10953 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
10954 start_value_object(p, VALUE_LISTVALUE);
10955 if (!start_subobject(p)) return false;
10956 start_listvalue_object(p);
10957 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_LISTVALUE)) {
10958 start_listvalue_object(p);
10959 } else {
10960 return false;
10961 }
Paul Yang4b145b12019-03-12 10:56:58 -070010962 } else if (is_wellknown_field(p, UPB_WELLKNOWN_LISTVALUE) &&
10963 (!upb_fielddef_isseq(p->top->f) ||
10964 p->top->is_repeated)) {
Paul Yang9bda1f12018-09-22 18:57:43 -070010965 if (!start_subobject(p)) return false;
10966 start_listvalue_object(p);
Paul Yang4b145b12019-03-12 10:56:58 -070010967 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE) &&
10968 (!upb_fielddef_isseq(p->top->f) ||
10969 p->top->is_repeated)) {
Paul Yang9bda1f12018-09-22 18:57:43 -070010970 if (!start_subobject(p)) return false;
10971 start_value_object(p, VALUE_LISTVALUE);
10972 if (!start_subobject(p)) return false;
10973 start_listvalue_object(p);
10974 }
10975
Paul Yangd2d4b402018-10-12 13:46:26 -070010976 if (p->top->is_unknown_field) {
Adam Cozzette8645d892019-03-26 14:32:20 -070010977 inner = start_jsonparser_frame(p);
Paul Yangd2d4b402018-10-12 13:46:26 -070010978 inner->is_unknown_field = true;
10979 p->top = inner;
10980
10981 return true;
10982 }
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010983
10984 if (!upb_fielddef_isseq(p->top->f)) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010985 upb_status_seterrf(p->status,
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010986 "Array specified for non-repeated field: %s",
10987 upb_fielddef_name(p->top->f));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010988 return false;
10989 }
10990
10991 if (!check_stack(p)) return false;
10992
Adam Cozzette8645d892019-03-26 14:32:20 -070010993 inner = start_jsonparser_frame(p);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010994 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080010995 upb_sink_startseq(p->top->sink, sel, &inner->sink);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010996 inner->m = p->top->m;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010997 inner->f = p->top->f;
Paul Yang4b145b12019-03-12 10:56:58 -070010998 inner->is_repeated = true;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070010999 p->top = inner;
11000
11001 return true;
11002}
11003
11004static void end_array(upb_json_parser *p) {
11005 upb_selector_t sel;
11006
Paul Yange0e54662016-09-15 11:09:01 -070011007 UPB_ASSERT(p->top > p->stack);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070011008
11009 p->top--;
Paul Yangd2d4b402018-10-12 13:46:26 -070011010
11011 if (p->top->is_unknown_field) {
11012 return;
11013 }
11014
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070011015 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080011016 upb_sink_endseq(p->top->sink, sel);
Paul Yang9bda1f12018-09-22 18:57:43 -070011017
11018 if (is_wellknown_msg(p, UPB_WELLKNOWN_LISTVALUE)) {
11019 end_listvalue_object(p);
11020 if (!is_top_level(p)) {
11021 end_subobject(p);
11022 }
11023 }
11024
11025 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
11026 end_value_object(p);
11027 if (!is_top_level(p)) {
11028 end_subobject(p);
11029 }
11030 }
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070011031}
11032
11033static void start_object(upb_json_parser *p) {
Paul Yangd2d4b402018-10-12 13:46:26 -070011034 if (!p->top->is_map && p->top->m != NULL) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080011035 upb_sink_startmsg(p->top->sink);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070011036 }
11037}
11038
11039static void end_object(upb_json_parser *p) {
Paul Yangd2d4b402018-10-12 13:46:26 -070011040 if (!p->top->is_map && p->top->m != NULL) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080011041 upb_sink_endmsg(p->top->sink, p->status);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070011042 }
11043}
11044
Paul Yang8faa7782018-12-26 10:36:09 -080011045static void start_any_object(upb_json_parser *p, const char *ptr) {
11046 start_object(p);
11047 p->top->any_frame->before_type_url_start = ptr;
11048 p->top->any_frame->before_type_url_end = ptr;
11049}
11050
11051static bool end_any_object(upb_json_parser *p, const char *ptr) {
11052 const char *value_membername = "value";
11053 bool is_well_known_packed = false;
11054 const char *packed_end = ptr + 1;
11055 upb_selector_t sel;
11056 upb_jsonparser_frame *inner;
11057
11058 if (json_parser_any_frame_has_value(p->top->any_frame) &&
11059 !json_parser_any_frame_has_type_url(p->top->any_frame)) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080011060 upb_status_seterrmsg(p->status, "No valid type url");
Paul Yang8faa7782018-12-26 10:36:09 -080011061 return false;
11062 }
11063
11064 /* Well known types data is represented as value field. */
11065 if (upb_msgdef_wellknowntype(p->top->any_frame->parser->top->m) !=
11066 UPB_WELLKNOWN_UNSPECIFIED) {
11067 is_well_known_packed = true;
11068
11069 if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame)) {
11070 p->top->any_frame->before_type_url_start =
11071 memchr(p->top->any_frame->before_type_url_start, ':',
11072 p->top->any_frame->before_type_url_end -
11073 p->top->any_frame->before_type_url_start);
11074 if (p->top->any_frame->before_type_url_start == NULL) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080011075 upb_status_seterrmsg(p->status, "invalid data for well known type.");
Paul Yang8faa7782018-12-26 10:36:09 -080011076 return false;
11077 }
11078 p->top->any_frame->before_type_url_start++;
11079 }
11080
11081 if (json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) {
11082 p->top->any_frame->after_type_url_start =
11083 memchr(p->top->any_frame->after_type_url_start, ':',
11084 (ptr + 1) -
11085 p->top->any_frame->after_type_url_start);
11086 if (p->top->any_frame->after_type_url_start == NULL) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080011087 upb_status_seterrmsg(p->status, "Invalid data for well known type.");
Paul Yang8faa7782018-12-26 10:36:09 -080011088 return false;
11089 }
11090 p->top->any_frame->after_type_url_start++;
11091 packed_end = ptr;
11092 }
11093 }
11094
11095 if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame)) {
11096 if (!parse(p->top->any_frame->parser, NULL,
11097 p->top->any_frame->before_type_url_start,
11098 p->top->any_frame->before_type_url_end -
11099 p->top->any_frame->before_type_url_start, NULL)) {
11100 return false;
11101 }
11102 } else {
11103 if (!is_well_known_packed) {
11104 if (!parse(p->top->any_frame->parser, NULL, "{", 1, NULL)) {
11105 return false;
11106 }
11107 }
11108 }
11109
11110 if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame) &&
11111 json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) {
11112 if (!parse(p->top->any_frame->parser, NULL, ",", 1, NULL)) {
11113 return false;
11114 }
11115 }
11116
11117 if (json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) {
11118 if (!parse(p->top->any_frame->parser, NULL,
11119 p->top->any_frame->after_type_url_start,
11120 packed_end - p->top->any_frame->after_type_url_start, NULL)) {
11121 return false;
11122 }
11123 } else {
11124 if (!is_well_known_packed) {
11125 if (!parse(p->top->any_frame->parser, NULL, "}", 1, NULL)) {
11126 return false;
11127 }
11128 }
11129 }
11130
11131 if (!end(p->top->any_frame->parser, NULL)) {
11132 return false;
11133 }
11134
11135 p->top->is_any = false;
11136
11137 /* Set value */
11138 start_member(p);
11139 capture_begin(p, value_membername);
11140 capture_end(p, value_membername + 5);
11141 end_membername(p);
11142
11143 if (!check_stack(p)) return false;
11144 inner = p->top + 1;
11145
11146 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080011147 upb_sink_startstr(p->top->sink, sel, 0, &inner->sink);
Paul Yang8faa7782018-12-26 10:36:09 -080011148 sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080011149 upb_sink_putstring(inner->sink, sel, p->top->any_frame->stringsink.ptr,
Paul Yang8faa7782018-12-26 10:36:09 -080011150 p->top->any_frame->stringsink.len, NULL);
11151 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080011152 upb_sink_endstr(inner->sink, sel);
Paul Yang8faa7782018-12-26 10:36:09 -080011153
11154 end_member(p);
11155
11156 end_object(p);
11157
11158 /* Deallocate any parse frame. */
11159 json_parser_any_frame_free(p->top->any_frame);
Paul Yang8faa7782018-12-26 10:36:09 -080011160
11161 return true;
11162}
11163
Bo Yange3ee7162018-08-10 18:09:02 +000011164static bool is_string_wrapper(const upb_msgdef *m) {
Paul Yang9bda1f12018-09-22 18:57:43 -070011165 upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
11166 return type == UPB_WELLKNOWN_STRINGVALUE ||
11167 type == UPB_WELLKNOWN_BYTESVALUE;
Bo Yange3ee7162018-08-10 18:09:02 +000011168}
11169
Paul Yangc4f2a922019-01-17 10:18:43 -080011170static bool is_fieldmask(const upb_msgdef *m) {
11171 upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
11172 return type == UPB_WELLKNOWN_FIELDMASK;
11173}
11174
11175static void start_fieldmask_object(upb_json_parser *p) {
11176 const char *membername = "paths";
11177
11178 start_object(p);
11179
11180 /* Set up context for parsing value */
11181 start_member(p);
11182 capture_begin(p, membername);
11183 capture_end(p, membername + 5);
11184 end_membername(p);
11185
11186 start_array(p);
11187}
11188
11189static void end_fieldmask_object(upb_json_parser *p) {
11190 end_array(p);
11191 end_member(p);
11192 end_object(p);
11193}
11194
Bo Yange3ee7162018-08-10 18:09:02 +000011195static void start_wrapper_object(upb_json_parser *p) {
11196 const char *membername = "value";
11197
11198 start_object(p);
11199
11200 /* Set up context for parsing value */
11201 start_member(p);
11202 capture_begin(p, membername);
11203 capture_end(p, membername + 5);
11204 end_membername(p);
11205}
11206
11207static void end_wrapper_object(upb_json_parser *p) {
11208 end_member(p);
11209 end_object(p);
11210}
11211
Paul Yang9bda1f12018-09-22 18:57:43 -070011212static void start_value_object(upb_json_parser *p, int value_type) {
11213 const char *nullmember = "null_value";
11214 const char *numbermember = "number_value";
11215 const char *stringmember = "string_value";
11216 const char *boolmember = "bool_value";
11217 const char *structmember = "struct_value";
11218 const char *listmember = "list_value";
11219 const char *membername = "";
11220
11221 switch (value_type) {
11222 case VALUE_NULLVALUE:
11223 membername = nullmember;
11224 break;
11225 case VALUE_NUMBERVALUE:
11226 membername = numbermember;
11227 break;
11228 case VALUE_STRINGVALUE:
11229 membername = stringmember;
11230 break;
11231 case VALUE_BOOLVALUE:
11232 membername = boolmember;
11233 break;
11234 case VALUE_STRUCTVALUE:
11235 membername = structmember;
11236 break;
11237 case VALUE_LISTVALUE:
11238 membername = listmember;
11239 break;
11240 }
11241
11242 start_object(p);
11243
11244 /* Set up context for parsing value */
11245 start_member(p);
11246 capture_begin(p, membername);
11247 capture_end(p, membername + strlen(membername));
11248 end_membername(p);
11249}
11250
11251static void end_value_object(upb_json_parser *p) {
11252 end_member(p);
11253 end_object(p);
11254}
11255
11256static void start_listvalue_object(upb_json_parser *p) {
11257 const char *membername = "values";
11258
11259 start_object(p);
11260
11261 /* Set up context for parsing value */
11262 start_member(p);
11263 capture_begin(p, membername);
11264 capture_end(p, membername + strlen(membername));
11265 end_membername(p);
11266}
11267
11268static void end_listvalue_object(upb_json_parser *p) {
11269 end_member(p);
11270 end_object(p);
11271}
11272
11273static void start_structvalue_object(upb_json_parser *p) {
11274 const char *membername = "fields";
11275
11276 start_object(p);
11277
11278 /* Set up context for parsing value */
11279 start_member(p);
11280 capture_begin(p, membername);
11281 capture_end(p, membername + strlen(membername));
11282 end_membername(p);
11283}
11284
11285static void end_structvalue_object(upb_json_parser *p) {
11286 end_member(p);
11287 end_object(p);
11288}
11289
Bo Yange3ee7162018-08-10 18:09:02 +000011290static bool is_top_level(upb_json_parser *p) {
Paul Yangd2d4b402018-10-12 13:46:26 -070011291 return p->top == p->stack && p->top->f == NULL && !p->top->is_unknown_field;
Bo Yange3ee7162018-08-10 18:09:02 +000011292}
11293
Paul Yang9bda1f12018-09-22 18:57:43 -070011294static bool is_wellknown_msg(upb_json_parser *p, upb_wellknowntype_t type) {
11295 return p->top->m != NULL && upb_msgdef_wellknowntype(p->top->m) == type;
11296}
11297
11298static bool is_wellknown_field(upb_json_parser *p, upb_wellknowntype_t type) {
11299 return p->top->f != NULL &&
11300 upb_fielddef_issubmsg(p->top->f) &&
11301 (upb_msgdef_wellknowntype(upb_fielddef_msgsubdef(p->top->f))
11302 == type);
11303}
11304
Bo Yange3ee7162018-08-10 18:09:02 +000011305static bool does_number_wrapper_start(upb_json_parser *p) {
11306 return p->top->f != NULL &&
11307 upb_fielddef_issubmsg(p->top->f) &&
Paul Yang9bda1f12018-09-22 18:57:43 -070011308 upb_msgdef_isnumberwrapper(upb_fielddef_msgsubdef(p->top->f));
Bo Yange3ee7162018-08-10 18:09:02 +000011309}
11310
11311static bool does_number_wrapper_end(upb_json_parser *p) {
Paul Yang9bda1f12018-09-22 18:57:43 -070011312 return p->top->m != NULL && upb_msgdef_isnumberwrapper(p->top->m);
Bo Yange3ee7162018-08-10 18:09:02 +000011313}
11314
11315static bool is_number_wrapper_object(upb_json_parser *p) {
Paul Yang9bda1f12018-09-22 18:57:43 -070011316 return p->top->m != NULL && upb_msgdef_isnumberwrapper(p->top->m);
Bo Yange3ee7162018-08-10 18:09:02 +000011317}
11318
11319static bool does_string_wrapper_start(upb_json_parser *p) {
11320 return p->top->f != NULL &&
11321 upb_fielddef_issubmsg(p->top->f) &&
11322 is_string_wrapper(upb_fielddef_msgsubdef(p->top->f));
11323}
11324
11325static bool does_string_wrapper_end(upb_json_parser *p) {
11326 return p->top->m != NULL && is_string_wrapper(p->top->m);
11327}
11328
11329static bool is_string_wrapper_object(upb_json_parser *p) {
11330 return p->top->m != NULL && is_string_wrapper(p->top->m);
11331}
11332
Paul Yangc4f2a922019-01-17 10:18:43 -080011333static bool does_fieldmask_start(upb_json_parser *p) {
11334 return p->top->f != NULL &&
11335 upb_fielddef_issubmsg(p->top->f) &&
11336 is_fieldmask(upb_fielddef_msgsubdef(p->top->f));
11337}
11338
11339static bool does_fieldmask_end(upb_json_parser *p) {
11340 return p->top->m != NULL && is_fieldmask(p->top->m);
11341}
11342
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070011343#define CHECK_RETURN_TOP(x) if (!(x)) goto error
11344
11345
11346/* The actual parser **********************************************************/
11347
11348/* What follows is the Ragel parser itself. The language is specified in Ragel
11349 * and the actions call our C functions above.
11350 *
11351 * Ragel has an extensive set of functionality, and we use only a small part of
11352 * it. There are many action types but we only use a few:
11353 *
11354 * ">" -- transition into a machine
11355 * "%" -- transition out of a machine
11356 * "@" -- transition into a final state of a machine.
11357 *
11358 * "@" transitions are tricky because a machine can transition into a final
11359 * state repeatedly. But in some cases we know this can't happen, for example
11360 * a string which is delimited by a final '"' can only transition into its
11361 * final state once, when the closing '"' is seen. */
11362
11363
Adam Cozzette8645d892019-03-26 14:32:20 -070011364#line 2749 "upb/json/parser.rl"
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070011365
11366
11367
Adam Cozzette8645d892019-03-26 14:32:20 -070011368#line 2552 "upb/json/parser.c"
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070011369static const char _json_actions[] = {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080011370 0, 1, 0, 1, 1, 1, 3, 1,
11371 4, 1, 6, 1, 7, 1, 8, 1,
Paul Yang57b65972019-03-19 22:27:13 -070011372 9, 1, 11, 1, 12, 1, 13, 1,
11373 14, 1, 15, 1, 16, 1, 17, 1,
11374 18, 1, 19, 1, 20, 1, 22, 1,
11375 23, 1, 24, 1, 35, 1, 37, 1,
11376 39, 1, 40, 1, 42, 1, 43, 1,
11377 44, 1, 46, 1, 48, 1, 49, 1,
11378 50, 1, 51, 1, 53, 1, 54, 2,
11379 4, 9, 2, 5, 6, 2, 7, 3,
11380 2, 7, 9, 2, 21, 26, 2, 25,
11381 10, 2, 27, 28, 2, 29, 30, 2,
11382 32, 34, 2, 33, 31, 2, 38, 36,
11383 2, 40, 42, 2, 45, 2, 2, 46,
11384 54, 2, 47, 36, 2, 49, 54, 2,
11385 50, 54, 2, 51, 54, 2, 52, 41,
11386 2, 53, 54, 3, 32, 34, 35, 4,
11387 21, 26, 27, 28
Bo Yange3ee7162018-08-10 18:09:02 +000011388};
11389
Paul Yang7f42d6d2019-01-22 15:35:12 -080011390static const short _json_key_offsets[] = {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080011391 0, 0, 12, 13, 18, 23, 28, 29,
11392 30, 31, 32, 33, 34, 35, 36, 37,
11393 38, 43, 44, 48, 53, 58, 63, 67,
11394 71, 74, 77, 79, 83, 87, 89, 91,
11395 96, 98, 100, 109, 115, 121, 127, 133,
11396 135, 139, 142, 144, 146, 149, 150, 154,
11397 156, 158, 160, 162, 163, 165, 167, 168,
11398 170, 172, 173, 175, 177, 178, 180, 182,
11399 183, 185, 187, 191, 193, 195, 196, 197,
11400 198, 199, 201, 206, 208, 210, 212, 221,
11401 222, 222, 222, 227, 232, 237, 238, 239,
11402 240, 241, 241, 242, 243, 244, 244, 245,
11403 246, 247, 247, 252, 253, 257, 262, 267,
11404 272, 276, 276, 279, 282, 285, 288, 291,
Paul Yang7f42d6d2019-01-22 15:35:12 -080011405 294, 294, 294, 294, 294, 294
11406};
11407
11408static const char _json_trans_keys[] = {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080011409 32, 34, 45, 91, 102, 110, 116, 123,
11410 9, 13, 48, 57, 34, 32, 93, 125,
11411 9, 13, 32, 44, 93, 9, 13, 32,
11412 93, 125, 9, 13, 97, 108, 115, 101,
11413 117, 108, 108, 114, 117, 101, 32, 34,
11414 125, 9, 13, 34, 32, 58, 9, 13,
11415 32, 93, 125, 9, 13, 32, 44, 125,
11416 9, 13, 32, 44, 125, 9, 13, 32,
11417 34, 9, 13, 45, 48, 49, 57, 48,
11418 49, 57, 46, 69, 101, 48, 57, 69,
11419 101, 48, 57, 43, 45, 48, 57, 48,
11420 57, 48, 57, 46, 69, 101, 48, 57,
11421 34, 92, 34, 92, 34, 47, 92, 98,
11422 102, 110, 114, 116, 117, 48, 57, 65,
11423 70, 97, 102, 48, 57, 65, 70, 97,
11424 102, 48, 57, 65, 70, 97, 102, 48,
11425 57, 65, 70, 97, 102, 34, 92, 45,
11426 48, 49, 57, 48, 49, 57, 46, 115,
11427 48, 57, 115, 48, 57, 34, 46, 115,
11428 48, 57, 48, 57, 48, 57, 48, 57,
11429 48, 57, 45, 48, 57, 48, 57, 45,
11430 48, 57, 48, 57, 84, 48, 57, 48,
11431 57, 58, 48, 57, 48, 57, 58, 48,
11432 57, 48, 57, 43, 45, 46, 90, 48,
11433 57, 48, 57, 58, 48, 48, 34, 48,
11434 57, 43, 45, 90, 48, 57, 34, 44,
11435 34, 44, 34, 44, 34, 45, 91, 102,
11436 110, 116, 123, 48, 57, 34, 32, 93,
11437 125, 9, 13, 32, 44, 93, 9, 13,
11438 32, 93, 125, 9, 13, 97, 108, 115,
11439 101, 117, 108, 108, 114, 117, 101, 32,
11440 34, 125, 9, 13, 34, 32, 58, 9,
11441 13, 32, 93, 125, 9, 13, 32, 44,
11442 125, 9, 13, 32, 44, 125, 9, 13,
11443 32, 34, 9, 13, 32, 9, 13, 32,
11444 9, 13, 32, 9, 13, 32, 9, 13,
Paul Yang7f42d6d2019-01-22 15:35:12 -080011445 32, 9, 13, 32, 9, 13, 0
11446};
11447
11448static const char _json_single_lengths[] = {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080011449 0, 8, 1, 3, 3, 3, 1, 1,
11450 1, 1, 1, 1, 1, 1, 1, 1,
11451 3, 1, 2, 3, 3, 3, 2, 2,
11452 1, 3, 0, 2, 2, 0, 0, 3,
11453 2, 2, 9, 0, 0, 0, 0, 2,
11454 2, 1, 2, 0, 1, 1, 2, 0,
11455 0, 0, 0, 1, 0, 0, 1, 0,
11456 0, 1, 0, 0, 1, 0, 0, 1,
11457 0, 0, 4, 0, 0, 1, 1, 1,
11458 1, 0, 3, 2, 2, 2, 7, 1,
11459 0, 0, 3, 3, 3, 1, 1, 1,
11460 1, 0, 1, 1, 1, 0, 1, 1,
11461 1, 0, 3, 1, 2, 3, 3, 3,
11462 2, 0, 1, 1, 1, 1, 1, 1,
Paul Yang7f42d6d2019-01-22 15:35:12 -080011463 0, 0, 0, 0, 0, 0
11464};
11465
11466static const char _json_range_lengths[] = {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080011467 0, 2, 0, 1, 1, 1, 0, 0,
11468 0, 0, 0, 0, 0, 0, 0, 0,
11469 1, 0, 1, 1, 1, 1, 1, 1,
11470 1, 0, 1, 1, 1, 1, 1, 1,
11471 0, 0, 0, 3, 3, 3, 3, 0,
11472 1, 1, 0, 1, 1, 0, 1, 1,
11473 1, 1, 1, 0, 1, 1, 0, 1,
11474 1, 0, 1, 1, 0, 1, 1, 0,
11475 1, 1, 0, 1, 1, 0, 0, 0,
11476 0, 1, 1, 0, 0, 0, 1, 0,
11477 0, 0, 1, 1, 1, 0, 0, 0,
11478 0, 0, 0, 0, 0, 0, 0, 0,
11479 0, 0, 1, 0, 1, 1, 1, 1,
11480 1, 0, 1, 1, 1, 1, 1, 1,
Paul Yang7f42d6d2019-01-22 15:35:12 -080011481 0, 0, 0, 0, 0, 0
11482};
11483
11484static const short _json_index_offsets[] = {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080011485 0, 0, 11, 13, 18, 23, 28, 30,
11486 32, 34, 36, 38, 40, 42, 44, 46,
11487 48, 53, 55, 59, 64, 69, 74, 78,
11488 82, 85, 89, 91, 95, 99, 101, 103,
11489 108, 111, 114, 124, 128, 132, 136, 140,
11490 143, 147, 150, 153, 155, 158, 160, 164,
11491 166, 168, 170, 172, 174, 176, 178, 180,
11492 182, 184, 186, 188, 190, 192, 194, 196,
11493 198, 200, 202, 207, 209, 211, 213, 215,
11494 217, 219, 221, 226, 229, 232, 235, 244,
11495 246, 247, 248, 253, 258, 263, 265, 267,
11496 269, 271, 272, 274, 276, 278, 279, 281,
11497 283, 285, 286, 291, 293, 297, 302, 307,
11498 312, 316, 317, 320, 323, 326, 329, 332,
Paul Yang7f42d6d2019-01-22 15:35:12 -080011499 335, 336, 337, 338, 339, 340
11500};
11501
11502static const unsigned char _json_indicies[] = {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080011503 0, 2, 3, 4, 5, 6, 7, 8,
11504 0, 3, 1, 9, 1, 11, 12, 1,
11505 11, 10, 13, 14, 12, 13, 1, 14,
11506 1, 1, 14, 10, 15, 1, 16, 1,
11507 17, 1, 18, 1, 19, 1, 20, 1,
11508 21, 1, 22, 1, 23, 1, 24, 1,
11509 25, 26, 27, 25, 1, 28, 1, 29,
11510 30, 29, 1, 30, 1, 1, 30, 31,
11511 32, 33, 34, 32, 1, 35, 36, 27,
11512 35, 1, 36, 26, 36, 1, 37, 38,
11513 39, 1, 38, 39, 1, 41, 42, 42,
11514 40, 43, 1, 42, 42, 43, 40, 44,
11515 44, 45, 1, 45, 1, 45, 40, 41,
11516 42, 42, 39, 40, 47, 48, 46, 50,
11517 51, 49, 52, 52, 52, 52, 52, 52,
11518 52, 52, 53, 1, 54, 54, 54, 1,
11519 55, 55, 55, 1, 56, 56, 56, 1,
11520 57, 57, 57, 1, 59, 60, 58, 61,
11521 62, 63, 1, 64, 65, 1, 66, 67,
11522 1, 68, 1, 67, 68, 1, 69, 1,
11523 66, 67, 65, 1, 70, 1, 71, 1,
11524 72, 1, 73, 1, 74, 1, 75, 1,
11525 76, 1, 77, 1, 78, 1, 79, 1,
11526 80, 1, 81, 1, 82, 1, 83, 1,
11527 84, 1, 85, 1, 86, 1, 87, 1,
11528 88, 1, 89, 89, 90, 91, 1, 92,
11529 1, 93, 1, 94, 1, 95, 1, 96,
11530 1, 97, 1, 98, 1, 99, 99, 100,
11531 98, 1, 102, 1, 101, 104, 105, 103,
11532 1, 1, 101, 106, 107, 108, 109, 110,
11533 111, 112, 107, 1, 113, 1, 114, 115,
11534 117, 118, 1, 117, 116, 119, 120, 118,
11535 119, 1, 120, 1, 1, 120, 116, 121,
11536 1, 122, 1, 123, 1, 124, 1, 125,
11537 126, 1, 127, 1, 128, 1, 129, 130,
11538 1, 131, 1, 132, 1, 133, 134, 135,
11539 136, 134, 1, 137, 1, 138, 139, 138,
11540 1, 139, 1, 1, 139, 140, 141, 142,
11541 143, 141, 1, 144, 145, 136, 144, 1,
11542 145, 135, 145, 1, 146, 147, 147, 1,
11543 148, 148, 1, 149, 149, 1, 150, 150,
11544 1, 151, 151, 1, 152, 152, 1, 1,
Paul Yang7f42d6d2019-01-22 15:35:12 -080011545 1, 1, 1, 1, 1, 0
11546};
11547
11548static const char _json_trans_targs[] = {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080011549 1, 0, 2, 107, 3, 6, 10, 13,
11550 16, 106, 4, 3, 106, 4, 5, 7,
11551 8, 9, 108, 11, 12, 109, 14, 15,
11552 110, 16, 17, 111, 18, 18, 19, 20,
11553 21, 22, 111, 21, 22, 24, 25, 31,
11554 112, 26, 28, 27, 29, 30, 33, 113,
11555 34, 33, 113, 34, 32, 35, 36, 37,
11556 38, 39, 33, 113, 34, 41, 42, 46,
11557 42, 46, 43, 45, 44, 114, 48, 49,
11558 50, 51, 52, 53, 54, 55, 56, 57,
11559 58, 59, 60, 61, 62, 63, 64, 65,
11560 66, 67, 73, 72, 68, 69, 70, 71,
11561 72, 115, 74, 67, 72, 76, 116, 76,
11562 116, 77, 79, 81, 82, 85, 90, 94,
11563 98, 80, 117, 117, 83, 82, 80, 83,
11564 84, 86, 87, 88, 89, 117, 91, 92,
11565 93, 117, 95, 96, 97, 117, 98, 99,
11566 105, 100, 100, 101, 102, 103, 104, 105,
11567 103, 104, 117, 106, 106, 106, 106, 106,
Paul Yang7f42d6d2019-01-22 15:35:12 -080011568 106
11569};
11570
Paul Yang57b65972019-03-19 22:27:13 -070011571static const unsigned char _json_trans_actions[] = {
11572 0, 0, 113, 107, 53, 0, 0, 0,
11573 125, 59, 45, 0, 55, 0, 0, 0,
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080011574 0, 0, 0, 0, 0, 0, 0, 0,
Paul Yang57b65972019-03-19 22:27:13 -070011575 0, 0, 101, 51, 47, 0, 0, 45,
11576 49, 49, 104, 0, 0, 0, 0, 0,
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080011577 3, 0, 0, 0, 0, 0, 5, 15,
Paul Yang57b65972019-03-19 22:27:13 -070011578 0, 0, 71, 7, 13, 0, 74, 9,
11579 9, 9, 77, 80, 11, 37, 37, 37,
11580 0, 0, 0, 39, 0, 41, 86, 0,
11581 0, 0, 17, 19, 0, 21, 23, 0,
11582 25, 27, 0, 29, 31, 0, 33, 35,
11583 0, 135, 83, 135, 0, 0, 0, 0,
11584 0, 92, 0, 89, 89, 98, 43, 0,
11585 131, 95, 113, 107, 53, 0, 0, 0,
11586 125, 59, 69, 110, 45, 0, 55, 0,
11587 0, 0, 0, 0, 0, 119, 0, 0,
11588 0, 122, 0, 0, 0, 116, 0, 101,
11589 51, 47, 0, 0, 45, 49, 49, 104,
11590 0, 0, 128, 0, 57, 63, 65, 61,
11591 67
Paul Yang7f42d6d2019-01-22 15:35:12 -080011592};
11593
Paul Yang57b65972019-03-19 22:27:13 -070011594static const unsigned char _json_eof_actions[] = {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080011595 0, 0, 0, 0, 0, 0, 0, 0,
11596 0, 0, 0, 0, 0, 0, 0, 0,
11597 0, 0, 0, 0, 0, 0, 0, 0,
11598 0, 1, 0, 1, 0, 0, 1, 1,
11599 0, 0, 0, 0, 0, 0, 0, 0,
11600 0, 0, 0, 0, 0, 0, 0, 0,
11601 0, 0, 0, 0, 0, 0, 0, 0,
11602 0, 0, 0, 0, 0, 0, 0, 0,
11603 0, 0, 0, 0, 0, 0, 0, 0,
11604 0, 0, 0, 0, 0, 0, 0, 0,
11605 0, 0, 0, 0, 0, 0, 0, 0,
11606 0, 0, 0, 0, 0, 0, 0, 0,
11607 0, 0, 0, 0, 0, 0, 0, 0,
Paul Yang57b65972019-03-19 22:27:13 -070011608 0, 0, 0, 57, 63, 65, 61, 67,
Paul Yangc4f2a922019-01-17 10:18:43 -080011609 0, 0, 0, 0, 0, 0
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070011610};
11611
11612static const int json_start = 1;
11613
Paul Yang8faa7782018-12-26 10:36:09 -080011614static const int json_en_number_machine = 23;
11615static const int json_en_string_machine = 32;
11616static const int json_en_duration_machine = 40;
11617static const int json_en_timestamp_machine = 47;
Paul Yangc4f2a922019-01-17 10:18:43 -080011618static const int json_en_fieldmask_machine = 75;
11619static const int json_en_value_machine = 78;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070011620static const int json_en_main = 1;
11621
11622
Adam Cozzette8645d892019-03-26 14:32:20 -070011623#line 2752 "upb/json/parser.rl"
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070011624
11625size_t parse(void *closure, const void *hd, const char *buf, size_t size,
11626 const upb_bufhandle *handle) {
11627 upb_json_parser *parser = closure;
11628
11629 /* Variables used by Ragel's generated code. */
11630 int cs = parser->current_state;
11631 int *stack = parser->parser_stack;
11632 int top = parser->parser_top;
11633
11634 const char *p = buf;
11635 const char *pe = buf + size;
Bo Yange3ee7162018-08-10 18:09:02 +000011636 const char *eof = &eof_ch;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070011637
11638 parser->handle = handle;
11639
11640 UPB_UNUSED(hd);
11641 UPB_UNUSED(handle);
11642
11643 capture_resume(parser, buf);
11644
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080011645
Adam Cozzette8645d892019-03-26 14:32:20 -070011646#line 2830 "upb/json/parser.c"
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070011647 {
Paul Yang7f42d6d2019-01-22 15:35:12 -080011648 int _klen;
11649 unsigned int _trans;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070011650 const char *_acts;
11651 unsigned int _nacts;
Paul Yang7f42d6d2019-01-22 15:35:12 -080011652 const char *_keys;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070011653
11654 if ( p == pe )
11655 goto _test_eof;
11656 if ( cs == 0 )
11657 goto _out;
11658_resume:
Paul Yang7f42d6d2019-01-22 15:35:12 -080011659 _keys = _json_trans_keys + _json_key_offsets[cs];
11660 _trans = _json_index_offsets[cs];
11661
11662 _klen = _json_single_lengths[cs];
11663 if ( _klen > 0 ) {
11664 const char *_lower = _keys;
11665 const char *_mid;
11666 const char *_upper = _keys + _klen - 1;
11667 while (1) {
11668 if ( _upper < _lower )
11669 break;
11670
11671 _mid = _lower + ((_upper-_lower) >> 1);
11672 if ( (*p) < *_mid )
11673 _upper = _mid - 1;
11674 else if ( (*p) > *_mid )
11675 _lower = _mid + 1;
11676 else {
11677 _trans += (unsigned int)(_mid - _keys);
11678 goto _match;
11679 }
11680 }
11681 _keys += _klen;
11682 _trans += _klen;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070011683 }
11684
Paul Yang7f42d6d2019-01-22 15:35:12 -080011685 _klen = _json_range_lengths[cs];
11686 if ( _klen > 0 ) {
11687 const char *_lower = _keys;
11688 const char *_mid;
11689 const char *_upper = _keys + (_klen<<1) - 2;
11690 while (1) {
11691 if ( _upper < _lower )
11692 break;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070011693
Paul Yang7f42d6d2019-01-22 15:35:12 -080011694 _mid = _lower + (((_upper-_lower) >> 1) & ~1);
11695 if ( (*p) < _mid[0] )
11696 _upper = _mid - 2;
11697 else if ( (*p) > _mid[1] )
11698 _lower = _mid + 2;
11699 else {
11700 _trans += (unsigned int)((_mid - _keys)>>1);
11701 goto _match;
11702 }
11703 }
11704 _trans += _klen;
11705 }
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070011706
Paul Yang7f42d6d2019-01-22 15:35:12 -080011707_match:
11708 _trans = _json_indicies[_trans];
11709 cs = _json_trans_targs[_trans];
11710
11711 if ( _json_trans_actions[_trans] == 0 )
11712 goto _again;
11713
11714 _acts = _json_actions + _json_trans_actions[_trans];
11715 _nacts = (unsigned int) *_acts++;
11716 while ( _nacts-- > 0 )
11717 {
11718 switch ( *_acts++ )
11719 {
Paul Yange0e54662016-09-15 11:09:01 -070011720 case 1:
Adam Cozzette8645d892019-03-26 14:32:20 -070011721#line 2557 "upb/json/parser.rl"
Paul Yang7f42d6d2019-01-22 15:35:12 -080011722 { p--; {cs = stack[--top]; goto _again;} }
Paul Yange0e54662016-09-15 11:09:01 -070011723 break;
11724 case 2:
Adam Cozzette8645d892019-03-26 14:32:20 -070011725#line 2559 "upb/json/parser.rl"
Paul Yangc4f2a922019-01-17 10:18:43 -080011726 { p--; {stack[top++] = cs; cs = 23;goto _again;} }
Paul Yange0e54662016-09-15 11:09:01 -070011727 break;
11728 case 3:
Adam Cozzette8645d892019-03-26 14:32:20 -070011729#line 2563 "upb/json/parser.rl"
Bo Yange3ee7162018-08-10 18:09:02 +000011730 { start_text(parser, p); }
Paul Yange0e54662016-09-15 11:09:01 -070011731 break;
11732 case 4:
Adam Cozzette8645d892019-03-26 14:32:20 -070011733#line 2564 "upb/json/parser.rl"
Bo Yange3ee7162018-08-10 18:09:02 +000011734 { CHECK_RETURN_TOP(end_text(parser, p)); }
Paul Yange0e54662016-09-15 11:09:01 -070011735 break;
11736 case 5:
Adam Cozzette8645d892019-03-26 14:32:20 -070011737#line 2570 "upb/json/parser.rl"
Bo Yange3ee7162018-08-10 18:09:02 +000011738 { start_hex(parser); }
Paul Yange0e54662016-09-15 11:09:01 -070011739 break;
11740 case 6:
Adam Cozzette8645d892019-03-26 14:32:20 -070011741#line 2571 "upb/json/parser.rl"
Bo Yange3ee7162018-08-10 18:09:02 +000011742 { hexdigit(parser, p); }
Paul Yange0e54662016-09-15 11:09:01 -070011743 break;
11744 case 7:
Adam Cozzette8645d892019-03-26 14:32:20 -070011745#line 2572 "upb/json/parser.rl"
Bo Yange3ee7162018-08-10 18:09:02 +000011746 { CHECK_RETURN_TOP(end_hex(parser)); }
Paul Yange0e54662016-09-15 11:09:01 -070011747 break;
11748 case 8:
Adam Cozzette8645d892019-03-26 14:32:20 -070011749#line 2578 "upb/json/parser.rl"
Bo Yange3ee7162018-08-10 18:09:02 +000011750 { CHECK_RETURN_TOP(escape(parser, p)); }
Paul Yange0e54662016-09-15 11:09:01 -070011751 break;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070011752 case 9:
Adam Cozzette8645d892019-03-26 14:32:20 -070011753#line 2584 "upb/json/parser.rl"
Paul Yang7f42d6d2019-01-22 15:35:12 -080011754 { p--; {cs = stack[--top]; goto _again;} }
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070011755 break;
Bo Yange3ee7162018-08-10 18:09:02 +000011756 case 10:
Adam Cozzette8645d892019-03-26 14:32:20 -070011757#line 2589 "upb/json/parser.rl"
Paul Yang57b65972019-03-19 22:27:13 -070011758 { start_year(parser, p); }
Bo Yange3ee7162018-08-10 18:09:02 +000011759 break;
11760 case 11:
Adam Cozzette8645d892019-03-26 14:32:20 -070011761#line 2590 "upb/json/parser.rl"
Paul Yang57b65972019-03-19 22:27:13 -070011762 { CHECK_RETURN_TOP(end_year(parser, p)); }
Bo Yange3ee7162018-08-10 18:09:02 +000011763 break;
11764 case 12:
Adam Cozzette8645d892019-03-26 14:32:20 -070011765#line 2594 "upb/json/parser.rl"
Paul Yang57b65972019-03-19 22:27:13 -070011766 { start_month(parser, p); }
Bo Yange3ee7162018-08-10 18:09:02 +000011767 break;
Paul Yang9bda1f12018-09-22 18:57:43 -070011768 case 13:
Adam Cozzette8645d892019-03-26 14:32:20 -070011769#line 2595 "upb/json/parser.rl"
Paul Yang57b65972019-03-19 22:27:13 -070011770 { CHECK_RETURN_TOP(end_month(parser, p)); }
Paul Yang9bda1f12018-09-22 18:57:43 -070011771 break;
11772 case 14:
Adam Cozzette8645d892019-03-26 14:32:20 -070011773#line 2599 "upb/json/parser.rl"
Paul Yang57b65972019-03-19 22:27:13 -070011774 { start_day(parser, p); }
Paul Yang9bda1f12018-09-22 18:57:43 -070011775 break;
11776 case 15:
Adam Cozzette8645d892019-03-26 14:32:20 -070011777#line 2600 "upb/json/parser.rl"
Paul Yang57b65972019-03-19 22:27:13 -070011778 { CHECK_RETURN_TOP(end_day(parser, p)); }
Paul Yang9bda1f12018-09-22 18:57:43 -070011779 break;
11780 case 16:
Adam Cozzette8645d892019-03-26 14:32:20 -070011781#line 2604 "upb/json/parser.rl"
Paul Yang57b65972019-03-19 22:27:13 -070011782 { start_hour(parser, p); }
Paul Yang9bda1f12018-09-22 18:57:43 -070011783 break;
11784 case 17:
Adam Cozzette8645d892019-03-26 14:32:20 -070011785#line 2605 "upb/json/parser.rl"
Paul Yang57b65972019-03-19 22:27:13 -070011786 { CHECK_RETURN_TOP(end_hour(parser, p)); }
Paul Yang9bda1f12018-09-22 18:57:43 -070011787 break;
11788 case 18:
Adam Cozzette8645d892019-03-26 14:32:20 -070011789#line 2609 "upb/json/parser.rl"
Paul Yang57b65972019-03-19 22:27:13 -070011790 { start_minute(parser, p); }
Paul Yang9bda1f12018-09-22 18:57:43 -070011791 break;
11792 case 19:
Adam Cozzette8645d892019-03-26 14:32:20 -070011793#line 2610 "upb/json/parser.rl"
Paul Yang57b65972019-03-19 22:27:13 -070011794 { CHECK_RETURN_TOP(end_minute(parser, p)); }
Paul Yang9bda1f12018-09-22 18:57:43 -070011795 break;
11796 case 20:
Adam Cozzette8645d892019-03-26 14:32:20 -070011797#line 2614 "upb/json/parser.rl"
Paul Yang57b65972019-03-19 22:27:13 -070011798 { start_second(parser, p); }
Paul Yangc4f2a922019-01-17 10:18:43 -080011799 break;
11800 case 21:
Adam Cozzette8645d892019-03-26 14:32:20 -070011801#line 2615 "upb/json/parser.rl"
Paul Yang57b65972019-03-19 22:27:13 -070011802 { CHECK_RETURN_TOP(end_second(parser, p)); }
Paul Yangc4f2a922019-01-17 10:18:43 -080011803 break;
11804 case 22:
Adam Cozzette8645d892019-03-26 14:32:20 -070011805#line 2620 "upb/json/parser.rl"
Paul Yang57b65972019-03-19 22:27:13 -070011806 { start_duration_base(parser, p); }
Paul Yangc4f2a922019-01-17 10:18:43 -080011807 break;
11808 case 23:
Adam Cozzette8645d892019-03-26 14:32:20 -070011809#line 2621 "upb/json/parser.rl"
Paul Yang57b65972019-03-19 22:27:13 -070011810 { CHECK_RETURN_TOP(end_duration_base(parser, p)); }
Paul Yangc4f2a922019-01-17 10:18:43 -080011811 break;
11812 case 24:
Adam Cozzette8645d892019-03-26 14:32:20 -070011813#line 2623 "upb/json/parser.rl"
Paul Yang7f42d6d2019-01-22 15:35:12 -080011814 { p--; {cs = stack[--top]; goto _again;} }
Paul Yangc4f2a922019-01-17 10:18:43 -080011815 break;
11816 case 25:
Adam Cozzette8645d892019-03-26 14:32:20 -070011817#line 2628 "upb/json/parser.rl"
Paul Yang57b65972019-03-19 22:27:13 -070011818 { start_timestamp_base(parser); }
11819 break;
11820 case 26:
Adam Cozzette8645d892019-03-26 14:32:20 -070011821#line 2630 "upb/json/parser.rl"
Paul Yang57b65972019-03-19 22:27:13 -070011822 { start_timestamp_fraction(parser, p); }
11823 break;
11824 case 27:
Adam Cozzette8645d892019-03-26 14:32:20 -070011825#line 2631 "upb/json/parser.rl"
Paul Yang57b65972019-03-19 22:27:13 -070011826 { CHECK_RETURN_TOP(end_timestamp_fraction(parser, p)); }
11827 break;
11828 case 28:
Adam Cozzette8645d892019-03-26 14:32:20 -070011829#line 2633 "upb/json/parser.rl"
Paul Yang57b65972019-03-19 22:27:13 -070011830 { start_timestamp_zone(parser, p); }
11831 break;
11832 case 29:
Adam Cozzette8645d892019-03-26 14:32:20 -070011833#line 2634 "upb/json/parser.rl"
Paul Yang57b65972019-03-19 22:27:13 -070011834 { CHECK_RETURN_TOP(end_timestamp_zone(parser, p)); }
11835 break;
11836 case 30:
Adam Cozzette8645d892019-03-26 14:32:20 -070011837#line 2636 "upb/json/parser.rl"
Paul Yang57b65972019-03-19 22:27:13 -070011838 { p--; {cs = stack[--top]; goto _again;} }
11839 break;
11840 case 31:
Adam Cozzette8645d892019-03-26 14:32:20 -070011841#line 2641 "upb/json/parser.rl"
Paul Yang57b65972019-03-19 22:27:13 -070011842 { start_fieldmask_path_text(parser, p); }
11843 break;
11844 case 32:
Adam Cozzette8645d892019-03-26 14:32:20 -070011845#line 2642 "upb/json/parser.rl"
Paul Yang57b65972019-03-19 22:27:13 -070011846 { end_fieldmask_path_text(parser, p); }
11847 break;
11848 case 33:
Adam Cozzette8645d892019-03-26 14:32:20 -070011849#line 2647 "upb/json/parser.rl"
Paul Yang57b65972019-03-19 22:27:13 -070011850 { start_fieldmask_path(parser); }
11851 break;
11852 case 34:
Adam Cozzette8645d892019-03-26 14:32:20 -070011853#line 2648 "upb/json/parser.rl"
Paul Yang57b65972019-03-19 22:27:13 -070011854 { end_fieldmask_path(parser); }
11855 break;
11856 case 35:
Adam Cozzette8645d892019-03-26 14:32:20 -070011857#line 2654 "upb/json/parser.rl"
Paul Yang57b65972019-03-19 22:27:13 -070011858 { p--; {cs = stack[--top]; goto _again;} }
11859 break;
11860 case 36:
Adam Cozzette8645d892019-03-26 14:32:20 -070011861#line 2659 "upb/json/parser.rl"
Paul Yang9bda1f12018-09-22 18:57:43 -070011862 {
11863 if (is_wellknown_msg(parser, UPB_WELLKNOWN_TIMESTAMP)) {
Paul Yangc4f2a922019-01-17 10:18:43 -080011864 {stack[top++] = cs; cs = 47;goto _again;}
Paul Yang9bda1f12018-09-22 18:57:43 -070011865 } else if (is_wellknown_msg(parser, UPB_WELLKNOWN_DURATION)) {
Paul Yangc4f2a922019-01-17 10:18:43 -080011866 {stack[top++] = cs; cs = 40;goto _again;}
11867 } else if (is_wellknown_msg(parser, UPB_WELLKNOWN_FIELDMASK)) {
11868 {stack[top++] = cs; cs = 75;goto _again;}
Paul Yang9bda1f12018-09-22 18:57:43 -070011869 } else {
Paul Yangc4f2a922019-01-17 10:18:43 -080011870 {stack[top++] = cs; cs = 32;goto _again;}
Paul Yang9bda1f12018-09-22 18:57:43 -070011871 }
11872 }
11873 break;
Paul Yang57b65972019-03-19 22:27:13 -070011874 case 37:
Adam Cozzette8645d892019-03-26 14:32:20 -070011875#line 2672 "upb/json/parser.rl"
Paul Yangc4f2a922019-01-17 10:18:43 -080011876 { p--; {stack[top++] = cs; cs = 78;goto _again;} }
Paul Yang9bda1f12018-09-22 18:57:43 -070011877 break;
Paul Yang57b65972019-03-19 22:27:13 -070011878 case 38:
Adam Cozzette8645d892019-03-26 14:32:20 -070011879#line 2677 "upb/json/parser.rl"
Paul Yang8faa7782018-12-26 10:36:09 -080011880 {
11881 if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) {
11882 start_any_member(parser, p);
11883 } else {
11884 start_member(parser);
11885 }
11886 }
Paul Yang9bda1f12018-09-22 18:57:43 -070011887 break;
Paul Yang57b65972019-03-19 22:27:13 -070011888 case 39:
Adam Cozzette8645d892019-03-26 14:32:20 -070011889#line 2684 "upb/json/parser.rl"
Paul Yang9bda1f12018-09-22 18:57:43 -070011890 { CHECK_RETURN_TOP(end_membername(parser)); }
11891 break;
Paul Yang57b65972019-03-19 22:27:13 -070011892 case 40:
Adam Cozzette8645d892019-03-26 14:32:20 -070011893#line 2687 "upb/json/parser.rl"
Paul Yang8faa7782018-12-26 10:36:09 -080011894 {
11895 if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) {
11896 end_any_member(parser, p);
11897 } else {
11898 end_member(parser);
11899 }
11900 }
Paul Yang9bda1f12018-09-22 18:57:43 -070011901 break;
Paul Yang57b65972019-03-19 22:27:13 -070011902 case 41:
Adam Cozzette8645d892019-03-26 14:32:20 -070011903#line 2698 "upb/json/parser.rl"
Paul Yang8faa7782018-12-26 10:36:09 -080011904 {
11905 if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) {
11906 start_any_object(parser, p);
11907 } else {
11908 start_object(parser);
11909 }
11910 }
Paul Yang9bda1f12018-09-22 18:57:43 -070011911 break;
Paul Yang57b65972019-03-19 22:27:13 -070011912 case 42:
Adam Cozzette8645d892019-03-26 14:32:20 -070011913#line 2707 "upb/json/parser.rl"
Paul Yang8faa7782018-12-26 10:36:09 -080011914 {
11915 if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) {
11916 CHECK_RETURN_TOP(end_any_object(parser, p));
11917 } else {
11918 end_object(parser);
11919 }
11920 }
Paul Yang9bda1f12018-09-22 18:57:43 -070011921 break;
Paul Yang57b65972019-03-19 22:27:13 -070011922 case 43:
Adam Cozzette8645d892019-03-26 14:32:20 -070011923#line 2719 "upb/json/parser.rl"
Paul Yang9bda1f12018-09-22 18:57:43 -070011924 { CHECK_RETURN_TOP(start_array(parser)); }
11925 break;
Paul Yang57b65972019-03-19 22:27:13 -070011926 case 44:
Adam Cozzette8645d892019-03-26 14:32:20 -070011927#line 2723 "upb/json/parser.rl"
Paul Yang9bda1f12018-09-22 18:57:43 -070011928 { end_array(parser); }
11929 break;
Paul Yang57b65972019-03-19 22:27:13 -070011930 case 45:
Adam Cozzette8645d892019-03-26 14:32:20 -070011931#line 2728 "upb/json/parser.rl"
Paul Yang9bda1f12018-09-22 18:57:43 -070011932 { CHECK_RETURN_TOP(start_number(parser, p)); }
11933 break;
Paul Yang57b65972019-03-19 22:27:13 -070011934 case 46:
Adam Cozzette8645d892019-03-26 14:32:20 -070011935#line 2729 "upb/json/parser.rl"
Paul Yang9bda1f12018-09-22 18:57:43 -070011936 { CHECK_RETURN_TOP(end_number(parser, p)); }
11937 break;
Paul Yang57b65972019-03-19 22:27:13 -070011938 case 47:
Adam Cozzette8645d892019-03-26 14:32:20 -070011939#line 2731 "upb/json/parser.rl"
Paul Yang9bda1f12018-09-22 18:57:43 -070011940 { CHECK_RETURN_TOP(start_stringval(parser)); }
11941 break;
Paul Yang57b65972019-03-19 22:27:13 -070011942 case 48:
Adam Cozzette8645d892019-03-26 14:32:20 -070011943#line 2732 "upb/json/parser.rl"
Paul Yang9bda1f12018-09-22 18:57:43 -070011944 { CHECK_RETURN_TOP(end_stringval(parser)); }
11945 break;
Paul Yang57b65972019-03-19 22:27:13 -070011946 case 49:
Adam Cozzette8645d892019-03-26 14:32:20 -070011947#line 2734 "upb/json/parser.rl"
Paul Yang9bda1f12018-09-22 18:57:43 -070011948 { CHECK_RETURN_TOP(end_bool(parser, true)); }
11949 break;
Paul Yang57b65972019-03-19 22:27:13 -070011950 case 50:
Adam Cozzette8645d892019-03-26 14:32:20 -070011951#line 2736 "upb/json/parser.rl"
Paul Yang9bda1f12018-09-22 18:57:43 -070011952 { CHECK_RETURN_TOP(end_bool(parser, false)); }
11953 break;
Paul Yang57b65972019-03-19 22:27:13 -070011954 case 51:
Adam Cozzette8645d892019-03-26 14:32:20 -070011955#line 2738 "upb/json/parser.rl"
Paul Yang9bda1f12018-09-22 18:57:43 -070011956 { CHECK_RETURN_TOP(end_null(parser)); }
11957 break;
Paul Yang57b65972019-03-19 22:27:13 -070011958 case 52:
Adam Cozzette8645d892019-03-26 14:32:20 -070011959#line 2740 "upb/json/parser.rl"
Paul Yang9bda1f12018-09-22 18:57:43 -070011960 { CHECK_RETURN_TOP(start_subobject_full(parser)); }
11961 break;
Paul Yang57b65972019-03-19 22:27:13 -070011962 case 53:
Adam Cozzette8645d892019-03-26 14:32:20 -070011963#line 2741 "upb/json/parser.rl"
Paul Yang9bda1f12018-09-22 18:57:43 -070011964 { end_subobject_full(parser); }
11965 break;
Paul Yang57b65972019-03-19 22:27:13 -070011966 case 54:
Adam Cozzette8645d892019-03-26 14:32:20 -070011967#line 2746 "upb/json/parser.rl"
Paul Yang7f42d6d2019-01-22 15:35:12 -080011968 { p--; {cs = stack[--top]; goto _again;} }
Paul Yang9bda1f12018-09-22 18:57:43 -070011969 break;
Adam Cozzette8645d892019-03-26 14:32:20 -070011970#line 3154 "upb/json/parser.c"
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070011971 }
11972 }
11973
11974_again:
11975 if ( cs == 0 )
11976 goto _out;
11977 if ( ++p != pe )
11978 goto _resume;
11979 _test_eof: {}
Bo Yange3ee7162018-08-10 18:09:02 +000011980 if ( p == eof )
11981 {
11982 const char *__acts = _json_actions + _json_eof_actions[cs];
11983 unsigned int __nacts = (unsigned int) *__acts++;
11984 while ( __nacts-- > 0 ) {
11985 switch ( *__acts++ ) {
11986 case 0:
Adam Cozzette8645d892019-03-26 14:32:20 -070011987#line 2555 "upb/json/parser.rl"
Paul Yang7f42d6d2019-01-22 15:35:12 -080011988 { p--; {cs = stack[--top]; if ( p == pe )
Paul Yangc4f2a922019-01-17 10:18:43 -080011989 goto _test_eof;
11990goto _again;} }
Bo Yange3ee7162018-08-10 18:09:02 +000011991 break;
Paul Yang57b65972019-03-19 22:27:13 -070011992 case 46:
Adam Cozzette8645d892019-03-26 14:32:20 -070011993#line 2729 "upb/json/parser.rl"
Paul Yangc4f2a922019-01-17 10:18:43 -080011994 { CHECK_RETURN_TOP(end_number(parser, p)); }
11995 break;
Paul Yang57b65972019-03-19 22:27:13 -070011996 case 49:
Adam Cozzette8645d892019-03-26 14:32:20 -070011997#line 2734 "upb/json/parser.rl"
Paul Yangc4f2a922019-01-17 10:18:43 -080011998 { CHECK_RETURN_TOP(end_bool(parser, true)); }
11999 break;
Paul Yang57b65972019-03-19 22:27:13 -070012000 case 50:
Adam Cozzette8645d892019-03-26 14:32:20 -070012001#line 2736 "upb/json/parser.rl"
Paul Yangc4f2a922019-01-17 10:18:43 -080012002 { CHECK_RETURN_TOP(end_bool(parser, false)); }
12003 break;
Paul Yang57b65972019-03-19 22:27:13 -070012004 case 51:
Adam Cozzette8645d892019-03-26 14:32:20 -070012005#line 2738 "upb/json/parser.rl"
Paul Yang9bda1f12018-09-22 18:57:43 -070012006 { CHECK_RETURN_TOP(end_null(parser)); }
Bo Yange3ee7162018-08-10 18:09:02 +000012007 break;
Paul Yang57b65972019-03-19 22:27:13 -070012008 case 53:
Adam Cozzette8645d892019-03-26 14:32:20 -070012009#line 2741 "upb/json/parser.rl"
Paul Yang9bda1f12018-09-22 18:57:43 -070012010 { end_subobject_full(parser); }
Bo Yange3ee7162018-08-10 18:09:02 +000012011 break;
Adam Cozzette8645d892019-03-26 14:32:20 -070012012#line 3196 "upb/json/parser.c"
Bo Yange3ee7162018-08-10 18:09:02 +000012013 }
12014 }
12015 }
12016
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012017 _out: {}
12018 }
12019
Adam Cozzette8645d892019-03-26 14:32:20 -070012020#line 2774 "upb/json/parser.rl"
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012021
12022 if (p != pe) {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080012023 upb_status_seterrf(parser->status, "Parse error at '%.*s'\n", pe - p, p);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012024 } else {
12025 capture_suspend(parser, &p);
12026 }
12027
12028error:
12029 /* Save parsing state back to parser. */
12030 parser->current_state = cs;
12031 parser->parser_top = top;
12032
12033 return p - buf;
12034}
12035
Paul Yang8faa7782018-12-26 10:36:09 -080012036static bool end(void *closure, const void *hd) {
Bo Yange3ee7162018-08-10 18:09:02 +000012037 upb_json_parser *parser = closure;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012038
12039 /* Prevent compile warning on unused static constants. */
12040 UPB_UNUSED(json_start);
Paul Yang9bda1f12018-09-22 18:57:43 -070012041 UPB_UNUSED(json_en_duration_machine);
Paul Yangc4f2a922019-01-17 10:18:43 -080012042 UPB_UNUSED(json_en_fieldmask_machine);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012043 UPB_UNUSED(json_en_number_machine);
12044 UPB_UNUSED(json_en_string_machine);
Paul Yang9bda1f12018-09-22 18:57:43 -070012045 UPB_UNUSED(json_en_timestamp_machine);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012046 UPB_UNUSED(json_en_value_machine);
12047 UPB_UNUSED(json_en_main);
Bo Yange3ee7162018-08-10 18:09:02 +000012048
12049 parse(parser, hd, &eof_ch, 0, NULL);
12050
Paul Yangc4f2a922019-01-17 10:18:43 -080012051 return parser->current_state >= 106;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012052}
12053
12054static void json_parser_reset(upb_json_parser *p) {
12055 int cs;
12056 int top;
12057
12058 p->top = p->stack;
Adam Cozzette8645d892019-03-26 14:32:20 -070012059 init_frame(p->top);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012060
12061 /* Emit Ragel initialization of the parser. */
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080012062
Adam Cozzette8645d892019-03-26 14:32:20 -070012063#line 3247 "upb/json/parser.c"
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012064 {
12065 cs = json_start;
12066 top = 0;
12067 }
12068
Adam Cozzette8645d892019-03-26 14:32:20 -070012069#line 2816 "upb/json/parser.rl"
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012070 p->current_state = cs;
12071 p->parser_top = top;
12072 accumulate_clear(p);
12073 p->multipart_state = MULTIPART_INACTIVE;
12074 p->capture = NULL;
12075 p->accumulated = NULL;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012076}
12077
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080012078static upb_json_parsermethod *parsermethod_new(upb_json_codecache *c,
12079 const upb_msgdef *md) {
Paul Yange0e54662016-09-15 11:09:01 -070012080 upb_msg_field_iter i;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080012081 upb_alloc *alloc = upb_arena_alloc(c->arena);
Paul Yange0e54662016-09-15 11:09:01 -070012082
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080012083 upb_json_parsermethod *m = upb_malloc(alloc, sizeof(*m));
Paul Yange0e54662016-09-15 11:09:01 -070012084
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080012085 m->cache = c;
Paul Yange0e54662016-09-15 11:09:01 -070012086
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080012087 upb_byteshandler_init(&m->input_handler_);
12088 upb_byteshandler_setstring(&m->input_handler_, parse, m);
12089 upb_byteshandler_setendstr(&m->input_handler_, end, m);
12090
12091 upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, alloc);
12092
12093 /* Build name_table */
Paul Yange0e54662016-09-15 11:09:01 -070012094
12095 for(upb_msg_field_begin(&i, md);
12096 !upb_msg_field_done(&i);
12097 upb_msg_field_next(&i)) {
12098 const upb_fielddef *f = upb_msg_iter_field(&i);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080012099 upb_value v = upb_value_constptr(f);
12100 char *buf;
Paul Yange0e54662016-09-15 11:09:01 -070012101
12102 /* Add an entry for the JSON name. */
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080012103 size_t len = upb_fielddef_getjsonname(f, NULL, 0);
12104 buf = upb_malloc(alloc, len);
12105 upb_fielddef_getjsonname(f, buf, len);
12106 upb_strtable_insert3(&m->name_table, buf, strlen(buf), v, alloc);
Paul Yange0e54662016-09-15 11:09:01 -070012107
12108 if (strcmp(buf, upb_fielddef_name(f)) != 0) {
12109 /* Since the JSON name is different from the regular field name, add an
12110 * entry for the raw name (compliant proto3 JSON parsers must accept
12111 * both). */
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080012112 const char *name = upb_fielddef_name(f);
12113 upb_strtable_insert3(&m->name_table, name, strlen(name), v, alloc);
Paul Yange0e54662016-09-15 11:09:01 -070012114 }
12115 }
12116
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080012117 return m;
Paul Yange0e54662016-09-15 11:09:01 -070012118}
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012119
12120/* Public API *****************************************************************/
12121
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080012122upb_json_parser *upb_json_parser_create(upb_arena *arena,
Paul Yange0e54662016-09-15 11:09:01 -070012123 const upb_json_parsermethod *method,
Paul Yang8faa7782018-12-26 10:36:09 -080012124 const upb_symtab* symtab,
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080012125 upb_sink output,
12126 upb_status *status,
Paul Yang26eeec92018-07-09 14:29:23 -070012127 bool ignore_json_unknown) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012128#ifndef NDEBUG
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080012129 const size_t size_before = upb_arena_bytesallocated(arena);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012130#endif
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080012131 upb_json_parser *p = upb_arena_malloc(arena, sizeof(upb_json_parser));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012132 if (!p) return false;
12133
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080012134 p->arena = arena;
Paul Yange0e54662016-09-15 11:09:01 -070012135 p->method = method;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080012136 p->status = status;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012137 p->limit = p->stack + UPB_JSON_MAX_DEPTH;
12138 p->accumulate_buf = NULL;
12139 p->accumulate_buf_size = 0;
Paul Yange0e54662016-09-15 11:09:01 -070012140 upb_bytessink_reset(&p->input_, &method->input_handler_, p);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012141
12142 json_parser_reset(p);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080012143 p->top->sink = output;
12144 p->top->m = upb_handlers_msgdef(output.handlers);
Paul Yang8faa7782018-12-26 10:36:09 -080012145 if (is_wellknown_msg(p, UPB_WELLKNOWN_ANY)) {
12146 p->top->is_any = true;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080012147 p->top->any_frame = json_parser_any_frame_new(p);
Paul Yang8faa7782018-12-26 10:36:09 -080012148 } else {
12149 p->top->is_any = false;
12150 p->top->any_frame = NULL;
12151 }
Paul Yange0e54662016-09-15 11:09:01 -070012152 set_name_table(p, p->top);
Paul Yang8faa7782018-12-26 10:36:09 -080012153 p->symtab = symtab;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012154
Paul Yang26eeec92018-07-09 14:29:23 -070012155 p->ignore_json_unknown = ignore_json_unknown;
12156
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012157 /* If this fails, uncomment and increase the value in parser.h. */
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080012158 /* fprintf(stderr, "%zd\n", upb_arena_bytesallocated(arena) - size_before); */
12159 UPB_ASSERT_DEBUGVAR(upb_arena_bytesallocated(arena) - size_before <=
Paul Yange0e54662016-09-15 11:09:01 -070012160 UPB_JSON_PARSER_SIZE);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012161 return p;
12162}
12163
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080012164upb_bytessink upb_json_parser_input(upb_json_parser *p) {
12165 return p->input_;
Paul Yange0e54662016-09-15 11:09:01 -070012166}
12167
12168const upb_byteshandler *upb_json_parsermethod_inputhandler(
12169 const upb_json_parsermethod *m) {
12170 return &m->input_handler_;
12171}
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080012172
12173upb_json_codecache *upb_json_codecache_new() {
12174 upb_alloc *alloc;
12175 upb_json_codecache *c;
12176
12177 c = upb_gmalloc(sizeof(*c));
12178
12179 c->arena = upb_arena_new();
12180 alloc = upb_arena_alloc(c->arena);
12181
12182 upb_inttable_init2(&c->methods, UPB_CTYPE_CONSTPTR, alloc);
12183
12184 return c;
12185}
12186
12187void upb_json_codecache_free(upb_json_codecache *c) {
12188 upb_arena_free(c->arena);
12189 upb_gfree(c);
12190}
12191
12192const upb_json_parsermethod *upb_json_codecache_get(upb_json_codecache *c,
12193 const upb_msgdef *md) {
12194 upb_json_parsermethod *m;
12195 upb_value v;
12196 upb_msg_field_iter i;
12197 upb_alloc *alloc = upb_arena_alloc(c->arena);
12198
12199 if (upb_inttable_lookupptr(&c->methods, md, &v)) {
12200 return upb_value_getconstptr(v);
12201 }
12202
12203 m = parsermethod_new(c, md);
12204 v = upb_value_constptr(m);
12205
12206 if (!m) return NULL;
12207 if (!upb_inttable_insertptr2(&c->methods, md, v, alloc)) return NULL;
12208
12209 /* Populate parser methods for all submessages, so the name tables will
12210 * be available during parsing. */
12211 for(upb_msg_field_begin(&i, md);
12212 !upb_msg_field_done(&i);
12213 upb_msg_field_next(&i)) {
12214 upb_fielddef *f = upb_msg_iter_field(&i);
12215
12216 if (upb_fielddef_issubmsg(f)) {
12217 const upb_msgdef *subdef = upb_fielddef_msgsubdef(f);
12218 const upb_json_parsermethod *sub_method =
12219 upb_json_codecache_get(c, subdef);
12220
12221 if (!sub_method) return NULL;
12222 }
12223 }
12224
12225 return m;
12226}
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012227/*
12228** This currently uses snprintf() to format primitives, and could be optimized
12229** further.
12230*/
12231
12232
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080012233#include <ctype.h>
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012234#include <stdint.h>
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080012235#include <string.h>
Paul Yang9bda1f12018-09-22 18:57:43 -070012236#include <time.h>
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012237
12238struct upb_json_printer {
12239 upb_sink input_;
12240 /* BytesSink closure. */
12241 void *subc_;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080012242 upb_bytessink output_;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012243
12244 /* We track the depth so that we know when to emit startstr/endstr on the
12245 * output. */
12246 int depth_;
12247
12248 /* Have we emitted the first element? This state is necessary to emit commas
12249 * without leaving a trailing comma in arrays/maps. We keep this state per
12250 * frame depth.
12251 *
12252 * Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages.
12253 * We count frames (contexts in which we separate elements by commas) as both
12254 * repeated fields and messages (maps), and the worst case is a
12255 * message->repeated field->submessage->repeated field->... nesting. */
12256 bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2];
Paul Yang9bda1f12018-09-22 18:57:43 -070012257
12258 /* To print timestamp, printer needs to cache its seconds and nanos values
12259 * and convert them when ending timestamp message. See comments of
12260 * printer_sethandlers_timestamp for more detail. */
12261 int64_t seconds;
12262 int32_t nanos;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012263};
12264
12265/* StringPiece; a pointer plus a length. */
12266typedef struct {
Paul Yange0e54662016-09-15 11:09:01 -070012267 char *ptr;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012268 size_t len;
12269} strpc;
12270
Paul Yange0e54662016-09-15 11:09:01 -070012271void freestrpc(void *ptr) {
12272 strpc *pc = ptr;
12273 upb_gfree(pc->ptr);
12274 upb_gfree(pc);
12275}
12276
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080012277typedef struct {
12278 bool preserve_fieldnames;
12279} upb_json_printercache;
12280
Paul Yange0e54662016-09-15 11:09:01 -070012281/* Convert fielddef name to JSON name and return as a string piece. */
12282strpc *newstrpc(upb_handlers *h, const upb_fielddef *f,
12283 bool preserve_fieldnames) {
12284 /* TODO(haberman): handle malloc failure. */
12285 strpc *ret = upb_gmalloc(sizeof(*ret));
12286 if (preserve_fieldnames) {
12287 ret->ptr = upb_gstrdup(upb_fielddef_name(f));
12288 ret->len = strlen(ret->ptr);
12289 } else {
12290 size_t len;
12291 ret->len = upb_fielddef_getjsonname(f, NULL, 0);
12292 ret->ptr = upb_gmalloc(ret->len);
12293 len = upb_fielddef_getjsonname(f, ret->ptr, ret->len);
12294 UPB_ASSERT(len == ret->len);
12295 ret->len--; /* NULL */
12296 }
12297
12298 upb_handlers_addcleanup(h, ret, freestrpc);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012299 return ret;
12300}
12301
Paul Yang8faa7782018-12-26 10:36:09 -080012302/* Convert a null-terminated const char* to a string piece. */
12303strpc *newstrpc_str(upb_handlers *h, const char * str) {
12304 strpc * ret = upb_gmalloc(sizeof(*ret));
12305 ret->ptr = upb_gstrdup(str);
12306 ret->len = strlen(str);
12307 upb_handlers_addcleanup(h, ret, freestrpc);
12308 return ret;
12309}
12310
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012311/* ------------ JSON string printing: values, maps, arrays ------------------ */
12312
12313static void print_data(
12314 upb_json_printer *p, const char *buf, unsigned int len) {
12315 /* TODO: Will need to change if we support pushback from the sink. */
12316 size_t n = upb_bytessink_putbuf(p->output_, p->subc_, buf, len, NULL);
Paul Yange0e54662016-09-15 11:09:01 -070012317 UPB_ASSERT(n == len);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012318}
12319
12320static void print_comma(upb_json_printer *p) {
12321 if (!p->first_elem_[p->depth_]) {
12322 print_data(p, ",", 1);
12323 }
12324 p->first_elem_[p->depth_] = false;
12325}
12326
12327/* Helpers that print properly formatted elements to the JSON output stream. */
12328
12329/* Used for escaping control chars in strings. */
12330static const char kControlCharLimit = 0x20;
12331
12332UPB_INLINE bool is_json_escaped(char c) {
12333 /* See RFC 4627. */
12334 unsigned char uc = (unsigned char)c;
12335 return uc < kControlCharLimit || uc == '"' || uc == '\\';
12336}
12337
Paul Yange0e54662016-09-15 11:09:01 -070012338UPB_INLINE const char* json_nice_escape(char c) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012339 switch (c) {
12340 case '"': return "\\\"";
12341 case '\\': return "\\\\";
12342 case '\b': return "\\b";
12343 case '\f': return "\\f";
12344 case '\n': return "\\n";
12345 case '\r': return "\\r";
12346 case '\t': return "\\t";
12347 default: return NULL;
12348 }
12349}
12350
12351/* Write a properly escaped string chunk. The surrounding quotes are *not*
12352 * printed; this is so that the caller has the option of emitting the string
12353 * content in chunks. */
12354static void putstring(upb_json_printer *p, const char *buf, unsigned int len) {
12355 const char* unescaped_run = NULL;
12356 unsigned int i;
12357 for (i = 0; i < len; i++) {
12358 char c = buf[i];
12359 /* Handle escaping. */
12360 if (is_json_escaped(c)) {
12361 /* Use a "nice" escape, like \n, if one exists for this character. */
12362 const char* escape = json_nice_escape(c);
12363 /* If we don't have a specific 'nice' escape code, use a \uXXXX-style
12364 * escape. */
12365 char escape_buf[8];
12366 if (!escape) {
12367 unsigned char byte = (unsigned char)c;
12368 _upb_snprintf(escape_buf, sizeof(escape_buf), "\\u%04x", (int)byte);
12369 escape = escape_buf;
12370 }
12371
12372 /* N.B. that we assume that the input encoding is equal to the output
12373 * encoding (both UTF-8 for now), so for chars >= 0x20 and != \, ", we
12374 * can simply pass the bytes through. */
12375
12376 /* If there's a current run of unescaped chars, print that run first. */
12377 if (unescaped_run) {
12378 print_data(p, unescaped_run, &buf[i] - unescaped_run);
12379 unescaped_run = NULL;
12380 }
12381 /* Then print the escape code. */
12382 print_data(p, escape, strlen(escape));
12383 } else {
12384 /* Add to the current unescaped run of characters. */
12385 if (unescaped_run == NULL) {
12386 unescaped_run = &buf[i];
12387 }
12388 }
12389 }
12390
12391 /* If the string ended in a run of unescaped characters, print that last run. */
12392 if (unescaped_run) {
12393 print_data(p, unescaped_run, &buf[len] - unescaped_run);
12394 }
12395}
12396
12397#define CHKLENGTH(x) if (!(x)) return -1;
12398
12399/* Helpers that format floating point values according to our custom formats.
12400 * Right now we use %.8g and %.17g for float/double, respectively, to match
12401 * proto2::util::JsonFormat's defaults. May want to change this later. */
12402
Paul Yang60327462017-10-09 12:39:13 -070012403const char neginf[] = "\"-Infinity\"";
12404const char inf[] = "\"Infinity\"";
12405
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012406static size_t fmt_double(double val, char* buf, size_t length) {
Paul Yang60327462017-10-09 12:39:13 -070012407 if (val == (1.0 / 0.0)) {
12408 CHKLENGTH(length >= strlen(inf));
12409 strcpy(buf, inf);
12410 return strlen(inf);
12411 } else if (val == (-1.0 / 0.0)) {
12412 CHKLENGTH(length >= strlen(neginf));
12413 strcpy(buf, neginf);
12414 return strlen(neginf);
12415 } else {
12416 size_t n = _upb_snprintf(buf, length, "%.17g", val);
12417 CHKLENGTH(n > 0 && n < length);
12418 return n;
12419 }
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012420}
12421
12422static size_t fmt_float(float val, char* buf, size_t length) {
12423 size_t n = _upb_snprintf(buf, length, "%.8g", val);
12424 CHKLENGTH(n > 0 && n < length);
12425 return n;
12426}
12427
12428static size_t fmt_bool(bool val, char* buf, size_t length) {
12429 size_t n = _upb_snprintf(buf, length, "%s", (val ? "true" : "false"));
12430 CHKLENGTH(n > 0 && n < length);
12431 return n;
12432}
12433
Paul Yang640423f2019-06-17 13:14:27 -070012434static size_t fmt_int64_as_number(long long val, char* buf, size_t length) {
12435 size_t n = _upb_snprintf(buf, length, "%lld", val);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012436 CHKLENGTH(n > 0 && n < length);
12437 return n;
12438}
12439
Paul Yang640423f2019-06-17 13:14:27 -070012440static size_t fmt_uint64_as_number(
12441 unsigned long long val, char* buf, size_t length) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012442 size_t n = _upb_snprintf(buf, length, "%llu", val);
12443 CHKLENGTH(n > 0 && n < length);
12444 return n;
12445}
12446
Paul Yang640423f2019-06-17 13:14:27 -070012447static size_t fmt_int64_as_string(long long val, char* buf, size_t length) {
12448 size_t n = _upb_snprintf(buf, length, "\"%lld\"", val);
12449 CHKLENGTH(n > 0 && n < length);
12450 return n;
12451}
12452
12453static size_t fmt_uint64_as_string(
12454 unsigned long long val, char* buf, size_t length) {
12455 size_t n = _upb_snprintf(buf, length, "\"%llu\"", val);
12456 CHKLENGTH(n > 0 && n < length);
12457 return n;
12458}
12459
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012460/* Print a map key given a field name. Called by scalar field handlers and by
12461 * startseq for repeated fields. */
12462static bool putkey(void *closure, const void *handler_data) {
12463 upb_json_printer *p = closure;
12464 const strpc *key = handler_data;
12465 print_comma(p);
12466 print_data(p, "\"", 1);
12467 putstring(p, key->ptr, key->len);
12468 print_data(p, "\":", 2);
12469 return true;
12470}
12471
12472#define CHKFMT(val) if ((val) == (size_t)-1) return false;
12473#define CHK(val) if (!(val)) return false;
12474
12475#define TYPE_HANDLERS(type, fmt_func) \
12476 static bool put##type(void *closure, const void *handler_data, type val) { \
12477 upb_json_printer *p = closure; \
12478 char data[64]; \
12479 size_t length = fmt_func(val, data, sizeof(data)); \
12480 UPB_UNUSED(handler_data); \
12481 CHKFMT(length); \
12482 print_data(p, data, length); \
12483 return true; \
12484 } \
12485 static bool scalar_##type(void *closure, const void *handler_data, \
12486 type val) { \
12487 CHK(putkey(closure, handler_data)); \
12488 CHK(put##type(closure, handler_data, val)); \
12489 return true; \
12490 } \
12491 static bool repeated_##type(void *closure, const void *handler_data, \
12492 type val) { \
12493 upb_json_printer *p = closure; \
12494 print_comma(p); \
12495 CHK(put##type(closure, handler_data, val)); \
12496 return true; \
12497 }
12498
12499#define TYPE_HANDLERS_MAPKEY(type, fmt_func) \
12500 static bool putmapkey_##type(void *closure, const void *handler_data, \
12501 type val) { \
12502 upb_json_printer *p = closure; \
Paul Yang640423f2019-06-17 13:14:27 -070012503 char data[64]; \
12504 size_t length = fmt_func(val, data, sizeof(data)); \
12505 UPB_UNUSED(handler_data); \
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012506 print_data(p, "\"", 1); \
Paul Yang640423f2019-06-17 13:14:27 -070012507 print_data(p, data, length); \
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012508 print_data(p, "\":", 2); \
12509 return true; \
12510 }
12511
12512TYPE_HANDLERS(double, fmt_double)
12513TYPE_HANDLERS(float, fmt_float)
12514TYPE_HANDLERS(bool, fmt_bool)
Paul Yang640423f2019-06-17 13:14:27 -070012515TYPE_HANDLERS(int32_t, fmt_int64_as_number)
12516TYPE_HANDLERS(uint32_t, fmt_int64_as_number)
12517TYPE_HANDLERS(int64_t, fmt_int64_as_string)
12518TYPE_HANDLERS(uint64_t, fmt_uint64_as_string)
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012519
12520/* double and float are not allowed to be map keys. */
12521TYPE_HANDLERS_MAPKEY(bool, fmt_bool)
Paul Yang640423f2019-06-17 13:14:27 -070012522TYPE_HANDLERS_MAPKEY(int32_t, fmt_int64_as_number)
12523TYPE_HANDLERS_MAPKEY(uint32_t, fmt_int64_as_number)
12524TYPE_HANDLERS_MAPKEY(int64_t, fmt_int64_as_number)
12525TYPE_HANDLERS_MAPKEY(uint64_t, fmt_uint64_as_number)
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012526
12527#undef TYPE_HANDLERS
12528#undef TYPE_HANDLERS_MAPKEY
12529
12530typedef struct {
12531 void *keyname;
12532 const upb_enumdef *enumdef;
12533} EnumHandlerData;
12534
12535static bool scalar_enum(void *closure, const void *handler_data,
12536 int32_t val) {
12537 const EnumHandlerData *hd = handler_data;
12538 upb_json_printer *p = closure;
12539 const char *symbolic_name;
12540
12541 CHK(putkey(closure, hd->keyname));
12542
12543 symbolic_name = upb_enumdef_iton(hd->enumdef, val);
12544 if (symbolic_name) {
12545 print_data(p, "\"", 1);
12546 putstring(p, symbolic_name, strlen(symbolic_name));
12547 print_data(p, "\"", 1);
12548 } else {
12549 putint32_t(closure, NULL, val);
12550 }
12551
12552 return true;
12553}
12554
12555static void print_enum_symbolic_name(upb_json_printer *p,
12556 const upb_enumdef *def,
12557 int32_t val) {
12558 const char *symbolic_name = upb_enumdef_iton(def, val);
12559 if (symbolic_name) {
12560 print_data(p, "\"", 1);
12561 putstring(p, symbolic_name, strlen(symbolic_name));
12562 print_data(p, "\"", 1);
12563 } else {
12564 putint32_t(p, NULL, val);
12565 }
12566}
12567
12568static bool repeated_enum(void *closure, const void *handler_data,
12569 int32_t val) {
12570 const EnumHandlerData *hd = handler_data;
12571 upb_json_printer *p = closure;
12572 print_comma(p);
12573
12574 print_enum_symbolic_name(p, hd->enumdef, val);
12575
12576 return true;
12577}
12578
12579static bool mapvalue_enum(void *closure, const void *handler_data,
12580 int32_t val) {
12581 const EnumHandlerData *hd = handler_data;
12582 upb_json_printer *p = closure;
12583
12584 print_enum_symbolic_name(p, hd->enumdef, val);
12585
12586 return true;
12587}
12588
12589static void *scalar_startsubmsg(void *closure, const void *handler_data) {
12590 return putkey(closure, handler_data) ? closure : UPB_BREAK;
12591}
12592
12593static void *repeated_startsubmsg(void *closure, const void *handler_data) {
12594 upb_json_printer *p = closure;
12595 UPB_UNUSED(handler_data);
12596 print_comma(p);
12597 return closure;
12598}
12599
12600static void start_frame(upb_json_printer *p) {
12601 p->depth_++;
12602 p->first_elem_[p->depth_] = true;
12603 print_data(p, "{", 1);
12604}
12605
12606static void end_frame(upb_json_printer *p) {
12607 print_data(p, "}", 1);
12608 p->depth_--;
12609}
12610
12611static bool printer_startmsg(void *closure, const void *handler_data) {
12612 upb_json_printer *p = closure;
12613 UPB_UNUSED(handler_data);
12614 if (p->depth_ == 0) {
12615 upb_bytessink_start(p->output_, 0, &p->subc_);
12616 }
12617 start_frame(p);
12618 return true;
12619}
12620
12621static bool printer_endmsg(void *closure, const void *handler_data, upb_status *s) {
12622 upb_json_printer *p = closure;
12623 UPB_UNUSED(handler_data);
12624 UPB_UNUSED(s);
12625 end_frame(p);
12626 if (p->depth_ == 0) {
12627 upb_bytessink_end(p->output_);
12628 }
12629 return true;
12630}
12631
12632static void *startseq(void *closure, const void *handler_data) {
12633 upb_json_printer *p = closure;
12634 CHK(putkey(closure, handler_data));
12635 p->depth_++;
12636 p->first_elem_[p->depth_] = true;
12637 print_data(p, "[", 1);
12638 return closure;
12639}
12640
12641static bool endseq(void *closure, const void *handler_data) {
12642 upb_json_printer *p = closure;
12643 UPB_UNUSED(handler_data);
12644 print_data(p, "]", 1);
12645 p->depth_--;
12646 return true;
12647}
12648
12649static void *startmap(void *closure, const void *handler_data) {
12650 upb_json_printer *p = closure;
12651 CHK(putkey(closure, handler_data));
12652 p->depth_++;
12653 p->first_elem_[p->depth_] = true;
12654 print_data(p, "{", 1);
12655 return closure;
12656}
12657
12658static bool endmap(void *closure, const void *handler_data) {
12659 upb_json_printer *p = closure;
12660 UPB_UNUSED(handler_data);
12661 print_data(p, "}", 1);
12662 p->depth_--;
12663 return true;
12664}
12665
12666static size_t putstr(void *closure, const void *handler_data, const char *str,
12667 size_t len, const upb_bufhandle *handle) {
12668 upb_json_printer *p = closure;
12669 UPB_UNUSED(handler_data);
12670 UPB_UNUSED(handle);
12671 putstring(p, str, len);
12672 return len;
12673}
12674
12675/* This has to Base64 encode the bytes, because JSON has no "bytes" type. */
12676static size_t putbytes(void *closure, const void *handler_data, const char *str,
12677 size_t len, const upb_bufhandle *handle) {
12678 upb_json_printer *p = closure;
12679
12680 /* This is the regular base64, not the "web-safe" version. */
12681 static const char base64[] =
12682 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
12683
12684 /* Base64-encode. */
12685 char data[16000];
12686 const char *limit = data + sizeof(data);
12687 const unsigned char *from = (const unsigned char*)str;
12688 char *to = data;
12689 size_t remaining = len;
12690 size_t bytes;
12691
12692 UPB_UNUSED(handler_data);
12693 UPB_UNUSED(handle);
12694
Paul Yangba42cb52019-03-25 10:51:18 -070012695 print_data(p, "\"", 1);
12696
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012697 while (remaining > 2) {
Paul Yangba42cb52019-03-25 10:51:18 -070012698 if (limit - to < 4) {
12699 bytes = to - data;
12700 putstring(p, data, bytes);
12701 to = data;
12702 }
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012703
12704 to[0] = base64[from[0] >> 2];
12705 to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)];
12706 to[2] = base64[((from[1] & 0xf) << 2) | (from[2] >> 6)];
12707 to[3] = base64[from[2] & 0x3f];
12708
12709 remaining -= 3;
12710 to += 4;
12711 from += 3;
12712 }
12713
12714 switch (remaining) {
12715 case 2:
12716 to[0] = base64[from[0] >> 2];
12717 to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)];
12718 to[2] = base64[(from[1] & 0xf) << 2];
12719 to[3] = '=';
12720 to += 4;
12721 from += 2;
12722 break;
12723 case 1:
12724 to[0] = base64[from[0] >> 2];
12725 to[1] = base64[((from[0] & 0x3) << 4)];
12726 to[2] = '=';
12727 to[3] = '=';
12728 to += 4;
12729 from += 1;
12730 break;
12731 }
12732
12733 bytes = to - data;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012734 putstring(p, data, bytes);
12735 print_data(p, "\"", 1);
12736 return len;
12737}
12738
12739static void *scalar_startstr(void *closure, const void *handler_data,
12740 size_t size_hint) {
12741 upb_json_printer *p = closure;
12742 UPB_UNUSED(handler_data);
12743 UPB_UNUSED(size_hint);
12744 CHK(putkey(closure, handler_data));
12745 print_data(p, "\"", 1);
12746 return p;
12747}
12748
12749static size_t scalar_str(void *closure, const void *handler_data,
12750 const char *str, size_t len,
12751 const upb_bufhandle *handle) {
12752 CHK(putstr(closure, handler_data, str, len, handle));
12753 return len;
12754}
12755
12756static bool scalar_endstr(void *closure, const void *handler_data) {
12757 upb_json_printer *p = closure;
12758 UPB_UNUSED(handler_data);
12759 print_data(p, "\"", 1);
12760 return true;
12761}
12762
12763static void *repeated_startstr(void *closure, const void *handler_data,
12764 size_t size_hint) {
12765 upb_json_printer *p = closure;
12766 UPB_UNUSED(handler_data);
12767 UPB_UNUSED(size_hint);
12768 print_comma(p);
12769 print_data(p, "\"", 1);
12770 return p;
12771}
12772
12773static size_t repeated_str(void *closure, const void *handler_data,
12774 const char *str, size_t len,
12775 const upb_bufhandle *handle) {
12776 CHK(putstr(closure, handler_data, str, len, handle));
12777 return len;
12778}
12779
12780static bool repeated_endstr(void *closure, const void *handler_data) {
12781 upb_json_printer *p = closure;
12782 UPB_UNUSED(handler_data);
12783 print_data(p, "\"", 1);
12784 return true;
12785}
12786
12787static void *mapkeyval_startstr(void *closure, const void *handler_data,
12788 size_t size_hint) {
12789 upb_json_printer *p = closure;
12790 UPB_UNUSED(handler_data);
12791 UPB_UNUSED(size_hint);
12792 print_data(p, "\"", 1);
12793 return p;
12794}
12795
12796static size_t mapkey_str(void *closure, const void *handler_data,
12797 const char *str, size_t len,
12798 const upb_bufhandle *handle) {
12799 CHK(putstr(closure, handler_data, str, len, handle));
12800 return len;
12801}
12802
12803static bool mapkey_endstr(void *closure, const void *handler_data) {
12804 upb_json_printer *p = closure;
12805 UPB_UNUSED(handler_data);
12806 print_data(p, "\":", 2);
12807 return true;
12808}
12809
12810static bool mapvalue_endstr(void *closure, const void *handler_data) {
12811 upb_json_printer *p = closure;
12812 UPB_UNUSED(handler_data);
12813 print_data(p, "\"", 1);
12814 return true;
12815}
12816
12817static size_t scalar_bytes(void *closure, const void *handler_data,
12818 const char *str, size_t len,
12819 const upb_bufhandle *handle) {
12820 CHK(putkey(closure, handler_data));
12821 CHK(putbytes(closure, handler_data, str, len, handle));
12822 return len;
12823}
12824
12825static size_t repeated_bytes(void *closure, const void *handler_data,
12826 const char *str, size_t len,
12827 const upb_bufhandle *handle) {
12828 upb_json_printer *p = closure;
12829 print_comma(p);
12830 CHK(putbytes(closure, handler_data, str, len, handle));
12831 return len;
12832}
12833
12834static size_t mapkey_bytes(void *closure, const void *handler_data,
12835 const char *str, size_t len,
12836 const upb_bufhandle *handle) {
12837 upb_json_printer *p = closure;
12838 CHK(putbytes(closure, handler_data, str, len, handle));
12839 print_data(p, ":", 1);
12840 return len;
12841}
12842
12843static void set_enum_hd(upb_handlers *h,
12844 const upb_fielddef *f,
Paul Yange0e54662016-09-15 11:09:01 -070012845 bool preserve_fieldnames,
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012846 upb_handlerattr *attr) {
Paul Yange0e54662016-09-15 11:09:01 -070012847 EnumHandlerData *hd = upb_gmalloc(sizeof(EnumHandlerData));
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080012848 hd->enumdef = upb_fielddef_enumsubdef(f);
Paul Yange0e54662016-09-15 11:09:01 -070012849 hd->keyname = newstrpc(h, f, preserve_fieldnames);
12850 upb_handlers_addcleanup(h, hd, upb_gfree);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080012851 attr->handler_data = hd;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012852}
12853
12854/* Set up handlers for a mapentry submessage (i.e., an individual key/value pair
12855 * in a map).
12856 *
12857 * TODO: Handle missing key, missing value, out-of-order key/value, or repeated
12858 * key or value cases properly. The right way to do this is to allocate a
12859 * temporary structure at the start of a mapentry submessage, store key and
12860 * value data in it as key and value handlers are called, and then print the
12861 * key/value pair once at the end of the submessage. If we don't do this, we
12862 * should at least detect the case and throw an error. However, so far all of
12863 * our sources that emit mapentry messages do so canonically (with one key
12864 * field, and then one value field), so this is not a pressing concern at the
12865 * moment. */
Paul Yange0e54662016-09-15 11:09:01 -070012866void printer_sethandlers_mapentry(const void *closure, bool preserve_fieldnames,
12867 upb_handlers *h) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012868 const upb_msgdef *md = upb_handlers_msgdef(h);
12869
12870 /* A mapentry message is printed simply as '"key": value'. Rather than
12871 * special-case key and value for every type below, we just handle both
12872 * fields explicitly here. */
12873 const upb_fielddef* key_field = upb_msgdef_itof(md, UPB_MAPENTRY_KEY);
12874 const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_MAPENTRY_VALUE);
12875
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080012876 upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012877
12878 UPB_UNUSED(closure);
12879
12880 switch (upb_fielddef_type(key_field)) {
12881 case UPB_TYPE_INT32:
12882 upb_handlers_setint32(h, key_field, putmapkey_int32_t, &empty_attr);
12883 break;
12884 case UPB_TYPE_INT64:
12885 upb_handlers_setint64(h, key_field, putmapkey_int64_t, &empty_attr);
12886 break;
12887 case UPB_TYPE_UINT32:
12888 upb_handlers_setuint32(h, key_field, putmapkey_uint32_t, &empty_attr);
12889 break;
12890 case UPB_TYPE_UINT64:
12891 upb_handlers_setuint64(h, key_field, putmapkey_uint64_t, &empty_attr);
12892 break;
12893 case UPB_TYPE_BOOL:
12894 upb_handlers_setbool(h, key_field, putmapkey_bool, &empty_attr);
12895 break;
12896 case UPB_TYPE_STRING:
12897 upb_handlers_setstartstr(h, key_field, mapkeyval_startstr, &empty_attr);
12898 upb_handlers_setstring(h, key_field, mapkey_str, &empty_attr);
12899 upb_handlers_setendstr(h, key_field, mapkey_endstr, &empty_attr);
12900 break;
12901 case UPB_TYPE_BYTES:
12902 upb_handlers_setstring(h, key_field, mapkey_bytes, &empty_attr);
12903 break;
12904 default:
Paul Yange0e54662016-09-15 11:09:01 -070012905 UPB_ASSERT(false);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012906 break;
12907 }
12908
12909 switch (upb_fielddef_type(value_field)) {
12910 case UPB_TYPE_INT32:
12911 upb_handlers_setint32(h, value_field, putint32_t, &empty_attr);
12912 break;
12913 case UPB_TYPE_INT64:
12914 upb_handlers_setint64(h, value_field, putint64_t, &empty_attr);
12915 break;
12916 case UPB_TYPE_UINT32:
12917 upb_handlers_setuint32(h, value_field, putuint32_t, &empty_attr);
12918 break;
12919 case UPB_TYPE_UINT64:
12920 upb_handlers_setuint64(h, value_field, putuint64_t, &empty_attr);
12921 break;
12922 case UPB_TYPE_BOOL:
12923 upb_handlers_setbool(h, value_field, putbool, &empty_attr);
12924 break;
12925 case UPB_TYPE_FLOAT:
12926 upb_handlers_setfloat(h, value_field, putfloat, &empty_attr);
12927 break;
12928 case UPB_TYPE_DOUBLE:
12929 upb_handlers_setdouble(h, value_field, putdouble, &empty_attr);
12930 break;
12931 case UPB_TYPE_STRING:
12932 upb_handlers_setstartstr(h, value_field, mapkeyval_startstr, &empty_attr);
12933 upb_handlers_setstring(h, value_field, putstr, &empty_attr);
12934 upb_handlers_setendstr(h, value_field, mapvalue_endstr, &empty_attr);
12935 break;
12936 case UPB_TYPE_BYTES:
12937 upb_handlers_setstring(h, value_field, putbytes, &empty_attr);
12938 break;
12939 case UPB_TYPE_ENUM: {
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080012940 upb_handlerattr enum_attr = UPB_HANDLERATTR_INIT;
Paul Yange0e54662016-09-15 11:09:01 -070012941 set_enum_hd(h, value_field, preserve_fieldnames, &enum_attr);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012942 upb_handlers_setint32(h, value_field, mapvalue_enum, &enum_attr);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012943 break;
12944 }
12945 case UPB_TYPE_MESSAGE:
12946 /* No handler necessary -- the submsg handlers will print the message
12947 * as appropriate. */
12948 break;
12949 }
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070012950}
12951
Paul Yang9bda1f12018-09-22 18:57:43 -070012952static bool putseconds(void *closure, const void *handler_data,
12953 int64_t seconds) {
12954 upb_json_printer *p = closure;
12955 p->seconds = seconds;
12956 UPB_UNUSED(handler_data);
12957 return true;
12958}
12959
12960static bool putnanos(void *closure, const void *handler_data,
12961 int32_t nanos) {
12962 upb_json_printer *p = closure;
12963 p->nanos = nanos;
12964 UPB_UNUSED(handler_data);
12965 return true;
12966}
12967
12968static void *scalar_startstr_nokey(void *closure, const void *handler_data,
12969 size_t size_hint) {
12970 upb_json_printer *p = closure;
12971 UPB_UNUSED(handler_data);
12972 UPB_UNUSED(size_hint);
12973 print_data(p, "\"", 1);
12974 return p;
12975}
12976
12977static size_t putstr_nokey(void *closure, const void *handler_data,
12978 const char *str, size_t len,
12979 const upb_bufhandle *handle) {
12980 upb_json_printer *p = closure;
12981 UPB_UNUSED(handler_data);
12982 UPB_UNUSED(handle);
12983 print_data(p, "\"", 1);
12984 putstring(p, str, len);
12985 print_data(p, "\"", 1);
12986 return len + 2;
12987}
12988
12989static void *startseq_nokey(void *closure, const void *handler_data) {
12990 upb_json_printer *p = closure;
12991 UPB_UNUSED(handler_data);
12992 p->depth_++;
12993 p->first_elem_[p->depth_] = true;
12994 print_data(p, "[", 1);
12995 return closure;
12996}
12997
Paul Yangc4f2a922019-01-17 10:18:43 -080012998static void *startseq_fieldmask(void *closure, const void *handler_data) {
12999 upb_json_printer *p = closure;
13000 UPB_UNUSED(handler_data);
13001 p->depth_++;
13002 p->first_elem_[p->depth_] = true;
Paul Yangc4f2a922019-01-17 10:18:43 -080013003 return closure;
13004}
13005
13006static bool endseq_fieldmask(void *closure, const void *handler_data) {
13007 upb_json_printer *p = closure;
13008 UPB_UNUSED(handler_data);
13009 p->depth_--;
Paul Yangc4f2a922019-01-17 10:18:43 -080013010 return true;
13011}
13012
13013static void *repeated_startstr_fieldmask(
13014 void *closure, const void *handler_data,
13015 size_t size_hint) {
13016 upb_json_printer *p = closure;
13017 UPB_UNUSED(handler_data);
13018 UPB_UNUSED(size_hint);
13019 print_comma(p);
13020 return p;
13021}
13022
13023static size_t repeated_str_fieldmask(
13024 void *closure, const void *handler_data,
13025 const char *str, size_t len,
13026 const upb_bufhandle *handle) {
13027 const char* limit = str + len;
13028 bool upper = false;
13029 size_t result_len = 0;
13030 for (; str < limit; str++) {
13031 if (*str == '_') {
13032 upper = true;
13033 continue;
13034 }
13035 if (upper && *str >= 'a' && *str <= 'z') {
13036 char upper_char = toupper(*str);
13037 CHK(putstr(closure, handler_data, &upper_char, 1, handle));
13038 } else {
13039 CHK(putstr(closure, handler_data, str, 1, handle));
13040 }
13041 upper = false;
13042 result_len++;
13043 }
13044 return result_len;
13045}
13046
Paul Yang9bda1f12018-09-22 18:57:43 -070013047static void *startmap_nokey(void *closure, const void *handler_data) {
13048 upb_json_printer *p = closure;
13049 UPB_UNUSED(handler_data);
13050 p->depth_++;
13051 p->first_elem_[p->depth_] = true;
13052 print_data(p, "{", 1);
13053 return closure;
13054}
13055
13056static bool putnull(void *closure, const void *handler_data,
13057 int32_t null) {
13058 upb_json_printer *p = closure;
13059 print_data(p, "null", 4);
13060 UPB_UNUSED(handler_data);
13061 UPB_UNUSED(null);
13062 return true;
13063}
13064
13065static bool printer_startdurationmsg(void *closure, const void *handler_data) {
13066 upb_json_printer *p = closure;
13067 UPB_UNUSED(handler_data);
13068 if (p->depth_ == 0) {
13069 upb_bytessink_start(p->output_, 0, &p->subc_);
13070 }
13071 return true;
13072}
13073
13074#define UPB_DURATION_MAX_JSON_LEN 23
13075#define UPB_DURATION_MAX_NANO_LEN 9
13076
13077static bool printer_enddurationmsg(void *closure, const void *handler_data,
13078 upb_status *s) {
13079 upb_json_printer *p = closure;
13080 char buffer[UPB_DURATION_MAX_JSON_LEN];
13081 size_t base_len;
13082 size_t curr;
13083 size_t i;
13084
13085 memset(buffer, 0, UPB_DURATION_MAX_JSON_LEN);
13086
13087 if (p->seconds < -315576000000) {
13088 upb_status_seterrf(s, "error parsing duration: "
13089 "minimum acceptable value is "
13090 "-315576000000");
13091 return false;
13092 }
13093
13094 if (p->seconds > 315576000000) {
13095 upb_status_seterrf(s, "error serializing duration: "
13096 "maximum acceptable value is "
13097 "315576000000");
13098 return false;
13099 }
13100
13101 _upb_snprintf(buffer, sizeof(buffer), "%ld", (long)p->seconds);
13102 base_len = strlen(buffer);
13103
13104 if (p->nanos != 0) {
13105 char nanos_buffer[UPB_DURATION_MAX_NANO_LEN + 3];
13106 _upb_snprintf(nanos_buffer, sizeof(nanos_buffer), "%.9f",
13107 p->nanos / 1000000000.0);
13108 /* Remove trailing 0. */
13109 for (i = UPB_DURATION_MAX_NANO_LEN + 2;
13110 nanos_buffer[i] == '0'; i--) {
13111 nanos_buffer[i] = 0;
13112 }
13113 strcpy(buffer + base_len, nanos_buffer + 1);
13114 }
13115
13116 curr = strlen(buffer);
13117 strcpy(buffer + curr, "s");
13118
13119 p->seconds = 0;
13120 p->nanos = 0;
13121
13122 print_data(p, "\"", 1);
13123 print_data(p, buffer, strlen(buffer));
13124 print_data(p, "\"", 1);
13125
13126 if (p->depth_ == 0) {
13127 upb_bytessink_end(p->output_);
13128 }
13129
13130 UPB_UNUSED(handler_data);
13131 return true;
13132}
13133
13134static bool printer_starttimestampmsg(void *closure, const void *handler_data) {
13135 upb_json_printer *p = closure;
13136 UPB_UNUSED(handler_data);
13137 if (p->depth_ == 0) {
13138 upb_bytessink_start(p->output_, 0, &p->subc_);
13139 }
13140 return true;
13141}
13142
13143#define UPB_TIMESTAMP_MAX_JSON_LEN 31
13144#define UPB_TIMESTAMP_BEFORE_NANO_LEN 19
13145#define UPB_TIMESTAMP_MAX_NANO_LEN 9
13146
13147static bool printer_endtimestampmsg(void *closure, const void *handler_data,
13148 upb_status *s) {
13149 upb_json_printer *p = closure;
13150 char buffer[UPB_TIMESTAMP_MAX_JSON_LEN];
13151 time_t time = p->seconds;
13152 size_t curr;
13153 size_t i;
13154 size_t year_length =
13155 strftime(buffer, UPB_TIMESTAMP_MAX_JSON_LEN, "%Y", gmtime(&time));
13156
13157 if (p->seconds < -62135596800) {
13158 upb_status_seterrf(s, "error parsing timestamp: "
13159 "minimum acceptable value is "
13160 "0001-01-01T00:00:00Z");
13161 return false;
13162 }
13163
13164 if (p->seconds > 253402300799) {
13165 upb_status_seterrf(s, "error parsing timestamp: "
13166 "maximum acceptable value is "
13167 "9999-12-31T23:59:59Z");
13168 return false;
13169 }
13170
13171 /* strftime doesn't guarantee 4 digits for year. Prepend 0 by ourselves. */
13172 for (i = 0; i < 4 - year_length; i++) {
13173 buffer[i] = '0';
13174 }
13175
13176 strftime(buffer + (4 - year_length), UPB_TIMESTAMP_MAX_JSON_LEN,
13177 "%Y-%m-%dT%H:%M:%S", gmtime(&time));
13178 if (p->nanos != 0) {
13179 char nanos_buffer[UPB_TIMESTAMP_MAX_NANO_LEN + 3];
13180 _upb_snprintf(nanos_buffer, sizeof(nanos_buffer), "%.9f",
13181 p->nanos / 1000000000.0);
13182 /* Remove trailing 0. */
13183 for (i = UPB_TIMESTAMP_MAX_NANO_LEN + 2;
13184 nanos_buffer[i] == '0'; i--) {
13185 nanos_buffer[i] = 0;
13186 }
13187 strcpy(buffer + UPB_TIMESTAMP_BEFORE_NANO_LEN, nanos_buffer + 1);
13188 }
13189
13190 curr = strlen(buffer);
13191 strcpy(buffer + curr, "Z");
13192
13193 p->seconds = 0;
13194 p->nanos = 0;
13195
13196 print_data(p, "\"", 1);
13197 print_data(p, buffer, strlen(buffer));
13198 print_data(p, "\"", 1);
13199
13200 if (p->depth_ == 0) {
13201 upb_bytessink_end(p->output_);
13202 }
13203
13204 UPB_UNUSED(handler_data);
13205 UPB_UNUSED(s);
13206 return true;
13207}
13208
13209static bool printer_startmsg_noframe(void *closure, const void *handler_data) {
13210 upb_json_printer *p = closure;
13211 UPB_UNUSED(handler_data);
13212 if (p->depth_ == 0) {
13213 upb_bytessink_start(p->output_, 0, &p->subc_);
13214 }
13215 return true;
13216}
13217
13218static bool printer_endmsg_noframe(
13219 void *closure, const void *handler_data, upb_status *s) {
13220 upb_json_printer *p = closure;
13221 UPB_UNUSED(handler_data);
13222 UPB_UNUSED(s);
13223 if (p->depth_ == 0) {
13224 upb_bytessink_end(p->output_);
13225 }
13226 return true;
13227}
13228
Paul Yang7f42d6d2019-01-22 15:35:12 -080013229static bool printer_startmsg_fieldmask(
13230 void *closure, const void *handler_data) {
13231 upb_json_printer *p = closure;
13232 UPB_UNUSED(handler_data);
13233 if (p->depth_ == 0) {
13234 upb_bytessink_start(p->output_, 0, &p->subc_);
13235 }
13236 print_data(p, "\"", 1);
13237 return true;
13238}
13239
13240static bool printer_endmsg_fieldmask(
13241 void *closure, const void *handler_data, upb_status *s) {
13242 upb_json_printer *p = closure;
13243 UPB_UNUSED(handler_data);
13244 UPB_UNUSED(s);
13245 print_data(p, "\"", 1);
13246 if (p->depth_ == 0) {
13247 upb_bytessink_end(p->output_);
13248 }
13249 return true;
13250}
13251
Paul Yang8faa7782018-12-26 10:36:09 -080013252static void *scalar_startstr_onlykey(
13253 void *closure, const void *handler_data, size_t size_hint) {
13254 upb_json_printer *p = closure;
13255 UPB_UNUSED(size_hint);
13256 CHK(putkey(closure, handler_data));
13257 return p;
13258}
13259
13260/* Set up handlers for an Any submessage. */
13261void printer_sethandlers_any(const void *closure, upb_handlers *h) {
13262 const upb_msgdef *md = upb_handlers_msgdef(h);
13263
13264 const upb_fielddef* type_field = upb_msgdef_itof(md, UPB_ANY_TYPE);
13265 const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_ANY_VALUE);
13266
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080013267 upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
Paul Yang8faa7782018-12-26 10:36:09 -080013268
13269 /* type_url's json name is "@type" */
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080013270 upb_handlerattr type_name_attr = UPB_HANDLERATTR_INIT;
13271 upb_handlerattr value_name_attr = UPB_HANDLERATTR_INIT;
Paul Yang8faa7782018-12-26 10:36:09 -080013272 strpc *type_url_json_name = newstrpc_str(h, "@type");
13273 strpc *value_json_name = newstrpc_str(h, "value");
13274
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080013275 type_name_attr.handler_data = type_url_json_name;
13276 value_name_attr.handler_data = value_json_name;
Paul Yang8faa7782018-12-26 10:36:09 -080013277
13278 /* Set up handlers. */
13279 upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr);
13280 upb_handlers_setendmsg(h, printer_endmsg, &empty_attr);
13281
13282 upb_handlers_setstartstr(h, type_field, scalar_startstr, &type_name_attr);
13283 upb_handlers_setstring(h, type_field, scalar_str, &empty_attr);
13284 upb_handlers_setendstr(h, type_field, scalar_endstr, &empty_attr);
13285
13286 /* This is not the full and correct JSON encoding for the Any value field. It
13287 * requires further processing by the wrapper code based on the type URL.
13288 */
13289 upb_handlers_setstartstr(h, value_field, scalar_startstr_onlykey,
13290 &value_name_attr);
13291
13292 UPB_UNUSED(closure);
13293}
13294
Paul Yangc4f2a922019-01-17 10:18:43 -080013295/* Set up handlers for a fieldmask submessage. */
13296void printer_sethandlers_fieldmask(const void *closure, upb_handlers *h) {
13297 const upb_msgdef *md = upb_handlers_msgdef(h);
13298 const upb_fielddef* f = upb_msgdef_itof(md, 1);
13299
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080013300 upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
Paul Yangc4f2a922019-01-17 10:18:43 -080013301
13302 upb_handlers_setstartseq(h, f, startseq_fieldmask, &empty_attr);
13303 upb_handlers_setendseq(h, f, endseq_fieldmask, &empty_attr);
13304
Paul Yang7f42d6d2019-01-22 15:35:12 -080013305 upb_handlers_setstartmsg(h, printer_startmsg_fieldmask, &empty_attr);
13306 upb_handlers_setendmsg(h, printer_endmsg_fieldmask, &empty_attr);
Paul Yangc4f2a922019-01-17 10:18:43 -080013307
13308 upb_handlers_setstartstr(h, f, repeated_startstr_fieldmask, &empty_attr);
13309 upb_handlers_setstring(h, f, repeated_str_fieldmask, &empty_attr);
13310
13311 UPB_UNUSED(closure);
13312}
13313
Paul Yang9bda1f12018-09-22 18:57:43 -070013314/* Set up handlers for a duration submessage. */
13315void printer_sethandlers_duration(const void *closure, upb_handlers *h) {
13316 const upb_msgdef *md = upb_handlers_msgdef(h);
13317
13318 const upb_fielddef* seconds_field =
13319 upb_msgdef_itof(md, UPB_DURATION_SECONDS);
13320 const upb_fielddef* nanos_field =
13321 upb_msgdef_itof(md, UPB_DURATION_NANOS);
13322
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080013323 upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
Paul Yang9bda1f12018-09-22 18:57:43 -070013324
13325 upb_handlers_setstartmsg(h, printer_startdurationmsg, &empty_attr);
13326 upb_handlers_setint64(h, seconds_field, putseconds, &empty_attr);
13327 upb_handlers_setint32(h, nanos_field, putnanos, &empty_attr);
13328 upb_handlers_setendmsg(h, printer_enddurationmsg, &empty_attr);
13329
13330 UPB_UNUSED(closure);
13331}
13332
13333/* Set up handlers for a timestamp submessage. Instead of printing fields
13334 * separately, the json representation of timestamp follows RFC 3339 */
13335void printer_sethandlers_timestamp(const void *closure, upb_handlers *h) {
13336 const upb_msgdef *md = upb_handlers_msgdef(h);
13337
13338 const upb_fielddef* seconds_field =
13339 upb_msgdef_itof(md, UPB_TIMESTAMP_SECONDS);
13340 const upb_fielddef* nanos_field =
13341 upb_msgdef_itof(md, UPB_TIMESTAMP_NANOS);
13342
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080013343 upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
Paul Yang9bda1f12018-09-22 18:57:43 -070013344
13345 upb_handlers_setstartmsg(h, printer_starttimestampmsg, &empty_attr);
13346 upb_handlers_setint64(h, seconds_field, putseconds, &empty_attr);
13347 upb_handlers_setint32(h, nanos_field, putnanos, &empty_attr);
13348 upb_handlers_setendmsg(h, printer_endtimestampmsg, &empty_attr);
13349
13350 UPB_UNUSED(closure);
13351}
13352
13353void printer_sethandlers_value(const void *closure, upb_handlers *h) {
13354 const upb_msgdef *md = upb_handlers_msgdef(h);
13355 upb_msg_field_iter i;
13356
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080013357 upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
Paul Yang9bda1f12018-09-22 18:57:43 -070013358
13359 upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr);
13360 upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr);
13361
13362 upb_msg_field_begin(&i, md);
13363 for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) {
13364 const upb_fielddef *f = upb_msg_iter_field(&i);
13365
13366 switch (upb_fielddef_type(f)) {
13367 case UPB_TYPE_ENUM:
13368 upb_handlers_setint32(h, f, putnull, &empty_attr);
13369 break;
13370 case UPB_TYPE_DOUBLE:
13371 upb_handlers_setdouble(h, f, putdouble, &empty_attr);
13372 break;
13373 case UPB_TYPE_STRING:
13374 upb_handlers_setstartstr(h, f, scalar_startstr_nokey, &empty_attr);
13375 upb_handlers_setstring(h, f, scalar_str, &empty_attr);
13376 upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr);
13377 break;
13378 case UPB_TYPE_BOOL:
13379 upb_handlers_setbool(h, f, putbool, &empty_attr);
13380 break;
13381 case UPB_TYPE_MESSAGE:
13382 break;
13383 default:
13384 UPB_ASSERT(false);
13385 break;
13386 }
13387 }
13388
13389 UPB_UNUSED(closure);
13390}
13391
13392#define WRAPPER_SETHANDLERS(wrapper, type, putmethod) \
13393void printer_sethandlers_##wrapper(const void *closure, upb_handlers *h) { \
13394 const upb_msgdef *md = upb_handlers_msgdef(h); \
13395 const upb_fielddef* f = upb_msgdef_itof(md, 1); \
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080013396 upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT; \
Paul Yang9bda1f12018-09-22 18:57:43 -070013397 upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr); \
13398 upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr); \
13399 upb_handlers_set##type(h, f, putmethod, &empty_attr); \
13400 UPB_UNUSED(closure); \
13401}
13402
13403WRAPPER_SETHANDLERS(doublevalue, double, putdouble)
13404WRAPPER_SETHANDLERS(floatvalue, float, putfloat)
13405WRAPPER_SETHANDLERS(int64value, int64, putint64_t)
13406WRAPPER_SETHANDLERS(uint64value, uint64, putuint64_t)
13407WRAPPER_SETHANDLERS(int32value, int32, putint32_t)
13408WRAPPER_SETHANDLERS(uint32value, uint32, putuint32_t)
13409WRAPPER_SETHANDLERS(boolvalue, bool, putbool)
13410WRAPPER_SETHANDLERS(stringvalue, string, putstr_nokey)
13411WRAPPER_SETHANDLERS(bytesvalue, string, putbytes)
13412
13413#undef WRAPPER_SETHANDLERS
13414
13415void printer_sethandlers_listvalue(const void *closure, upb_handlers *h) {
13416 const upb_msgdef *md = upb_handlers_msgdef(h);
13417 const upb_fielddef* f = upb_msgdef_itof(md, 1);
13418
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080013419 upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
Paul Yang9bda1f12018-09-22 18:57:43 -070013420
13421 upb_handlers_setstartseq(h, f, startseq_nokey, &empty_attr);
13422 upb_handlers_setendseq(h, f, endseq, &empty_attr);
13423
13424 upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr);
13425 upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr);
13426
13427 upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &empty_attr);
13428
13429 UPB_UNUSED(closure);
13430}
13431
13432void printer_sethandlers_structvalue(const void *closure, upb_handlers *h) {
13433 const upb_msgdef *md = upb_handlers_msgdef(h);
13434 const upb_fielddef* f = upb_msgdef_itof(md, 1);
13435
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080013436 upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
Paul Yang9bda1f12018-09-22 18:57:43 -070013437
13438 upb_handlers_setstartseq(h, f, startmap_nokey, &empty_attr);
13439 upb_handlers_setendseq(h, f, endmap, &empty_attr);
13440
13441 upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr);
13442 upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr);
13443
13444 upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &empty_attr);
13445
13446 UPB_UNUSED(closure);
13447}
13448
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070013449void printer_sethandlers(const void *closure, upb_handlers *h) {
13450 const upb_msgdef *md = upb_handlers_msgdef(h);
13451 bool is_mapentry = upb_msgdef_mapentry(md);
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080013452 upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070013453 upb_msg_field_iter i;
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080013454 const upb_json_printercache *cache = closure;
13455 const bool preserve_fieldnames = cache->preserve_fieldnames;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070013456
13457 if (is_mapentry) {
13458 /* mapentry messages are sufficiently different that we handle them
13459 * separately. */
Paul Yange0e54662016-09-15 11:09:01 -070013460 printer_sethandlers_mapentry(closure, preserve_fieldnames, h);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070013461 return;
13462 }
13463
Paul Yang9bda1f12018-09-22 18:57:43 -070013464 switch (upb_msgdef_wellknowntype(md)) {
13465 case UPB_WELLKNOWN_UNSPECIFIED:
13466 break;
Paul Yang8faa7782018-12-26 10:36:09 -080013467 case UPB_WELLKNOWN_ANY:
13468 printer_sethandlers_any(closure, h);
13469 return;
Paul Yangc4f2a922019-01-17 10:18:43 -080013470 case UPB_WELLKNOWN_FIELDMASK:
13471 printer_sethandlers_fieldmask(closure, h);
13472 return;
Paul Yang9bda1f12018-09-22 18:57:43 -070013473 case UPB_WELLKNOWN_DURATION:
13474 printer_sethandlers_duration(closure, h);
13475 return;
13476 case UPB_WELLKNOWN_TIMESTAMP:
13477 printer_sethandlers_timestamp(closure, h);
13478 return;
13479 case UPB_WELLKNOWN_VALUE:
13480 printer_sethandlers_value(closure, h);
13481 return;
13482 case UPB_WELLKNOWN_LISTVALUE:
13483 printer_sethandlers_listvalue(closure, h);
13484 return;
13485 case UPB_WELLKNOWN_STRUCT:
13486 printer_sethandlers_structvalue(closure, h);
13487 return;
13488#define WRAPPER(wellknowntype, name) \
13489 case wellknowntype: \
13490 printer_sethandlers_##name(closure, h); \
13491 return; \
13492
13493 WRAPPER(UPB_WELLKNOWN_DOUBLEVALUE, doublevalue);
13494 WRAPPER(UPB_WELLKNOWN_FLOATVALUE, floatvalue);
13495 WRAPPER(UPB_WELLKNOWN_INT64VALUE, int64value);
13496 WRAPPER(UPB_WELLKNOWN_UINT64VALUE, uint64value);
13497 WRAPPER(UPB_WELLKNOWN_INT32VALUE, int32value);
13498 WRAPPER(UPB_WELLKNOWN_UINT32VALUE, uint32value);
13499 WRAPPER(UPB_WELLKNOWN_BOOLVALUE, boolvalue);
13500 WRAPPER(UPB_WELLKNOWN_STRINGVALUE, stringvalue);
13501 WRAPPER(UPB_WELLKNOWN_BYTESVALUE, bytesvalue);
13502
13503#undef WRAPPER
13504 }
13505
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070013506 upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr);
13507 upb_handlers_setendmsg(h, printer_endmsg, &empty_attr);
13508
13509#define TYPE(type, name, ctype) \
13510 case type: \
13511 if (upb_fielddef_isseq(f)) { \
13512 upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr); \
13513 } else { \
13514 upb_handlers_set##name(h, f, scalar_##ctype, &name_attr); \
13515 } \
13516 break;
13517
13518 upb_msg_field_begin(&i, md);
13519 for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) {
13520 const upb_fielddef *f = upb_msg_iter_field(&i);
13521
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080013522 upb_handlerattr name_attr = UPB_HANDLERATTR_INIT;
13523 name_attr.handler_data = newstrpc(h, f, preserve_fieldnames);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070013524
13525 if (upb_fielddef_ismap(f)) {
13526 upb_handlers_setstartseq(h, f, startmap, &name_attr);
13527 upb_handlers_setendseq(h, f, endmap, &name_attr);
13528 } else if (upb_fielddef_isseq(f)) {
13529 upb_handlers_setstartseq(h, f, startseq, &name_attr);
13530 upb_handlers_setendseq(h, f, endseq, &empty_attr);
13531 }
13532
13533 switch (upb_fielddef_type(f)) {
13534 TYPE(UPB_TYPE_FLOAT, float, float);
13535 TYPE(UPB_TYPE_DOUBLE, double, double);
13536 TYPE(UPB_TYPE_BOOL, bool, bool);
13537 TYPE(UPB_TYPE_INT32, int32, int32_t);
13538 TYPE(UPB_TYPE_UINT32, uint32, uint32_t);
13539 TYPE(UPB_TYPE_INT64, int64, int64_t);
13540 TYPE(UPB_TYPE_UINT64, uint64, uint64_t);
13541 case UPB_TYPE_ENUM: {
13542 /* For now, we always emit symbolic names for enums. We may want an
13543 * option later to control this behavior, but we will wait for a real
13544 * need first. */
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080013545 upb_handlerattr enum_attr = UPB_HANDLERATTR_INIT;
Paul Yange0e54662016-09-15 11:09:01 -070013546 set_enum_hd(h, f, preserve_fieldnames, &enum_attr);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070013547
13548 if (upb_fielddef_isseq(f)) {
13549 upb_handlers_setint32(h, f, repeated_enum, &enum_attr);
13550 } else {
13551 upb_handlers_setint32(h, f, scalar_enum, &enum_attr);
13552 }
13553
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070013554 break;
13555 }
13556 case UPB_TYPE_STRING:
13557 if (upb_fielddef_isseq(f)) {
13558 upb_handlers_setstartstr(h, f, repeated_startstr, &empty_attr);
13559 upb_handlers_setstring(h, f, repeated_str, &empty_attr);
13560 upb_handlers_setendstr(h, f, repeated_endstr, &empty_attr);
13561 } else {
13562 upb_handlers_setstartstr(h, f, scalar_startstr, &name_attr);
13563 upb_handlers_setstring(h, f, scalar_str, &empty_attr);
13564 upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr);
13565 }
13566 break;
13567 case UPB_TYPE_BYTES:
13568 /* XXX: this doesn't support strings that span buffers yet. The base64
13569 * encoder will need to be made resumable for this to work properly. */
13570 if (upb_fielddef_isseq(f)) {
13571 upb_handlers_setstring(h, f, repeated_bytes, &empty_attr);
13572 } else {
13573 upb_handlers_setstring(h, f, scalar_bytes, &name_attr);
13574 }
13575 break;
13576 case UPB_TYPE_MESSAGE:
13577 if (upb_fielddef_isseq(f)) {
13578 upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &name_attr);
13579 } else {
13580 upb_handlers_setstartsubmsg(h, f, scalar_startsubmsg, &name_attr);
13581 }
13582 break;
13583 }
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070013584 }
13585
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070013586#undef TYPE
13587}
13588
13589static void json_printer_reset(upb_json_printer *p) {
13590 p->depth_ = 0;
13591}
13592
13593
13594/* Public API *****************************************************************/
13595
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080013596upb_json_printer *upb_json_printer_create(upb_arena *a, const upb_handlers *h,
13597 upb_bytessink output) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070013598#ifndef NDEBUG
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080013599 size_t size_before = upb_arena_bytesallocated(a);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070013600#endif
13601
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080013602 upb_json_printer *p = upb_arena_malloc(a, sizeof(upb_json_printer));
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070013603 if (!p) return NULL;
13604
13605 p->output_ = output;
13606 json_printer_reset(p);
13607 upb_sink_reset(&p->input_, h, p);
Paul Yang9bda1f12018-09-22 18:57:43 -070013608 p->seconds = 0;
13609 p->nanos = 0;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070013610
13611 /* If this fails, increase the value in printer.h. */
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080013612 UPB_ASSERT_DEBUGVAR(upb_arena_bytesallocated(a) - size_before <=
Paul Yange0e54662016-09-15 11:09:01 -070013613 UPB_JSON_PRINTER_SIZE);
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070013614 return p;
13615}
13616
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080013617upb_sink upb_json_printer_input(upb_json_printer *p) {
13618 return p->input_;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070013619}
13620
Joshua Habermanf5e8ee42019-03-06 12:04:17 -080013621upb_handlercache *upb_json_printer_newcache(bool preserve_proto_fieldnames) {
13622 upb_json_printercache *cache = upb_gmalloc(sizeof(*cache));
13623 upb_handlercache *ret = upb_handlercache_new(printer_sethandlers, cache);
13624
13625 cache->preserve_fieldnames = preserve_proto_fieldnames;
13626 upb_handlercache_addcleanup(ret, cache, upb_gfree);
13627
13628 return ret;
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070013629}
Paul Yang9bda1f12018-09-22 18:57:43 -070013630
13631#undef UPB_SIZE
13632#undef UPB_FIELD_AT
13633#undef UPB_READ_ONEOF
13634#undef UPB_WRITE_ONEOF