@@ -138,10 +138,13 @@ struct structural_parser : structural_iterator {
138
138
current_string_buf_loc = dst + 1 ;
139
139
}
140
140
141
- WARN_UNUSED really_inline error_code parse_string (bool key = false ) {
141
+ WARN_UNUSED really_inline error_code parse_key (const uint8_t *key) {
142
+ return parse_string (key, true );
143
+ }
144
+ WARN_UNUSED really_inline error_code parse_string (const uint8_t *value, bool key = false ) {
142
145
log_value (key ? " key" : " string" );
143
146
uint8_t *dst = on_start_string ();
144
- dst = stringparsing::parse_string (current () , dst);
147
+ dst = stringparsing::parse_string (value , dst);
145
148
if (dst == nullptr ) {
146
149
log_error (" Invalid escape in string" );
147
150
return STRING_ERROR;
@@ -150,79 +153,75 @@ struct structural_parser : structural_iterator {
150
153
return SUCCESS;
151
154
}
152
155
153
- WARN_UNUSED really_inline error_code parse_number (const uint8_t *src ) {
156
+ WARN_UNUSED really_inline error_code parse_number (const uint8_t *value ) {
154
157
log_value (" number" );
155
- if (!numberparsing::parse_number (src , tape)) { log_error (" Invalid number" ); return NUMBER_ERROR; }
158
+ if (!numberparsing::parse_number (value , tape)) { log_error (" Invalid number" ); return NUMBER_ERROR; }
156
159
return SUCCESS;
157
160
}
158
- WARN_UNUSED really_inline error_code parse_number () {
159
- return parse_number (current ());
160
- }
161
-
162
- really_inline error_code parse_root_number () {
163
- /* *
164
- * We need to make a copy to make sure that the string is space terminated.
165
- * This is not about padding the input, which should already padded up
166
- * to len + SIMDJSON_PADDING. However, we have no control at this stage
167
- * on how the padding was done. What if the input string was padded with nulls?
168
- * It is quite common for an input string to have an extra null character (C string).
169
- * We do not want to allow 9\0 (where \0 is the null character) inside a JSON
170
- * document, but the string "9\0" by itself is fine. So we make a copy and
171
- * pad the input with spaces when we know that there is just one input element.
172
- * This copy is relatively expensive, but it will almost never be called in
173
- * practice unless you are in the strange scenario where you have many JSON
174
- * documents made of single atoms.
175
- */
176
- uint8_t *copy = static_cast <uint8_t *>(malloc (parser.len + SIMDJSON_PADDING));
161
+
162
+ really_inline error_code parse_root_number (const uint8_t *value) {
163
+ //
164
+ // We need to make a copy to make sure that the string is space terminated.
165
+ // This is not about padding the input, which should already padded up
166
+ // to len + SIMDJSON_PADDING. However, we have no control at this stage
167
+ // on how the padding was done. What if the input string was padded with nulls?
168
+ // It is quite common for an input string to have an extra null character (C string).
169
+ // We do not want to allow 9\0 (where \0 is the null character) inside a JSON
170
+ // document, but the string "9\0" by itself is fine. So we make a copy and
171
+ // pad the input with spaces when we know that there is just one input element.
172
+ // This copy is relatively expensive, but it will almost never be called in
173
+ // practice unless you are in the strange scenario where you have many JSON
174
+ // documents made of single atoms.
175
+ //
176
+ uint8_t *copy = static_cast <uint8_t *>(malloc (remaining_len () + SIMDJSON_PADDING));
177
177
if (copy == nullptr ) {
178
178
return MEMALLOC;
179
179
}
180
- memcpy (copy, buf, parser.len );
181
- memset (copy + parser.len , ' ' , SIMDJSON_PADDING);
182
- size_t idx = *current_structural;
183
- error_code error = parse_number (©[idx]); // parse_number does not throw
180
+ memcpy (copy, value, remaining_len ());
181
+ memset (copy + remaining_len (), ' ' , SIMDJSON_PADDING);
182
+ error_code error = parse_number (copy);
184
183
free (copy);
185
184
return error;
186
185
}
187
186
188
- WARN_UNUSED really_inline error_code parse_true_atom () {
187
+ WARN_UNUSED really_inline error_code parse_true_atom (const uint8_t *value ) {
189
188
log_value (" true" );
190
- if (!atomparsing::is_valid_true_atom (current () )) { return T_ATOM_ERROR; }
189
+ if (!atomparsing::is_valid_true_atom (value )) { return T_ATOM_ERROR; }
191
190
tape.append (0 , internal::tape_type::TRUE_VALUE);
192
191
return SUCCESS;
193
192
}
194
193
195
- WARN_UNUSED really_inline error_code parse_root_true_atom () {
194
+ WARN_UNUSED really_inline error_code parse_root_true_atom (const uint8_t *value ) {
196
195
log_value (" true" );
197
- if (!atomparsing::is_valid_true_atom (current () , remaining_len ())) { return T_ATOM_ERROR; }
196
+ if (!atomparsing::is_valid_true_atom (value , remaining_len ())) { return T_ATOM_ERROR; }
198
197
tape.append (0 , internal::tape_type::TRUE_VALUE);
199
198
return SUCCESS;
200
199
}
201
200
202
- WARN_UNUSED really_inline error_code parse_false_atom () {
201
+ WARN_UNUSED really_inline error_code parse_false_atom (const uint8_t *value ) {
203
202
log_value (" false" );
204
- if (!atomparsing::is_valid_false_atom (current () )) { return F_ATOM_ERROR; }
203
+ if (!atomparsing::is_valid_false_atom (value )) { return F_ATOM_ERROR; }
205
204
tape.append (0 , internal::tape_type::FALSE_VALUE);
206
205
return SUCCESS;
207
206
}
208
207
209
- WARN_UNUSED really_inline error_code parse_root_false_atom () {
208
+ WARN_UNUSED really_inline error_code parse_root_false_atom (const uint8_t *value ) {
210
209
log_value (" false" );
211
- if (!atomparsing::is_valid_false_atom (current () , remaining_len ())) { return F_ATOM_ERROR; }
210
+ if (!atomparsing::is_valid_false_atom (value , remaining_len ())) { return F_ATOM_ERROR; }
212
211
tape.append (0 , internal::tape_type::FALSE_VALUE);
213
212
return SUCCESS;
214
213
}
215
214
216
- WARN_UNUSED really_inline error_code parse_null_atom () {
215
+ WARN_UNUSED really_inline error_code parse_null_atom (const uint8_t *value ) {
217
216
log_value (" null" );
218
- if (!atomparsing::is_valid_null_atom (current () )) { return N_ATOM_ERROR; }
217
+ if (!atomparsing::is_valid_null_atom (value )) { return N_ATOM_ERROR; }
219
218
tape.append (0 , internal::tape_type::NULL_VALUE);
220
219
return SUCCESS;
221
220
}
222
221
223
- WARN_UNUSED really_inline error_code parse_root_null_atom () {
222
+ WARN_UNUSED really_inline error_code parse_root_null_atom (const uint8_t *value ) {
224
223
log_value (" null" );
225
- if (!atomparsing::is_valid_null_atom (current () , remaining_len ())) { return N_ATOM_ERROR; }
224
+ if (!atomparsing::is_valid_null_atom (value , remaining_len ())) { return N_ATOM_ERROR; }
226
225
tape.append (0 , internal::tape_type::NULL_VALUE);
227
226
return SUCCESS;
228
227
}
@@ -279,50 +278,54 @@ WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_impleme
279
278
//
280
279
// Read first value
281
280
//
282
- switch (parser.current_char ()) {
283
- case ' {' : {
284
- if (parser.empty_object ()) { goto document_end; }
285
- SIMDJSON_TRY ( parser.start_object () );
286
- goto object_begin;
287
- }
288
- case ' [' : {
289
- if (parser.empty_array ()) { goto document_end; }
290
- SIMDJSON_TRY ( parser.start_array () );
291
- // Make sure the outer array is closed before continuing; otherwise, there are ways we could get
292
- // into memory corruption. See https://github.com/simdjson/simdjson/issues/906
293
- if (!STREAMING) {
294
- if (parser.buf [dom_parser.structural_indexes [dom_parser.n_structural_indexes - 1 ]] != ' ]' ) {
295
- return TAPE_ERROR;
281
+ {
282
+ switch (parser.current_char ()) {
283
+ case ' {' : {
284
+ if (parser.empty_object ()) { goto document_end; }
285
+ SIMDJSON_TRY ( parser.start_object () );
286
+ goto object_begin;
287
+ }
288
+ case ' [' : {
289
+ if (parser.empty_array ()) { goto document_end; }
290
+ SIMDJSON_TRY ( parser.start_array () );
291
+ // Make sure the outer array is closed before continuing; otherwise, there are ways we could get
292
+ // into memory corruption. See https://github.com/simdjson/simdjson/issues/906
293
+ if (!STREAMING) {
294
+ if (parser.buf [dom_parser.structural_indexes [dom_parser.n_structural_indexes - 1 ]] != ' ]' ) {
295
+ return TAPE_ERROR;
296
+ }
296
297
}
298
+ goto array_begin;
299
+ }
300
+ case ' "' : SIMDJSON_TRY ( parser.parse_string (parser.current ()) ); goto document_end;
301
+ case ' t' : SIMDJSON_TRY ( parser.parse_root_true_atom (parser.current ()) ); goto document_end;
302
+ case ' f' : SIMDJSON_TRY ( parser.parse_root_false_atom (parser.current ()) ); goto document_end;
303
+ case ' n' : SIMDJSON_TRY ( parser.parse_root_null_atom (parser.current ()) ); goto document_end;
304
+ case ' -' :
305
+ case ' 0' : case ' 1' : case ' 2' : case ' 3' : case ' 4' :
306
+ case ' 5' : case ' 6' : case ' 7' : case ' 8' : case ' 9' :
307
+ SIMDJSON_TRY ( parser.parse_root_number (parser.current ()) ); goto document_end;
308
+ default :
309
+ parser.log_error (" Document starts with a non-value character" );
310
+ return TAPE_ERROR;
297
311
}
298
- goto array_begin;
299
- }
300
- case ' "' : SIMDJSON_TRY ( parser.parse_string () ); goto document_end;
301
- case ' t' : SIMDJSON_TRY ( parser.parse_root_true_atom () ); goto document_end;
302
- case ' f' : SIMDJSON_TRY ( parser.parse_root_false_atom () ); goto document_end;
303
- case ' n' : SIMDJSON_TRY ( parser.parse_root_null_atom () ); goto document_end;
304
- case ' -' :
305
- case ' 0' : case ' 1' : case ' 2' : case ' 3' : case ' 4' :
306
- case ' 5' : case ' 6' : case ' 7' : case ' 8' : case ' 9' :
307
- SIMDJSON_TRY ( parser.parse_root_number () ); goto document_end;
308
- default :
309
- parser.log_error (" Document starts with a non-value character" );
310
- return TAPE_ERROR;
311
312
}
312
313
313
314
//
314
315
// Object parser states
315
316
//
316
- object_begin:
317
- if (parser.advance_char () != ' "' ) {
317
+ object_begin: {
318
+ const uint8_t *key = parser.advance ();
319
+ if (*key != ' "' ) {
318
320
parser.log_error (" Object does not start with a key" );
319
321
return TAPE_ERROR;
320
322
}
321
323
parser.increment_count ();
322
- SIMDJSON_TRY ( parser.parse_string ( true ) );
324
+ SIMDJSON_TRY ( parser.parse_key (key ) );
323
325
goto object_field;
326
+ } // object_begin:
324
327
325
- object_field:
328
+ object_field: {
326
329
if (unlikely ( parser.advance_char () != ' :' )) { parser.log_error (" Missing colon after key in object" ); return TAPE_ERROR; }
327
330
switch (parser.advance_char ()) {
328
331
case ' {' : {
@@ -335,46 +338,52 @@ WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_impleme
335
338
SIMDJSON_TRY ( parser.start_array () );
336
339
goto array_begin;
337
340
}
338
- case ' "' : SIMDJSON_TRY ( parser.parse_string () ); break ;
339
- case ' t' : SIMDJSON_TRY ( parser.parse_true_atom () ); break ;
340
- case ' f' : SIMDJSON_TRY ( parser.parse_false_atom () ); break ;
341
- case ' n' : SIMDJSON_TRY ( parser.parse_null_atom () ); break ;
341
+ case ' "' : SIMDJSON_TRY ( parser.parse_string (parser. current () ) ); break ;
342
+ case ' t' : SIMDJSON_TRY ( parser.parse_true_atom (parser. current () ) ); break ;
343
+ case ' f' : SIMDJSON_TRY ( parser.parse_false_atom (parser. current () ) ); break ;
344
+ case ' n' : SIMDJSON_TRY ( parser.parse_null_atom (parser. current () ) ); break ;
342
345
case ' -' :
343
346
case ' 0' : case ' 1' : case ' 2' : case ' 3' : case ' 4' :
344
347
case ' 5' : case ' 6' : case ' 7' : case ' 8' : case ' 9' :
345
- SIMDJSON_TRY ( parser.parse_number () ); break ;
348
+ SIMDJSON_TRY ( parser.parse_number (parser. current () ) ); break ;
346
349
default :
347
350
parser.log_error (" Non-value found when value was expected!" );
348
351
return TAPE_ERROR;
349
352
}
353
+ } // object_field:
350
354
351
- object_continue:
355
+ object_continue: {
352
356
switch (parser.advance_char ()) {
353
- case ' ,' :
357
+ case ' ,' : {
354
358
parser.increment_count ();
355
- if (unlikely ( parser.advance_char () != ' "' )) { parser.log_error (" Key string missing at beginning of field in object" ); return TAPE_ERROR; }
356
- SIMDJSON_TRY ( parser.parse_string (true ) );
359
+ const uint8_t *key = parser.advance ();
360
+ if (unlikely ( *key != ' "' )) { parser.log_error (" Key string missing at beginning of field in object" ); return TAPE_ERROR; }
361
+ SIMDJSON_TRY ( parser.parse_key (key) );
357
362
goto object_field;
363
+ }
358
364
case ' }' :
359
365
parser.end_object ();
360
366
goto scope_end;
361
367
default :
362
368
parser.log_error (" No comma between object fields" );
363
369
return TAPE_ERROR;
364
370
}
371
+ } // object_continue:
365
372
366
- scope_end:
373
+ scope_end: {
367
374
if (parser.depth == 0 ) { goto document_end; }
368
375
if (parser.parser .is_array [parser.depth ]) { goto array_continue; }
369
376
goto object_continue;
377
+ } // scope_end:
370
378
371
379
//
372
380
// Array parser states
373
381
//
374
- array_begin:
382
+ array_begin: {
375
383
parser.increment_count ();
384
+ } // array_begin:
376
385
377
- array_value:
386
+ array_value: {
378
387
switch (parser.advance_char ()) {
379
388
case ' {' : {
380
389
if (parser.empty_object ()) { break ; };
@@ -386,20 +395,21 @@ WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_impleme
386
395
SIMDJSON_TRY ( parser.start_array () );
387
396
goto array_begin;
388
397
}
389
- case ' "' : SIMDJSON_TRY ( parser.parse_string () ); break ;
390
- case ' t' : SIMDJSON_TRY ( parser.parse_true_atom () ); break ;
391
- case ' f' : SIMDJSON_TRY ( parser.parse_false_atom () ); break ;
392
- case ' n' : SIMDJSON_TRY ( parser.parse_null_atom () ); break ;
398
+ case ' "' : SIMDJSON_TRY ( parser.parse_string (parser. current () ) ); break ;
399
+ case ' t' : SIMDJSON_TRY ( parser.parse_true_atom (parser. current () ) ); break ;
400
+ case ' f' : SIMDJSON_TRY ( parser.parse_false_atom (parser. current () ) ); break ;
401
+ case ' n' : SIMDJSON_TRY ( parser.parse_null_atom (parser. current () ) ); break ;
393
402
case ' -' :
394
403
case ' 0' : case ' 1' : case ' 2' : case ' 3' : case ' 4' :
395
404
case ' 5' : case ' 6' : case ' 7' : case ' 8' : case ' 9' :
396
- SIMDJSON_TRY ( parser.parse_number () ); break ;
405
+ SIMDJSON_TRY ( parser.parse_number (parser. current () ) ); break ;
397
406
default :
398
407
parser.log_error (" Non-value found when value was expected!" );
399
408
return TAPE_ERROR;
400
409
}
410
+ } // array_value:
401
411
402
- array_continue:
412
+ array_continue: {
403
413
switch (parser.advance_char ()) {
404
414
case ' ,' :
405
415
parser.increment_count ();
@@ -411,9 +421,11 @@ WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_impleme
411
421
parser.log_error (" Missing comma between array values" );
412
422
return TAPE_ERROR;
413
423
}
424
+ } // array_continue:
414
425
415
- document_end:
426
+ document_end: {
416
427
return parser.finish ();
428
+ } // document_end:
417
429
418
430
} // parse_structurals()
419
431
0 commit comments