Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 099bb1a

Browse files
committed
Pass buffer to primitive parse functions
1 parent 9c33093 commit 099bb1a

File tree

2 files changed

+105
-89
lines changed

2 files changed

+105
-89
lines changed

src/generic/stage2/structural_iterator.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ class structural_iterator {
2626
really_inline char peek_next_char() {
2727
return buf[*(current_structural+1)];
2828
}
29+
really_inline const uint8_t* advance() {
30+
current_structural++;
31+
return &buf[*current_structural];
32+
}
2933
really_inline char advance_char() {
3034
current_structural++;
3135
return buf[*current_structural];

src/generic/stage2/structural_parser.h

Lines changed: 101 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -138,10 +138,13 @@ struct structural_parser : structural_iterator {
138138
current_string_buf_loc = dst + 1;
139139
}
140140

141-
WARN_UNUSED really_inline error_code parse_string(bool key = false) {
141+
WARN_UNUSED really_inline error_code parse_key(const uint8_t *key) {
142+
return parse_string(key, true);
143+
}
144+
WARN_UNUSED really_inline error_code parse_string(const uint8_t *value, bool key = false) {
142145
log_value(key ? "key" : "string");
143146
uint8_t *dst = on_start_string();
144-
dst = stringparsing::parse_string(current(), dst);
147+
dst = stringparsing::parse_string(value, dst);
145148
if (dst == nullptr) {
146149
log_error("Invalid escape in string");
147150
return STRING_ERROR;
@@ -150,79 +153,75 @@ struct structural_parser : structural_iterator {
150153
return SUCCESS;
151154
}
152155

153-
WARN_UNUSED really_inline error_code parse_number(const uint8_t *src) {
156+
WARN_UNUSED really_inline error_code parse_number(const uint8_t *value) {
154157
log_value("number");
155-
if (!numberparsing::parse_number(src, tape)) { log_error("Invalid number"); return NUMBER_ERROR; }
158+
if (!numberparsing::parse_number(value, tape)) { log_error("Invalid number"); return NUMBER_ERROR; }
156159
return SUCCESS;
157160
}
158-
WARN_UNUSED really_inline error_code parse_number() {
159-
return parse_number(current());
160-
}
161-
162-
really_inline error_code parse_root_number() {
163-
/**
164-
* We need to make a copy to make sure that the string is space terminated.
165-
* This is not about padding the input, which should already padded up
166-
* to len + SIMDJSON_PADDING. However, we have no control at this stage
167-
* on how the padding was done. What if the input string was padded with nulls?
168-
* It is quite common for an input string to have an extra null character (C string).
169-
* We do not want to allow 9\0 (where \0 is the null character) inside a JSON
170-
* document, but the string "9\0" by itself is fine. So we make a copy and
171-
* pad the input with spaces when we know that there is just one input element.
172-
* This copy is relatively expensive, but it will almost never be called in
173-
* practice unless you are in the strange scenario where you have many JSON
174-
* documents made of single atoms.
175-
*/
176-
uint8_t *copy = static_cast<uint8_t *>(malloc(parser.len + SIMDJSON_PADDING));
161+
162+
really_inline error_code parse_root_number(const uint8_t *value) {
163+
//
164+
// We need to make a copy to make sure that the string is space terminated.
165+
// This is not about padding the input, which should already padded up
166+
// to len + SIMDJSON_PADDING. However, we have no control at this stage
167+
// on how the padding was done. What if the input string was padded with nulls?
168+
// It is quite common for an input string to have an extra null character (C string).
169+
// We do not want to allow 9\0 (where \0 is the null character) inside a JSON
170+
// document, but the string "9\0" by itself is fine. So we make a copy and
171+
// pad the input with spaces when we know that there is just one input element.
172+
// This copy is relatively expensive, but it will almost never be called in
173+
// practice unless you are in the strange scenario where you have many JSON
174+
// documents made of single atoms.
175+
//
176+
uint8_t *copy = static_cast<uint8_t *>(malloc(remaining_len() + SIMDJSON_PADDING));
177177
if (copy == nullptr) {
178178
return MEMALLOC;
179179
}
180-
memcpy(copy, buf, parser.len);
181-
memset(copy + parser.len, ' ', SIMDJSON_PADDING);
182-
size_t idx = *current_structural;
183-
error_code error = parse_number(&copy[idx]); // parse_number does not throw
180+
memcpy(copy, value, remaining_len());
181+
memset(copy + remaining_len(), ' ', SIMDJSON_PADDING);
182+
error_code error = parse_number(copy);
184183
free(copy);
185184
return error;
186185
}
187186

188-
WARN_UNUSED really_inline error_code parse_true_atom() {
187+
WARN_UNUSED really_inline error_code parse_true_atom(const uint8_t *value) {
189188
log_value("true");
190-
if (!atomparsing::is_valid_true_atom(current())) { return T_ATOM_ERROR; }
189+
if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; }
191190
tape.append(0, internal::tape_type::TRUE_VALUE);
192191
return SUCCESS;
193192
}
194193

195-
WARN_UNUSED really_inline error_code parse_root_true_atom() {
194+
WARN_UNUSED really_inline error_code parse_root_true_atom(const uint8_t *value) {
196195
log_value("true");
197-
if (!atomparsing::is_valid_true_atom(current(), remaining_len())) { return T_ATOM_ERROR; }
196+
if (!atomparsing::is_valid_true_atom(value, remaining_len())) { return T_ATOM_ERROR; }
198197
tape.append(0, internal::tape_type::TRUE_VALUE);
199198
return SUCCESS;
200199
}
201200

202-
WARN_UNUSED really_inline error_code parse_false_atom() {
201+
WARN_UNUSED really_inline error_code parse_false_atom(const uint8_t *value) {
203202
log_value("false");
204-
if (!atomparsing::is_valid_false_atom(current())) { return F_ATOM_ERROR; }
203+
if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; }
205204
tape.append(0, internal::tape_type::FALSE_VALUE);
206205
return SUCCESS;
207206
}
208207

209-
WARN_UNUSED really_inline error_code parse_root_false_atom() {
208+
WARN_UNUSED really_inline error_code parse_root_false_atom(const uint8_t *value) {
210209
log_value("false");
211-
if (!atomparsing::is_valid_false_atom(current(), remaining_len())) { return F_ATOM_ERROR; }
210+
if (!atomparsing::is_valid_false_atom(value, remaining_len())) { return F_ATOM_ERROR; }
212211
tape.append(0, internal::tape_type::FALSE_VALUE);
213212
return SUCCESS;
214213
}
215214

216-
WARN_UNUSED really_inline error_code parse_null_atom() {
215+
WARN_UNUSED really_inline error_code parse_null_atom(const uint8_t *value) {
217216
log_value("null");
218-
if (!atomparsing::is_valid_null_atom(current())) { return N_ATOM_ERROR; }
217+
if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; }
219218
tape.append(0, internal::tape_type::NULL_VALUE);
220219
return SUCCESS;
221220
}
222221

223-
WARN_UNUSED really_inline error_code parse_root_null_atom() {
222+
WARN_UNUSED really_inline error_code parse_root_null_atom(const uint8_t *value) {
224223
log_value("null");
225-
if (!atomparsing::is_valid_null_atom(current(), remaining_len())) { return N_ATOM_ERROR; }
224+
if (!atomparsing::is_valid_null_atom(value, remaining_len())) { return N_ATOM_ERROR; }
226225
tape.append(0, internal::tape_type::NULL_VALUE);
227226
return SUCCESS;
228227
}
@@ -279,50 +278,54 @@ WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_impleme
279278
//
280279
// Read first value
281280
//
282-
switch (parser.current_char()) {
283-
case '{': {
284-
if (parser.empty_object()) { goto document_end; }
285-
SIMDJSON_TRY( parser.start_object() );
286-
goto object_begin;
287-
}
288-
case '[': {
289-
if (parser.empty_array()) { goto document_end; }
290-
SIMDJSON_TRY( parser.start_array() );
291-
// Make sure the outer array is closed before continuing; otherwise, there are ways we could get
292-
// into memory corruption. See https://github.com/simdjson/simdjson/issues/906
293-
if (!STREAMING) {
294-
if (parser.buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]] != ']') {
295-
return TAPE_ERROR;
281+
{
282+
switch (parser.current_char()) {
283+
case '{': {
284+
if (parser.empty_object()) { goto document_end; }
285+
SIMDJSON_TRY( parser.start_object() );
286+
goto object_begin;
287+
}
288+
case '[': {
289+
if (parser.empty_array()) { goto document_end; }
290+
SIMDJSON_TRY( parser.start_array() );
291+
// Make sure the outer array is closed before continuing; otherwise, there are ways we could get
292+
// into memory corruption. See https://github.com/simdjson/simdjson/issues/906
293+
if (!STREAMING) {
294+
if (parser.buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]] != ']') {
295+
return TAPE_ERROR;
296+
}
296297
}
298+
goto array_begin;
299+
}
300+
case '"': SIMDJSON_TRY( parser.parse_string(parser.current()) ); goto document_end;
301+
case 't': SIMDJSON_TRY( parser.parse_root_true_atom(parser.current()) ); goto document_end;
302+
case 'f': SIMDJSON_TRY( parser.parse_root_false_atom(parser.current()) ); goto document_end;
303+
case 'n': SIMDJSON_TRY( parser.parse_root_null_atom(parser.current()) ); goto document_end;
304+
case '-':
305+
case '0': case '1': case '2': case '3': case '4':
306+
case '5': case '6': case '7': case '8': case '9':
307+
SIMDJSON_TRY( parser.parse_root_number(parser.current()) ); goto document_end;
308+
default:
309+
parser.log_error("Document starts with a non-value character");
310+
return TAPE_ERROR;
297311
}
298-
goto array_begin;
299-
}
300-
case '"': SIMDJSON_TRY( parser.parse_string() ); goto document_end;
301-
case 't': SIMDJSON_TRY( parser.parse_root_true_atom() ); goto document_end;
302-
case 'f': SIMDJSON_TRY( parser.parse_root_false_atom() ); goto document_end;
303-
case 'n': SIMDJSON_TRY( parser.parse_root_null_atom() ); goto document_end;
304-
case '-':
305-
case '0': case '1': case '2': case '3': case '4':
306-
case '5': case '6': case '7': case '8': case '9':
307-
SIMDJSON_TRY( parser.parse_root_number() ); goto document_end;
308-
default:
309-
parser.log_error("Document starts with a non-value character");
310-
return TAPE_ERROR;
311312
}
312313

313314
//
314315
// Object parser states
315316
//
316-
object_begin:
317-
if (parser.advance_char() != '"') {
317+
object_begin: {
318+
const uint8_t *key = parser.advance();
319+
if (*key != '"') {
318320
parser.log_error("Object does not start with a key");
319321
return TAPE_ERROR;
320322
}
321323
parser.increment_count();
322-
SIMDJSON_TRY( parser.parse_string(true) );
324+
SIMDJSON_TRY( parser.parse_key(key) );
323325
goto object_field;
326+
} // object_begin:
324327

325-
object_field:
328+
object_field: {
326329
if (unlikely( parser.advance_char() != ':' )) { parser.log_error("Missing colon after key in object"); return TAPE_ERROR; }
327330
switch (parser.advance_char()) {
328331
case '{': {
@@ -335,46 +338,52 @@ WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_impleme
335338
SIMDJSON_TRY( parser.start_array() );
336339
goto array_begin;
337340
}
338-
case '"': SIMDJSON_TRY( parser.parse_string() ); break;
339-
case 't': SIMDJSON_TRY( parser.parse_true_atom() ); break;
340-
case 'f': SIMDJSON_TRY( parser.parse_false_atom() ); break;
341-
case 'n': SIMDJSON_TRY( parser.parse_null_atom() ); break;
341+
case '"': SIMDJSON_TRY( parser.parse_string(parser.current()) ); break;
342+
case 't': SIMDJSON_TRY( parser.parse_true_atom(parser.current()) ); break;
343+
case 'f': SIMDJSON_TRY( parser.parse_false_atom(parser.current()) ); break;
344+
case 'n': SIMDJSON_TRY( parser.parse_null_atom(parser.current()) ); break;
342345
case '-':
343346
case '0': case '1': case '2': case '3': case '4':
344347
case '5': case '6': case '7': case '8': case '9':
345-
SIMDJSON_TRY( parser.parse_number() ); break;
348+
SIMDJSON_TRY( parser.parse_number(parser.current()) ); break;
346349
default:
347350
parser.log_error("Non-value found when value was expected!");
348351
return TAPE_ERROR;
349352
}
353+
} // object_field:
350354

351-
object_continue:
355+
object_continue: {
352356
switch (parser.advance_char()) {
353-
case ',':
357+
case ',': {
354358
parser.increment_count();
355-
if (unlikely( parser.advance_char() != '"' )) { parser.log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; }
356-
SIMDJSON_TRY( parser.parse_string(true) );
359+
const uint8_t *key = parser.advance();
360+
if (unlikely( *key != '"' )) { parser.log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; }
361+
SIMDJSON_TRY( parser.parse_key(key) );
357362
goto object_field;
363+
}
358364
case '}':
359365
parser.end_object();
360366
goto scope_end;
361367
default:
362368
parser.log_error("No comma between object fields");
363369
return TAPE_ERROR;
364370
}
371+
} // object_continue:
365372

366-
scope_end:
373+
scope_end: {
367374
if (parser.depth == 0) { goto document_end; }
368375
if (parser.parser.is_array[parser.depth]) { goto array_continue; }
369376
goto object_continue;
377+
} // scope_end:
370378

371379
//
372380
// Array parser states
373381
//
374-
array_begin:
382+
array_begin: {
375383
parser.increment_count();
384+
} // array_begin:
376385

377-
array_value:
386+
array_value: {
378387
switch (parser.advance_char()) {
379388
case '{': {
380389
if (parser.empty_object()) { break; };
@@ -386,20 +395,21 @@ WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_impleme
386395
SIMDJSON_TRY( parser.start_array() );
387396
goto array_begin;
388397
}
389-
case '"': SIMDJSON_TRY( parser.parse_string() ); break;
390-
case 't': SIMDJSON_TRY( parser.parse_true_atom() ); break;
391-
case 'f': SIMDJSON_TRY( parser.parse_false_atom() ); break;
392-
case 'n': SIMDJSON_TRY( parser.parse_null_atom() ); break;
398+
case '"': SIMDJSON_TRY( parser.parse_string(parser.current()) ); break;
399+
case 't': SIMDJSON_TRY( parser.parse_true_atom(parser.current()) ); break;
400+
case 'f': SIMDJSON_TRY( parser.parse_false_atom(parser.current()) ); break;
401+
case 'n': SIMDJSON_TRY( parser.parse_null_atom(parser.current()) ); break;
393402
case '-':
394403
case '0': case '1': case '2': case '3': case '4':
395404
case '5': case '6': case '7': case '8': case '9':
396-
SIMDJSON_TRY( parser.parse_number() ); break;
405+
SIMDJSON_TRY( parser.parse_number(parser.current()) ); break;
397406
default:
398407
parser.log_error("Non-value found when value was expected!");
399408
return TAPE_ERROR;
400409
}
410+
} // array_value:
401411

402-
array_continue:
412+
array_continue: {
403413
switch (parser.advance_char()) {
404414
case ',':
405415
parser.increment_count();
@@ -411,9 +421,11 @@ WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_impleme
411421
parser.log_error("Missing comma between array values");
412422
return TAPE_ERROR;
413423
}
424+
} // array_continue:
414425

415-
document_end:
426+
document_end: {
416427
return parser.finish();
428+
} // document_end:
417429

418430
} // parse_structurals()
419431

0 commit comments

Comments
 (0)