diff --git a/data/csv/auto/early_out_error.csv b/data/csv/auto/early_out_error.csv new file mode 100644 index 000000000000..b327fbb1f96b --- /dev/null +++ b/data/csv/auto/early_out_error.csv @@ -0,0 +1,23 @@ +a,b,c +1,1,1 +1,1,1 +1,\n,1 +1,\n,1 +1,\n,1 +1,\n,1 +1,\n,1 +1,\n,1 +1,\n,1 +1,\n,1 +1,\n,1 +1,\n,1 +1,\n,1 +1,\n,1 +1,\n,1 +1,\n,1 +1,\n,1 +1,\n,1 +1,\n,1 +1,\n,1 +1,1,1 +1,1,1 \ No newline at end of file diff --git a/src/common/enum_util.cpp b/src/common/enum_util.cpp index c4185af9fbfa..79ff7887b1ba 100644 --- a/src/common/enum_util.cpp +++ b/src/common/enum_util.cpp @@ -833,6 +833,8 @@ const char* EnumUtil::ToChars(CSVState value) { return "EMPTY_SPACE"; case CSVState::COMMENT: return "COMMENT"; + case CSVState::STANDARD_NEWLINE: + return "STANDARD_NEWLINE"; default: throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented in ToChars", value)); } @@ -876,6 +878,9 @@ CSVState EnumUtil::FromString(const char *value) { if (StringUtil::Equals(value, "COMMENT")) { return CSVState::COMMENT; } + if (StringUtil::Equals(value, "STANDARD_NEWLINE")) { + return CSVState::STANDARD_NEWLINE; + } throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented in FromString", value)); } diff --git a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp index 43636c50b406..f7d9dd5e3324 100644 --- a/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +++ b/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp @@ -1404,34 +1404,19 @@ bool StringValueResult::PrintErrorLine() const { (state_machine.options.store_rejects.GetValue() || !state_machine.options.ignore_errors.GetValue()); } -void StringValueScanner::SkipUntilNewLine() { - // Now skip until next newline - if (state_machine->options.dialect_options.state_machine_options.new_line.GetValue() == - NewLineIdentifier::CARRY_ON) { - bool carriage_return = false; - bool not_carriage_return = false; - for (; iterator.pos.buffer_pos < cur_buffer_handle->actual_size; iterator.pos.buffer_pos++) { - if (buffer_handle_ptr[iterator.pos.buffer_pos] == '\r') { - carriage_return = true; - } else if (buffer_handle_ptr[iterator.pos.buffer_pos] != '\n') { - not_carriage_return = true; - } - if (buffer_handle_ptr[iterator.pos.buffer_pos] == '\n') { - if (carriage_return || not_carriage_return) { - iterator.pos.buffer_pos++; - return; - } - } +bool StringValueScanner::SkipUntilState(CSVState initial_state, CSVState until_state) { + CSVStates current_state; + current_state.Initialize(initial_state); + while (iterator.pos.buffer_pos < cur_buffer_handle->actual_size) { + state_machine->Transition(current_state, buffer_handle_ptr[iterator.pos.buffer_pos++]); + if (current_state.IsState(until_state)) { + return true; } - } else { - for (; iterator.pos.buffer_pos < cur_buffer_handle->actual_size; iterator.pos.buffer_pos++) { - if (buffer_handle_ptr[iterator.pos.buffer_pos] == '\n' || - buffer_handle_ptr[iterator.pos.buffer_pos] == '\r') { - iterator.pos.buffer_pos++; - return; - } + if (current_state.IsState(CSVState::INVALID)) { + return false; } } + return true; } bool StringValueScanner::CanDirectlyCast(const LogicalType &type, bool icu_loaded) { @@ -1463,6 +1448,63 @@ bool StringValueScanner::CanDirectlyCast(const LogicalType &type, bool icu_loade } } +bool StringValueScanner::IsRowValid() { + if (iterator.pos.buffer_pos == cur_buffer_handle->actual_size) { + return false; + } + constexpr idx_t result_size = 1; + auto scan_finder = + make_uniq(0U, buffer_manager, state_machine, make_shared_ptr(), + csv_file_scan, false, iterator, result_size); + auto &tuples = scan_finder->ParseChunk(); + return tuples.number_of_rows == 1 && tuples.borked_rows.empty(); +} + +void StringValueScanner::TryRow(CSVState state, idx_t &start_pos, idx_t &end_pos, bool &valid) { + idx_t initial_pos = iterator.pos.buffer_pos; + if (SkipUntilState(state, CSVState::RECORD_SEPARATOR)) { + idx_t current_pos = iterator.pos.buffer_pos; + if (IsRowValid()) { + valid = true; + start_pos = std::min(start_pos, current_pos); + } + end_pos = std::max(end_pos, iterator.pos.buffer_pos); + } + // reset buffer + iterator.pos.buffer_pos = initial_pos; +} + +idx_t StringValueScanner::FindNextNewLine() const { + idx_t cur_pos = iterator.pos.buffer_pos; + // Now skip until next newline + if (state_machine->options.dialect_options.state_machine_options.new_line.GetValue() == + NewLineIdentifier::CARRY_ON) { + bool carriage_return = false; + bool not_carriage_return = false; + for (; cur_pos < cur_buffer_handle->actual_size; cur_pos++) { + if (buffer_handle_ptr[cur_pos] == '\r') { + carriage_return = true; + } else if (buffer_handle_ptr[cur_pos] != '\n') { + not_carriage_return = true; + } + if (buffer_handle_ptr[cur_pos] == '\n') { + if (carriage_return || not_carriage_return) { + cur_pos++; + return cur_pos; + } + } + } + } else { + for (; cur_pos < cur_buffer_handle->actual_size; cur_pos++) { + if (buffer_handle_ptr[cur_pos] == '\n' || buffer_handle_ptr[cur_pos] == '\r') { + cur_pos++; + return cur_pos; + } + } + } + return cur_pos; +} + void StringValueScanner::SetStart() { if (iterator.first_one) { if (result.store_line_size) { @@ -1472,50 +1514,41 @@ void StringValueScanner::SetStart() { } // The result size of the data after skipping the row is one line // We have to look for a new line that fits our schema - // 1. We walk until the next new line - bool line_found; - unique_ptr scan_finder; - do { - constexpr idx_t result_size = 1; - SkipUntilNewLine(); - if (state_machine->options.null_padding) { - // When Null Padding, we assume we start from the correct new-line - return; - } - scan_finder = - make_uniq(0U, buffer_manager, state_machine, make_shared_ptr(true), - csv_file_scan, false, iterator, result_size); - auto &tuples = scan_finder->ParseChunk(); - line_found = true; - if (tuples.number_of_rows != 1 || - (!tuples.borked_rows.empty() && !state_machine->options.ignore_errors.GetValue()) || - tuples.first_line_is_comment) { - line_found = false; - // If no tuples were parsed, this is not the correct start, we need to skip until the next new line - // Or if columns don't match, this is not the correct start, we need to skip until the next new line - if (scan_finder->previous_buffer_handle) { - if (scan_finder->iterator.pos.buffer_pos >= scan_finder->previous_buffer_handle->actual_size && - scan_finder->previous_buffer_handle->is_last_buffer) { - iterator.pos.buffer_idx = scan_finder->iterator.pos.buffer_idx; - iterator.pos.buffer_pos = scan_finder->iterator.pos.buffer_pos; - result.last_position = {iterator.pos.buffer_idx, iterator.pos.buffer_pos, result.buffer_size}; - iterator.done = scan_finder->iterator.done; - return; - } - } - if (iterator.pos.buffer_pos == cur_buffer_handle->actual_size || - scan_finder->iterator.GetBufferIdx() > iterator.GetBufferIdx()) { - // If things go terribly wrong, we never loop indefinitely. - iterator.pos.buffer_idx = scan_finder->iterator.pos.buffer_idx; - iterator.pos.buffer_pos = scan_finder->iterator.pos.buffer_pos; - result.last_position = {iterator.pos.buffer_idx, iterator.pos.buffer_pos, result.buffer_size}; - iterator.done = scan_finder->iterator.done; - return; - } + idx_t next_new_line = FindNextNewLine(); + idx_t potential_start = cur_buffer_handle->actual_size; + idx_t largest_end_pos = 0; + bool any_valid_row = false; + if (state_machine->options.null_padding) { + // When Null Padding, we assume we start from the correct new-line + return; + } + // At this point we have 3 options: + // 1. We are at the start of a valid line + TryRow(CSVState::STANDARD_NEWLINE, potential_start, largest_end_pos, any_valid_row); + // 2. We are in the middle of a quoted value + if (potential_start > next_new_line && + state_machine->dialect_options.state_machine_options.quote.GetValue() != '\0') { + TryRow(CSVState::QUOTED, potential_start, largest_end_pos, any_valid_row); + } + // 3. We are in an escaped value + if (!any_valid_row && potential_start > next_new_line && + state_machine->dialect_options.state_machine_options.escape.GetValue() != '\0') { + TryRow(CSVState::ESCAPE, potential_start, largest_end_pos, any_valid_row); + } + if (!any_valid_row) { + bool is_this_the_end = largest_end_pos == cur_buffer_handle->actual_size && cur_buffer_handle->is_last_buffer; + if (is_this_the_end) { + iterator.pos.buffer_pos = largest_end_pos; + iterator.done = true; + } else { + SkipUntilState(CSVState::STANDARD_NEWLINE, CSVState::RECORD_SEPARATOR); } - } while (!line_found); - iterator.pos.buffer_idx = scan_finder->result.current_line_position.begin.buffer_idx; - iterator.pos.buffer_pos = scan_finder->result.current_line_position.begin.buffer_pos; + } else { + iterator.pos.buffer_pos = potential_start; + iterator.done = iterator.pos.buffer_pos == cur_buffer_handle->actual_size; + } + // 4. We have an error, if we have an error, we let life go on, the scanner will either ignore it + // or throw. result.last_position = {iterator.pos.buffer_idx, iterator.pos.buffer_pos, result.buffer_size}; } diff --git a/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp b/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp index 6c93cc939f3c..b73e222e820d 100644 --- a/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +++ b/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp @@ -50,14 +50,20 @@ void CSVStateMachineCache::Insert(const CSVStateMachineOptions &state_machine_op // Now set values depending on configuration // 1) Standard/Invalid State - vector std_inv {static_cast(CSVState::STANDARD), static_cast(CSVState::INVALID)}; + vector std_inv {static_cast(CSVState::STANDARD), static_cast(CSVState::INVALID), + static_cast(CSVState::STANDARD_NEWLINE)}; for (auto &state : std_inv) { transition_array[delimiter][state] = CSVState::DELIMITER; - transition_array[static_cast('\n')][state] = CSVState::RECORD_SEPARATOR; if (new_line_id == NewLineIdentifier::CARRY_ON) { transition_array[static_cast('\r')][state] = CSVState::CARRIAGE_RETURN; + if (state == static_cast(CSVState::STANDARD_NEWLINE)) { + transition_array[static_cast('\n')][state] = CSVState::STANDARD; + } else { + transition_array[static_cast('\n')][state] = CSVState::INVALID; + } } else { transition_array[static_cast('\r')][state] = CSVState::RECORD_SEPARATOR; + transition_array[static_cast('\n')][state] = CSVState::RECORD_SEPARATOR; } if (comment != '\0') { transition_array[comment][state] = CSVState::COMMENT; diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_state.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_state.hpp index ecb40e440bf0..926c3a9e7e76 100644 --- a/src/include/duckdb/execution/operator/csv_scanner/csv_state.hpp +++ b/src/include/duckdb/execution/operator/csv_scanner/csv_state.hpp @@ -14,18 +14,19 @@ namespace duckdb { //! All States of CSV Parsing enum class CSVState : uint8_t { - STANDARD = 0, //! Regular unquoted field state - DELIMITER = 1, //! State after encountering a field separator (e.g., ;) - RECORD_SEPARATOR = 2, //! State after encountering a record separator (i.e., \n) - CARRIAGE_RETURN = 3, //! State after encountering a carriage return(i.e., \r) - QUOTED = 4, //! State when inside a quoted field - UNQUOTED = 5, //! State when leaving a quoted field - ESCAPE = 6, //! State when encountering an escape character (e.g., \) - INVALID = 7, //! Got to an Invalid State, this should error. - NOT_SET = 8, //! If the state is not set, usually the first state before getting the first character - QUOTED_NEW_LINE = 9, //! If we have a quoted newline - EMPTY_SPACE = 10, //! If we have empty spaces in the beginning and end of value - COMMENT = 11 //! If we are in a comment state, and hence have to skip the whole line + STANDARD = 0, //! Regular unquoted field state + DELIMITER = 1, //! State after encountering a field separator (e.g., ;) + RECORD_SEPARATOR = 2, //! State after encountering a record separator (i.e., \n) + CARRIAGE_RETURN = 3, //! State after encountering a carriage return(i.e., \r) + QUOTED = 4, //! State when inside a quoted field + UNQUOTED = 5, //! State when leaving a quoted field + ESCAPE = 6, //! State when encountering an escape character (e.g., \) + INVALID = 7, //! Got to an Invalid State, this should error. + NOT_SET = 8, //! If the state is not set, usually the first state before getting the first character + QUOTED_NEW_LINE = 9, //! If we have a quoted newline + EMPTY_SPACE = 10, //! If we have empty spaces in the beginning and end of value + COMMENT = 11, //! If we are in a comment state, and hence have to skip the whole line + STANDARD_NEWLINE = 12, //! State used for figuring out a new line. }; } // namespace duckdb diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine.hpp index 13933a1805e9..48d9a25857ff 100644 --- a/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine.hpp +++ b/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine.hpp @@ -17,65 +17,68 @@ namespace duckdb { //! State of necessary CSV States to parse file //! Current, previous, and state before the previous struct CSVStates { - void Initialize() { - states[0] = CSVState::NOT_SET; - states[1] = CSVState::NOT_SET; + void Initialize(CSVState initial_state = CSVState::NOT_SET) { + states[0] = initial_state; + states[1] = initial_state; } - inline bool NewValue() { + inline bool NewValue() const { return states[1] == CSVState::DELIMITER; } - inline bool NewRow() { + inline bool NewRow() const { // It is a new row, if the previous state is not a record separator, and the current one is return states[0] != CSVState::RECORD_SEPARATOR && states[0] != CSVState::CARRIAGE_RETURN && (states[1] == CSVState::RECORD_SEPARATOR || states[1] == CSVState::CARRIAGE_RETURN); } - inline bool WasStandard() { + inline bool WasStandard() const { return states[0] == CSVState::STANDARD; } - inline bool EmptyLastValue() { + inline bool EmptyLastValue() const { // It is a new row, if the previous state is not a record separator, and the current one is return states[0] == CSVState::DELIMITER && (states[1] == CSVState::RECORD_SEPARATOR || states[1] == CSVState::CARRIAGE_RETURN || states[1] == CSVState::DELIMITER); } - inline bool EmptyLine() { + inline bool EmptyLine() const { return (states[1] == CSVState::CARRIAGE_RETURN || states[1] == CSVState::RECORD_SEPARATOR) && (states[0] == CSVState::RECORD_SEPARATOR || states[0] == CSVState::NOT_SET); } - inline bool IsNotSet() { + inline bool IsNotSet() const { return states[1] == CSVState::NOT_SET; } - inline bool IsComment() { + inline bool IsComment() const { return states[1] == CSVState::COMMENT; } - inline bool IsCurrentNewRow() { + inline bool IsCurrentNewRow() const { return states[1] == CSVState::RECORD_SEPARATOR || states[1] == CSVState::CARRIAGE_RETURN; } - inline bool IsCarriageReturn() { + inline bool IsCarriageReturn() const { return states[1] == CSVState::CARRIAGE_RETURN; } - inline bool IsInvalid() { + inline bool IsInvalid() const { return states[1] == CSVState::INVALID; } - inline bool IsQuoted() { + inline bool IsQuoted() const { return states[0] == CSVState::QUOTED; } - inline bool IsEscaped() { + inline bool IsEscaped() const { return states[1] == CSVState::ESCAPE || (states[0] == CSVState::UNQUOTED && states[1] == CSVState::QUOTED); } - inline bool IsQuotedCurrent() { + inline bool IsQuotedCurrent() const { return states[1] == CSVState::QUOTED || states[1] == CSVState::QUOTED_NEW_LINE; } + inline bool IsState(const CSVState state) const { + return states[1] == state; + } CSVState states[2]; }; @@ -98,7 +101,7 @@ class CSVStateMachine { states.states[1] = transition_array[static_cast(current_char)][static_cast(states.states[1])]; } - void Print() { + void Print() const { std::cout << "State Machine Options" << '\n'; std::cout << "Delim: " << state_machine_options.delimiter.GetValue() << '\n'; std::cout << "Quote: " << state_machine_options.quote.GetValue() << '\n'; diff --git a/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine_cache.hpp b/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine_cache.hpp index decf46d46a78..4ad705282c10 100644 --- a/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine_cache.hpp +++ b/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine_cache.hpp @@ -37,11 +37,11 @@ class StateMachine { uint64_t escape = 0; uint64_t comment = 0; - const CSVState *operator[](idx_t i) const { + const CSVState *operator[](const idx_t i) const { return state_machine[i]; } - CSVState *operator[](idx_t i) { + CSVState *operator[](const idx_t i) { return state_machine[i]; } }; @@ -60,7 +60,7 @@ struct HashCSVStateMachineConfig { //! The CSVStateMachineCache caches state machines, although small ~2kb, the actual creation of multiple State Machines //! can become a bottleneck on sniffing, when reading very small csv files. -//! Hence the cache stores State Machines based on their different delimiter|quote|escape options. +//! Hence, the cache stores State Machines based on their different delimiter|quote|escape options. class CSVStateMachineCache : public ObjectCacheEntry { public: CSVStateMachineCache(); @@ -85,7 +85,7 @@ class CSVStateMachineCache : public ObjectCacheEntry { void Insert(const CSVStateMachineOptions &state_machine_options); //! Cache on delimiter|quote|escape|newline unordered_map state_machine_cache; - //! Default value for options used to intialize CSV State Machine Cache + //! Default value for options used to initialize CSV State Machine Cache //! Because the state machine cache can be accessed in Parallel we need a mutex. mutex main_mutex; diff --git a/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp b/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp index 20182b5eded2..71e85370544d 100644 --- a/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +++ b/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp @@ -314,10 +314,15 @@ class StringValueScanner : public BaseScanner { //! Function used to move from one buffer to the other, if necessary bool MoveToNextBuffer(); - void SkipUntilNewLine(); - + //! -------- Functions used to figure out where lines start ---------!// + //! Main function, sets the correct start void SetStart(); - + //! From a given initial state, it skips until we reach the until_state + bool SkipUntilState(CSVState initial_state, CSVState until_state); + //! If the current row we found is valid + bool IsRowValid(); + void TryRow(CSVState state, idx_t &start_pos, idx_t &end_pos, bool &valid); + idx_t FindNextNewLine() const; StringValueResult result; vector types; diff --git a/test/sql/copy/csv/auto/test_early_out.test b/test/sql/copy/csv/auto/test_early_out.test new file mode 100644 index 000000000000..e447427cf0ce --- /dev/null +++ b/test/sql/copy/csv/auto/test_early_out.test @@ -0,0 +1,11 @@ +# name: test/sql/copy/csv/auto/test_early_out.test +# group: [auto] + +statement ok +PRAGMA enable_verification + +statement error +SELECT * +FROM read_csv('data/csv/auto/early_out_error.csv', buffer_size = 8, maximum_line_size = 8, auto_detect = false, columns = {'a': 'integer','b': 'integer','c': 'integer'}, header = true) +---- +Error when converting column "b". Could not convert string "\n" to 'INTEGER' \ No newline at end of file diff --git a/test/sql/copy/csv/parallel/csv_parallel_buffer_size.test b/test/sql/copy/csv/parallel/csv_parallel_buffer_size.test index 9e8e140f38aa..466758becc6f 100644 --- a/test/sql/copy/csv/parallel/csv_parallel_buffer_size.test +++ b/test/sql/copy/csv/parallel/csv_parallel_buffer_size.test @@ -6,7 +6,6 @@ statement ok PRAGMA verify_parallelism - query III SELECT sum(a), sum(b), sum(c) FROM read_csv('data/csv/test/multi_column_integer.csv', COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER'), auto_detect='true', delim = '|', buffer_size=30) ---- diff --git a/test/sql/copy/csv/test_mixed_line_endings.test b/test/sql/copy/csv/test_mixed_line_endings.test index 9f7cad5ef3f1..c899837adce3 100644 --- a/test/sql/copy/csv/test_mixed_line_endings.test +++ b/test/sql/copy/csv/test_mixed_line_endings.test @@ -10,28 +10,7 @@ PRAGMA enable_verification statement ok CREATE TABLE test (a INTEGER, b VARCHAR, c INTEGER); - -query I +statement error insert into test select * from read_csv_auto('data/csv/test/mixed_line_endings.csv'); ---- -3 - -query I -SELECT LENGTH(b) FROM test ORDER BY a; ----- -5 -5 -4 - -query III -select * from test; ----- -10 hello 20 -20 world 30 -30 test 30 - -query RR -SELECT SUM(a), SUM(c) FROM test; ----- -60.000000 80.000000 - +Error when sniffing file diff --git a/test/sql/copy/csv/test_wrong_newline_delimiter.test b/test/sql/copy/csv/test_wrong_newline_delimiter.test index 3066590d0a0c..5539ef248a24 100644 --- a/test/sql/copy/csv/test_wrong_newline_delimiter.test +++ b/test/sql/copy/csv/test_wrong_newline_delimiter.test @@ -16,7 +16,7 @@ FROM read_csv('data/csv/timestamp.csv', columns = {'a': 'BIGINT'}, new_line= '\n new_line = \n (Set By User) statement error -FROM read_csv('data/csv/timestamp.csv', columns = {'a': 'BIGINT'}, new_line= '\r\n') +FROM read_csv('data/csv/timestamp.csv', columns = {'a': 'BIGINT'}, new_line= '\r\n', auto_detect = false) ---- new_line = \r\n (Set By User) diff --git a/tools/pythonpkg/tests/fast/api/test_read_csv.py b/tools/pythonpkg/tests/fast/api/test_read_csv.py index ef2bfd91b03f..f71f03ff2c02 100644 --- a/tools/pythonpkg/tests/fast/api/test_read_csv.py +++ b/tools/pythonpkg/tests/fast/api/test_read_csv.py @@ -584,12 +584,9 @@ def test_read_csv_list_invalid_path(self, tmp_path): @pytest.mark.parametrize( 'options', [ - {'lineterminator': '\\r\\n'}, {'lineterminator': '\\n'}, {'lineterminator': 'LINE_FEED'}, - {'lineterminator': 'CARRIAGE_RETURN_LINE_FEED'}, {'lineterminator': CSVLineTerminator.LINE_FEED}, - {'lineterminator': CSVLineTerminator.CARRIAGE_RETURN_LINE_FEED}, {'columns': {'id': 'INTEGER', 'name': 'INTEGER', 'c': 'integer', 'd': 'INTEGER'}}, {'auto_type_candidates': ['INTEGER', 'INTEGER']}, {'max_line_size': 10000},