Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 9e2d870

Browse files
committed
Add new COPY option SAVE_ERROR_TO
Currently, when source data contains unexpected data regarding data type or range, the entire COPY fails. However, in some cases, such data can be ignored and just copying normal data is preferable. This commit adds a new option SAVE_ERROR_TO, which specifies where to save the error information. When this option is specified, COPY skips soft errors and continues copying. Currently, SAVE_ERROR_TO only supports "none". This indicates error information is not saved and COPY just skips the unexpected data and continues running. Later works are expected to add more choices, such as 'log' and 'table'. Author: Damir Belyalov, Atsushi Torikoshi, Alex Shulgin, Jian He Discussion: https://postgr.es/m/87k31ftoe0.fsf_-_%40commandprompt.com Reviewed-by: Pavel Stehule, Andres Freund, Tom Lane, Daniel Gustafsson, Reviewed-by: Alena Rybakina, Andy Fan, Andrei Lepikhov, Masahiko Sawada Reviewed-by: Vignesh C, Atsushi Torikoshi
1 parent c7e5e99 commit 9e2d870

File tree

10 files changed

+239
-7
lines changed

10 files changed

+239
-7
lines changed

doc/src/sgml/ref/copy.sgml

+22-1
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
4343
FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
4444
FORCE_NOT_NULL { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
4545
FORCE_NULL { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
46+
SAVE_ERROR_TO '<replaceable class="parameter">location</replaceable>'
4647
ENCODING '<replaceable class="parameter">encoding_name</replaceable>'
4748
</synopsis>
4849
</refsynopsisdiv>
@@ -373,6 +374,25 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
373374
</listitem>
374375
</varlistentry>
375376

377+
<varlistentry>
378+
<term><literal>SAVE_ERROR_TO</literal></term>
379+
<listitem>
380+
<para>
381+
Specifies to save error information to <replaceable class="parameter">
382+
location</replaceable> when there is malformed data in the input.
383+
Currently, only <literal>error</literal> (default) and <literal>none</literal>
384+
values are supported.
385+
If the <literal>error</literal> value is specified,
386+
<command>COPY</command> stops operation at the first error.
387+
If the <literal>none</literal> value is specified,
388+
<command>COPY</command> skips malformed data and continues copying data.
389+
The option is allowed only in <command>COPY FROM</command>.
390+
The <literal>none</literal> value is allowed only when
391+
not using <literal>binary</literal> format.
392+
</para>
393+
</listitem>
394+
</varlistentry>
395+
376396
<varlistentry>
377397
<term><literal>ENCODING</literal></term>
378398
<listitem>
@@ -556,7 +576,8 @@ COPY <replaceable class="parameter">count</replaceable>
556576
</para>
557577

558578
<para>
559-
<command>COPY</command> stops operation at the first error. This
579+
<command>COPY</command> stops operation at the first error when
580+
<literal>SAVE_ERROR_TO</literal> is not specified. This
560581
should not lead to problems in the event of a <command>COPY
561582
TO</command>, but the target table will already have received
562583
earlier rows in a <command>COPY FROM</command>. These rows will not

src/backend/commands/copy.c

+49
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,42 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from)
394394
return COPY_HEADER_FALSE; /* keep compiler quiet */
395395
}
396396

397+
/*
398+
* Extract a CopySaveErrorToChoice value from a DefElem.
399+
*/
400+
static CopySaveErrorToChoice
401+
defGetCopySaveErrorToChoice(DefElem *def, ParseState *pstate, bool is_from)
402+
{
403+
char *sval;
404+
405+
if (!is_from)
406+
ereport(ERROR,
407+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
408+
errmsg("COPY SAVE_ERROR_TO cannot be used with COPY TO"),
409+
parser_errposition(pstate, def->location)));
410+
411+
/*
412+
* If no parameter value given, assume the default value.
413+
*/
414+
if (def->arg == NULL)
415+
return COPY_SAVE_ERROR_TO_ERROR;
416+
417+
/*
418+
* Allow "error", or "none" values.
419+
*/
420+
sval = defGetString(def);
421+
if (pg_strcasecmp(sval, "error") == 0)
422+
return COPY_SAVE_ERROR_TO_ERROR;
423+
if (pg_strcasecmp(sval, "none") == 0)
424+
return COPY_SAVE_ERROR_TO_NONE;
425+
426+
ereport(ERROR,
427+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
428+
errmsg("COPY save_error_to \"%s\" not recognized", sval),
429+
parser_errposition(pstate, def->location)));
430+
return COPY_SAVE_ERROR_TO_ERROR; /* keep compiler quiet */
431+
}
432+
397433
/*
398434
* Process the statement option list for COPY.
399435
*
@@ -419,6 +455,7 @@ ProcessCopyOptions(ParseState *pstate,
419455
bool format_specified = false;
420456
bool freeze_specified = false;
421457
bool header_specified = false;
458+
bool save_error_to_specified = false;
422459
ListCell *option;
423460

424461
/* Support external use for option sanity checking */
@@ -571,6 +608,13 @@ ProcessCopyOptions(ParseState *pstate,
571608
defel->defname),
572609
parser_errposition(pstate, defel->location)));
573610
}
611+
else if (strcmp(defel->defname, "save_error_to") == 0)
612+
{
613+
if (save_error_to_specified)
614+
errorConflictingDefElem(defel, pstate);
615+
save_error_to_specified = true;
616+
opts_out->save_error_to = defGetCopySaveErrorToChoice(defel, pstate, is_from);
617+
}
574618
else
575619
ereport(ERROR,
576620
(errcode(ERRCODE_SYNTAX_ERROR),
@@ -598,6 +642,11 @@ ProcessCopyOptions(ParseState *pstate,
598642
(errcode(ERRCODE_SYNTAX_ERROR),
599643
errmsg("cannot specify DEFAULT in BINARY mode")));
600644

645+
if (opts_out->binary && opts_out->save_error_to != COPY_SAVE_ERROR_TO_ERROR)
646+
ereport(ERROR,
647+
(errcode(ERRCODE_SYNTAX_ERROR),
648+
errmsg("cannot specify SAVE_ERROR_TO in BINARY mode")));
649+
601650
/* Set defaults for omitted options */
602651
if (!opts_out->delim)
603652
opts_out->delim = opts_out->csv_mode ? "," : "\t";

src/backend/commands/copyfrom.c

+48
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include "libpq/libpq.h"
4343
#include "libpq/pqformat.h"
4444
#include "miscadmin.h"
45+
#include "nodes/miscnodes.h"
4546
#include "optimizer/optimizer.h"
4647
#include "pgstat.h"
4748
#include "rewrite/rewriteHandler.h"
@@ -656,6 +657,9 @@ CopyFrom(CopyFromState cstate)
656657
Assert(cstate->rel);
657658
Assert(list_length(cstate->range_table) == 1);
658659

660+
if (cstate->opts.save_error_to != COPY_SAVE_ERROR_TO_ERROR)
661+
Assert(cstate->escontext);
662+
659663
/*
660664
* The target must be a plain, foreign, or partitioned relation, or have
661665
* an INSTEAD OF INSERT row trigger. (Currently, such triggers are only
@@ -992,6 +996,25 @@ CopyFrom(CopyFromState cstate)
992996
if (!NextCopyFrom(cstate, econtext, myslot->tts_values, myslot->tts_isnull))
993997
break;
994998

999+
if (cstate->opts.save_error_to != COPY_SAVE_ERROR_TO_ERROR &&
1000+
cstate->escontext->error_occurred)
1001+
{
1002+
/*
1003+
* Soft error occured, skip this tuple and save error information
1004+
* according to SAVE_ERROR_TO.
1005+
*/
1006+
if (cstate->opts.save_error_to == COPY_SAVE_ERROR_TO_NONE)
1007+
1008+
/*
1009+
* Just make ErrorSaveContext ready for the next NextCopyFrom.
1010+
* Since we don't set details_wanted and error_data is not to
1011+
* be filled, just resetting error_occurred is enough.
1012+
*/
1013+
cstate->escontext->error_occurred = false;
1014+
1015+
continue;
1016+
}
1017+
9951018
ExecStoreVirtualTuple(myslot);
9961019

9971020
/*
@@ -1284,6 +1307,14 @@ CopyFrom(CopyFromState cstate)
12841307
/* Done, clean up */
12851308
error_context_stack = errcallback.previous;
12861309

1310+
if (cstate->opts.save_error_to != COPY_SAVE_ERROR_TO_ERROR &&
1311+
cstate->num_errors > 0)
1312+
ereport(NOTICE,
1313+
errmsg_plural("%zd row were skipped due to data type incompatibility",
1314+
"%zd rows were skipped due to data type incompatibility",
1315+
cstate->num_errors,
1316+
cstate->num_errors));
1317+
12871318
if (bistate != NULL)
12881319
FreeBulkInsertState(bistate);
12891320

@@ -1419,6 +1450,23 @@ BeginCopyFrom(ParseState *pstate,
14191450
}
14201451
}
14211452

1453+
/* Set up soft error handler for SAVE_ERROR_TO */
1454+
if (cstate->opts.save_error_to != COPY_SAVE_ERROR_TO_ERROR)
1455+
{
1456+
cstate->escontext = makeNode(ErrorSaveContext);
1457+
cstate->escontext->type = T_ErrorSaveContext;
1458+
cstate->escontext->error_occurred = false;
1459+
1460+
/*
1461+
* Currently we only support COPY_SAVE_ERROR_TO_NONE. We'll add other
1462+
* options later
1463+
*/
1464+
if (cstate->opts.save_error_to == COPY_SAVE_ERROR_TO_NONE)
1465+
cstate->escontext->details_wanted = false;
1466+
}
1467+
else
1468+
cstate->escontext = NULL;
1469+
14221470
/* Convert FORCE_NULL name list to per-column flags, check validity */
14231471
cstate->opts.force_null_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
14241472
if (cstate->opts.force_null_all)

src/backend/commands/copyfromparse.c

+12-5
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
#include "libpq/pqformat.h"
7171
#include "mb/pg_wchar.h"
7272
#include "miscadmin.h"
73+
#include "nodes/miscnodes.h"
7374
#include "pgstat.h"
7475
#include "port/pg_bswap.h"
7576
#include "utils/builtins.h"
@@ -955,11 +956,17 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
955956

956957
values[m] = ExecEvalExpr(defexprs[m], econtext, &nulls[m]);
957958
}
958-
else
959-
values[m] = InputFunctionCall(&in_functions[m],
960-
string,
961-
typioparams[m],
962-
att->atttypmod);
959+
/* If SAVE_ERROR_TO is specified, skip rows with soft errors */
960+
else if (!InputFunctionCallSafe(&in_functions[m],
961+
string,
962+
typioparams[m],
963+
att->atttypmod,
964+
(Node *) cstate->escontext,
965+
&values[m]))
966+
{
967+
cstate->num_errors++;
968+
return true;
969+
}
963970

964971
cstate->cur_attname = NULL;
965972
cstate->cur_attval = NULL;

src/bin/psql/tab-complete.c

+6-1
Original file line numberDiff line numberDiff line change
@@ -2898,12 +2898,17 @@ psql_completion(const char *text, int start, int end)
28982898
else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "("))
28992899
COMPLETE_WITH("FORMAT", "FREEZE", "DELIMITER", "NULL",
29002900
"HEADER", "QUOTE", "ESCAPE", "FORCE_QUOTE",
2901-
"FORCE_NOT_NULL", "FORCE_NULL", "ENCODING", "DEFAULT");
2901+
"FORCE_NOT_NULL", "FORCE_NULL", "ENCODING", "DEFAULT",
2902+
"SAVE_ERROR_TO");
29022903

29032904
/* Complete COPY <sth> FROM|TO filename WITH (FORMAT */
29042905
else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "FORMAT"))
29052906
COMPLETE_WITH("binary", "csv", "text");
29062907

2908+
/* Complete COPY <sth> FROM filename WITH (SAVE_ERROR_TO */
2909+
else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "SAVE_ERROR_TO"))
2910+
COMPLETE_WITH("error", "none");
2911+
29072912
/* Complete COPY <sth> FROM <sth> WITH (<options>) */
29082913
else if (Matches("COPY|\\copy", MatchAny, "FROM", MatchAny, "WITH", MatchAny))
29092914
COMPLETE_WITH("WHERE");

src/include/commands/copy.h

+11
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,16 @@ typedef enum CopyHeaderChoice
3030
COPY_HEADER_MATCH,
3131
} CopyHeaderChoice;
3232

33+
/*
34+
* Represents where to save input processing errors. More values to be added
35+
* in the future.
36+
*/
37+
typedef enum CopySaveErrorToChoice
38+
{
39+
COPY_SAVE_ERROR_TO_ERROR = 0, /* immediately throw errors */
40+
COPY_SAVE_ERROR_TO_NONE, /* ignore errors */
41+
} CopySaveErrorToChoice;
42+
3343
/*
3444
* A struct to hold COPY options, in a parsed form. All of these are related
3545
* to formatting, except for 'freeze', which doesn't really belong here, but
@@ -62,6 +72,7 @@ typedef struct CopyFormatOptions
6272
bool force_null_all; /* FORCE_NULL *? */
6373
bool *force_null_flags; /* per-column CSV FN flags */
6474
bool convert_selectively; /* do selective binary conversion? */
75+
CopySaveErrorToChoice save_error_to; /* where to save error information */
6576
List *convert_select; /* list of column names (can be NIL) */
6677
} CopyFormatOptions;
6778

src/include/commands/copyfrom_internal.h

+5
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
#include "commands/copy.h"
1818
#include "commands/trigger.h"
19+
#include "nodes/miscnodes.h"
1920

2021
/*
2122
* Represents the different source cases we need to worry about at
@@ -94,6 +95,10 @@ typedef struct CopyFromStateData
9495
* default value */
9596
FmgrInfo *in_functions; /* array of input functions for each attrs */
9697
Oid *typioparams; /* array of element types for in_functions */
98+
ErrorSaveContext *escontext; /* soft error trapper during in_functions
99+
* execution */
100+
uint64 num_errors; /* total number of rows which contained soft
101+
* errors */
97102
int *defmap; /* array of default att numbers related to
98103
* missing att */
99104
ExprState **defexprs; /* array of default att expressions for all

src/test/regress/expected/copy2.out

+43
Original file line numberDiff line numberDiff line change
@@ -77,11 +77,21 @@ COPY x from stdin (encoding 'sql_ascii', encoding 'sql_ascii');
7777
ERROR: conflicting or redundant options
7878
LINE 1: COPY x from stdin (encoding 'sql_ascii', encoding 'sql_ascii...
7979
^
80+
COPY x from stdin (save_error_to none,save_error_to none);
81+
ERROR: conflicting or redundant options
82+
LINE 1: COPY x from stdin (save_error_to none,save_error_to none);
83+
^
8084
-- incorrect options
8185
COPY x to stdin (format BINARY, delimiter ',');
8286
ERROR: cannot specify DELIMITER in BINARY mode
8387
COPY x to stdin (format BINARY, null 'x');
8488
ERROR: cannot specify NULL in BINARY mode
89+
COPY x from stdin (format BINARY, save_error_to none);
90+
ERROR: cannot specify SAVE_ERROR_TO in BINARY mode
91+
COPY x to stdin (save_error_to none);
92+
ERROR: COPY SAVE_ERROR_TO cannot be used with COPY TO
93+
LINE 1: COPY x to stdin (save_error_to none);
94+
^
8595
COPY x to stdin (format TEXT, force_quote(a));
8696
ERROR: COPY FORCE_QUOTE requires CSV mode
8797
COPY x from stdin (format CSV, force_quote(a));
@@ -94,6 +104,10 @@ COPY x to stdout (format TEXT, force_null(a));
94104
ERROR: COPY FORCE_NULL requires CSV mode
95105
COPY x to stdin (format CSV, force_null(a));
96106
ERROR: COPY FORCE_NULL cannot be used with COPY TO
107+
COPY x to stdin (format BINARY, save_error_to unsupported);
108+
ERROR: COPY SAVE_ERROR_TO cannot be used with COPY TO
109+
LINE 1: COPY x to stdin (format BINARY, save_error_to unsupported);
110+
^
97111
-- too many columns in column list: should fail
98112
COPY x (a, b, c, d, e, d, c) from stdin;
99113
ERROR: column "d" specified more than once
@@ -710,6 +724,33 @@ SELECT * FROM instead_of_insert_tbl;
710724
(2 rows)
711725

712726
COMMIT;
727+
-- tests for SAVE_ERROR_TO option
728+
CREATE TABLE check_ign_err (n int, m int[], k int);
729+
COPY check_ign_err FROM STDIN WITH (save_error_to error);
730+
ERROR: invalid input syntax for type integer: "a"
731+
CONTEXT: COPY check_ign_err, line 2, column n: "a"
732+
COPY check_ign_err FROM STDIN WITH (save_error_to none);
733+
NOTICE: 4 rows were skipped due to data type incompatibility
734+
SELECT * FROM check_ign_err;
735+
n | m | k
736+
---+-----+---
737+
1 | {1} | 1
738+
5 | {5} | 5
739+
(2 rows)
740+
741+
-- test datatype error that can't be handled as soft: should fail
742+
CREATE TABLE hard_err(foo widget);
743+
COPY hard_err FROM STDIN WITH (save_error_to none);
744+
ERROR: invalid input syntax for type widget: "1"
745+
CONTEXT: COPY hard_err, line 1, column foo: "1"
746+
-- test missing data: should fail
747+
COPY check_ign_err FROM STDIN WITH (save_error_to none);
748+
ERROR: missing data for column "k"
749+
CONTEXT: COPY check_ign_err, line 1: "1 {1}"
750+
-- test extra data: should fail
751+
COPY check_ign_err FROM STDIN WITH (save_error_to none);
752+
ERROR: extra data after last expected column
753+
CONTEXT: COPY check_ign_err, line 1: "1 {1} 3 abc"
713754
-- clean up
714755
DROP TABLE forcetest;
715756
DROP TABLE vistest;
@@ -724,6 +765,8 @@ DROP TABLE instead_of_insert_tbl;
724765
DROP VIEW instead_of_insert_tbl_view;
725766
DROP VIEW instead_of_insert_tbl_view_2;
726767
DROP FUNCTION fun_instead_of_insert_tbl();
768+
DROP TABLE check_ign_err;
769+
DROP TABLE hard_err;
727770
--
728771
-- COPY FROM ... DEFAULT
729772
--

0 commit comments

Comments
 (0)