Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 072132f

Browse files
committed
Add header matching mode to COPY FROM
COPY FROM supports the HEADER option to silently discard the header line from a CSV or text file. It is possible to load by mistake a file that matches the expected format, for example, if two text columns have been swapped, resulting in garbage in the database. This adds a new option value HEADER MATCH that checks the column names in the header line against the actual column names and errors out if they do not match. Author: Rémi Lapeyre <[email protected]> Reviewed-by: Daniel Verite <[email protected]> Reviewed-by: Peter Eisentraut <[email protected]> Discussion: https://www.postgresql.org/message-id/flat/CAF1-J-0PtCWMeLtswwGV2M70U26n4g33gpe1rcKQqe6wVQDrFA@mail.gmail.com
1 parent edcedcc commit 072132f

File tree

8 files changed

+210
-8
lines changed

8 files changed

+210
-8
lines changed

contrib/file_fdw/expected/file_fdw.out

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,21 @@ CREATE FOREIGN TABLE agg_bad (
113113
) SERVER file_server
114114
OPTIONS (format 'csv', filename :'filename', header 'true', delimiter ';', quote '@', escape '"', null '');
115115
ALTER FOREIGN TABLE agg_bad ADD CHECK (a >= 0);
116+
-- test header matching
117+
\set filename :abs_srcdir '/data/list1.csv'
118+
CREATE FOREIGN TABLE header_match ("1" int, foo text) SERVER file_server
119+
OPTIONS (format 'csv', filename :'filename', delimiter ',', header 'match');
120+
SELECT * FROM header_match;
121+
1 | foo
122+
---+-----
123+
1 | bar
124+
(1 row)
125+
126+
CREATE FOREIGN TABLE header_doesnt_match (a int, foo text) SERVER file_server
127+
OPTIONS (format 'csv', filename :'filename', delimiter ',', header 'match');
128+
SELECT * FROM header_doesnt_match; -- ERROR
129+
ERROR: column name mismatch in header line field 1: got "1", expected "a"
130+
CONTEXT: COPY header_doesnt_match, line 1: "1,foo"
116131
-- per-column options tests
117132
\set filename :abs_srcdir '/data/text.csv'
118133
CREATE FOREIGN TABLE text_csv (
@@ -464,12 +479,14 @@ SET ROLE regress_file_fdw_superuser;
464479
-- cleanup
465480
RESET ROLE;
466481
DROP EXTENSION file_fdw CASCADE;
467-
NOTICE: drop cascades to 7 other objects
482+
NOTICE: drop cascades to 9 other objects
468483
DETAIL: drop cascades to server file_server
469484
drop cascades to user mapping for regress_file_fdw_superuser on server file_server
470485
drop cascades to user mapping for regress_no_priv_user on server file_server
471486
drop cascades to foreign table agg_text
472487
drop cascades to foreign table agg_csv
473488
drop cascades to foreign table agg_bad
489+
drop cascades to foreign table header_match
490+
drop cascades to foreign table header_doesnt_match
474491
drop cascades to foreign table text_csv
475492
DROP ROLE regress_file_fdw_superuser, regress_file_fdw_user, regress_no_priv_user;

contrib/file_fdw/sql/file_fdw.sql

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,15 @@ CREATE FOREIGN TABLE agg_bad (
103103
OPTIONS (format 'csv', filename :'filename', header 'true', delimiter ';', quote '@', escape '"', null '');
104104
ALTER FOREIGN TABLE agg_bad ADD CHECK (a >= 0);
105105
106+
-- test header matching
107+
\set filename :abs_srcdir '/data/list1.csv'
108+
CREATE FOREIGN TABLE header_match ("1" int, foo text) SERVER file_server
109+
OPTIONS (format 'csv', filename :'filename', delimiter ',', header 'match');
110+
SELECT * FROM header_match;
111+
CREATE FOREIGN TABLE header_doesnt_match (a int, foo text) SERVER file_server
112+
OPTIONS (format 'csv', filename :'filename', delimiter ',', header 'match');
113+
SELECT * FROM header_doesnt_match; -- ERROR
114+
106115
-- per-column options tests
107116
\set filename :abs_srcdir '/data/text.csv'
108117
CREATE FOREIGN TABLE text_csv (

doc/src/sgml/ref/copy.sgml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
3636
FREEZE [ <replaceable class="parameter">boolean</replaceable> ]
3737
DELIMITER '<replaceable class="parameter">delimiter_character</replaceable>'
3838
NULL '<replaceable class="parameter">null_string</replaceable>'
39-
HEADER [ <replaceable class="parameter">boolean</replaceable> ]
39+
HEADER [ <replaceable class="parameter">boolean</replaceable> | <literal>match</literal> ]
4040
QUOTE '<replaceable class="parameter">quote_character</replaceable>'
4141
ESCAPE '<replaceable class="parameter">escape_character</replaceable>'
4242
FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
@@ -276,7 +276,11 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
276276
<para>
277277
Specifies that the file contains a header line with the names of each
278278
column in the file. On output, the first line contains the column
279-
names from the table, and on input, the first line is ignored.
279+
names from the table. On input, the first line is discarded when this
280+
option is set to <literal>true</literal> (or equivalent Boolean value).
281+
If this option is set to <literal>match</literal>, the number and names
282+
of the columns in the header line must match the actual column names of
283+
the table, otherwise an error is raised.
280284
This option is not allowed when using <literal>binary</literal> format.
281285
</para>
282286
</listitem>

src/backend/commands/copy.c

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,64 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
313313
table_close(rel, NoLock);
314314
}
315315

316+
/*
317+
* Extract a CopyHeaderChoice value from a DefElem. This is like
318+
* defGetBoolean() but also accepts the special value "match".
319+
*/
320+
static CopyHeaderChoice
321+
defGetCopyHeaderChoice(DefElem *def)
322+
{
323+
/*
324+
* If no parameter given, assume "true" is meant.
325+
*/
326+
if (def->arg == NULL)
327+
return COPY_HEADER_TRUE;
328+
329+
/*
330+
* Allow 0, 1, "true", "false", "on", "off", or "match".
331+
*/
332+
switch (nodeTag(def->arg))
333+
{
334+
case T_Integer:
335+
switch (intVal(def->arg))
336+
{
337+
case 0:
338+
return COPY_HEADER_FALSE;
339+
case 1:
340+
return COPY_HEADER_TRUE;
341+
default:
342+
/* otherwise, error out below */
343+
break;
344+
}
345+
break;
346+
default:
347+
{
348+
char *sval = defGetString(def);
349+
350+
/*
351+
* The set of strings accepted here should match up with the
352+
* grammar's opt_boolean_or_string production.
353+
*/
354+
if (pg_strcasecmp(sval, "true") == 0)
355+
return COPY_HEADER_TRUE;
356+
if (pg_strcasecmp(sval, "false") == 0)
357+
return COPY_HEADER_FALSE;
358+
if (pg_strcasecmp(sval, "on") == 0)
359+
return COPY_HEADER_TRUE;
360+
if (pg_strcasecmp(sval, "off") == 0)
361+
return COPY_HEADER_FALSE;
362+
if (pg_strcasecmp(sval, "match") == 0)
363+
return COPY_HEADER_MATCH;
364+
}
365+
break;
366+
}
367+
ereport(ERROR,
368+
(errcode(ERRCODE_SYNTAX_ERROR),
369+
errmsg("%s requires a Boolean value or \"match\"",
370+
def->defname)));
371+
return COPY_HEADER_FALSE; /* keep compiler quiet */
372+
}
373+
316374
/*
317375
* Process the statement option list for COPY.
318376
*
@@ -394,7 +452,7 @@ ProcessCopyOptions(ParseState *pstate,
394452
if (header_specified)
395453
errorConflictingDefElem(defel, pstate);
396454
header_specified = true;
397-
opts_out->header_line = defGetBoolean(defel);
455+
opts_out->header_line = defGetCopyHeaderChoice(defel);
398456
}
399457
else if (strcmp(defel->defname, "quote") == 0)
400458
{

src/backend/commands/copyfromparse.c

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@
7272
#include "miscadmin.h"
7373
#include "pgstat.h"
7474
#include "port/pg_bswap.h"
75+
#include "utils/builtins.h"
7576
#include "utils/memutils.h"
7677
#include "utils/rel.h"
7778

@@ -758,12 +759,58 @@ NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)
758759
/* only available for text or csv input */
759760
Assert(!cstate->opts.binary);
760761

761-
/* on input just throw the header line away */
762+
/* on input check that the header line is correct if needed */
762763
if (cstate->cur_lineno == 0 && cstate->opts.header_line)
763764
{
765+
ListCell *cur;
766+
TupleDesc tupDesc;
767+
768+
tupDesc = RelationGetDescr(cstate->rel);
769+
764770
cstate->cur_lineno++;
765-
if (CopyReadLine(cstate))
766-
return false; /* done */
771+
done = CopyReadLine(cstate);
772+
773+
if (cstate->opts.header_line == COPY_HEADER_MATCH)
774+
{
775+
int fldnum;
776+
777+
if (cstate->opts.csv_mode)
778+
fldct = CopyReadAttributesCSV(cstate);
779+
else
780+
fldct = CopyReadAttributesText(cstate);
781+
782+
if (fldct != list_length(cstate->attnumlist))
783+
ereport(ERROR,
784+
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
785+
errmsg("wrong number of fields in header line: field count is %d, expected %d",
786+
fldct, list_length(cstate->attnumlist))));
787+
788+
fldnum = 0;
789+
foreach(cur, cstate->attnumlist)
790+
{
791+
int attnum = lfirst_int(cur);
792+
char *colName = cstate->raw_fields[attnum - 1];
793+
Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
794+
795+
fldnum++;
796+
797+
if (colName == NULL)
798+
ereport(ERROR,
799+
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
800+
errmsg("column name mismatch in header line field %d: got null value (\"%s\"), expected \"%s\"",
801+
fldnum, cstate->opts.null_print, NameStr(attr->attname))));
802+
803+
if (namestrcmp(&attr->attname, colName) != 0) {
804+
ereport(ERROR,
805+
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
806+
errmsg("column name mismatch in header line field %d: got \"%s\", expected \"%s\"",
807+
fldnum, colName, NameStr(attr->attname))));
808+
}
809+
}
810+
}
811+
812+
if (done)
813+
return false;
767814
}
768815

769816
cstate->cur_lineno++;

src/include/commands/copy.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,17 @@
1919
#include "parser/parse_node.h"
2020
#include "tcop/dest.h"
2121

22+
/*
23+
* Represents whether a header line should be present, and whether it must
24+
* match the actual names (which implies "true").
25+
*/
26+
typedef enum CopyHeaderChoice
27+
{
28+
COPY_HEADER_FALSE = 0,
29+
COPY_HEADER_TRUE,
30+
COPY_HEADER_MATCH,
31+
} CopyHeaderChoice;
32+
2233
/*
2334
* A struct to hold COPY options, in a parsed form. All of these are related
2435
* to formatting, except for 'freeze', which doesn't really belong here, but
@@ -32,7 +43,7 @@ typedef struct CopyFormatOptions
3243
bool binary; /* binary format? */
3344
bool freeze; /* freeze rows on loading? */
3445
bool csv_mode; /* Comma Separated Value format? */
35-
bool header_line; /* header line? */
46+
CopyHeaderChoice header_line; /* header line? */
3647
char *null_print; /* NULL marker string (server encoding!) */
3748
int null_print_len; /* length of same */
3849
char *null_print_client; /* same converted to file encoding */

src/test/regress/expected/copy.out

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,3 +176,26 @@ INFO: progress: {"type": "FILE", "command": "COPY FROM", "relname": "tab_progre
176176
drop trigger check_after_tab_progress_reporting on tab_progress_reporting;
177177
drop function notice_after_tab_progress_reporting();
178178
drop table tab_progress_reporting;
179+
-- Test header matching feature
180+
create table header_copytest (
181+
a int,
182+
b int,
183+
c text
184+
);
185+
copy header_copytest from stdin with (header wrong_choice);
186+
ERROR: header requires a Boolean value or "match"
187+
copy header_copytest from stdin with (header match);
188+
copy header_copytest from stdin with (header match);
189+
ERROR: column name mismatch in header line field 3: got null value ("\N"), expected "c"
190+
CONTEXT: COPY header_copytest, line 1: "a b \N"
191+
copy header_copytest from stdin with (header match);
192+
ERROR: wrong number of fields in header line: field count is 2, expected 3
193+
CONTEXT: COPY header_copytest, line 1: "a b"
194+
copy header_copytest from stdin with (header match);
195+
ERROR: wrong number of fields in header line: field count is 4, expected 3
196+
CONTEXT: COPY header_copytest, line 1: "a b c d"
197+
copy header_copytest from stdin with (header match);
198+
ERROR: column name mismatch in header line field 3: got "d", expected "c"
199+
CONTEXT: COPY header_copytest, line 1: "a b d"
200+
copy header_copytest from stdin with (header match, format csv);
201+
drop table header_copytest;

src/test/regress/sql/copy.sql

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,3 +197,36 @@ copy tab_progress_reporting from :'filename'
197197
drop trigger check_after_tab_progress_reporting on tab_progress_reporting;
198198
drop function notice_after_tab_progress_reporting();
199199
drop table tab_progress_reporting;
200+
201+
-- Test header matching feature
202+
create table header_copytest (
203+
a int,
204+
b int,
205+
c text
206+
);
207+
copy header_copytest from stdin with (header wrong_choice);
208+
copy header_copytest from stdin with (header match);
209+
a b c
210+
1 2 foo
211+
\.
212+
copy header_copytest from stdin with (header match);
213+
a b \N
214+
1 2 foo
215+
\.
216+
copy header_copytest from stdin with (header match);
217+
a b
218+
1 2
219+
\.
220+
copy header_copytest from stdin with (header match);
221+
a b c d
222+
1 2 foo bar
223+
\.
224+
copy header_copytest from stdin with (header match);
225+
a b d
226+
1 2 foo
227+
\.
228+
copy header_copytest from stdin with (header match, format csv);
229+
a,b,c
230+
1,2,foo
231+
\.
232+
drop table header_copytest;

0 commit comments

Comments
 (0)