Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit eec5711

Browse files
committed
In psql \copy from, send data to server in larger chunks.
Previously, we would send each line as a separate CopyData message. That's pretty wasteful if the table is narrow, as each CopyData message has 5 bytes of overhead. For efficiency, buffer up and pack 8 kB of input data into each CopyData message. The server also sends each line as a separate CopyData message in COPY TO STDOUT, and that's similarly wasteful. But that's documented in the FE/BE protocol description, so changing that would be a wire protocol break. Reviewed-by: Aleksander Alekseev Discussion: https://www.postgresql.org/message-id/40b2cec0-d0fb-3191-2ae1-9a3fe16a7e48%40iki.fi
1 parent b4deefc commit eec5711

File tree

1 file changed

+56
-43
lines changed

1 file changed

+56
-43
lines changed

src/bin/psql/copy.c

Lines changed: 56 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -581,77 +581,90 @@ handleCopyIn(PGconn *conn, FILE *copystream, bool isbinary, PGresult **res)
581581
else
582582
{
583583
bool copydone = false;
584+
int buflen;
585+
bool at_line_begin = true;
584586

587+
/*
588+
* In text mode, we have to read the input one line at a time, so that
589+
* we can stop reading at the EOF marker (\.). We mustn't read beyond
590+
* the EOF marker, because if the data was inlined in a SQL script, we
591+
* would eat up the commands after the EOF marker.
592+
*/
593+
buflen = 0;
585594
while (!copydone)
586-
{ /* for each input line ... */
587-
bool firstload;
588-
bool linedone;
595+
{
596+
char *fgresult;
589597

590-
if (showprompt)
598+
if (at_line_begin && showprompt)
591599
{
592600
const char *prompt = get_prompt(PROMPT_COPY, NULL);
593601

594602
fputs(prompt, stdout);
595603
fflush(stdout);
596604
}
597605

598-
firstload = true;
599-
linedone = false;
600-
601-
while (!linedone)
602-
{ /* for each bufferload in line ... */
603-
int linelen;
604-
char *fgresult;
605-
606-
/* enable longjmp while waiting for input */
607-
sigint_interrupt_enabled = true;
606+
/* enable longjmp while waiting for input */
607+
sigint_interrupt_enabled = true;
608608

609-
fgresult = fgets(buf, sizeof(buf), copystream);
609+
fgresult = fgets(&buf[buflen], COPYBUFSIZ - buflen, copystream);
610610

611-
sigint_interrupt_enabled = false;
611+
sigint_interrupt_enabled = false;
612612

613-
if (!fgresult)
614-
{
615-
copydone = true;
616-
break;
617-
}
613+
if (!fgresult)
614+
copydone = true;
615+
else
616+
{
617+
int linelen;
618618

619-
linelen = strlen(buf);
619+
linelen = strlen(fgresult);
620+
buflen += linelen;
620621

621622
/* current line is done? */
622-
if (linelen > 0 && buf[linelen - 1] == '\n')
623-
linedone = true;
624-
625-
/* check for EOF marker, but not on a partial line */
626-
if (firstload)
623+
if (buf[buflen - 1] == '\n')
627624
{
628-
/*
629-
* This code erroneously assumes '\.' on a line alone
630-
* inside a quoted CSV string terminates the \copy.
631-
* https://www.postgresql.org/message-id/[email protected]
632-
*/
633-
if (strcmp(buf, "\\.\n") == 0 ||
634-
strcmp(buf, "\\.\r\n") == 0)
625+
/* check for EOF marker, but not on a partial line */
626+
if (at_line_begin)
635627
{
636-
copydone = true;
637-
break;
628+
/*
629+
* This code erroneously assumes '\.' on a line alone
630+
* inside a quoted CSV string terminates the \copy.
631+
* https://www.postgresql.org/message-id/[email protected]
632+
*/
633+
if ((linelen == 3 && memcmp(fgresult, "\\.\n", 3) == 0) ||
634+
(linelen == 4 && memcmp(fgresult, "\\.\r\n", 4) == 0))
635+
{
636+
copydone = true;
637+
}
638638
}
639639

640-
firstload = false;
640+
if (copystream == pset.cur_cmd_source)
641+
{
642+
pset.lineno++;
643+
pset.stmt_lineno++;
644+
}
645+
at_line_begin = true;
641646
}
647+
else
648+
at_line_begin = false;
649+
}
642650

643-
if (PQputCopyData(conn, buf, linelen) <= 0)
651+
/*
652+
* If the buffer is full, or we've reached the EOF, flush it.
653+
*
654+
* Make sure there's always space for four more bytes in the
655+
* buffer, plus a NUL terminator. That way, an EOF marker is
656+
* never split across two fgets() calls, which simplies the logic.
657+
*/
658+
if (buflen >= COPYBUFSIZ - 5 || (copydone && buflen > 0))
659+
{
660+
if (PQputCopyData(conn, buf, buflen) <= 0)
644661
{
645662
OK = false;
646663
copydone = true;
647664
break;
648665
}
649-
}
650666

651-
if (copystream == pset.cur_cmd_source)
652-
{
653-
pset.lineno++;
654-
pset.stmt_lineno++;
667+
buflen = 0;
655668
}
656669
}
657670
}

0 commit comments

Comments
 (0)