@@ -4359,34 +4359,36 @@ replace_text(PG_FUNCTION_ARGS)
4359
4359
}
4360
4360
4361
4361
/*
4362
- * check_replace_text_has_escape_char
4362
+ * check_replace_text_has_escape
4363
4363
*
4364
- * check whether replace_text contains escape char.
4364
+ * Returns 0 if text contains no backslashes that need processing.
4365
+ * Returns 1 if text contains backslashes, but not regexp submatch specifiers.
4366
+ * Returns 2 if text contains regexp submatch specifiers (\1 .. \9).
4365
4367
*/
4366
- static bool
4367
- check_replace_text_has_escape_char (const text * replace_text )
4368
+ static int
4369
+ check_replace_text_has_escape (const text * replace_text )
4368
4370
{
4371
+ int result = 0 ;
4369
4372
const char * p = VARDATA_ANY (replace_text );
4370
4373
const char * p_end = p + VARSIZE_ANY_EXHDR (replace_text );
4371
4374
4372
- if (pg_database_encoding_max_length () == 1 )
4373
- {
4374
- for (; p < p_end ; p ++ )
4375
- {
4376
- if (* p == '\\' )
4377
- return true;
4378
- }
4379
- }
4380
- else
4375
+ while (p < p_end )
4381
4376
{
4382
- for (; p < p_end ; p += pg_mblen (p ))
4377
+ /* Find next escape char, if any. */
4378
+ p = memchr (p , '\\' , p_end - p );
4379
+ if (p == NULL )
4380
+ break ;
4381
+ p ++ ;
4382
+ /* Note: a backslash at the end doesn't require extra processing. */
4383
+ if (p < p_end )
4383
4384
{
4384
- if (* p == '\\' )
4385
- return true;
4385
+ if (* p >= '1' && * p <= '9' )
4386
+ return 2 ; /* Found a submatch specifier, so done */
4387
+ result = 1 ; /* Found some other sequence, keep looking */
4388
+ p ++ ;
4386
4389
}
4387
4390
}
4388
-
4389
- return false;
4391
+ return result ;
4390
4392
}
4391
4393
4392
4394
/*
@@ -4403,25 +4405,17 @@ appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
4403
4405
{
4404
4406
const char * p = VARDATA_ANY (replace_text );
4405
4407
const char * p_end = p + VARSIZE_ANY_EXHDR (replace_text );
4406
- int eml = pg_database_encoding_max_length ();
4407
4408
4408
- for (;; )
4409
+ while ( p < p_end )
4409
4410
{
4410
4411
const char * chunk_start = p ;
4411
4412
int so ;
4412
4413
int eo ;
4413
4414
4414
- /* Find next escape char. */
4415
- if (eml == 1 )
4416
- {
4417
- for (; p < p_end && * p != '\\' ; p ++ )
4418
- /* nothing */ ;
4419
- }
4420
- else
4421
- {
4422
- for (; p < p_end && * p != '\\' ; p += pg_mblen (p ))
4423
- /* nothing */ ;
4424
- }
4415
+ /* Find next escape char, if any. */
4416
+ p = memchr (p , '\\' , p_end - p );
4417
+ if (p == NULL )
4418
+ p = p_end ;
4425
4419
4426
4420
/* Copy the text we just scanned over, if any. */
4427
4421
if (p > chunk_start )
@@ -4473,7 +4467,7 @@ appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
4473
4467
continue ;
4474
4468
}
4475
4469
4476
- if (so != -1 && eo != -1 )
4470
+ if (so >= 0 && eo >= 0 )
4477
4471
{
4478
4472
/*
4479
4473
* Copy the text that is back reference of regexp. Note so and eo
@@ -4491,45 +4485,57 @@ appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
4491
4485
}
4492
4486
}
4493
4487
4494
- #define REGEXP_REPLACE_BACKREF_CNT 10
4495
-
4496
4488
/*
4497
4489
* replace_text_regexp
4498
4490
*
4499
- * replace substring(s) in src_text that match regexp with replace_text.
4491
+ * replace substring(s) in src_text that match pattern with replace_text.
4492
+ * The replace_text can contain backslash markers to substitute
4493
+ * (parts of) the matched text.
4500
4494
*
4495
+ * cflags: regexp compile flags.
4496
+ * collation: collation to use.
4501
4497
* search_start: the character (not byte) offset in src_text at which to
4502
4498
* begin searching.
4503
4499
* n: if 0, replace all matches; if > 0, replace only the N'th match.
4504
- *
4505
- * Note: to avoid having to include regex.h in builtins.h, we declare
4506
- * the regexp argument as void *, but really it's regex_t *.
4507
4500
*/
4508
4501
text *
4509
- replace_text_regexp (text * src_text , void * regexp ,
4502
+ replace_text_regexp (text * src_text , text * pattern_text ,
4510
4503
text * replace_text ,
4504
+ int cflags , Oid collation ,
4511
4505
int search_start , int n )
4512
4506
{
4513
4507
text * ret_text ;
4514
- regex_t * re = ( regex_t * ) regexp ;
4508
+ regex_t * re ;
4515
4509
int src_text_len = VARSIZE_ANY_EXHDR (src_text );
4516
4510
int nmatches = 0 ;
4517
4511
StringInfoData buf ;
4518
- regmatch_t pmatch [REGEXP_REPLACE_BACKREF_CNT ];
4512
+ regmatch_t pmatch [10 ]; /* main match, plus \1 to \9 */
4513
+ int nmatch = lengthof (pmatch );
4519
4514
pg_wchar * data ;
4520
4515
size_t data_len ;
4521
4516
int data_pos ;
4522
4517
char * start_ptr ;
4523
- bool have_escape ;
4518
+ int escape_status ;
4524
4519
4525
4520
initStringInfo (& buf );
4526
4521
4527
4522
/* Convert data string to wide characters. */
4528
4523
data = (pg_wchar * ) palloc ((src_text_len + 1 ) * sizeof (pg_wchar ));
4529
4524
data_len = pg_mb2wchar_with_len (VARDATA_ANY (src_text ), data , src_text_len );
4530
4525
4531
- /* Check whether replace_text has escape char. */
4532
- have_escape = check_replace_text_has_escape_char (replace_text );
4526
+ /* Check whether replace_text has escapes, especially regexp submatches. */
4527
+ escape_status = check_replace_text_has_escape (replace_text );
4528
+
4529
+ /* If no regexp submatches, we can use REG_NOSUB. */
4530
+ if (escape_status < 2 )
4531
+ {
4532
+ cflags |= REG_NOSUB ;
4533
+ /* Also tell pg_regexec we only want the whole-match location. */
4534
+ nmatch = 1 ;
4535
+ }
4536
+
4537
+ /* Prepare the regexp. */
4538
+ re = RE_compile_and_cache (pattern_text , cflags , collation );
4533
4539
4534
4540
/* start_ptr points to the data_pos'th character of src_text */
4535
4541
start_ptr = (char * ) VARDATA_ANY (src_text );
@@ -4546,7 +4552,7 @@ replace_text_regexp(text *src_text, void *regexp,
4546
4552
data_len ,
4547
4553
search_start ,
4548
4554
NULL , /* no details */
4549
- REGEXP_REPLACE_BACKREF_CNT ,
4555
+ nmatch ,
4550
4556
pmatch ,
4551
4557
0 );
4552
4558
@@ -4602,10 +4608,9 @@ replace_text_regexp(text *src_text, void *regexp,
4602
4608
}
4603
4609
4604
4610
/*
4605
- * Copy the replace_text. Process back references when the
4606
- * replace_text has escape characters.
4611
+ * Copy the replace_text, processing escapes if any are present.
4607
4612
*/
4608
- if (have_escape )
4613
+ if (escape_status > 0 )
4609
4614
appendStringInfoRegexpSubstr (& buf , replace_text , pmatch ,
4610
4615
start_ptr , data_pos );
4611
4616
else
0 commit comments