From 4cfd4beb2efe3012e2b358b3634b54fcc58cd684 Mon Sep 17 00:00:00 2001 From: Aaron Crane Date: Mon, 21 Oct 2019 12:17:40 +0200 Subject: [PATCH 01/20] toke.c: factor out static yyl_fake_eof() --- toke.c | 529 ++++++++++++++++++++++++++++++--------------------------- 1 file changed, 274 insertions(+), 255 deletions(-) diff --git a/toke.c b/toke.c index 28f305c62c67..b5bd92a88cf8 100644 --- a/toke.c +++ b/toke.c @@ -6816,6 +6816,273 @@ yyl_eol(pTHX_ char *s, STRLEN len, return RETRY(); } +static int +yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s, STRLEN len, + I32 orig_keyword, GV *gv, GV **gvp, + U8 formbrack, const bool saw_infix_sigil) +{ + char *d; + + goto start; + + do { + fake_eof = 0; + bof = cBOOL(PL_rsfp); + start: + + PL_bufptr = PL_bufend; + COPLINE_INC_WITH_HERELINES; + if (!lex_next_chunk(fake_eof)) { + CopLINE_dec(PL_curcop); + s = PL_bufptr; + TOKEN(';'); /* not infinite loop because rsfp is NULL now */ + } + CopLINE_dec(PL_curcop); + s = PL_bufptr; + /* If it looks like the start of a BOM or raw UTF-16, + * check if it in fact is. */ + if (bof && PL_rsfp + && ( *s == 0 + || *(U8*)s == BOM_UTF8_FIRST_BYTE + || *(U8*)s >= 0xFE + || s[1] == 0)) + { + Off_t offset = (IV)PerlIO_tell(PL_rsfp); + bof = (offset == (Off_t)SvCUR(PL_linestr)); +#if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS) + /* offset may include swallowed CR */ + if (!bof) + bof = (offset == (Off_t)SvCUR(PL_linestr)+1); +#endif + if (bof) { + PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); + s = swallow_bom((U8*)s); + } + } + if (PL_parser->in_pod) { + /* Incest with pod. */ + if ( memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut") + && !isALPHA(s[4])) + { + SvPVCLEAR(PL_linestr); + PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); + PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); + PL_last_lop = PL_last_uni = NULL; + PL_parser->in_pod = 0; + } + } + if (PL_rsfp || PL_parser->filtered) + incline(s, PL_bufend); + } while (PL_parser->in_pod); + + PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s; + PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); + PL_last_lop = PL_last_uni = NULL; + if (CopLINE(PL_curcop) == 1) { + while (s < PL_bufend && isSPACE(*s)) + s++; + if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */ + s++; + d = NULL; + if (!PL_in_eval) { + if (*s == '#' && *(s+1) == '!') + d = s + 2; +#ifdef ALTERNATE_SHEBANG + else { + static char const as[] = ALTERNATE_SHEBANG; + if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1)) + d = s + (sizeof(as) - 1); + } +#endif /* ALTERNATE_SHEBANG */ + } + if (d) { + char *ipath; + char *ipathend; + + while (isSPACE(*d)) + d++; + ipath = d; + while (*d && !isSPACE(*d)) + d++; + ipathend = d; + +#ifdef ARG_ZERO_IS_SCRIPT + if (ipathend > ipath) { + /* + * HP-UX (at least) sets argv[0] to the script name, + * which makes $^X incorrect. And Digital UNIX and Linux, + * at least, set argv[0] to the basename of the Perl + * interpreter. So, having found "#!", we'll set it right. + */ + SV* copfilesv = CopFILESV(PL_curcop); + if (copfilesv) { + SV * const x = + GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL, + SVt_PV)); /* $^X */ + assert(SvPOK(x) || SvGMAGICAL(x)); + if (sv_eq(x, copfilesv)) { + sv_setpvn(x, ipath, ipathend - ipath); + SvSETMAGIC(x); + } + else { + STRLEN blen; + STRLEN llen; + const char *bstart = SvPV_const(copfilesv, blen); + const char * const lstart = SvPV_const(x, llen); + if (llen < blen) { + bstart += blen - llen; + if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') { + sv_setpvn(x, ipath, ipathend - ipath); + SvSETMAGIC(x); + } + } + } + } + else { + /* Anything to do if no copfilesv? */ + } + TAINT_NOT; /* $^X is always tainted, but that's OK */ + } +#endif /* ARG_ZERO_IS_SCRIPT */ + + /* + * Look for options. + */ + d = instr(s,"perl -"); + if (!d) { + d = instr(s,"perl"); +#if defined(DOSISH) + /* avoid getting into infinite loops when shebang + * line contains "Perl" rather than "perl" */ + if (!d) { + for (d = ipathend-4; d >= ipath; --d) { + if (isALPHA_FOLD_EQ(*d, 'p') + && !ibcmp(d, "perl", 4)) + { + break; + } + } + if (d < ipath) + d = NULL; + } +#endif + } +#ifdef ALTERNATE_SHEBANG + /* + * If the ALTERNATE_SHEBANG on this system starts with a + * character that can be part of a Perl expression, then if + * we see it but not "perl", we're probably looking at the + * start of Perl code, not a request to hand off to some + * other interpreter. Similarly, if "perl" is there, but + * not in the first 'word' of the line, we assume the line + * contains the start of the Perl program. + */ + if (d && *s != '#') { + const char *c = ipath; + while (*c && !strchr("; \t\r\n\f\v#", *c)) + c++; + if (c < d) + d = NULL; /* "perl" not in first word; ignore */ + else + *s = '#'; /* Don't try to parse shebang line */ + } +#endif /* ALTERNATE_SHEBANG */ + if (!d + && *s == '#' + && ipathend > ipath + && !PL_minus_c + && !instr(s,"indir") + && instr(PL_origargv[0],"perl")) + { + dVAR; + char **newargv; + + *ipathend = '\0'; + s = ipathend + 1; + while (s < PL_bufend && isSPACE(*s)) + s++; + if (s < PL_bufend) { + Newx(newargv,PL_origargc+3,char*); + newargv[1] = s; + while (s < PL_bufend && !isSPACE(*s)) + s++; + *s = '\0'; + Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*); + } + else + newargv = PL_origargv; + newargv[0] = ipath; + PERL_FPU_PRE_EXEC + PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv)); + PERL_FPU_POST_EXEC + Perl_croak(aTHX_ "Can't exec %s", ipath); + } + if (d) { + while (*d && !isSPACE(*d)) + d++; + while (SPACE_OR_TAB(*d)) + d++; + + if (*d++ == '-') { + const bool switches_done = PL_doswitches; + const U32 oldpdb = PL_perldb; + const bool oldn = PL_minus_n; + const bool oldp = PL_minus_p; + const char *d1 = d; + + do { + bool baduni = FALSE; + if (*d1 == 'C') { + const char *d2 = d1 + 1; + if (parse_unicode_opts((const char **)&d2) + != PL_unicode) + baduni = TRUE; + } + if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) { + const char * const m = d1; + while (*d1 && !isSPACE(*d1)) + d1++; + Perl_croak(aTHX_ "Too late for \"-%.*s\" option", + (int)(d1 - m), m); + } + d1 = moreswitches(d1); + } while (d1); + if (PL_doswitches && !switches_done) { + int argc = PL_origargc; + char **argv = PL_origargv; + do { + argc--,argv++; + } while (argc && argv[0][0] == '-' && argv[0][1]); + init_argv_symbols(argc,argv); + } + if ( (PERLDB_LINE_OR_SAVESRC && !oldpdb) + || ((PL_minus_n || PL_minus_p) && !(oldn || oldp))) + /* if we have already added "LINE: while (<>) {", + we must not do it again */ + { + SvPVCLEAR(PL_linestr); + PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); + PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); + PL_last_lop = PL_last_uni = NULL; + PL_preambled = FALSE; + if (PERLDB_LINE_OR_SAVESRC) + (void)gv_fetchfile(PL_origfilename); + return RETRY(); + } + } + } + } + } + + if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { + PL_lex_state = LEX_FORMLINE; + force_next(FORMRBRACK); + TOKEN(';'); + } + + return RETRY(); +} + static int yyl_try(pTHX_ char initial_state, char *s, STRLEN len, I32 orig_keyword, GV *gv, GV **gvp, @@ -6836,8 +7103,9 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, case 4: case 26: - fake_eof = LEX_FAKE_EOF; - goto fake_eof; /* emulate EOF on ^D or ^Z */ + /* emulate EOF on ^D or ^Z */ + return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s, len, + orig_keyword, gv, gvp, formbrack, saw_infix_sigil); case 0: if ((!PL_rsfp || PL_lex_inwhat) @@ -6938,258 +7206,9 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, update_debugger_info(PL_linestr, NULL, 0); return RETRY(); } - do { - fake_eof = 0; - bof = cBOOL(PL_rsfp); - fake_eof: - PL_bufptr = PL_bufend; - COPLINE_INC_WITH_HERELINES; - if (!lex_next_chunk(fake_eof)) { - CopLINE_dec(PL_curcop); - s = PL_bufptr; - TOKEN(';'); /* not infinite loop because rsfp is NULL now */ - } - CopLINE_dec(PL_curcop); - s = PL_bufptr; - /* If it looks like the start of a BOM or raw UTF-16, - * check if it in fact is. */ - if (bof && PL_rsfp - && ( *s == 0 - || *(U8*)s == BOM_UTF8_FIRST_BYTE - || *(U8*)s >= 0xFE - || s[1] == 0)) - { - Off_t offset = (IV)PerlIO_tell(PL_rsfp); - bof = (offset == (Off_t)SvCUR(PL_linestr)); -#if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS) - /* offset may include swallowed CR */ - if (!bof) - bof = (offset == (Off_t)SvCUR(PL_linestr)+1); -#endif - if (bof) { - PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); - s = swallow_bom((U8*)s); - } - } - if (PL_parser->in_pod) { - /* Incest with pod. */ - if ( memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut") - && !isALPHA(s[4])) - { - SvPVCLEAR(PL_linestr); - PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); - PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); - PL_last_lop = PL_last_uni = NULL; - PL_parser->in_pod = 0; - } - } - if (PL_rsfp || PL_parser->filtered) - incline(s, PL_bufend); - } while (PL_parser->in_pod); - PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s; - PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); - PL_last_lop = PL_last_uni = NULL; - if (CopLINE(PL_curcop) == 1) { - while (s < PL_bufend && isSPACE(*s)) - s++; - if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */ - s++; - d = NULL; - if (!PL_in_eval) { - if (*s == '#' && *(s+1) == '!') - d = s + 2; -#ifdef ALTERNATE_SHEBANG - else { - static char const as[] = ALTERNATE_SHEBANG; - if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1)) - d = s + (sizeof(as) - 1); - } -#endif /* ALTERNATE_SHEBANG */ - } - if (d) { - char *ipath; - char *ipathend; - - while (isSPACE(*d)) - d++; - ipath = d; - while (*d && !isSPACE(*d)) - d++; - ipathend = d; - -#ifdef ARG_ZERO_IS_SCRIPT - if (ipathend > ipath) { - /* - * HP-UX (at least) sets argv[0] to the script name, - * which makes $^X incorrect. And Digital UNIX and Linux, - * at least, set argv[0] to the basename of the Perl - * interpreter. So, having found "#!", we'll set it right. - */ - SV* copfilesv = CopFILESV(PL_curcop); - if (copfilesv) { - SV * const x = - GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL, - SVt_PV)); /* $^X */ - assert(SvPOK(x) || SvGMAGICAL(x)); - if (sv_eq(x, copfilesv)) { - sv_setpvn(x, ipath, ipathend - ipath); - SvSETMAGIC(x); - } - else { - STRLEN blen; - STRLEN llen; - const char *bstart = SvPV_const(copfilesv, blen); - const char * const lstart = SvPV_const(x, llen); - if (llen < blen) { - bstart += blen - llen; - if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') { - sv_setpvn(x, ipath, ipathend - ipath); - SvSETMAGIC(x); - } - } - } - } - else { - /* Anything to do if no copfilesv? */ - } - TAINT_NOT; /* $^X is always tainted, but that's OK */ - } -#endif /* ARG_ZERO_IS_SCRIPT */ + return yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s, len, + orig_keyword, gv, gvp, formbrack, saw_infix_sigil); - /* - * Look for options. - */ - d = instr(s,"perl -"); - if (!d) { - d = instr(s,"perl"); -#if defined(DOSISH) - /* avoid getting into infinite loops when shebang - * line contains "Perl" rather than "perl" */ - if (!d) { - for (d = ipathend-4; d >= ipath; --d) { - if (isALPHA_FOLD_EQ(*d, 'p') - && !ibcmp(d, "perl", 4)) - { - break; - } - } - if (d < ipath) - d = NULL; - } -#endif - } -#ifdef ALTERNATE_SHEBANG - /* - * If the ALTERNATE_SHEBANG on this system starts with a - * character that can be part of a Perl expression, then if - * we see it but not "perl", we're probably looking at the - * start of Perl code, not a request to hand off to some - * other interpreter. Similarly, if "perl" is there, but - * not in the first 'word' of the line, we assume the line - * contains the start of the Perl program. - */ - if (d && *s != '#') { - const char *c = ipath; - while (*c && !strchr("; \t\r\n\f\v#", *c)) - c++; - if (c < d) - d = NULL; /* "perl" not in first word; ignore */ - else - *s = '#'; /* Don't try to parse shebang line */ - } -#endif /* ALTERNATE_SHEBANG */ - if (!d - && *s == '#' - && ipathend > ipath - && !PL_minus_c - && !instr(s,"indir") - && instr(PL_origargv[0],"perl")) - { - dVAR; - char **newargv; - - *ipathend = '\0'; - s = ipathend + 1; - while (s < PL_bufend && isSPACE(*s)) - s++; - if (s < PL_bufend) { - Newx(newargv,PL_origargc+3,char*); - newargv[1] = s; - while (s < PL_bufend && !isSPACE(*s)) - s++; - *s = '\0'; - Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*); - } - else - newargv = PL_origargv; - newargv[0] = ipath; - PERL_FPU_PRE_EXEC - PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv)); - PERL_FPU_POST_EXEC - Perl_croak(aTHX_ "Can't exec %s", ipath); - } - if (d) { - while (*d && !isSPACE(*d)) - d++; - while (SPACE_OR_TAB(*d)) - d++; - - if (*d++ == '-') { - const bool switches_done = PL_doswitches; - const U32 oldpdb = PL_perldb; - const bool oldn = PL_minus_n; - const bool oldp = PL_minus_p; - const char *d1 = d; - - do { - bool baduni = FALSE; - if (*d1 == 'C') { - const char *d2 = d1 + 1; - if (parse_unicode_opts((const char **)&d2) - != PL_unicode) - baduni = TRUE; - } - if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) { - const char * const m = d1; - while (*d1 && !isSPACE(*d1)) - d1++; - Perl_croak(aTHX_ "Too late for \"-%.*s\" option", - (int)(d1 - m), m); - } - d1 = moreswitches(d1); - } while (d1); - if (PL_doswitches && !switches_done) { - int argc = PL_origargc; - char **argv = PL_origargv; - do { - argc--,argv++; - } while (argc && argv[0][0] == '-' && argv[0][1]); - init_argv_symbols(argc,argv); - } - if ( (PERLDB_LINE_OR_SAVESRC && !oldpdb) - || ((PL_minus_n || PL_minus_p) && !(oldn || oldp))) - /* if we have already added "LINE: while (<>) {", - we must not do it again */ - { - SvPVCLEAR(PL_linestr); - PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); - PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); - PL_last_lop = PL_last_uni = NULL; - PL_preambled = FALSE; - if (PERLDB_LINE_OR_SAVESRC) - (void)gv_fetchfile(PL_origfilename); - return RETRY(); - } - } - } - } - } - if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { - PL_lex_state = LEX_FORMLINE; - force_next(FORMRBRACK); - TOKEN(';'); - } - return RETRY(); case '\r': #ifdef PERL_STRICT_CR Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r'); @@ -8050,8 +8069,8 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, case KEY___END__: if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) yyl_data_handle(aTHX); - fake_eof = LEX_FAKE_EOF; - goto fake_eof; + return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, bof, s, len, + orig_keyword, gv, gvp, formbrack, saw_infix_sigil); case KEY___SUB__: FUN0OP(CvCLONE(PL_compcv) From 20b318f8fb4f5b6fe8e1f7319fb81cf46c9ae7eb Mon Sep 17 00:00:00 2001 From: Aaron Crane Date: Mon, 21 Oct 2019 12:43:17 +0200 Subject: [PATCH 02/20] toke.c: factor out static yyl_fatcomma() This removes a goto label. --- toke.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/toke.c b/toke.c index b5bd92a88cf8..a574a51d0781 100644 --- a/toke.c +++ b/toke.c @@ -7083,6 +7083,17 @@ yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s, STRLEN len, return RETRY(); } +static int +yyl_fatcomma(pTHX_ char *s, STRLEN len) +{ + CLINE; + pl_yylval.opval + = newSVOP(OP_CONST, 0, + S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len)); + pl_yylval.opval->op_private = OPpCONST_BARE; + TERM(BAREWORD); +} + static int yyl_try(pTHX_ char initial_state, char *s, STRLEN len, I32 orig_keyword, GV *gv, GV **gvp, @@ -7590,13 +7601,7 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, /* Is this a word before a => operator? */ if (*d == '=' && d[1] == '>') { - fat_arrow: - CLINE; - pl_yylval.opval - = newSVOP(OP_CONST, 0, - S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len)); - pl_yylval.opval->op_private = OPpCONST_BARE; - TERM(BAREWORD); + return yyl_fatcomma(aTHX_ s, len); } /* Check for plugged-in keyword */ @@ -7690,7 +7695,7 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, PL_bufptr = SvPVX(PL_linestr) + bufoff; s = SvPVX(PL_linestr) + soff; if (arrow) - goto fat_arrow; + return yyl_fatcomma(aTHX_ s, len); } reserved_word: From 0ac605481f7ff6dd790220ec8f437604e67fac30 Mon Sep 17 00:00:00 2001 From: Aaron Crane Date: Mon, 21 Oct 2019 13:34:33 +0200 Subject: [PATCH 03/20] toke.c: fold some initialisations into the corresponding declarations --- toke.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/toke.c b/toke.c index a574a51d0781..57ca09e02573 100644 --- a/toke.c +++ b/toke.c @@ -7567,21 +7567,16 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, keylookup: { bool anydelim; - bool lex; + bool lex = FALSE; I32 tmp; - SV *sv; - CV *cv; - PADOFFSET off; - OP *rv2cv_op; + SV *sv = NULL; + CV *cv = NULL; + PADOFFSET off = 0; + OP *rv2cv_op = NULL; - lex = FALSE; orig_keyword = 0; - off = 0; - sv = NULL; - cv = NULL; gv = NULL; gvp = NULL; - rv2cv_op = NULL; PL_bufptr = s; s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); From f5417bd96886c039469e82f67f4f10c44e904b84 Mon Sep 17 00:00:00 2001 From: Aaron Crane Date: Mon, 21 Oct 2019 13:39:10 +0200 Subject: [PATCH 04/20] toke.c: factor out static yyl_safe_bareword() --- toke.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/toke.c b/toke.c index 57ca09e02573..bd7927d48f3f 100644 --- a/toke.c +++ b/toke.c @@ -7094,6 +7094,24 @@ yyl_fatcomma(pTHX_ char *s, STRLEN len) TERM(BAREWORD); } +static int +yyl_safe_bareword(pTHX_ char *s, const char lastchar, const bool saw_infix_sigil) +{ + if ((lastchar == '*' || lastchar == '%' || lastchar == '&') + && saw_infix_sigil) + { + Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), + "Operator or semicolon missing before %c%" UTF8f, + lastchar, + UTF8fARG(UTF, strlen(PL_tokenbuf), + PL_tokenbuf)); + Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), + "Ambiguous use of %c resolved as operator %c", + lastchar, lastchar); + } + TOKEN(BAREWORD); +} + static int yyl_try(pTHX_ char initial_state, char *s, STRLEN len, I32 orig_keyword, GV *gv, GV **gvp, @@ -7795,7 +7813,7 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, /* And if "Foo::", then that's what it certainly is. */ if (safebw) - goto safe_bareword; + return yyl_safe_bareword(aTHX_ s, lastchar, saw_infix_sigil); if (!off) { @@ -8031,19 +8049,7 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, } op_free(rv2cv_op); - safe_bareword: - if ((lastchar == '*' || lastchar == '%' || lastchar == '&') - && saw_infix_sigil) { - Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), - "Operator or semicolon missing before %c%" UTF8f, - lastchar, - UTF8fARG(UTF, strlen(PL_tokenbuf), - PL_tokenbuf)); - Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), - "Ambiguous use of %c resolved as operator %c", - lastchar, lastchar); - } - TOKEN(BAREWORD); + return yyl_safe_bareword(aTHX_ s, lastchar, saw_infix_sigil); } case KEY___FILE__: From 305b85ecc46d0a05eaa0832167c160dd5d138520 Mon Sep 17 00:00:00 2001 From: Aaron Crane Date: Mon, 21 Oct 2019 13:55:22 +0200 Subject: [PATCH 05/20] toke.c: factor out static yyl_constant_op() With the removal of another goto label! --- toke.c | 91 +++++++++++++++++++++++++++++++--------------------------- 1 file changed, 48 insertions(+), 43 deletions(-) diff --git a/toke.c b/toke.c index bd7927d48f3f..2ad2cb395d5c 100644 --- a/toke.c +++ b/toke.c @@ -7112,6 +7112,50 @@ yyl_safe_bareword(pTHX_ char *s, const char lastchar, const bool saw_infix_sigil TOKEN(BAREWORD); } +static int +yyl_constant_op(pTHX_ char *s, SV *sv, CV *cv, OP *rv2cv_op, PADOFFSET off) +{ + if (sv) { + op_free(rv2cv_op); + SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv); + ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv); + if (SvTYPE(sv) == SVt_PVAV) + pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS, + pl_yylval.opval); + else { + pl_yylval.opval->op_private = 0; + pl_yylval.opval->op_folded = 1; + pl_yylval.opval->op_flags |= OPf_SPECIAL; + } + TOKEN(BAREWORD); + } + + op_free(pl_yylval.opval); + pl_yylval.opval = + off ? newCVREF(0, rv2cv_op) : rv2cv_op; + pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN; + PL_last_lop = PL_oldbufptr; + PL_last_lop_op = OP_ENTERSUB; + + /* Is there a prototype? */ + if (SvPOK(cv)) { + int k = yyl_subproto(aTHX_ s, cv); + if (k != KEY_NULL) + return k; + } + + NEXTVAL_NEXTTOKE.opval = pl_yylval.opval; + PL_expect = XTERM; + force_next(off ? PRIVATEREF : BAREWORD); + if (!PL_lex_allbrackets + && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) + { + PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; + } + + TOKEN(NOAMP); +} + static int yyl_try(pTHX_ char initial_state, char *s, STRLEN len, I32 orig_keyword, GV *gv, GV **gvp, @@ -7912,10 +7956,8 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, d = s + 1; while (SPACE_OR_TAB(*d)) d++; - if (*d == ')' && (sv = cv_const_sv_or_av(cv))) { - s = d + 1; - goto its_constant; - } + if (*d == ')' && (sv = cv_const_sv_or_av(cv))) + return yyl_constant_op(aTHX_ d + 1, sv, cv, rv2cv_op, off); } NEXTVAL_NEXTTOKE.opval = off ? rv2cv_op : pl_yylval.opval; @@ -7972,45 +8014,8 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, if (cv) { /* Check for a constant sub */ - if ((sv = cv_const_sv_or_av(cv))) { - its_constant: - op_free(rv2cv_op); - SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv); - ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv); - if (SvTYPE(sv) == SVt_PVAV) - pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS, - pl_yylval.opval); - else { - pl_yylval.opval->op_private = 0; - pl_yylval.opval->op_folded = 1; - pl_yylval.opval->op_flags |= OPf_SPECIAL; - } - TOKEN(BAREWORD); - } - - op_free(pl_yylval.opval); - pl_yylval.opval = - off ? newCVREF(0, rv2cv_op) : rv2cv_op; - pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN; - PL_last_lop = PL_oldbufptr; - PL_last_lop_op = OP_ENTERSUB; - - /* Is there a prototype? */ - if (SvPOK(cv)) { - int k = yyl_subproto(aTHX_ s, cv); - if (k != KEY_NULL) - return k; - } - - NEXTVAL_NEXTTOKE.opval = pl_yylval.opval; - PL_expect = XTERM; - force_next(off ? PRIVATEREF : BAREWORD); - if (!PL_lex_allbrackets - && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) - { - PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; - } - TOKEN(NOAMP); + sv = cv_const_sv_or_av(cv); + return yyl_constant_op(aTHX_ s, sv, cv, rv2cv_op, off); } /* Call it a bare word */ From 1fd0b21f815e837ba8c7f902b22b4507d5689ce8 Mon Sep 17 00:00:00 2001 From: Aaron Crane Date: Mon, 21 Oct 2019 14:12:16 +0200 Subject: [PATCH 06/20] toke.c: remove the really_sub goto label This permits some additional pleasing simplifications. --- toke.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/toke.c b/toke.c index 2ad2cb395d5c..29d2e12dd9ec 100644 --- a/toke.c +++ b/toke.c @@ -6723,9 +6723,8 @@ yyl_do(pTHX_ char *s, I32 orig_keyword) } static int -yyl_my(pTHX_ char **sp, I32 my) +yyl_my(pTHX_ char *s, I32 my) { - char *s = *sp; if (PL_in_my) { PL_bufptr = s; yyerror(Perl_form(aTHX_ @@ -6740,10 +6739,8 @@ yyl_my(pTHX_ char **sp, I32 my) if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { STRLEN len; s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len); - if (memEQs(PL_tokenbuf, len, "sub")) { - *sp = s; - return SUB; - } + if (memEQs(PL_tokenbuf, len, "sub")) + return yyl_sub(aTHX_ s, my); PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len); if (!PL_in_my_stash) { char tmpbuf[1024]; @@ -8095,10 +8092,8 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, case KEY_CHECK: case KEY_INIT: case KEY_END: - if (PL_expect == XSTATE) { - s = PL_bufptr; - goto really_sub; - } + if (PL_expect == XSTATE) + return yyl_sub(aTHX_ PL_bufptr, tmp); goto just_a_word; case_KEY_CORE: @@ -8538,13 +8533,8 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, case KEY_our: case KEY_my: - case KEY_state: { - int tok = yyl_my(aTHX_ &s, tmp); - if (tok == SUB) - goto really_sub; - else - return tok; - } + case KEY_state: + return yyl_my(aTHX_ s, tmp); case KEY_next: LOOPX(OP_NEXT); @@ -8851,7 +8841,6 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, case KEY_format: case KEY_sub: - really_sub: return yyl_sub(aTHX_ s, tmp); case KEY_system: From be24a7d746539a2a621a5cb132727c2fa570d078 Mon Sep 17 00:00:00 2001 From: Aaron Crane Date: Mon, 21 Oct 2019 17:51:54 +0200 Subject: [PATCH 07/20] toke.c: factor out static yyl_strictwarn_bareword() --- toke.c | 70 ++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 39 insertions(+), 31 deletions(-) diff --git a/toke.c b/toke.c index 29d2e12dd9ec..5599904682f9 100644 --- a/toke.c +++ b/toke.c @@ -7153,6 +7153,39 @@ yyl_constant_op(pTHX_ char *s, SV *sv, CV *cv, OP *rv2cv_op, PADOFFSET off) TOKEN(NOAMP); } +/* Honour "reserved word" warnings, and enforce strict subs */ +static void +yyl_strictwarn_bareword(pTHX_ const char lastchar) +{ + /* after "print" and similar functions (corresponding to + * "F? L" in opcode.pl), whatever wasn't already parsed as + * a filehandle should be subject to "strict subs". + * Likewise for the optional indirect-object argument to system + * or exec, which can't be a bareword */ + if ((PL_last_lop_op == OP_PRINT + || PL_last_lop_op == OP_PRTF + || PL_last_lop_op == OP_SAY + || PL_last_lop_op == OP_SYSTEM + || PL_last_lop_op == OP_EXEC) + && (PL_hints & HINT_STRICT_SUBS)) + { + pl_yylval.opval->op_private |= OPpCONST_STRICT; + } + + if (lastchar != '-' && ckWARN(WARN_RESERVED)) { + char *d = PL_tokenbuf; + while (isLOWER(*d)) + d++; + if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) { + /* PL_warn_reserved is constant */ + GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); + Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved, + PL_tokenbuf); + GCC_DIAG_RESTORE_STMT; + } + } +} + static int yyl_try(pTHX_ char initial_state, char *s, STRLEN len, I32 orig_keyword, GV *gv, GV **gvp, @@ -7921,7 +7954,9 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, ) { PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR; - goto bareword; + yyl_strictwarn_bareword(aTHX_ lastchar); + op_free(rv2cv_op); + return yyl_safe_bareword(aTHX_ s, lastchar, saw_infix_sigil); } } @@ -8019,36 +8054,9 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, if (PL_hints & HINT_STRICT_SUBS) pl_yylval.opval->op_private |= OPpCONST_STRICT; - else { - bareword: - /* after "print" and similar functions (corresponding to - * "F? L" in opcode.pl), whatever wasn't already parsed as - * a filehandle should be subject to "strict subs". - * Likewise for the optional indirect-object argument to system - * or exec, which can't be a bareword */ - if ((PL_last_lop_op == OP_PRINT - || PL_last_lop_op == OP_PRTF - || PL_last_lop_op == OP_SAY - || PL_last_lop_op == OP_SYSTEM - || PL_last_lop_op == OP_EXEC) - && (PL_hints & HINT_STRICT_SUBS)) - pl_yylval.opval->op_private |= OPpCONST_STRICT; - if (lastchar != '-') { - if (ckWARN(WARN_RESERVED)) { - d = PL_tokenbuf; - while (isLOWER(*d)) - d++; - if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) - { - /* PL_warn_reserved is constant */ - GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); - Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved, - PL_tokenbuf); - GCC_DIAG_RESTORE_STMT; - } - } - } - } + else + yyl_strictwarn_bareword(aTHX_ lastchar); + op_free(rv2cv_op); return yyl_safe_bareword(aTHX_ s, lastchar, saw_infix_sigil); From eac79f666961bf708020663fb7e9bc1ea956d5c9 Mon Sep 17 00:00:00 2001 From: Aaron Crane Date: Tue, 22 Oct 2019 22:49:00 +0100 Subject: [PATCH 08/20] toke.c: stop passing around several needless local variables I introduced these parameters as part of mechanically refactoring goto-heavy logic into subroutines. However, they aren't actually needed through most of the code. Even in the recursive case (in which yyl_try() or one of its callees will call itself), we can reset the variables to zero. --- toke.c | 35 +++++++++++++---------------------- 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/toke.c b/toke.c index 5599904682f9..1fea6b69c53d 100644 --- a/toke.c +++ b/toke.c @@ -6762,15 +6762,12 @@ yyl_my(pTHX_ char *s, I32 my) OPERATOR(MY); } -static int yyl_try(pTHX_ char, char*, STRLEN, I32, GV*, GV**, U8, U32, const bool); +static int yyl_try(pTHX_ char, char*, STRLEN, U8, const bool); -#define RETRY() yyl_try(aTHX_ 0, s, len, orig_keyword, gv, gvp, \ - formbrack, fake_eof, saw_infix_sigil) +#define RETRY() yyl_try(aTHX_ 0, s, len, 0, 0) static int -yyl_eol(pTHX_ char *s, STRLEN len, - I32 orig_keyword, GV *gv, GV **gvp, - U8 formbrack, U32 fake_eof, const bool saw_infix_sigil) +yyl_eol(pTHX_ char *s, STRLEN len) { if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) @@ -6814,9 +6811,7 @@ yyl_eol(pTHX_ char *s, STRLEN len, } static int -yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s, STRLEN len, - I32 orig_keyword, GV *gv, GV **gvp, - U8 formbrack, const bool saw_infix_sigil) +yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s, STRLEN len) { char *d; @@ -7188,11 +7183,11 @@ yyl_strictwarn_bareword(pTHX_ const char lastchar) static int yyl_try(pTHX_ char initial_state, char *s, STRLEN len, - I32 orig_keyword, GV *gv, GV **gvp, - U8 formbrack, U32 fake_eof, const bool saw_infix_sigil) + U8 formbrack, const bool saw_infix_sigil) { char *d; bool bof = FALSE; + GV *gv = NULL, **gvp = NULL; switch (initial_state) { case '}': goto rightbracket; @@ -7207,8 +7202,7 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, case 4: case 26: /* emulate EOF on ^D or ^Z */ - return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s, len, - orig_keyword, gv, gvp, formbrack, saw_infix_sigil); + return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s, len); case 0: if ((!PL_rsfp || PL_lex_inwhat) @@ -7309,8 +7303,7 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, update_debugger_info(PL_linestr, NULL, 0); return RETRY(); } - return yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s, len, - orig_keyword, gv, gvp, formbrack, saw_infix_sigil); + return yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s, len); case '\r': #ifdef PERL_STRICT_CR @@ -7324,8 +7317,7 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, case '#': case '\n': - return yyl_eol(aTHX_ s, len, orig_keyword, gv, gvp, - formbrack, fake_eof, saw_infix_sigil); + return yyl_eol(aTHX_ s, len); case '-': return yyl_hyphen(aTHX_ s); @@ -7665,8 +7657,8 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, CV *cv = NULL; PADOFFSET off = 0; OP *rv2cv_op = NULL; + I32 orig_keyword = 0; - orig_keyword = 0; gv = NULL; gvp = NULL; @@ -8085,8 +8077,7 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, case KEY___END__: if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) yyl_data_handle(aTHX); - return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, bof, s, len, - orig_keyword, gv, gvp, formbrack, saw_infix_sigil); + return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, bof, s, len); case KEY___SUB__: FUN0OP(CvCLONE(PL_compcv) @@ -9284,7 +9275,7 @@ Perl_yylex(pTHX) assert(PL_lex_formbrack); s = scan_formline(PL_bufptr); if (!PL_lex_formbrack) { - return yyl_try(aTHX_ '}', s, 0, 0, NULL, NULL, 1, 0, saw_infix_sigil); + return yyl_try(aTHX_ '}', s, 0, 1, saw_infix_sigil); } PL_bufptr = s; return yylex(); @@ -9301,7 +9292,7 @@ Perl_yylex(pTHX) return yyl_sigvar(aTHX_ s); } - return yyl_try(aTHX_ 0, s, 0, 0, NULL, NULL, 0, 0, saw_infix_sigil); + return yyl_try(aTHX_ 0, s, 0, 0, saw_infix_sigil); } From cd77589fe037f9469baae92cb5d6af3ed823cc2d Mon Sep 17 00:00:00 2001 From: Aaron Crane Date: Wed, 23 Oct 2019 12:21:53 +0100 Subject: [PATCH 09/20] toke.c: factor out static yyl_just_a_word() --- toke.c | 566 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 275 insertions(+), 291 deletions(-) diff --git a/toke.c b/toke.c index 1fea6b69c53d..e05a0a669b91 100644 --- a/toke.c +++ b/toke.c @@ -7181,6 +7181,259 @@ yyl_strictwarn_bareword(pTHX_ const char lastchar) } } +static int +yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 key, PADOFFSET off, + I32 orig_keyword, SV *sv, CV *cv, GV *gv, GV **gvp, + OP *rv2cv_op, const bool lex, const bool saw_infix_sigil) +{ + int pkgname = 0; + const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]); + bool safebw; + bool no_op_error = FALSE; + + if (PL_expect == XOPERATOR) { + if (PL_bufptr == PL_linestart) { + CopLINE_dec(PL_curcop); + Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi); + CopLINE_inc(PL_curcop); + } + else + /* We want to call no_op with s pointing after the + bareword, so defer it. But we want it to come + before the Bad name croak. */ + no_op_error = TRUE; + } + + /* Get the rest if it looks like a package qualifier */ + + if (*s == '\'' || (*s == ':' && s[1] == ':')) { + STRLEN morelen; + s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len, + TRUE, &morelen); + if (no_op_error) { + no_op("Bareword",s); + no_op_error = FALSE; + } + if (!morelen) + Perl_croak(aTHX_ "Bad name after %" UTF8f "%s", + UTF8fARG(UTF, len, PL_tokenbuf), + *s == '\'' ? "'" : "::"); + len += morelen; + pkgname = 1; + } + + if (no_op_error) + no_op("Bareword",s); + + /* See if the name is "Foo::", + in which case Foo is a bareword + (and a package name). */ + + if (len > 2 && PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') { + if (ckWARN(WARN_BAREWORD) + && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV)) + Perl_warner(aTHX_ packWARN(WARN_BAREWORD), + "Bareword \"%" UTF8f + "\" refers to nonexistent package", + UTF8fARG(UTF, len, PL_tokenbuf)); + len -= 2; + PL_tokenbuf[len] = '\0'; + gv = NULL; + gvp = 0; + safebw = TRUE; + } + else { + safebw = FALSE; + } + + /* if we saw a global override before, get the right name */ + + if (!sv) + sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len); + if (gvp) { + SV * const tmp_sv = sv; + sv = newSVpvs("CORE::GLOBAL::"); + sv_catsv(sv, tmp_sv); + SvREFCNT_dec(tmp_sv); + } + + /* Presume this is going to be a bareword of some sort. */ + CLINE; + pl_yylval.opval = newSVOP(OP_CONST, 0, sv); + pl_yylval.opval->op_private = OPpCONST_BARE; + + /* And if "Foo::", then that's what it certainly is. */ + if (safebw) + return yyl_safe_bareword(aTHX_ s, lastchar, saw_infix_sigil); + + if (!off) { + OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv)); + const_op->op_private = OPpCONST_BARE; + rv2cv_op = newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op); + cv = lex + ? isGV(gv) + ? GvCV(gv) + : SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV + ? (CV *)SvRV(gv) + : ((CV *)gv) + : rv2cv_op_cv(rv2cv_op, RV2CVOPCV_RETURN_STUB); + } + + /* Use this var to track whether intuit_method has been + called. intuit_method returns 0 or > 255. */ + key = 1; + + /* See if it's the indirect object for a list operator. */ + + if (PL_oldoldbufptr + && PL_oldoldbufptr < PL_bufptr + && (PL_oldoldbufptr == PL_last_lop + || PL_oldoldbufptr == PL_last_uni) + && /* NO SKIPSPACE BEFORE HERE! */ + (PL_expect == XREF + || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) + == OA_FILEREF)) + { + bool immediate_paren = *s == '('; + SSize_t s_off; + + /* (Now we can afford to cross potential line boundary.) */ + s = skipspace(s); + + /* intuit_method() can indirectly call lex_next_chunk(), + * invalidating s + */ + s_off = s - SvPVX(PL_linestr); + /* Two barewords in a row may indicate method call. */ + if ( ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) + || *s == '$') + && (key = intuit_method(s, lex ? NULL : sv, cv))) + { + /* the code at method: doesn't use s */ + goto method; + } + s = SvPVX(PL_linestr) + s_off; + + /* If not a declared subroutine, it's an indirect object. */ + /* (But it's an indir obj regardless for sort.) */ + /* Also, if "_" follows a filetest operator, it's a bareword */ + + if ( + ( !immediate_paren && (PL_last_lop_op == OP_SORT + || (!cv + && (PL_last_lop_op != OP_MAPSTART + && PL_last_lop_op != OP_GREPSTART)))) + || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0' + && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) + == OA_FILESTATOP)) + ) + { + PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR; + yyl_strictwarn_bareword(aTHX_ lastchar); + op_free(rv2cv_op); + return yyl_safe_bareword(aTHX_ s, lastchar, saw_infix_sigil); + } + } + + PL_expect = XOPERATOR; + s = skipspace(s); + + /* Is this a word before a => operator? */ + if (*s == '=' && s[1] == '>' && !pkgname) { + op_free(rv2cv_op); + CLINE; + if (gvp || (lex && !off)) { + assert (cSVOPx(pl_yylval.opval)->op_sv == sv); + /* This is our own scalar, created a few lines + above, so this is safe. */ + SvREADONLY_off(sv); + sv_setpv(sv, PL_tokenbuf); + if (UTF && !IN_BYTES + && is_utf8_string((U8*)PL_tokenbuf, len)) + SvUTF8_on(sv); + SvREADONLY_on(sv); + } + TERM(BAREWORD); + } + + /* If followed by a paren, it's certainly a subroutine. */ + if (*s == '(') { + CLINE; + if (cv) { + char *d = s + 1; + while (SPACE_OR_TAB(*d)) + d++; + if (*d == ')' && (sv = cv_const_sv_or_av(cv))) + return yyl_constant_op(aTHX_ d + 1, sv, cv, rv2cv_op, off); + } + NEXTVAL_NEXTTOKE.opval = + off ? rv2cv_op : pl_yylval.opval; + if (off) + op_free(pl_yylval.opval), force_next(PRIVATEREF); + else op_free(rv2cv_op), force_next(BAREWORD); + pl_yylval.ival = 0; + TOKEN('&'); + } + + /* If followed by var or block, call it a method (unless sub) */ + + if ((*s == '$' || *s == '{') && !cv) { + op_free(rv2cv_op); + PL_last_lop = PL_oldbufptr; + PL_last_lop_op = OP_METHOD; + if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) + PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; + PL_expect = XBLOCKTERM; + PL_bufptr = s; + return REPORT(METHOD); + } + + /* If followed by a bareword, see if it looks like indir obj. */ + + if ( key == 1 + && !orig_keyword + && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$') + && (key = intuit_method(s, lex ? NULL : sv, cv))) + { + method: + if (lex && !off) { + assert(cSVOPx(pl_yylval.opval)->op_sv == sv); + SvREADONLY_off(sv); + sv_setpvn(sv, PL_tokenbuf, len); + if (UTF && !IN_BYTES + && is_utf8_string((U8*)PL_tokenbuf, len)) + SvUTF8_on (sv); + else SvUTF8_off(sv); + } + op_free(rv2cv_op); + if (key == METHOD && !PL_lex_allbrackets + && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) + { + PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; + } + return REPORT(key); + } + + /* Not a method, so call it a subroutine (if defined) */ + + if (cv) { + /* Check for a constant sub */ + sv = cv_const_sv_or_av(cv); + return yyl_constant_op(aTHX_ s, sv, cv, rv2cv_op, off); + } + + /* Call it a bare word */ + + if (PL_hints & HINT_STRICT_SUBS) + pl_yylval.opval->op_private |= OPpCONST_STRICT; + else + yyl_strictwarn_bareword(aTHX_ lastchar); + + op_free(rv2cv_op); + + return yyl_safe_bareword(aTHX_ s, lastchar, saw_infix_sigil); +} + static int yyl_try(pTHX_ char initial_state, char *s, STRLEN len, U8 formbrack, const bool saw_infix_sigil) @@ -7346,10 +7599,9 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, s++; OPERATOR(','); case ':': - if (s[1] == ':') { - len = 0; - goto just_a_word_zero_gv; - } + if (s[1] == ':') + return yyl_just_a_word(aTHX_ s, 0, 0, 0, 0, 0, NULL, NULL, NULL, + NULL, NULL, saw_infix_sigil); return yyl_colon(aTHX_ s + 1); case '(': @@ -7651,8 +7903,7 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, keylookup: { bool anydelim; - bool lex = FALSE; - I32 tmp; + I32 tmp = 0; SV *sv = NULL; CV *cv = NULL; PADOFFSET off = 0; @@ -7671,7 +7922,8 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, /* x::* is just a word, unless x is "CORE" */ if (!anydelim && *s == ':' && s[1] == ':') { if (memEQs(PL_tokenbuf, len, "CORE")) goto case_KEY_CORE; - goto just_a_word; + return yyl_just_a_word(aTHX_ s, len, 0, off, orig_keyword, sv, cv, + gv, gvp, rv2cv_op, FALSE, saw_infix_sigil); } d = s; @@ -7744,8 +7996,9 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, off = 0; if (!gv) { sv_free(sv); - sv = NULL; - goto just_a_word; + return yyl_just_a_word(aTHX_ s, len, tmp, off, + orig_keyword, NULL, cv, gv, gvp, + rv2cv_op, FALSE, saw_infix_sigil); } } else { @@ -7753,8 +8006,8 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, rv2cv_op->op_targ = off; cv = find_lexical_cv(off); } - lex = TRUE; - goto just_a_word; + return yyl_just_a_word(aTHX_ s, len, tmp, off, orig_keyword, sv, + cv, gv, gvp, rv2cv_op, TRUE, saw_infix_sigil); } off = 0; } @@ -7779,280 +8032,9 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, reserved_word: switch (tmp) { - - /* Trade off - by using this evil construction we can pull the - variable gv into the block labelled keylookup. If not, then - we have to give it function scope so that the goto from the - earlier ':' case doesn't bypass the initialisation. */ - just_a_word_zero_gv: - sv = NULL; - cv = NULL; - gv = NULL; - gvp = NULL; - rv2cv_op = NULL; - orig_keyword = 0; - lex = 0; - off = 0; - /* FALLTHROUGH */ default: /* not a keyword */ - just_a_word: { - int pkgname = 0; - const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]); - bool safebw; - bool no_op_error = FALSE; - - if (PL_expect == XOPERATOR) { - if (PL_bufptr == PL_linestart) { - CopLINE_dec(PL_curcop); - Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi); - CopLINE_inc(PL_curcop); - } - else - /* We want to call no_op with s pointing after the - bareword, so defer it. But we want it to come - before the Bad name croak. */ - no_op_error = TRUE; - } - - /* Get the rest if it looks like a package qualifier */ - - if (*s == '\'' || (*s == ':' && s[1] == ':')) { - STRLEN morelen; - s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len, - TRUE, &morelen); - if (no_op_error) { - no_op("Bareword",s); - no_op_error = FALSE; - } - if (!morelen) - Perl_croak(aTHX_ "Bad name after %" UTF8f "%s", - UTF8fARG(UTF, len, PL_tokenbuf), - *s == '\'' ? "'" : "::"); - len += morelen; - pkgname = 1; - } - - if (no_op_error) - no_op("Bareword",s); - - /* See if the name is "Foo::", - in which case Foo is a bareword - (and a package name). */ - - if (len > 2 - && PL_tokenbuf[len - 2] == ':' - && PL_tokenbuf[len - 1] == ':') - { - if (ckWARN(WARN_BAREWORD) - && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV)) - Perl_warner(aTHX_ packWARN(WARN_BAREWORD), - "Bareword \"%" UTF8f - "\" refers to nonexistent package", - UTF8fARG(UTF, len, PL_tokenbuf)); - len -= 2; - PL_tokenbuf[len] = '\0'; - gv = NULL; - gvp = 0; - safebw = TRUE; - } - else { - safebw = FALSE; - } - - /* if we saw a global override before, get the right name */ - - if (!sv) - sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, - len); - if (gvp) { - SV * const tmp_sv = sv; - sv = newSVpvs("CORE::GLOBAL::"); - sv_catsv(sv, tmp_sv); - SvREFCNT_dec(tmp_sv); - } - - - /* Presume this is going to be a bareword of some sort. */ - CLINE; - pl_yylval.opval = newSVOP(OP_CONST, 0, sv); - pl_yylval.opval->op_private = OPpCONST_BARE; - - /* And if "Foo::", then that's what it certainly is. */ - if (safebw) - return yyl_safe_bareword(aTHX_ s, lastchar, saw_infix_sigil); - - if (!off) - { - OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv)); - const_op->op_private = OPpCONST_BARE; - rv2cv_op = - newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op); - cv = lex - ? isGV(gv) - ? GvCV(gv) - : SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV - ? (CV *)SvRV(gv) - : ((CV *)gv) - : rv2cv_op_cv(rv2cv_op, RV2CVOPCV_RETURN_STUB); - } - - /* Use this var to track whether intuit_method has been - called. intuit_method returns 0 or > 255. */ - tmp = 1; - - /* See if it's the indirect object for a list operator. */ - - if (PL_oldoldbufptr - && PL_oldoldbufptr < PL_bufptr - && (PL_oldoldbufptr == PL_last_lop - || PL_oldoldbufptr == PL_last_uni) - && /* NO SKIPSPACE BEFORE HERE! */ - (PL_expect == XREF - || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) - == OA_FILEREF)) - { - bool immediate_paren = *s == '('; - SSize_t s_off; - - /* (Now we can afford to cross potential line boundary.) */ - s = skipspace(s); - - /* intuit_method() can indirectly call lex_next_chunk(), - * invalidating s - */ - s_off = s - SvPVX(PL_linestr); - /* Two barewords in a row may indicate method call. */ - if ( ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) - || *s == '$') - && (tmp = intuit_method(s, lex ? NULL : sv, cv))) - { - /* the code at method: doesn't use s */ - goto method; - } - s = SvPVX(PL_linestr) + s_off; - - /* If not a declared subroutine, it's an indirect object. */ - /* (But it's an indir obj regardless for sort.) */ - /* Also, if "_" follows a filetest operator, it's a bareword */ - - if ( - ( !immediate_paren && (PL_last_lop_op == OP_SORT - || (!cv - && (PL_last_lop_op != OP_MAPSTART - && PL_last_lop_op != OP_GREPSTART)))) - || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0' - && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) - == OA_FILESTATOP)) - ) - { - PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR; - yyl_strictwarn_bareword(aTHX_ lastchar); - op_free(rv2cv_op); - return yyl_safe_bareword(aTHX_ s, lastchar, saw_infix_sigil); - } - } - - PL_expect = XOPERATOR; - s = skipspace(s); - - /* Is this a word before a => operator? */ - if (*s == '=' && s[1] == '>' && !pkgname) { - op_free(rv2cv_op); - CLINE; - if (gvp || (lex && !off)) { - assert (cSVOPx(pl_yylval.opval)->op_sv == sv); - /* This is our own scalar, created a few lines - above, so this is safe. */ - SvREADONLY_off(sv); - sv_setpv(sv, PL_tokenbuf); - if (UTF && !IN_BYTES - && is_utf8_string((U8*)PL_tokenbuf, len)) - SvUTF8_on(sv); - SvREADONLY_on(sv); - } - TERM(BAREWORD); - } - - /* If followed by a paren, it's certainly a subroutine. */ - if (*s == '(') { - CLINE; - if (cv) { - d = s + 1; - while (SPACE_OR_TAB(*d)) - d++; - if (*d == ')' && (sv = cv_const_sv_or_av(cv))) - return yyl_constant_op(aTHX_ d + 1, sv, cv, rv2cv_op, off); - } - NEXTVAL_NEXTTOKE.opval = - off ? rv2cv_op : pl_yylval.opval; - if (off) - op_free(pl_yylval.opval), force_next(PRIVATEREF); - else op_free(rv2cv_op), force_next(BAREWORD); - pl_yylval.ival = 0; - TOKEN('&'); - } - - /* If followed by var or block, call it a method (unless sub) */ - - if ((*s == '$' || *s == '{') && !cv) { - op_free(rv2cv_op); - PL_last_lop = PL_oldbufptr; - PL_last_lop_op = OP_METHOD; - if (!PL_lex_allbrackets - && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) - { - PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; - } - PL_expect = XBLOCKTERM; - PL_bufptr = s; - return REPORT(METHOD); - } - - /* If followed by a bareword, see if it looks like indir obj. */ - - if ( tmp == 1 - && !orig_keyword - && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$') - && (tmp = intuit_method(s, lex ? NULL : sv, cv))) - { - method: - if (lex && !off) { - assert(cSVOPx(pl_yylval.opval)->op_sv == sv); - SvREADONLY_off(sv); - sv_setpvn(sv, PL_tokenbuf, len); - if (UTF && !IN_BYTES - && is_utf8_string((U8*)PL_tokenbuf, len)) - SvUTF8_on (sv); - else SvUTF8_off(sv); - } - op_free(rv2cv_op); - if (tmp == METHOD && !PL_lex_allbrackets - && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) - { - PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; - } - return REPORT(tmp); - } - - /* Not a method, so call it a subroutine (if defined) */ - - if (cv) { - /* Check for a constant sub */ - sv = cv_const_sv_or_av(cv); - return yyl_constant_op(aTHX_ s, sv, cv, rv2cv_op, off); - } - - /* Call it a bare word */ - - if (PL_hints & HINT_STRICT_SUBS) - pl_yylval.opval->op_private |= OPpCONST_STRICT; - else - yyl_strictwarn_bareword(aTHX_ lastchar); - - op_free(rv2cv_op); - - return yyl_safe_bareword(aTHX_ s, lastchar, saw_infix_sigil); - } + return yyl_just_a_word(aTHX_ s, len, tmp, off, orig_keyword, sv, cv, + gv, gvp, rv2cv_op, FALSE, saw_infix_sigil); case KEY___FILE__: FUN0OP( @@ -8093,7 +8075,8 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, case KEY_END: if (PL_expect == XSTATE) return yyl_sub(aTHX_ PL_bufptr, tmp); - goto just_a_word; + return yyl_just_a_word(aTHX_ s, len, tmp, off, orig_keyword, sv, cv, + gv, gvp, rv2cv_op, FALSE, saw_infix_sigil); case_KEY_CORE: { @@ -8104,10 +8087,10 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, if ((*s == ':' && s[1] == ':') || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\'')) { - s = d; - len = olen; Copy(PL_bufptr, PL_tokenbuf, olen, char); - goto just_a_word; + return yyl_just_a_word(aTHX_ d, olen, tmp, off, orig_keyword, + sv, cv, gv, gvp, rv2cv_op, FALSE, + saw_infix_sigil); } if (!tmp) Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword", @@ -8982,7 +8965,8 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, Mop(OP_REPEAT); } check_uni(); - goto just_a_word; + return yyl_just_a_word(aTHX_ s, len, tmp, off, orig_keyword, sv, cv, + gv, gvp, rv2cv_op, FALSE, saw_infix_sigil); case KEY_xor: if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) @@ -9028,10 +9012,10 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, scan built-in keyword (but do nothing with it yet) check for statement label check for lexical subs - goto just_a_word if there is one + return yyl_just_a_word if there is one see whether built-in keyword is overridden switch on keyword number: - - default: just_a_word: + - default: return yyl_just_a_word: not a built-in keyword; handle bareword lookup disambiguate between method and sub call fall back to bareword From 539c839f22460a19392aab61176e02194b84c4d4 Mon Sep 17 00:00:00 2001 From: Aaron Crane Date: Wed, 23 Oct 2019 13:11:02 +0100 Subject: [PATCH 10/20] toke.c: bundle some yyl_just_a_word() params into a struct This makes calls to it much easier to understand. --- toke.c | 169 +++++++++++++++++++++++++++++++++------------------------ 1 file changed, 98 insertions(+), 71 deletions(-) diff --git a/toke.c b/toke.c index e05a0a669b91..e6fb3220068c 100644 --- a/toke.c +++ b/toke.c @@ -290,6 +290,37 @@ static const char* const lex_state_names[] = { } STMT_END +/* A file-local structure for passing around information about subroutines and + * related definable words */ +struct code { + SV *sv; + CV *cv; + GV *gv, **gvp; + OP *rv2cv_op; + PADOFFSET off; + bool lex; +}; + +static const struct code no_code = { NULL, NULL, NULL, NULL, NULL, 0, FALSE }; + +PERL_STATIC_INLINE struct code +make_code(SV *sv, CV *cv, GV *gv, GV **gvp, OP *rv2cv_op, PADOFFSET off, bool lex) +{ + struct code c; + c.sv = sv; + c.sv = sv; + c.cv = cv; + c.gv = gv; + c.gvp = gvp; + c.rv2cv_op = rv2cv_op; + c.off = off; + c.lex = lex; + return c; +} + +#define MAKE_CODE(lEx) make_code(sv, cv, gv, gvp, rv2cv_op, off, lEx) + + #ifdef DEBUGGING /* how to interpret the pl_yylval associated with the token */ @@ -7182,9 +7213,8 @@ yyl_strictwarn_bareword(pTHX_ const char lastchar) } static int -yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 key, PADOFFSET off, - I32 orig_keyword, SV *sv, CV *cv, GV *gv, GV **gvp, - OP *rv2cv_op, const bool lex, const bool saw_infix_sigil) +yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, + struct code c, const bool saw_infix_sigil) { int pkgname = 0; const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]); @@ -7238,8 +7268,8 @@ yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 key, PADOFFSET off, UTF8fARG(UTF, len, PL_tokenbuf)); len -= 2; PL_tokenbuf[len] = '\0'; - gv = NULL; - gvp = 0; + c.gv = NULL; + c.gvp = 0; safebw = TRUE; } else { @@ -7248,35 +7278,35 @@ yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 key, PADOFFSET off, /* if we saw a global override before, get the right name */ - if (!sv) - sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len); - if (gvp) { - SV * const tmp_sv = sv; - sv = newSVpvs("CORE::GLOBAL::"); - sv_catsv(sv, tmp_sv); - SvREFCNT_dec(tmp_sv); + if (!c.sv) + c.sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len); + if (c.gvp) { + SV *sv = newSVpvs("CORE::GLOBAL::"); + sv_catsv(sv, c.sv); + SvREFCNT_dec(c.sv); + c.sv = sv; } /* Presume this is going to be a bareword of some sort. */ CLINE; - pl_yylval.opval = newSVOP(OP_CONST, 0, sv); + pl_yylval.opval = newSVOP(OP_CONST, 0, c.sv); pl_yylval.opval->op_private = OPpCONST_BARE; /* And if "Foo::", then that's what it certainly is. */ if (safebw) return yyl_safe_bareword(aTHX_ s, lastchar, saw_infix_sigil); - if (!off) { - OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv)); + if (!c.off) { + OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(c.sv)); const_op->op_private = OPpCONST_BARE; - rv2cv_op = newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op); - cv = lex - ? isGV(gv) - ? GvCV(gv) - : SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV - ? (CV *)SvRV(gv) - : ((CV *)gv) - : rv2cv_op_cv(rv2cv_op, RV2CVOPCV_RETURN_STUB); + c.rv2cv_op = newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op); + c.cv = c.lex + ? isGV(c.gv) + ? GvCV(c.gv) + : SvROK(c.gv) && SvTYPE(SvRV(c.gv)) == SVt_PVCV + ? (CV *)SvRV(c.gv) + : ((CV *)c.gv) + : rv2cv_op_cv(c.rv2cv_op, RV2CVOPCV_RETURN_STUB); } /* Use this var to track whether intuit_method has been @@ -7307,7 +7337,7 @@ yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 key, PADOFFSET off, /* Two barewords in a row may indicate method call. */ if ( ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$') - && (key = intuit_method(s, lex ? NULL : sv, cv))) + && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv))) { /* the code at method: doesn't use s */ goto method; @@ -7320,7 +7350,7 @@ yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 key, PADOFFSET off, if ( ( !immediate_paren && (PL_last_lop_op == OP_SORT - || (!cv + || (!c.cv && (PL_last_lop_op != OP_MAPSTART && PL_last_lop_op != OP_GREPSTART)))) || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0' @@ -7330,7 +7360,7 @@ yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 key, PADOFFSET off, { PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR; yyl_strictwarn_bareword(aTHX_ lastchar); - op_free(rv2cv_op); + op_free(c.rv2cv_op); return yyl_safe_bareword(aTHX_ s, lastchar, saw_infix_sigil); } } @@ -7340,18 +7370,18 @@ yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 key, PADOFFSET off, /* Is this a word before a => operator? */ if (*s == '=' && s[1] == '>' && !pkgname) { - op_free(rv2cv_op); + op_free(c.rv2cv_op); CLINE; - if (gvp || (lex && !off)) { - assert (cSVOPx(pl_yylval.opval)->op_sv == sv); + if (c.gvp || (c.lex && !c.off)) { + assert (cSVOPx(pl_yylval.opval)->op_sv == c.sv); /* This is our own scalar, created a few lines above, so this is safe. */ - SvREADONLY_off(sv); - sv_setpv(sv, PL_tokenbuf); + SvREADONLY_off(c.sv); + sv_setpv(c.sv, PL_tokenbuf); if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len)) - SvUTF8_on(sv); - SvREADONLY_on(sv); + SvUTF8_on(c.sv); + SvREADONLY_on(c.sv); } TERM(BAREWORD); } @@ -7359,26 +7389,26 @@ yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 key, PADOFFSET off, /* If followed by a paren, it's certainly a subroutine. */ if (*s == '(') { CLINE; - if (cv) { + if (c.cv) { char *d = s + 1; while (SPACE_OR_TAB(*d)) d++; - if (*d == ')' && (sv = cv_const_sv_or_av(cv))) - return yyl_constant_op(aTHX_ d + 1, sv, cv, rv2cv_op, off); + if (*d == ')' && (c.sv = cv_const_sv_or_av(c.cv))) + return yyl_constant_op(aTHX_ d + 1, c.sv, c.cv, c.rv2cv_op, c.off); } NEXTVAL_NEXTTOKE.opval = - off ? rv2cv_op : pl_yylval.opval; - if (off) + c.off ? c.rv2cv_op : pl_yylval.opval; + if (c.off) op_free(pl_yylval.opval), force_next(PRIVATEREF); - else op_free(rv2cv_op), force_next(BAREWORD); + else op_free(c.rv2cv_op), force_next(BAREWORD); pl_yylval.ival = 0; TOKEN('&'); } /* If followed by var or block, call it a method (unless sub) */ - if ((*s == '$' || *s == '{') && !cv) { - op_free(rv2cv_op); + if ((*s == '$' || *s == '{') && !c.cv) { + op_free(c.rv2cv_op); PL_last_lop = PL_oldbufptr; PL_last_lop_op = OP_METHOD; if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) @@ -7393,19 +7423,19 @@ yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 key, PADOFFSET off, if ( key == 1 && !orig_keyword && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$') - && (key = intuit_method(s, lex ? NULL : sv, cv))) + && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv))) { method: - if (lex && !off) { - assert(cSVOPx(pl_yylval.opval)->op_sv == sv); - SvREADONLY_off(sv); - sv_setpvn(sv, PL_tokenbuf, len); + if (c.lex && !c.off) { + assert(cSVOPx(pl_yylval.opval)->op_sv == c.sv); + SvREADONLY_off(c.sv); + sv_setpvn(c.sv, PL_tokenbuf, len); if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len)) - SvUTF8_on (sv); - else SvUTF8_off(sv); + SvUTF8_on(c.sv); + else SvUTF8_off(c.sv); } - op_free(rv2cv_op); + op_free(c.rv2cv_op); if (key == METHOD && !PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) { @@ -7416,10 +7446,10 @@ yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 key, PADOFFSET off, /* Not a method, so call it a subroutine (if defined) */ - if (cv) { + if (c.cv) { /* Check for a constant sub */ - sv = cv_const_sv_or_av(cv); - return yyl_constant_op(aTHX_ s, sv, cv, rv2cv_op, off); + c.sv = cv_const_sv_or_av(c.cv); + return yyl_constant_op(aTHX_ s, c.sv, c.cv, c.rv2cv_op, c.off); } /* Call it a bare word */ @@ -7429,7 +7459,7 @@ yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 key, PADOFFSET off, else yyl_strictwarn_bareword(aTHX_ lastchar); - op_free(rv2cv_op); + op_free(c.rv2cv_op); return yyl_safe_bareword(aTHX_ s, lastchar, saw_infix_sigil); } @@ -7600,8 +7630,7 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, OPERATOR(','); case ':': if (s[1] == ':') - return yyl_just_a_word(aTHX_ s, 0, 0, 0, 0, 0, NULL, NULL, NULL, - NULL, NULL, saw_infix_sigil); + return yyl_just_a_word(aTHX_ s, 0, 0, 0, no_code, saw_infix_sigil); return yyl_colon(aTHX_ s + 1); case '(': @@ -7922,8 +7951,7 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, /* x::* is just a word, unless x is "CORE" */ if (!anydelim && *s == ':' && s[1] == ':') { if (memEQs(PL_tokenbuf, len, "CORE")) goto case_KEY_CORE; - return yyl_just_a_word(aTHX_ s, len, 0, off, orig_keyword, sv, cv, - gv, gvp, rv2cv_op, FALSE, saw_infix_sigil); + return yyl_just_a_word(aTHX_ s, len, 0, orig_keyword, MAKE_CODE(FALSE), saw_infix_sigil); } d = s; @@ -7996,9 +8024,9 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, off = 0; if (!gv) { sv_free(sv); - return yyl_just_a_word(aTHX_ s, len, tmp, off, - orig_keyword, NULL, cv, gv, gvp, - rv2cv_op, FALSE, saw_infix_sigil); + sv = NULL; + return yyl_just_a_word(aTHX_ s, len, tmp, orig_keyword, + MAKE_CODE(FALSE), saw_infix_sigil); } } else { @@ -8006,8 +8034,8 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, rv2cv_op->op_targ = off; cv = find_lexical_cv(off); } - return yyl_just_a_word(aTHX_ s, len, tmp, off, orig_keyword, sv, - cv, gv, gvp, rv2cv_op, TRUE, saw_infix_sigil); + return yyl_just_a_word(aTHX_ s, len, tmp, orig_keyword, + MAKE_CODE(TRUE), saw_infix_sigil); } off = 0; } @@ -8033,8 +8061,8 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, reserved_word: switch (tmp) { default: /* not a keyword */ - return yyl_just_a_word(aTHX_ s, len, tmp, off, orig_keyword, sv, cv, - gv, gvp, rv2cv_op, FALSE, saw_infix_sigil); + return yyl_just_a_word(aTHX_ s, len, tmp, orig_keyword, + MAKE_CODE(FALSE), saw_infix_sigil); case KEY___FILE__: FUN0OP( @@ -8075,8 +8103,8 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, case KEY_END: if (PL_expect == XSTATE) return yyl_sub(aTHX_ PL_bufptr, tmp); - return yyl_just_a_word(aTHX_ s, len, tmp, off, orig_keyword, sv, cv, - gv, gvp, rv2cv_op, FALSE, saw_infix_sigil); + return yyl_just_a_word(aTHX_ s, len, tmp, orig_keyword, + MAKE_CODE(FALSE), saw_infix_sigil); case_KEY_CORE: { @@ -8088,9 +8116,8 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\'')) { Copy(PL_bufptr, PL_tokenbuf, olen, char); - return yyl_just_a_word(aTHX_ d, olen, tmp, off, orig_keyword, - sv, cv, gv, gvp, rv2cv_op, FALSE, - saw_infix_sigil); + return yyl_just_a_word(aTHX_ d, olen, tmp, orig_keyword, + MAKE_CODE(FALSE), saw_infix_sigil); } if (!tmp) Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword", @@ -8965,8 +8992,8 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, Mop(OP_REPEAT); } check_uni(); - return yyl_just_a_word(aTHX_ s, len, tmp, off, orig_keyword, sv, cv, - gv, gvp, rv2cv_op, FALSE, saw_infix_sigil); + return yyl_just_a_word(aTHX_ s, len, tmp, orig_keyword, + MAKE_CODE(FALSE), saw_infix_sigil); case KEY_xor: if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) From 941cf490fb01ceab77e2e901fd605c60efb0d1f5 Mon Sep 17 00:00:00 2001 From: Aaron Crane Date: Wed, 23 Oct 2019 16:51:58 +0100 Subject: [PATCH 11/20] toke.c: factor out static yyl_key_core() and yyl_word_or_keyword() --- toke.c | 2650 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 1326 insertions(+), 1324 deletions(-) diff --git a/toke.c b/toke.c index e6fb3220068c..7c398e7611d5 100644 --- a/toke.c +++ b/toke.c @@ -7465,1542 +7465,1544 @@ yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, } static int -yyl_try(pTHX_ char initial_state, char *s, STRLEN len, - U8 formbrack, const bool saw_infix_sigil) -{ - char *d; - bool bof = FALSE; - GV *gv = NULL, **gvp = NULL; - - switch (initial_state) { - case '}': goto rightbracket; - } +yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, + struct code c, bool bof, const bool saw_infix_sigil) +{ + switch (key) { + default: /* not a keyword */ + return yyl_just_a_word(aTHX_ s, len, key, orig_keyword, + c, saw_infix_sigil); + + case KEY___FILE__: + FUN0OP( newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0)) ); + + case KEY___LINE__: + FUN0OP( + newSVOP(OP_CONST, 0, + Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop))) + ); + + case KEY___PACKAGE__: + FUN0OP( + newSVOP(OP_CONST, 0, (PL_curstash + ? newSVhek(HvNAME_HEK(PL_curstash)) + : &PL_sv_undef)) + ); + + case KEY___DATA__: + case KEY___END__: + if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) + yyl_data_handle(aTHX); + return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, bof, s, len); + + case KEY___SUB__: + FUN0OP(CvCLONE(PL_compcv) + ? newOP(OP_RUNCV, 0) + : newPVOP(OP_RUNCV,0,NULL)); + + case KEY_AUTOLOAD: + case KEY_DESTROY: + case KEY_BEGIN: + case KEY_UNITCHECK: + case KEY_CHECK: + case KEY_INIT: + case KEY_END: + if (PL_expect == XSTATE) + return yyl_sub(aTHX_ PL_bufptr, key); + return yyl_just_a_word(aTHX_ s, len, key, orig_keyword, + c, saw_infix_sigil); + + case KEY_abs: + UNI(OP_ABS); + + case KEY_alarm: + UNI(OP_ALARM); + + case KEY_accept: + LOP(OP_ACCEPT,XTERM); + + case KEY_and: + if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) + return REPORT(0); + OPERATOR(ANDOP); + + case KEY_atan2: + LOP(OP_ATAN2,XTERM); + + case KEY_bind: + LOP(OP_BIND,XTERM); + + case KEY_binmode: + LOP(OP_BINMODE,XTERM); + + case KEY_bless: + LOP(OP_BLESS,XTERM); + + case KEY_break: + FUN0(OP_BREAK); + + case KEY_chop: + UNI(OP_CHOP); + + case KEY_continue: + /* We have to disambiguate the two senses of + "continue". If the next token is a '{' then + treat it as the start of a continue block; + otherwise treat it as a control operator. + */ + s = skipspace(s); + if (*s == '{') + PREBLOCK(CONTINUE); + else + FUN0(OP_CONTINUE); - switch (*s) { - default: - if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) - goto keylookup; - yyl_croak_unrecognised(aTHX_ s); + case KEY_chdir: + /* may use HOME */ + (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV); + UNI(OP_CHDIR); - case 4: - case 26: - /* emulate EOF on ^D or ^Z */ - return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s, len); + case KEY_close: + UNI(OP_CLOSE); - case 0: - if ((!PL_rsfp || PL_lex_inwhat) - && (!PL_parser->filtered || s+1 < PL_bufend)) { - PL_last_uni = 0; - PL_last_lop = 0; - if (PL_lex_brackets - && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) - { - yyerror((const char *) - (PL_lex_formbrack - ? "Format not terminated" - : "Missing right curly or square bracket")); - } - DEBUG_T({ - PerlIO_printf(Perl_debug_log, "### Tokener got EOF\n"); - }); - TOKEN(0); - } - if (s++ < PL_bufend) - return RETRY(); /* ignore stray nulls */ - PL_last_uni = 0; - PL_last_lop = 0; - if (!PL_in_eval && !PL_preambled) { - PL_preambled = TRUE; - if (PL_perldb) { - /* Generate a string of Perl code to load the debugger. - * If PERL5DB is set, it will return the contents of that, - * otherwise a compile-time require of perl5db.pl. */ + case KEY_closedir: + UNI(OP_CLOSEDIR); - const char * const pdb = PerlEnv_getenv("PERL5DB"); + case KEY_cmp: + if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) + return REPORT(0); + Eop(OP_SCMP); - if (pdb) { - sv_setpv(PL_linestr, pdb); - sv_catpvs(PL_linestr,";"); - } else { - SETERRNO(0,SS_NORMAL); - sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };"); - } - PL_parser->preambling = CopLINE(PL_curcop); - } else - SvPVCLEAR(PL_linestr); - if (PL_preambleav) { - SV **svp = AvARRAY(PL_preambleav); - SV **const end = svp + AvFILLp(PL_preambleav); - while(svp <= end) { - sv_catsv(PL_linestr, *svp); - ++svp; - sv_catpvs(PL_linestr, ";"); - } - sv_free(MUTABLE_SV(PL_preambleav)); - PL_preambleav = NULL; - } - if (PL_minus_E) - sv_catpvs(PL_linestr, - "use feature ':5." STRINGIFY(PERL_VERSION) "';"); - if (PL_minus_n || PL_minus_p) { - sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/); - if (PL_minus_l) - sv_catpvs(PL_linestr,"chomp;"); - if (PL_minus_a) { - if (PL_minus_F) { - if ( ( *PL_splitstr == '/' - || *PL_splitstr == '\'' - || *PL_splitstr == '"') - && strchr(PL_splitstr + 1, *PL_splitstr)) - { - /* strchr is ok, because -F pattern can't contain - * embeddded NULs */ - Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr); - } - else { - /* "q\0${splitstr}\0" is legal perl. Yes, even NUL - bytes can be used as quoting characters. :-) */ - const char *splits = PL_splitstr; - sv_catpvs(PL_linestr, "our @F=split(q\0"); - do { - /* Need to \ \s */ - if (*splits == '\\') - sv_catpvn(PL_linestr, splits, 1); - sv_catpvn(PL_linestr, splits, 1); - } while (*splits++); - /* This loop will embed the trailing NUL of - PL_linestr as the last thing it does before - terminating. */ - sv_catpvs(PL_linestr, ");"); - } - } - else - sv_catpvs(PL_linestr,"our @F=split(' ');"); - } - } - sv_catpvs(PL_linestr, "\n"); - PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); - PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); - PL_last_lop = PL_last_uni = NULL; - if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash) - update_debugger_info(PL_linestr, NULL, 0); - return RETRY(); - } - return yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s, len); + case KEY_caller: + UNI(OP_CALLER); - case '\r': -#ifdef PERL_STRICT_CR - Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r'); - Perl_croak(aTHX_ - "\t(Maybe you didn't strip carriage returns after a network transfer?)\n"); + case KEY_crypt: +#ifdef FCRYPT + if (!PL_cryptseen) { + PL_cryptseen = TRUE; + init_des(); + } #endif - case ' ': case '\t': case '\f': case '\v': - s++; - return RETRY(); + LOP(OP_CRYPT,XTERM); - case '#': - case '\n': - return yyl_eol(aTHX_ s, len); + case KEY_chmod: + LOP(OP_CHMOD,XTERM); - case '-': - return yyl_hyphen(aTHX_ s); + case KEY_chown: + LOP(OP_CHOWN,XTERM); - case '+': - return yyl_plus(aTHX_ s); + case KEY_connect: + LOP(OP_CONNECT,XTERM); - case '*': - return yyl_star(aTHX_ s); + case KEY_chr: + UNI(OP_CHR); - case '%': - return yyl_percent(aTHX_ s); + case KEY_cos: + UNI(OP_COS); - case '^': - return yyl_caret(aTHX_ s); + case KEY_chroot: + UNI(OP_CHROOT); - case '[': - return yyl_leftsquare(aTHX_ s); + case KEY_default: + PREBLOCK(DEFAULT); - case '~': - return yyl_tilde(aTHX_ s); + case KEY_do: + return yyl_do(aTHX_ s, orig_keyword); - case ',': - if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) - TOKEN(0); - s++; - OPERATOR(','); - case ':': - if (s[1] == ':') - return yyl_just_a_word(aTHX_ s, 0, 0, 0, no_code, saw_infix_sigil); - return yyl_colon(aTHX_ s + 1); + case KEY_die: + PL_hints |= HINT_BLOCK_SCOPE; + LOP(OP_DIE,XTERM); - case '(': - return yyl_leftparen(aTHX_ s + 1); + case KEY_defined: + UNI(OP_DEFINED); - case ';': - if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) - TOKEN(0); - CLINE; - s++; - PL_expect = XSTATE; - TOKEN(';'); + case KEY_delete: + UNI(OP_DELETE); - case ')': - return yyl_rightparen(aTHX_ s); + case KEY_dbmopen: + Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"), + STR_WITH_LEN("NDBM_File::"), + STR_WITH_LEN("DB_File::"), + STR_WITH_LEN("GDBM_File::"), + STR_WITH_LEN("SDBM_File::"), + STR_WITH_LEN("ODBM_File::"), + NULL); + LOP(OP_DBMOPEN,XTERM); - case ']': - return yyl_rightsquare(aTHX_ s); + case KEY_dbmclose: + UNI(OP_DBMCLOSE); - case '{': - s++; - leftbracket: - return yyl_leftcurly(aTHX_ s, formbrack); + case KEY_dump: + LOOPX(OP_DUMP); - case '}': - if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF) - TOKEN(0); - rightbracket: - assert(s != PL_bufend); - return yyl_rightcurly(aTHX_ s + 1, formbrack); + case KEY_else: + PREBLOCK(ELSE); - case '&': - return yyl_ampersand(aTHX_ s); + case KEY_elsif: + pl_yylval.ival = CopLINE(PL_curcop); + OPERATOR(ELSIF); - case '|': - return yyl_verticalbar(aTHX_ s); + case KEY_eq: + if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) + return REPORT(0); + Eop(OP_SEQ); - case '=': - if (s[1] == '=' && (s == PL_linestart || s[-1] == '\n') - && memBEGINs(s + 2, (STRLEN) (PL_bufend - s + 2), "=====")) - { - s = vcs_conflict_marker(s + 7); - return RETRY(); + case KEY_exists: + UNI(OP_EXISTS); + + case KEY_exit: + UNI(OP_EXIT); + + case KEY_eval: + s = skipspace(s); + if (*s == '{') { /* block eval */ + PL_expect = XTERMBLOCK; + UNIBRACK(OP_ENTERTRY); + } + else { /* string eval */ + PL_expect = XTERM; + UNIBRACK(OP_ENTEREVAL); } - s++; - { - const char tmp = *s++; - if (tmp == '=') { - if (!PL_lex_allbrackets - && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) - { - s -= 2; - TOKEN(0); - } - Eop(OP_EQ); - } - if (tmp == '>') { - if (!PL_lex_allbrackets - && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) - { - s -= 2; - TOKEN(0); - } - OPERATOR(','); - } - if (tmp == '~') - PMop(OP_MATCH); - if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX) - && strchr("+-*/%.^&|<",tmp)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Reversed %c= operator",(int)tmp); - s--; - if (PL_expect == XSTATE - && isALPHA(tmp) - && (s == PL_linestart+1 || s[-2] == '\n') ) - { - if ( (PL_in_eval && !PL_rsfp && !PL_parser->filtered) - || PL_lex_state != LEX_NORMAL) - { - d = PL_bufend; - while (s < d) { - if (*s++ == '\n') { - incline(s, PL_bufend); - if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut")) - { - s = (char *) memchr(s,'\n', d - s); - if (s) - s++; - else - s = d; - incline(s, PL_bufend); - return RETRY(); - } - } - } - return RETRY(); - } - s = PL_bufend; - PL_parser->in_pod = 1; - return RETRY(); - } - } - if (PL_expect == XBLOCK) { - const char *t = s; -#ifdef PERL_STRICT_CR - while (SPACE_OR_TAB(*t)) -#else - while (SPACE_OR_TAB(*t) || *t == '\r') -#endif - t++; - if (*t == '\n' || *t == '#') { - formbrack = 1; - ENTER_with_name("lex_format"); - SAVEI8(PL_parser->form_lex_state); - SAVEI32(PL_lex_formbrack); - PL_parser->form_lex_state = PL_lex_state; - PL_lex_formbrack = PL_lex_brackets + 1; - PL_parser->sub_error_count = PL_error_count; - goto leftbracket; - } - } - if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { - s--; - TOKEN(0); - } - pl_yylval.ival = 0; - OPERATOR(ASSIGNOP); + case KEY_evalbytes: + PL_expect = XTERM; + UNIBRACK(-OP_ENTEREVAL); - case '!': - return yyl_bang(aTHX_ s + 1); + case KEY_eof: + UNI(OP_EOF); - case '<': - if (s[1] == '<' && (s == PL_linestart || s[-1] == '\n') - && memBEGINs(s+2, (STRLEN) (PL_bufend - (s+2)), "<<<<<")) - { - s = vcs_conflict_marker(s + 7); - return RETRY(); - } - return yyl_leftpointy(aTHX_ s); + case KEY_exp: + UNI(OP_EXP); - case '>': - if (s[1] == '>' && (s == PL_linestart || s[-1] == '\n') - && memBEGINs(s + 2, (STRLEN) (PL_bufend - s + 2), ">>>>>")) - { - s = vcs_conflict_marker(s + 7); - return RETRY(); - } - return yyl_rightpointy(aTHX_ s + 1); + case KEY_each: + UNI(OP_EACH); - case '$': - return yyl_dollar(aTHX_ s); + case KEY_exec: + LOP(OP_EXEC,XREF); - case '@': - return yyl_snail(aTHX_ s); + case KEY_endhostent: + FUN0(OP_EHOSTENT); - case '/': /* may be division, defined-or, or pattern */ - return yyl_slash(aTHX_ s); + case KEY_endnetent: + FUN0(OP_ENETENT); - case '?': /* conditional */ - s++; - if (!PL_lex_allbrackets - && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) - { - s--; - TOKEN(0); - } - PL_lex_allbrackets++; - OPERATOR('?'); + case KEY_endservent: + FUN0(OP_ESERVENT); - case '.': - if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack -#ifdef PERL_STRICT_CR - && s[1] == '\n' -#else - && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n')) -#endif - && (s == PL_linestart || s[-1] == '\n') ) - { - PL_expect = XSTATE; - formbrack = 2; /* dot seen where arguments expected */ - goto rightbracket; - } - if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') { - s += 3; - OPERATOR(YADAYADA); - } - if (PL_expect == XOPERATOR || !isDIGIT(s[1])) { - char tmp = *s++; - if (*s == tmp) { - if (!PL_lex_allbrackets - && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) - { - s--; - TOKEN(0); - } - s++; - if (*s == tmp) { - s++; - pl_yylval.ival = OPf_SPECIAL; - } - else - pl_yylval.ival = 0; - OPERATOR(DOTDOT); - } - if (*s == '=' && !PL_lex_allbrackets - && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) - { - s--; - TOKEN(0); - } - Aop(OP_CONCAT); - } - /* FALLTHROUGH */ - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - s = scan_num(s, &pl_yylval); - DEBUG_T( { printbuf("### Saw number in %s\n", s); } ); - if (PL_expect == XOPERATOR) - no_op("Number",s); - TERM(THING); + case KEY_endprotoent: + FUN0(OP_EPROTOENT); - case '\'': - return yyl_sglquote(aTHX_ s); + case KEY_endpwent: + FUN0(OP_EPWENT); - case '"': - return yyl_dblquote(aTHX_ s, len); + case KEY_endgrent: + FUN0(OP_EGRENT); - case '`': - return yyl_backtick(aTHX_ s); + case KEY_for: + case KEY_foreach: + return yyl_foreach(aTHX_ s); - case '\\': - return yyl_backslash(aTHX_ s + 1); + case KEY_formline: + LOP(OP_FORMLINE,XTERM); - case 'v': - if (isDIGIT(s[1]) && PL_expect != XOPERATOR) { - char *start = s + 2; - while (isDIGIT(*start) || *start == '_') - start++; - if (*start == '.' && isDIGIT(start[1])) { - s = scan_num(s, &pl_yylval); - TERM(THING); - } - else if ((*start == ':' && start[1] == ':') - || (PL_expect == XSTATE && *start == ':')) - goto keylookup; - else if (PL_expect == XSTATE) { - d = start; - while (d < PL_bufend && isSPACE(*d)) d++; - if (*d == ':') goto keylookup; - } - /* avoid v123abc() or $h{v1}, allow C */ - if (!isALPHA(*start) && (PL_expect == XTERM - || PL_expect == XREF || PL_expect == XSTATE - || PL_expect == XTERMORDORDOR)) { - GV *const gv = gv_fetchpvn_flags(s, start - s, - UTF ? SVf_UTF8 : 0, SVt_PVCV); - if (!gv) { - s = scan_num(s, &pl_yylval); - TERM(THING); - } - } - } - goto keylookup; - case 'x': - if (isDIGIT(s[1]) && PL_expect == XOPERATOR) { - s++; - Mop(OP_REPEAT); - } - goto keylookup; + case KEY_fork: + FUN0(OP_FORK); - case '_': - case 'a': case 'A': - case 'b': case 'B': - case 'c': case 'C': - case 'd': case 'D': - case 'e': case 'E': - case 'f': case 'F': - case 'g': case 'G': - case 'h': case 'H': - case 'i': case 'I': - case 'j': case 'J': - case 'k': case 'K': - case 'l': case 'L': - case 'm': case 'M': - case 'n': case 'N': - case 'o': case 'O': - case 'p': case 'P': - case 'q': case 'Q': - case 'r': case 'R': - case 's': case 'S': - case 't': case 'T': - case 'u': case 'U': - case 'V': - case 'w': case 'W': - case 'X': - case 'y': case 'Y': - case 'z': case 'Z': + case KEY_fc: + UNI(OP_FC); - keylookup: { - bool anydelim; - I32 tmp = 0; - SV *sv = NULL; - CV *cv = NULL; - PADOFFSET off = 0; - OP *rv2cv_op = NULL; - I32 orig_keyword = 0; + case KEY_fcntl: + LOP(OP_FCNTL,XTERM); - gv = NULL; - gvp = NULL; + case KEY_fileno: + UNI(OP_FILENO); - PL_bufptr = s; - s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); + case KEY_flock: + LOP(OP_FLOCK,XTERM); - /* Some keywords can be followed by any delimiter, including ':' */ - anydelim = word_takes_any_delimiter(PL_tokenbuf, len); + case KEY_gt: + if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) + return REPORT(0); + Rop(OP_SGT); - /* x::* is just a word, unless x is "CORE" */ - if (!anydelim && *s == ':' && s[1] == ':') { - if (memEQs(PL_tokenbuf, len, "CORE")) goto case_KEY_CORE; - return yyl_just_a_word(aTHX_ s, len, 0, orig_keyword, MAKE_CODE(FALSE), saw_infix_sigil); - } + case KEY_ge: + if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) + return REPORT(0); + Rop(OP_SGE); - d = s; - while (d < PL_bufend && isSPACE(*d)) - d++; /* no comments skipped here, or s### is misparsed */ + case KEY_grep: + LOP(OP_GREPSTART, XREF); - /* Is this a word before a => operator? */ - if (*d == '=' && d[1] == '>') { - return yyl_fatcomma(aTHX_ s, len); - } + case KEY_goto: + LOOPX(OP_GOTO); - /* Check for plugged-in keyword */ - { - OP *o; - int result; - char *saved_bufptr = PL_bufptr; - PL_bufptr = s; - result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o); - s = PL_bufptr; - if (result == KEYWORD_PLUGIN_DECLINE) { - /* not a plugged-in keyword */ - PL_bufptr = saved_bufptr; - } else if (result == KEYWORD_PLUGIN_STMT) { - pl_yylval.opval = o; - CLINE; - if (!PL_nexttoke) PL_expect = XSTATE; - return REPORT(PLUGSTMT); - } else if (result == KEYWORD_PLUGIN_EXPR) { - pl_yylval.opval = o; - CLINE; - if (!PL_nexttoke) PL_expect = XOPERATOR; - return REPORT(PLUGEXPR); - } else { - Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", - PL_tokenbuf); - } - } + case KEY_gmtime: + UNI(OP_GMTIME); + + case KEY_getc: + UNIDOR(OP_GETC); + + case KEY_getppid: + FUN0(OP_GETPPID); + + case KEY_getpgrp: + UNI(OP_GETPGRP); + + case KEY_getpriority: + LOP(OP_GETPRIORITY,XTERM); + + case KEY_getprotobyname: + UNI(OP_GPBYNAME); + + case KEY_getprotobynumber: + LOP(OP_GPBYNUMBER,XTERM); + + case KEY_getprotoent: + FUN0(OP_GPROTOENT); + + case KEY_getpwent: + FUN0(OP_GPWENT); + + case KEY_getpwnam: + UNI(OP_GPWNAM); + + case KEY_getpwuid: + UNI(OP_GPWUID); + + case KEY_getpeername: + UNI(OP_GETPEERNAME); + + case KEY_gethostbyname: + UNI(OP_GHBYNAME); + + case KEY_gethostbyaddr: + LOP(OP_GHBYADDR,XTERM); + + case KEY_gethostent: + FUN0(OP_GHOSTENT); + + case KEY_getnetbyname: + UNI(OP_GNBYNAME); + + case KEY_getnetbyaddr: + LOP(OP_GNBYADDR,XTERM); + + case KEY_getnetent: + FUN0(OP_GNETENT); + + case KEY_getservbyname: + LOP(OP_GSBYNAME,XTERM); + + case KEY_getservbyport: + LOP(OP_GSBYPORT,XTERM); + + case KEY_getservent: + FUN0(OP_GSERVENT); + + case KEY_getsockname: + UNI(OP_GETSOCKNAME); + + case KEY_getsockopt: + LOP(OP_GSOCKOPT,XTERM); + + case KEY_getgrent: + FUN0(OP_GGRENT); + + case KEY_getgrnam: + UNI(OP_GGRNAM); + + case KEY_getgrgid: + UNI(OP_GGRGID); + + case KEY_getlogin: + FUN0(OP_GETLOGIN); + + case KEY_given: + pl_yylval.ival = CopLINE(PL_curcop); + Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__SMARTMATCH), + "given is experimental"); + OPERATOR(GIVEN); + + case KEY_glob: + LOP( orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB, XTERM ); + + case KEY_hex: + UNI(OP_HEX); + + case KEY_if: + if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) + return REPORT(0); + pl_yylval.ival = CopLINE(PL_curcop); + OPERATOR(IF); + + case KEY_index: + LOP(OP_INDEX,XTERM); + + case KEY_int: + UNI(OP_INT); + + case KEY_ioctl: + LOP(OP_IOCTL,XTERM); + + case KEY_join: + LOP(OP_JOIN,XTERM); + + case KEY_keys: + UNI(OP_KEYS); + + case KEY_kill: + LOP(OP_KILL,XTERM); + + case KEY_last: + LOOPX(OP_LAST); + + case KEY_lc: + UNI(OP_LC); + + case KEY_lcfirst: + UNI(OP_LCFIRST); + + case KEY_local: + OPERATOR(LOCAL); + + case KEY_length: + UNI(OP_LENGTH); + + case KEY_lt: + if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) + return REPORT(0); + Rop(OP_SLT); + + case KEY_le: + if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) + return REPORT(0); + Rop(OP_SLE); + + case KEY_localtime: + UNI(OP_LOCALTIME); + + case KEY_log: + UNI(OP_LOG); + + case KEY_link: + LOP(OP_LINK,XTERM); + + case KEY_listen: + LOP(OP_LISTEN,XTERM); + + case KEY_lock: + UNI(OP_LOCK); + + case KEY_lstat: + UNI(OP_LSTAT); + + case KEY_m: + s = scan_pat(s,OP_MATCH); + TERM(sublex_start()); + + case KEY_map: + LOP(OP_MAPSTART, XREF); + + case KEY_mkdir: + LOP(OP_MKDIR,XTERM); + + case KEY_msgctl: + LOP(OP_MSGCTL,XTERM); + + case KEY_msgget: + LOP(OP_MSGGET,XTERM); + + case KEY_msgrcv: + LOP(OP_MSGRCV,XTERM); + + case KEY_msgsnd: + LOP(OP_MSGSND,XTERM); + + case KEY_our: + case KEY_my: + case KEY_state: + return yyl_my(aTHX_ s, key); + + case KEY_next: + LOOPX(OP_NEXT); + + case KEY_ne: + if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) + return REPORT(0); + Eop(OP_SNE); + + case KEY_no: + s = tokenize_use(0, s); + TOKEN(USE); + + case KEY_not: + if (*s == '(' || (s = skipspace(s), *s == '(')) + FUN1(OP_NOT); + else { + if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) + PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; + OPERATOR(NOTOP); + } + + case KEY_open: + s = skipspace(s); + if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { + const char *t; + char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); + for (t=d; isSPACE(*t);) + t++; + if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE) + /* [perl #16184] */ + && !(t[0] == '=' && t[1] == '>') + && !(t[0] == ':' && t[1] == ':') + && !keyword(s, d-s, 0) + ) { + Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE), + "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")", + UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s)); + } + } + LOP(OP_OPEN,XTERM); - /* Check for built-in keyword */ - tmp = keyword(PL_tokenbuf, len, 0); + case KEY_or: + if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) + return REPORT(0); + pl_yylval.ival = OP_OR; + OPERATOR(OROP); - /* Is this a label? */ - if (!anydelim && PL_expect == XSTATE - && d < PL_bufend && *d == ':' && *(d + 1) != ':') { - s = d + 1; - pl_yylval.opval = - newSVOP(OP_CONST, 0, - newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0)); - CLINE; - TOKEN(LABEL); - } + case KEY_ord: + UNI(OP_ORD); - /* Check for lexical sub */ - if (PL_expect != XOPERATOR) { - char tmpbuf[sizeof PL_tokenbuf + 1]; - *tmpbuf = '&'; - Copy(PL_tokenbuf, tmpbuf+1, len, char); - off = pad_findmy_pvn(tmpbuf, len+1, 0); - if (off != NOT_IN_PAD) { - assert(off); /* we assume this is boolean-true below */ - if (PAD_COMPNAME_FLAGS_isOUR(off)) { - HV * const stash = PAD_COMPNAME_OURSTASH(off); - HEK * const stashname = HvNAME_HEK(stash); - sv = newSVhek(stashname); - sv_catpvs(sv, "::"); - sv_catpvn_flags(sv, PL_tokenbuf, len, - (UTF ? SV_CATUTF8 : SV_CATBYTES)); - gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv), - SVt_PVCV); - off = 0; - if (!gv) { - sv_free(sv); - sv = NULL; - return yyl_just_a_word(aTHX_ s, len, tmp, orig_keyword, - MAKE_CODE(FALSE), saw_infix_sigil); - } - } - else { - rv2cv_op = newOP(OP_PADANY, 0); - rv2cv_op->op_targ = off; - cv = find_lexical_cv(off); - } - return yyl_just_a_word(aTHX_ s, len, tmp, orig_keyword, - MAKE_CODE(TRUE), saw_infix_sigil); - } - off = 0; - } + case KEY_oct: + UNI(OP_OCT); - if (tmp < 0) - tmp = yyl_secondclass_keyword(aTHX_ s, len, tmp, &orig_keyword, &gv, &gvp); + case KEY_opendir: + LOP(OP_OPEN_DIR,XTERM); - if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__ - && (!anydelim || *s != '#')) { - /* no override, and not s### either; skipspace is safe here - * check for => on following line */ - bool arrow; - STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr); - STRLEN soff = s - SvPVX(PL_linestr); - s = peekspace(s); - arrow = *s == '=' && s[1] == '>'; - PL_bufptr = SvPVX(PL_linestr) + bufoff; - s = SvPVX(PL_linestr) + soff; - if (arrow) - return yyl_fatcomma(aTHX_ s, len); - } + case KEY_print: + checkcomma(s,PL_tokenbuf,"filehandle"); + LOP(OP_PRINT,XREF); - reserved_word: - switch (tmp) { - default: /* not a keyword */ - return yyl_just_a_word(aTHX_ s, len, tmp, orig_keyword, - MAKE_CODE(FALSE), saw_infix_sigil); + case KEY_printf: + checkcomma(s,PL_tokenbuf,"filehandle"); + LOP(OP_PRTF,XREF); - case KEY___FILE__: - FUN0OP( - newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0)) - ); + case KEY_prototype: + UNI(OP_PROTOTYPE); - case KEY___LINE__: - FUN0OP( - newSVOP(OP_CONST, 0, - Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop))) - ); + case KEY_push: + LOP(OP_PUSH,XTERM); - case KEY___PACKAGE__: - FUN0OP( - newSVOP(OP_CONST, 0, - (PL_curstash - ? newSVhek(HvNAME_HEK(PL_curstash)) - : &PL_sv_undef)) - ); - - case KEY___DATA__: - case KEY___END__: - if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) - yyl_data_handle(aTHX); - return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, bof, s, len); - - case KEY___SUB__: - FUN0OP(CvCLONE(PL_compcv) - ? newOP(OP_RUNCV, 0) - : newPVOP(OP_RUNCV,0,NULL)); - - case KEY_AUTOLOAD: - case KEY_DESTROY: - case KEY_BEGIN: - case KEY_UNITCHECK: - case KEY_CHECK: - case KEY_INIT: - case KEY_END: - if (PL_expect == XSTATE) - return yyl_sub(aTHX_ PL_bufptr, tmp); - return yyl_just_a_word(aTHX_ s, len, tmp, orig_keyword, - MAKE_CODE(FALSE), saw_infix_sigil); - - case_KEY_CORE: - { - STRLEN olen = len; - d = s; - s += 2; - s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); - if ((*s == ':' && s[1] == ':') - || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\'')) - { - Copy(PL_bufptr, PL_tokenbuf, olen, char); - return yyl_just_a_word(aTHX_ d, olen, tmp, orig_keyword, - MAKE_CODE(FALSE), saw_infix_sigil); - } - if (!tmp) - Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword", - UTF8fARG(UTF, len, PL_tokenbuf)); - if (tmp < 0) - tmp = -tmp; - else if (tmp == KEY_require || tmp == KEY_do - || tmp == KEY_glob) - /* that's a way to remember we saw "CORE::" */ - orig_keyword = tmp; - goto reserved_word; - } + case KEY_pop: + UNIDOR(OP_POP); - case KEY_abs: - UNI(OP_ABS); + case KEY_pos: + UNIDOR(OP_POS); - case KEY_alarm: - UNI(OP_ALARM); + case KEY_pack: + LOP(OP_PACK,XTERM); - case KEY_accept: - LOP(OP_ACCEPT,XTERM); + case KEY_package: + s = force_word(s,BAREWORD,FALSE,TRUE); + s = skipspace(s); + s = force_strict_version(s); + PREBLOCK(PACKAGE); - case KEY_and: - if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) - return REPORT(0); - OPERATOR(ANDOP); + case KEY_pipe: + LOP(OP_PIPE_OP,XTERM); - case KEY_atan2: - LOP(OP_ATAN2,XTERM); + case KEY_q: + s = scan_str(s,FALSE,FALSE,FALSE,NULL); + if (!s) + missingterm(NULL, 0); + COPLINE_SET_FROM_MULTI_END; + pl_yylval.ival = OP_CONST; + TERM(sublex_start()); - case KEY_bind: - LOP(OP_BIND,XTERM); + case KEY_quotemeta: + UNI(OP_QUOTEMETA); - case KEY_binmode: - LOP(OP_BINMODE,XTERM); + case KEY_qw: + return yyl_qw(aTHX_ s, len); - case KEY_bless: - LOP(OP_BLESS,XTERM); + case KEY_qq: + s = scan_str(s,FALSE,FALSE,FALSE,NULL); + if (!s) + missingterm(NULL, 0); + pl_yylval.ival = OP_STRINGIFY; + if (SvIVX(PL_lex_stuff) == '\'') + SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */ + TERM(sublex_start()); - case KEY_break: - FUN0(OP_BREAK); + case KEY_qr: + s = scan_pat(s,OP_QR); + TERM(sublex_start()); - case KEY_chop: - UNI(OP_CHOP); + case KEY_qx: + s = scan_str(s,FALSE,FALSE,FALSE,NULL); + if (!s) + missingterm(NULL, 0); + pl_yylval.ival = OP_BACKTICK; + TERM(sublex_start()); - case KEY_continue: - /* We have to disambiguate the two senses of - "continue". If the next token is a '{' then - treat it as the start of a continue block; - otherwise treat it as a control operator. - */ - s = skipspace(s); - if (*s == '{') - PREBLOCK(CONTINUE); - else - FUN0(OP_CONTINUE); + case KEY_return: + OLDLOP(OP_RETURN); - case KEY_chdir: - /* may use HOME */ - (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV); - UNI(OP_CHDIR); + case KEY_require: + return yyl_require(aTHX_ s, orig_keyword); - case KEY_close: - UNI(OP_CLOSE); + case KEY_reset: + UNI(OP_RESET); - case KEY_closedir: - UNI(OP_CLOSEDIR); + case KEY_redo: + LOOPX(OP_REDO); - case KEY_cmp: - if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) - return REPORT(0); - Eop(OP_SCMP); + case KEY_rename: + LOP(OP_RENAME,XTERM); - case KEY_caller: - UNI(OP_CALLER); + case KEY_rand: + UNI(OP_RAND); - case KEY_crypt: -#ifdef FCRYPT - if (!PL_cryptseen) { - PL_cryptseen = TRUE; - init_des(); - } -#endif - LOP(OP_CRYPT,XTERM); + case KEY_rmdir: + UNI(OP_RMDIR); - case KEY_chmod: - LOP(OP_CHMOD,XTERM); + case KEY_rindex: + LOP(OP_RINDEX,XTERM); - case KEY_chown: - LOP(OP_CHOWN,XTERM); + case KEY_read: + LOP(OP_READ,XTERM); - case KEY_connect: - LOP(OP_CONNECT,XTERM); + case KEY_readdir: + UNI(OP_READDIR); - case KEY_chr: - UNI(OP_CHR); + case KEY_readline: + UNIDOR(OP_READLINE); - case KEY_cos: - UNI(OP_COS); + case KEY_readpipe: + UNIDOR(OP_BACKTICK); - case KEY_chroot: - UNI(OP_CHROOT); + case KEY_rewinddir: + UNI(OP_REWINDDIR); - case KEY_default: - PREBLOCK(DEFAULT); + case KEY_recv: + LOP(OP_RECV,XTERM); - case KEY_do: - return yyl_do(aTHX_ s, orig_keyword); + case KEY_reverse: + LOP(OP_REVERSE,XTERM); - case KEY_die: - PL_hints |= HINT_BLOCK_SCOPE; - LOP(OP_DIE,XTERM); + case KEY_readlink: + UNIDOR(OP_READLINK); - case KEY_defined: - UNI(OP_DEFINED); + case KEY_ref: + UNI(OP_REF); - case KEY_delete: - UNI(OP_DELETE); + case KEY_s: + s = scan_subst(s); + if (pl_yylval.opval) + TERM(sublex_start()); + else + TOKEN(1); /* force error */ - case KEY_dbmopen: - Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"), - STR_WITH_LEN("NDBM_File::"), - STR_WITH_LEN("DB_File::"), - STR_WITH_LEN("GDBM_File::"), - STR_WITH_LEN("SDBM_File::"), - STR_WITH_LEN("ODBM_File::"), - NULL); - LOP(OP_DBMOPEN,XTERM); + case KEY_say: + checkcomma(s,PL_tokenbuf,"filehandle"); + LOP(OP_SAY,XREF); - case KEY_dbmclose: - UNI(OP_DBMCLOSE); + case KEY_chomp: + UNI(OP_CHOMP); - case KEY_dump: - LOOPX(OP_DUMP); + case KEY_scalar: + UNI(OP_SCALAR); - case KEY_else: - PREBLOCK(ELSE); + case KEY_select: + LOP(OP_SELECT,XTERM); - case KEY_elsif: - pl_yylval.ival = CopLINE(PL_curcop); - OPERATOR(ELSIF); + case KEY_seek: + LOP(OP_SEEK,XTERM); - case KEY_eq: - if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) - return REPORT(0); - Eop(OP_SEQ); + case KEY_semctl: + LOP(OP_SEMCTL,XTERM); - case KEY_exists: - UNI(OP_EXISTS); + case KEY_semget: + LOP(OP_SEMGET,XTERM); - case KEY_exit: - UNI(OP_EXIT); + case KEY_semop: + LOP(OP_SEMOP,XTERM); - case KEY_eval: - s = skipspace(s); - if (*s == '{') { /* block eval */ - PL_expect = XTERMBLOCK; - UNIBRACK(OP_ENTERTRY); - } - else { /* string eval */ - PL_expect = XTERM; - UNIBRACK(OP_ENTEREVAL); - } + case KEY_send: + LOP(OP_SEND,XTERM); - case KEY_evalbytes: - PL_expect = XTERM; - UNIBRACK(-OP_ENTEREVAL); + case KEY_setpgrp: + LOP(OP_SETPGRP,XTERM); + + case KEY_setpriority: + LOP(OP_SETPRIORITY,XTERM); + + case KEY_sethostent: + UNI(OP_SHOSTENT); + + case KEY_setnetent: + UNI(OP_SNETENT); + + case KEY_setservent: + UNI(OP_SSERVENT); + + case KEY_setprotoent: + UNI(OP_SPROTOENT); + + case KEY_setpwent: + FUN0(OP_SPWENT); + + case KEY_setgrent: + FUN0(OP_SGRENT); + + case KEY_seekdir: + LOP(OP_SEEKDIR,XTERM); + + case KEY_setsockopt: + LOP(OP_SSOCKOPT,XTERM); + + case KEY_shift: + UNIDOR(OP_SHIFT); + + case KEY_shmctl: + LOP(OP_SHMCTL,XTERM); + + case KEY_shmget: + LOP(OP_SHMGET,XTERM); - case KEY_eof: - UNI(OP_EOF); + case KEY_shmread: + LOP(OP_SHMREAD,XTERM); - case KEY_exp: - UNI(OP_EXP); + case KEY_shmwrite: + LOP(OP_SHMWRITE,XTERM); - case KEY_each: - UNI(OP_EACH); + case KEY_shutdown: + LOP(OP_SHUTDOWN,XTERM); - case KEY_exec: - LOP(OP_EXEC,XREF); + case KEY_sin: + UNI(OP_SIN); - case KEY_endhostent: - FUN0(OP_EHOSTENT); + case KEY_sleep: + UNI(OP_SLEEP); - case KEY_endnetent: - FUN0(OP_ENETENT); + case KEY_socket: + LOP(OP_SOCKET,XTERM); - case KEY_endservent: - FUN0(OP_ESERVENT); + case KEY_socketpair: + LOP(OP_SOCKPAIR,XTERM); - case KEY_endprotoent: - FUN0(OP_EPROTOENT); + case KEY_sort: + checkcomma(s,PL_tokenbuf,"subroutine name"); + s = skipspace(s); + PL_expect = XTERM; + s = force_word(s,BAREWORD,TRUE,TRUE); + LOP(OP_SORT,XREF); + + case KEY_split: + LOP(OP_SPLIT,XTERM); - case KEY_endpwent: - FUN0(OP_EPWENT); + case KEY_sprintf: + LOP(OP_SPRINTF,XTERM); - case KEY_endgrent: - FUN0(OP_EGRENT); + case KEY_splice: + LOP(OP_SPLICE,XTERM); - case KEY_for: - case KEY_foreach: - return yyl_foreach(aTHX_ s); + case KEY_sqrt: + UNI(OP_SQRT); - case KEY_formline: - LOP(OP_FORMLINE,XTERM); + case KEY_srand: + UNI(OP_SRAND); - case KEY_fork: - FUN0(OP_FORK); + case KEY_stat: + UNI(OP_STAT); - case KEY_fc: - UNI(OP_FC); + case KEY_study: + UNI(OP_STUDY); - case KEY_fcntl: - LOP(OP_FCNTL,XTERM); + case KEY_substr: + LOP(OP_SUBSTR,XTERM); - case KEY_fileno: - UNI(OP_FILENO); + case KEY_format: + case KEY_sub: + return yyl_sub(aTHX_ s, key); - case KEY_flock: - LOP(OP_FLOCK,XTERM); + case KEY_system: + LOP(OP_SYSTEM,XREF); - case KEY_gt: - if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) - return REPORT(0); - Rop(OP_SGT); + case KEY_symlink: + LOP(OP_SYMLINK,XTERM); - case KEY_ge: - if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) - return REPORT(0); - Rop(OP_SGE); + case KEY_syscall: + LOP(OP_SYSCALL,XTERM); - case KEY_grep: - LOP(OP_GREPSTART, XREF); + case KEY_sysopen: + LOP(OP_SYSOPEN,XTERM); - case KEY_goto: - LOOPX(OP_GOTO); + case KEY_sysseek: + LOP(OP_SYSSEEK,XTERM); - case KEY_gmtime: - UNI(OP_GMTIME); + case KEY_sysread: + LOP(OP_SYSREAD,XTERM); - case KEY_getc: - UNIDOR(OP_GETC); + case KEY_syswrite: + LOP(OP_SYSWRITE,XTERM); - case KEY_getppid: - FUN0(OP_GETPPID); + case KEY_tr: + case KEY_y: + s = scan_trans(s); + TERM(sublex_start()); - case KEY_getpgrp: - UNI(OP_GETPGRP); + case KEY_tell: + UNI(OP_TELL); - case KEY_getpriority: - LOP(OP_GETPRIORITY,XTERM); + case KEY_telldir: + UNI(OP_TELLDIR); - case KEY_getprotobyname: - UNI(OP_GPBYNAME); + case KEY_tie: + LOP(OP_TIE,XTERM); - case KEY_getprotobynumber: - LOP(OP_GPBYNUMBER,XTERM); + case KEY_tied: + UNI(OP_TIED); - case KEY_getprotoent: - FUN0(OP_GPROTOENT); + case KEY_time: + FUN0(OP_TIME); - case KEY_getpwent: - FUN0(OP_GPWENT); + case KEY_times: + FUN0(OP_TMS); - case KEY_getpwnam: - UNI(OP_GPWNAM); + case KEY_truncate: + LOP(OP_TRUNCATE,XTERM); - case KEY_getpwuid: - UNI(OP_GPWUID); + case KEY_uc: + UNI(OP_UC); - case KEY_getpeername: - UNI(OP_GETPEERNAME); + case KEY_ucfirst: + UNI(OP_UCFIRST); - case KEY_gethostbyname: - UNI(OP_GHBYNAME); + case KEY_untie: + UNI(OP_UNTIE); - case KEY_gethostbyaddr: - LOP(OP_GHBYADDR,XTERM); + case KEY_until: + if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) + return REPORT(0); + pl_yylval.ival = CopLINE(PL_curcop); + OPERATOR(UNTIL); - case KEY_gethostent: - FUN0(OP_GHOSTENT); + case KEY_unless: + if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) + return REPORT(0); + pl_yylval.ival = CopLINE(PL_curcop); + OPERATOR(UNLESS); - case KEY_getnetbyname: - UNI(OP_GNBYNAME); + case KEY_unlink: + LOP(OP_UNLINK,XTERM); - case KEY_getnetbyaddr: - LOP(OP_GNBYADDR,XTERM); + case KEY_undef: + UNIDOR(OP_UNDEF); - case KEY_getnetent: - FUN0(OP_GNETENT); + case KEY_unpack: + LOP(OP_UNPACK,XTERM); - case KEY_getservbyname: - LOP(OP_GSBYNAME,XTERM); + case KEY_utime: + LOP(OP_UTIME,XTERM); - case KEY_getservbyport: - LOP(OP_GSBYPORT,XTERM); + case KEY_umask: + UNIDOR(OP_UMASK); - case KEY_getservent: - FUN0(OP_GSERVENT); + case KEY_unshift: + LOP(OP_UNSHIFT,XTERM); - case KEY_getsockname: - UNI(OP_GETSOCKNAME); + case KEY_use: + s = tokenize_use(1, s); + TOKEN(USE); - case KEY_getsockopt: - LOP(OP_GSOCKOPT,XTERM); + case KEY_values: + UNI(OP_VALUES); - case KEY_getgrent: - FUN0(OP_GGRENT); + case KEY_vec: + LOP(OP_VEC,XTERM); - case KEY_getgrnam: - UNI(OP_GGRNAM); + case KEY_when: + if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) + return REPORT(0); + pl_yylval.ival = CopLINE(PL_curcop); + Perl_ck_warner_d(aTHX_ + packWARN(WARN_EXPERIMENTAL__SMARTMATCH), + "when is experimental"); + OPERATOR(WHEN); - case KEY_getgrgid: - UNI(OP_GGRGID); + case KEY_while: + if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) + return REPORT(0); + pl_yylval.ival = CopLINE(PL_curcop); + OPERATOR(WHILE); - case KEY_getlogin: - FUN0(OP_GETLOGIN); + case KEY_warn: + PL_hints |= HINT_BLOCK_SCOPE; + LOP(OP_WARN,XTERM); - case KEY_given: - pl_yylval.ival = CopLINE(PL_curcop); - Perl_ck_warner_d(aTHX_ - packWARN(WARN_EXPERIMENTAL__SMARTMATCH), - "given is experimental"); - OPERATOR(GIVEN); + case KEY_wait: + FUN0(OP_WAIT); - case KEY_glob: - LOP( - orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB, - XTERM - ); + case KEY_waitpid: + LOP(OP_WAITPID,XTERM); - case KEY_hex: - UNI(OP_HEX); + case KEY_wantarray: + FUN0(OP_WANTARRAY); - case KEY_if: - if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) - return REPORT(0); - pl_yylval.ival = CopLINE(PL_curcop); - OPERATOR(IF); + case KEY_write: + /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and + * we use the same number on EBCDIC */ + gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV); + UNI(OP_ENTERWRITE); - case KEY_index: - LOP(OP_INDEX,XTERM); + case KEY_x: + if (PL_expect == XOPERATOR) { + if (*s == '=' && !PL_lex_allbrackets + && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) + { + return REPORT(0); + } + Mop(OP_REPEAT); + } + check_uni(); + return yyl_just_a_word(aTHX_ s, len, key, orig_keyword, + c, saw_infix_sigil); - case KEY_int: - UNI(OP_INT); + case KEY_xor: + if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) + return REPORT(0); + pl_yylval.ival = OP_XOR; + OPERATOR(OROP); + } +} - case KEY_ioctl: - LOP(OP_IOCTL,XTERM); +static int +yyl_key_core(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, + struct code c, bool bof, const bool saw_infix_sigil) +{ + STRLEN olen = len; + char *d = s; + s += 2; + s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); + if ((*s == ':' && s[1] == ':') + || (!(key = keyword(PL_tokenbuf, len, 1)) && *s == '\'')) + { + Copy(PL_bufptr, PL_tokenbuf, olen, char); + return yyl_just_a_word(aTHX_ d, olen, key, orig_keyword, c, saw_infix_sigil); + } + if (!key) + Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword", + UTF8fARG(UTF, len, PL_tokenbuf)); + if (key < 0) + key = -key; + else if (key == KEY_require || key == KEY_do + || key == KEY_glob) + /* that's a way to remember we saw "CORE::" */ + orig_keyword = key; - case KEY_join: - LOP(OP_JOIN,XTERM); + /* Known to be a reserved word at this point */ + return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c, + bof, saw_infix_sigil); +} - case KEY_keys: - UNI(OP_KEYS); +static int +yyl_try(pTHX_ char initial_state, char *s, STRLEN len, + U8 formbrack, const bool saw_infix_sigil) +{ + char *d; + bool bof = FALSE; + GV *gv = NULL, **gvp = NULL; - case KEY_kill: - LOP(OP_KILL,XTERM); + switch (initial_state) { + case '}': goto rightbracket; + } - case KEY_last: - LOOPX(OP_LAST); + switch (*s) { + default: + if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) + goto keylookup; + yyl_croak_unrecognised(aTHX_ s); - case KEY_lc: - UNI(OP_LC); + case 4: + case 26: + /* emulate EOF on ^D or ^Z */ + return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s, len); - case KEY_lcfirst: - UNI(OP_LCFIRST); + case 0: + if ((!PL_rsfp || PL_lex_inwhat) + && (!PL_parser->filtered || s+1 < PL_bufend)) { + PL_last_uni = 0; + PL_last_lop = 0; + if (PL_lex_brackets + && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) + { + yyerror((const char *) + (PL_lex_formbrack + ? "Format not terminated" + : "Missing right curly or square bracket")); + } + DEBUG_T({ + PerlIO_printf(Perl_debug_log, "### Tokener got EOF\n"); + }); + TOKEN(0); + } + if (s++ < PL_bufend) + return RETRY(); /* ignore stray nulls */ + PL_last_uni = 0; + PL_last_lop = 0; + if (!PL_in_eval && !PL_preambled) { + PL_preambled = TRUE; + if (PL_perldb) { + /* Generate a string of Perl code to load the debugger. + * If PERL5DB is set, it will return the contents of that, + * otherwise a compile-time require of perl5db.pl. */ - case KEY_local: - OPERATOR(LOCAL); + const char * const pdb = PerlEnv_getenv("PERL5DB"); - case KEY_length: - UNI(OP_LENGTH); + if (pdb) { + sv_setpv(PL_linestr, pdb); + sv_catpvs(PL_linestr,";"); + } else { + SETERRNO(0,SS_NORMAL); + sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };"); + } + PL_parser->preambling = CopLINE(PL_curcop); + } else + SvPVCLEAR(PL_linestr); + if (PL_preambleav) { + SV **svp = AvARRAY(PL_preambleav); + SV **const end = svp + AvFILLp(PL_preambleav); + while(svp <= end) { + sv_catsv(PL_linestr, *svp); + ++svp; + sv_catpvs(PL_linestr, ";"); + } + sv_free(MUTABLE_SV(PL_preambleav)); + PL_preambleav = NULL; + } + if (PL_minus_E) + sv_catpvs(PL_linestr, + "use feature ':5." STRINGIFY(PERL_VERSION) "';"); + if (PL_minus_n || PL_minus_p) { + sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/); + if (PL_minus_l) + sv_catpvs(PL_linestr,"chomp;"); + if (PL_minus_a) { + if (PL_minus_F) { + if ( ( *PL_splitstr == '/' + || *PL_splitstr == '\'' + || *PL_splitstr == '"') + && strchr(PL_splitstr + 1, *PL_splitstr)) + { + /* strchr is ok, because -F pattern can't contain + * embeddded NULs */ + Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr); + } + else { + /* "q\0${splitstr}\0" is legal perl. Yes, even NUL + bytes can be used as quoting characters. :-) */ + const char *splits = PL_splitstr; + sv_catpvs(PL_linestr, "our @F=split(q\0"); + do { + /* Need to \ \s */ + if (*splits == '\\') + sv_catpvn(PL_linestr, splits, 1); + sv_catpvn(PL_linestr, splits, 1); + } while (*splits++); + /* This loop will embed the trailing NUL of + PL_linestr as the last thing it does before + terminating. */ + sv_catpvs(PL_linestr, ");"); + } + } + else + sv_catpvs(PL_linestr,"our @F=split(' ');"); + } + } + sv_catpvs(PL_linestr, "\n"); + PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); + PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); + PL_last_lop = PL_last_uni = NULL; + if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash) + update_debugger_info(PL_linestr, NULL, 0); + return RETRY(); + } + return yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s, len); - case KEY_lt: - if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) - return REPORT(0); - Rop(OP_SLT); + case '\r': +#ifdef PERL_STRICT_CR + Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r'); + Perl_croak(aTHX_ + "\t(Maybe you didn't strip carriage returns after a network transfer?)\n"); +#endif + case ' ': case '\t': case '\f': case '\v': + s++; + return RETRY(); - case KEY_le: - if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) - return REPORT(0); - Rop(OP_SLE); + case '#': + case '\n': + return yyl_eol(aTHX_ s, len); - case KEY_localtime: - UNI(OP_LOCALTIME); + case '-': + return yyl_hyphen(aTHX_ s); - case KEY_log: - UNI(OP_LOG); + case '+': + return yyl_plus(aTHX_ s); - case KEY_link: - LOP(OP_LINK,XTERM); + case '*': + return yyl_star(aTHX_ s); - case KEY_listen: - LOP(OP_LISTEN,XTERM); + case '%': + return yyl_percent(aTHX_ s); - case KEY_lock: - UNI(OP_LOCK); + case '^': + return yyl_caret(aTHX_ s); - case KEY_lstat: - UNI(OP_LSTAT); + case '[': + return yyl_leftsquare(aTHX_ s); - case KEY_m: - s = scan_pat(s,OP_MATCH); - TERM(sublex_start()); + case '~': + return yyl_tilde(aTHX_ s); - case KEY_map: - LOP(OP_MAPSTART, XREF); + case ',': + if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) + TOKEN(0); + s++; + OPERATOR(','); + case ':': + if (s[1] == ':') + return yyl_just_a_word(aTHX_ s, 0, 0, 0, no_code, saw_infix_sigil); + return yyl_colon(aTHX_ s + 1); - case KEY_mkdir: - LOP(OP_MKDIR,XTERM); + case '(': + return yyl_leftparen(aTHX_ s + 1); - case KEY_msgctl: - LOP(OP_MSGCTL,XTERM); + case ';': + if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) + TOKEN(0); + CLINE; + s++; + PL_expect = XSTATE; + TOKEN(';'); - case KEY_msgget: - LOP(OP_MSGGET,XTERM); + case ')': + return yyl_rightparen(aTHX_ s); - case KEY_msgrcv: - LOP(OP_MSGRCV,XTERM); + case ']': + return yyl_rightsquare(aTHX_ s); - case KEY_msgsnd: - LOP(OP_MSGSND,XTERM); + case '{': + s++; + leftbracket: + return yyl_leftcurly(aTHX_ s, formbrack); - case KEY_our: - case KEY_my: - case KEY_state: - return yyl_my(aTHX_ s, tmp); + case '}': + if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF) + TOKEN(0); + rightbracket: + assert(s != PL_bufend); + return yyl_rightcurly(aTHX_ s + 1, formbrack); - case KEY_next: - LOOPX(OP_NEXT); + case '&': + return yyl_ampersand(aTHX_ s); - case KEY_ne: - if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) - return REPORT(0); - Eop(OP_SNE); + case '|': + return yyl_verticalbar(aTHX_ s); - case KEY_no: - s = tokenize_use(0, s); - TOKEN(USE); + case '=': + if (s[1] == '=' && (s == PL_linestart || s[-1] == '\n') + && memBEGINs(s + 2, (STRLEN) (PL_bufend - s + 2), "=====")) + { + s = vcs_conflict_marker(s + 7); + return RETRY(); + } - case KEY_not: - if (*s == '(' || (s = skipspace(s), *s == '(')) - FUN1(OP_NOT); - else { + s++; + { + const char tmp = *s++; + if (tmp == '=') { if (!PL_lex_allbrackets - && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) + && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { - PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; - } - OPERATOR(NOTOP); + s -= 2; + TOKEN(0); + } + Eop(OP_EQ); } - - case KEY_open: - s = skipspace(s); - if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { - const char *t; - d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, - &len); - for (t=d; isSPACE(*t);) - t++; - if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE) - /* [perl #16184] */ - && !(t[0] == '=' && t[1] == '>') - && !(t[0] == ':' && t[1] == ':') - && !keyword(s, d-s, 0) - ) { - Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE), - "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")", - UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s)); + if (tmp == '>') { + if (!PL_lex_allbrackets + && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) + { + s -= 2; + TOKEN(0); } + OPERATOR(','); } - LOP(OP_OPEN,XTERM); - - case KEY_or: - if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) - return REPORT(0); - pl_yylval.ival = OP_OR; - OPERATOR(OROP); - - case KEY_ord: - UNI(OP_ORD); - - case KEY_oct: - UNI(OP_OCT); - - case KEY_opendir: - LOP(OP_OPEN_DIR,XTERM); - - case KEY_print: - checkcomma(s,PL_tokenbuf,"filehandle"); - LOP(OP_PRINT,XREF); - - case KEY_printf: - checkcomma(s,PL_tokenbuf,"filehandle"); - LOP(OP_PRTF,XREF); - - case KEY_prototype: - UNI(OP_PROTOTYPE); - - case KEY_push: - LOP(OP_PUSH,XTERM); - - case KEY_pop: - UNIDOR(OP_POP); - - case KEY_pos: - UNIDOR(OP_POS); - - case KEY_pack: - LOP(OP_PACK,XTERM); - - case KEY_package: - s = force_word(s,BAREWORD,FALSE,TRUE); - s = skipspace(s); - s = force_strict_version(s); - PREBLOCK(PACKAGE); - - case KEY_pipe: - LOP(OP_PIPE_OP,XTERM); - - case KEY_q: - s = scan_str(s,FALSE,FALSE,FALSE,NULL); - if (!s) - missingterm(NULL, 0); - COPLINE_SET_FROM_MULTI_END; - pl_yylval.ival = OP_CONST; - TERM(sublex_start()); - - case KEY_quotemeta: - UNI(OP_QUOTEMETA); - - case KEY_qw: - return yyl_qw(aTHX_ s, len); - - case KEY_qq: - s = scan_str(s,FALSE,FALSE,FALSE,NULL); - if (!s) - missingterm(NULL, 0); - pl_yylval.ival = OP_STRINGIFY; - if (SvIVX(PL_lex_stuff) == '\'') - SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */ - TERM(sublex_start()); - - case KEY_qr: - s = scan_pat(s,OP_QR); - TERM(sublex_start()); - - case KEY_qx: - s = scan_str(s,FALSE,FALSE,FALSE,NULL); - if (!s) - missingterm(NULL, 0); - pl_yylval.ival = OP_BACKTICK; - TERM(sublex_start()); - - case KEY_return: - OLDLOP(OP_RETURN); - - case KEY_require: - return yyl_require(aTHX_ s, orig_keyword); - - case KEY_reset: - UNI(OP_RESET); - - case KEY_redo: - LOOPX(OP_REDO); - - case KEY_rename: - LOP(OP_RENAME,XTERM); - - case KEY_rand: - UNI(OP_RAND); - - case KEY_rmdir: - UNI(OP_RMDIR); - - case KEY_rindex: - LOP(OP_RINDEX,XTERM); - - case KEY_read: - LOP(OP_READ,XTERM); - - case KEY_readdir: - UNI(OP_READDIR); - - case KEY_readline: - UNIDOR(OP_READLINE); - - case KEY_readpipe: - UNIDOR(OP_BACKTICK); - - case KEY_rewinddir: - UNI(OP_REWINDDIR); - - case KEY_recv: - LOP(OP_RECV,XTERM); - - case KEY_reverse: - LOP(OP_REVERSE,XTERM); - - case KEY_readlink: - UNIDOR(OP_READLINK); - - case KEY_ref: - UNI(OP_REF); - - case KEY_s: - s = scan_subst(s); - if (pl_yylval.opval) - TERM(sublex_start()); - else - TOKEN(1); /* force error */ - - case KEY_say: - checkcomma(s,PL_tokenbuf,"filehandle"); - LOP(OP_SAY,XREF); - - case KEY_chomp: - UNI(OP_CHOMP); - - case KEY_scalar: - UNI(OP_SCALAR); - - case KEY_select: - LOP(OP_SELECT,XTERM); - - case KEY_seek: - LOP(OP_SEEK,XTERM); - - case KEY_semctl: - LOP(OP_SEMCTL,XTERM); - - case KEY_semget: - LOP(OP_SEMGET,XTERM); - - case KEY_semop: - LOP(OP_SEMOP,XTERM); - - case KEY_send: - LOP(OP_SEND,XTERM); - - case KEY_setpgrp: - LOP(OP_SETPGRP,XTERM); - - case KEY_setpriority: - LOP(OP_SETPRIORITY,XTERM); - - case KEY_sethostent: - UNI(OP_SHOSTENT); - - case KEY_setnetent: - UNI(OP_SNETENT); - - case KEY_setservent: - UNI(OP_SSERVENT); - - case KEY_setprotoent: - UNI(OP_SPROTOENT); - - case KEY_setpwent: - FUN0(OP_SPWENT); - - case KEY_setgrent: - FUN0(OP_SGRENT); - - case KEY_seekdir: - LOP(OP_SEEKDIR,XTERM); - - case KEY_setsockopt: - LOP(OP_SSOCKOPT,XTERM); - - case KEY_shift: - UNIDOR(OP_SHIFT); - - case KEY_shmctl: - LOP(OP_SHMCTL,XTERM); - - case KEY_shmget: - LOP(OP_SHMGET,XTERM); - - case KEY_shmread: - LOP(OP_SHMREAD,XTERM); - - case KEY_shmwrite: - LOP(OP_SHMWRITE,XTERM); - - case KEY_shutdown: - LOP(OP_SHUTDOWN,XTERM); - - case KEY_sin: - UNI(OP_SIN); - - case KEY_sleep: - UNI(OP_SLEEP); - - case KEY_socket: - LOP(OP_SOCKET,XTERM); - - case KEY_socketpair: - LOP(OP_SOCKPAIR,XTERM); - - case KEY_sort: - checkcomma(s,PL_tokenbuf,"subroutine name"); - s = skipspace(s); - PL_expect = XTERM; - s = force_word(s,BAREWORD,TRUE,TRUE); - LOP(OP_SORT,XREF); - - case KEY_split: - LOP(OP_SPLIT,XTERM); - - case KEY_sprintf: - LOP(OP_SPRINTF,XTERM); - - case KEY_splice: - LOP(OP_SPLICE,XTERM); - - case KEY_sqrt: - UNI(OP_SQRT); - - case KEY_srand: - UNI(OP_SRAND); - - case KEY_stat: - UNI(OP_STAT); - - case KEY_study: - UNI(OP_STUDY); - - case KEY_substr: - LOP(OP_SUBSTR,XTERM); - - case KEY_format: - case KEY_sub: - return yyl_sub(aTHX_ s, tmp); - - case KEY_system: - LOP(OP_SYSTEM,XREF); - - case KEY_symlink: - LOP(OP_SYMLINK,XTERM); - - case KEY_syscall: - LOP(OP_SYSCALL,XTERM); - - case KEY_sysopen: - LOP(OP_SYSOPEN,XTERM); - - case KEY_sysseek: - LOP(OP_SYSSEEK,XTERM); - - case KEY_sysread: - LOP(OP_SYSREAD,XTERM); - - case KEY_syswrite: - LOP(OP_SYSWRITE,XTERM); - - case KEY_tr: - case KEY_y: - s = scan_trans(s); - TERM(sublex_start()); - - case KEY_tell: - UNI(OP_TELL); - - case KEY_telldir: - UNI(OP_TELLDIR); - - case KEY_tie: - LOP(OP_TIE,XTERM); - - case KEY_tied: - UNI(OP_TIED); + if (tmp == '~') + PMop(OP_MATCH); + if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX) + && strchr("+-*/%.^&|<",tmp)) + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), + "Reversed %c= operator",(int)tmp); + s--; + if (PL_expect == XSTATE + && isALPHA(tmp) + && (s == PL_linestart+1 || s[-2] == '\n') ) + { + if ( (PL_in_eval && !PL_rsfp && !PL_parser->filtered) + || PL_lex_state != LEX_NORMAL) + { + d = PL_bufend; + while (s < d) { + if (*s++ == '\n') { + incline(s, PL_bufend); + if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut")) + { + s = (char *) memchr(s,'\n', d - s); + if (s) + s++; + else + s = d; + incline(s, PL_bufend); + return RETRY(); + } + } + } + return RETRY(); + } + s = PL_bufend; + PL_parser->in_pod = 1; + return RETRY(); + } + } + if (PL_expect == XBLOCK) { + const char *t = s; +#ifdef PERL_STRICT_CR + while (SPACE_OR_TAB(*t)) +#else + while (SPACE_OR_TAB(*t) || *t == '\r') +#endif + t++; + if (*t == '\n' || *t == '#') { + formbrack = 1; + ENTER_with_name("lex_format"); + SAVEI8(PL_parser->form_lex_state); + SAVEI32(PL_lex_formbrack); + PL_parser->form_lex_state = PL_lex_state; + PL_lex_formbrack = PL_lex_brackets + 1; + PL_parser->sub_error_count = PL_error_count; + goto leftbracket; + } + } + if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { + s--; + TOKEN(0); + } + pl_yylval.ival = 0; + OPERATOR(ASSIGNOP); - case KEY_time: - FUN0(OP_TIME); + case '!': + return yyl_bang(aTHX_ s + 1); - case KEY_times: - FUN0(OP_TMS); + case '<': + if (s[1] == '<' && (s == PL_linestart || s[-1] == '\n') + && memBEGINs(s+2, (STRLEN) (PL_bufend - (s+2)), "<<<<<")) + { + s = vcs_conflict_marker(s + 7); + return RETRY(); + } + return yyl_leftpointy(aTHX_ s); - case KEY_truncate: - LOP(OP_TRUNCATE,XTERM); + case '>': + if (s[1] == '>' && (s == PL_linestart || s[-1] == '\n') + && memBEGINs(s + 2, (STRLEN) (PL_bufend - s + 2), ">>>>>")) + { + s = vcs_conflict_marker(s + 7); + return RETRY(); + } + return yyl_rightpointy(aTHX_ s + 1); - case KEY_uc: - UNI(OP_UC); + case '$': + return yyl_dollar(aTHX_ s); - case KEY_ucfirst: - UNI(OP_UCFIRST); + case '@': + return yyl_snail(aTHX_ s); - case KEY_untie: - UNI(OP_UNTIE); + case '/': /* may be division, defined-or, or pattern */ + return yyl_slash(aTHX_ s); - case KEY_until: - if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) - return REPORT(0); - pl_yylval.ival = CopLINE(PL_curcop); - OPERATOR(UNTIL); + case '?': /* conditional */ + s++; + if (!PL_lex_allbrackets + && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) + { + s--; + TOKEN(0); + } + PL_lex_allbrackets++; + OPERATOR('?'); - case KEY_unless: - if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) - return REPORT(0); - pl_yylval.ival = CopLINE(PL_curcop); - OPERATOR(UNLESS); + case '.': + if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack +#ifdef PERL_STRICT_CR + && s[1] == '\n' +#else + && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n')) +#endif + && (s == PL_linestart || s[-1] == '\n') ) + { + PL_expect = XSTATE; + formbrack = 2; /* dot seen where arguments expected */ + goto rightbracket; + } + if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') { + s += 3; + OPERATOR(YADAYADA); + } + if (PL_expect == XOPERATOR || !isDIGIT(s[1])) { + char tmp = *s++; + if (*s == tmp) { + if (!PL_lex_allbrackets + && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) + { + s--; + TOKEN(0); + } + s++; + if (*s == tmp) { + s++; + pl_yylval.ival = OPf_SPECIAL; + } + else + pl_yylval.ival = 0; + OPERATOR(DOTDOT); + } + if (*s == '=' && !PL_lex_allbrackets + && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) + { + s--; + TOKEN(0); + } + Aop(OP_CONCAT); + } + /* FALLTHROUGH */ + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + s = scan_num(s, &pl_yylval); + DEBUG_T( { printbuf("### Saw number in %s\n", s); } ); + if (PL_expect == XOPERATOR) + no_op("Number",s); + TERM(THING); - case KEY_unlink: - LOP(OP_UNLINK,XTERM); + case '\'': + return yyl_sglquote(aTHX_ s); - case KEY_undef: - UNIDOR(OP_UNDEF); + case '"': + return yyl_dblquote(aTHX_ s, len); - case KEY_unpack: - LOP(OP_UNPACK,XTERM); + case '`': + return yyl_backtick(aTHX_ s); - case KEY_utime: - LOP(OP_UTIME,XTERM); + case '\\': + return yyl_backslash(aTHX_ s + 1); - case KEY_umask: - UNIDOR(OP_UMASK); + case 'v': + if (isDIGIT(s[1]) && PL_expect != XOPERATOR) { + char *start = s + 2; + while (isDIGIT(*start) || *start == '_') + start++; + if (*start == '.' && isDIGIT(start[1])) { + s = scan_num(s, &pl_yylval); + TERM(THING); + } + else if ((*start == ':' && start[1] == ':') + || (PL_expect == XSTATE && *start == ':')) + goto keylookup; + else if (PL_expect == XSTATE) { + d = start; + while (d < PL_bufend && isSPACE(*d)) d++; + if (*d == ':') goto keylookup; + } + /* avoid v123abc() or $h{v1}, allow C */ + if (!isALPHA(*start) && (PL_expect == XTERM + || PL_expect == XREF || PL_expect == XSTATE + || PL_expect == XTERMORDORDOR)) { + GV *const gv = gv_fetchpvn_flags(s, start - s, + UTF ? SVf_UTF8 : 0, SVt_PVCV); + if (!gv) { + s = scan_num(s, &pl_yylval); + TERM(THING); + } + } + } + goto keylookup; + case 'x': + if (isDIGIT(s[1]) && PL_expect == XOPERATOR) { + s++; + Mop(OP_REPEAT); + } + goto keylookup; - case KEY_unshift: - LOP(OP_UNSHIFT,XTERM); + case '_': + case 'a': case 'A': + case 'b': case 'B': + case 'c': case 'C': + case 'd': case 'D': + case 'e': case 'E': + case 'f': case 'F': + case 'g': case 'G': + case 'h': case 'H': + case 'i': case 'I': + case 'j': case 'J': + case 'k': case 'K': + case 'l': case 'L': + case 'm': case 'M': + case 'n': case 'N': + case 'o': case 'O': + case 'p': case 'P': + case 'q': case 'Q': + case 'r': case 'R': + case 's': case 'S': + case 't': case 'T': + case 'u': case 'U': + case 'V': + case 'w': case 'W': + case 'X': + case 'y': case 'Y': + case 'z': case 'Z': - case KEY_use: - s = tokenize_use(1, s); - TOKEN(USE); + keylookup: { + bool anydelim; + I32 tmp = 0; + SV *sv = NULL; + CV *cv = NULL; + PADOFFSET off = 0; + OP *rv2cv_op = NULL; + I32 orig_keyword = 0; - case KEY_values: - UNI(OP_VALUES); + gv = NULL; + gvp = NULL; - case KEY_vec: - LOP(OP_VEC,XTERM); + PL_bufptr = s; + s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); - case KEY_when: - if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) - return REPORT(0); - pl_yylval.ival = CopLINE(PL_curcop); - Perl_ck_warner_d(aTHX_ - packWARN(WARN_EXPERIMENTAL__SMARTMATCH), - "when is experimental"); - OPERATOR(WHEN); + /* Some keywords can be followed by any delimiter, including ':' */ + anydelim = word_takes_any_delimiter(PL_tokenbuf, len); - case KEY_while: - if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) - return REPORT(0); - pl_yylval.ival = CopLINE(PL_curcop); - OPERATOR(WHILE); + /* x::* is just a word, unless x is "CORE" */ + if (!anydelim && *s == ':' && s[1] == ':') { + struct code c = MAKE_CODE(FALSE); + if (memEQs(PL_tokenbuf, len, "CORE")) + return yyl_key_core(aTHX_ s, len, tmp, orig_keyword, c, bof, saw_infix_sigil); + return yyl_just_a_word(aTHX_ s, len, 0, orig_keyword, c, saw_infix_sigil); + } - case KEY_warn: - PL_hints |= HINT_BLOCK_SCOPE; - LOP(OP_WARN,XTERM); + d = s; + while (d < PL_bufend && isSPACE(*d)) + d++; /* no comments skipped here, or s### is misparsed */ - case KEY_wait: - FUN0(OP_WAIT); + /* Is this a word before a => operator? */ + if (*d == '=' && d[1] == '>') { + return yyl_fatcomma(aTHX_ s, len); + } - case KEY_waitpid: - LOP(OP_WAITPID,XTERM); + /* Check for plugged-in keyword */ + { + OP *o; + int result; + char *saved_bufptr = PL_bufptr; + PL_bufptr = s; + result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o); + s = PL_bufptr; + if (result == KEYWORD_PLUGIN_DECLINE) { + /* not a plugged-in keyword */ + PL_bufptr = saved_bufptr; + } else if (result == KEYWORD_PLUGIN_STMT) { + pl_yylval.opval = o; + CLINE; + if (!PL_nexttoke) PL_expect = XSTATE; + return REPORT(PLUGSTMT); + } else if (result == KEYWORD_PLUGIN_EXPR) { + pl_yylval.opval = o; + CLINE; + if (!PL_nexttoke) PL_expect = XOPERATOR; + return REPORT(PLUGEXPR); + } else { + Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", + PL_tokenbuf); + } + } - case KEY_wantarray: - FUN0(OP_WANTARRAY); + /* Check for built-in keyword */ + tmp = keyword(PL_tokenbuf, len, 0); - case KEY_write: - /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and - * we use the same number on EBCDIC */ - gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV); - UNI(OP_ENTERWRITE); + /* Is this a label? */ + if (!anydelim && PL_expect == XSTATE + && d < PL_bufend && *d == ':' && *(d + 1) != ':') { + s = d + 1; + pl_yylval.opval = + newSVOP(OP_CONST, 0, + newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0)); + CLINE; + TOKEN(LABEL); + } - case KEY_x: - if (PL_expect == XOPERATOR) { - if (*s == '=' && !PL_lex_allbrackets - && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) - { - return REPORT(0); - } - Mop(OP_REPEAT); + /* Check for lexical sub */ + if (PL_expect != XOPERATOR) { + char tmpbuf[sizeof PL_tokenbuf + 1]; + *tmpbuf = '&'; + Copy(PL_tokenbuf, tmpbuf+1, len, char); + off = pad_findmy_pvn(tmpbuf, len+1, 0); + if (off != NOT_IN_PAD) { + assert(off); /* we assume this is boolean-true below */ + if (PAD_COMPNAME_FLAGS_isOUR(off)) { + HV * const stash = PAD_COMPNAME_OURSTASH(off); + HEK * const stashname = HvNAME_HEK(stash); + sv = newSVhek(stashname); + sv_catpvs(sv, "::"); + sv_catpvn_flags(sv, PL_tokenbuf, len, + (UTF ? SV_CATUTF8 : SV_CATBYTES)); + gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv), + SVt_PVCV); + off = 0; + if (!gv) { + sv_free(sv); + sv = NULL; + return yyl_just_a_word(aTHX_ s, len, tmp, orig_keyword, + MAKE_CODE(FALSE), saw_infix_sigil); + } + } + else { + rv2cv_op = newOP(OP_PADANY, 0); + rv2cv_op->op_targ = off; + cv = find_lexical_cv(off); + } + return yyl_just_a_word(aTHX_ s, len, tmp, orig_keyword, + MAKE_CODE(TRUE), saw_infix_sigil); } - check_uni(); - return yyl_just_a_word(aTHX_ s, len, tmp, orig_keyword, - MAKE_CODE(FALSE), saw_infix_sigil); + off = 0; + } + + if (tmp < 0) + tmp = yyl_secondclass_keyword(aTHX_ s, len, tmp, &orig_keyword, &gv, &gvp); - case KEY_xor: - if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) - return REPORT(0); - pl_yylval.ival = OP_XOR; - OPERATOR(OROP); + if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__ + && (!anydelim || *s != '#')) { + /* no override, and not s### either; skipspace is safe here + * check for => on following line */ + bool arrow; + STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr); + STRLEN soff = s - SvPVX(PL_linestr); + s = peekspace(s); + arrow = *s == '=' && s[1] == '>'; + PL_bufptr = SvPVX(PL_linestr) + bufoff; + s = SvPVX(PL_linestr) + soff; + if (arrow) + return yyl_fatcomma(aTHX_ s, len); } + + return yyl_word_or_keyword(aTHX_ s, len, tmp, orig_keyword, + MAKE_CODE(FALSE), bof, saw_infix_sigil); }} } From a5ed76862204ca7631824464c5a2ec98fc8ec748 Mon Sep 17 00:00:00 2001 From: Aaron Crane Date: Wed, 23 Oct 2019 17:14:16 +0100 Subject: [PATCH 12/20] toke.c: factor out static yyl_keylookup() --- toke.c | 300 +++++++++++++++++++++++++++------------------------------ 1 file changed, 143 insertions(+), 157 deletions(-) diff --git a/toke.c b/toke.c index 7c398e7611d5..9ac5334de5c3 100644 --- a/toke.c +++ b/toke.c @@ -303,23 +303,6 @@ struct code { static const struct code no_code = { NULL, NULL, NULL, NULL, NULL, 0, FALSE }; -PERL_STATIC_INLINE struct code -make_code(SV *sv, CV *cv, GV *gv, GV **gvp, OP *rv2cv_op, PADOFFSET off, bool lex) -{ - struct code c; - c.sv = sv; - c.sv = sv; - c.cv = cv; - c.gv = gv; - c.gvp = gvp; - c.rv2cv_op = rv2cv_op; - c.off = off; - c.lex = lex; - return c; -} - -#define MAKE_CODE(lEx) make_code(sv, cv, gv, gvp, rv2cv_op, off, lEx) - #ifdef DEBUGGING @@ -8405,13 +8388,146 @@ yyl_key_core(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, bof, saw_infix_sigil); } +static int +yyl_keylookup(pTHX_ char *s, GV *gv, bool bof, bool saw_infix_sigil) +{ + STRLEN len; + bool anydelim; + I32 tmp = 0; + struct code c = no_code; + I32 orig_keyword = 0; + char *d; + + c.gv = gv; + + PL_bufptr = s; + s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); + + /* Some keywords can be followed by any delimiter, including ':' */ + anydelim = word_takes_any_delimiter(PL_tokenbuf, len); + + /* x::* is just a word, unless x is "CORE" */ + if (!anydelim && *s == ':' && s[1] == ':') { + if (memEQs(PL_tokenbuf, len, "CORE")) + return yyl_key_core(aTHX_ s, len, tmp, orig_keyword, c, bof, saw_infix_sigil); + return yyl_just_a_word(aTHX_ s, len, 0, orig_keyword, c, saw_infix_sigil); + } + + d = s; + while (d < PL_bufend && isSPACE(*d)) + d++; /* no comments skipped here, or s### is misparsed */ + + /* Is this a word before a => operator? */ + if (*d == '=' && d[1] == '>') { + return yyl_fatcomma(aTHX_ s, len); + } + + /* Check for plugged-in keyword */ + { + OP *o; + int result; + char *saved_bufptr = PL_bufptr; + PL_bufptr = s; + result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o); + s = PL_bufptr; + if (result == KEYWORD_PLUGIN_DECLINE) { + /* not a plugged-in keyword */ + PL_bufptr = saved_bufptr; + } else if (result == KEYWORD_PLUGIN_STMT) { + pl_yylval.opval = o; + CLINE; + if (!PL_nexttoke) PL_expect = XSTATE; + return REPORT(PLUGSTMT); + } else if (result == KEYWORD_PLUGIN_EXPR) { + pl_yylval.opval = o; + CLINE; + if (!PL_nexttoke) PL_expect = XOPERATOR; + return REPORT(PLUGEXPR); + } else { + Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", PL_tokenbuf); + } + } + + /* Check for built-in keyword */ + tmp = keyword(PL_tokenbuf, len, 0); + + /* Is this a label? */ + if (!anydelim && PL_expect == XSTATE + && d < PL_bufend && *d == ':' && *(d + 1) != ':') { + s = d + 1; + pl_yylval.opval = + newSVOP(OP_CONST, 0, + newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0)); + CLINE; + TOKEN(LABEL); + } + + /* Check for lexical sub */ + if (PL_expect != XOPERATOR) { + char tmpbuf[sizeof PL_tokenbuf + 1]; + *tmpbuf = '&'; + Copy(PL_tokenbuf, tmpbuf+1, len, char); + c.off = pad_findmy_pvn(tmpbuf, len+1, 0); + if (c.off != NOT_IN_PAD) { + assert(c.off); /* we assume this is boolean-true below */ + if (PAD_COMPNAME_FLAGS_isOUR(c.off)) { + HV * const stash = PAD_COMPNAME_OURSTASH(c.off); + HEK * const stashname = HvNAME_HEK(stash); + c.sv = newSVhek(stashname); + sv_catpvs(c.sv, "::"); + sv_catpvn_flags(c.sv, PL_tokenbuf, len, + (UTF ? SV_CATUTF8 : SV_CATBYTES)); + c.gv = gv_fetchsv(c.sv, GV_NOADD_NOINIT | SvUTF8(c.sv), + SVt_PVCV); + c.off = 0; + if (!c.gv) { + sv_free(c.sv); + c.sv = NULL; + return yyl_just_a_word(aTHX_ s, len, tmp, orig_keyword, + c, saw_infix_sigil); + } + } + else { + c.rv2cv_op = newOP(OP_PADANY, 0); + c.rv2cv_op->op_targ = c.off; + c.cv = find_lexical_cv(c.off); + } + c.lex = TRUE; + return yyl_just_a_word(aTHX_ s, len, tmp, orig_keyword, + c, saw_infix_sigil); + } + c.off = 0; + } + + if (tmp < 0) + tmp = yyl_secondclass_keyword(aTHX_ s, len, tmp, &orig_keyword, &c.gv, &c.gvp); + + if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__ + && (!anydelim || *s != '#')) { + /* no override, and not s### either; skipspace is safe here + * check for => on following line */ + bool arrow; + STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr); + STRLEN soff = s - SvPVX(PL_linestr); + s = peekspace(s); + arrow = *s == '=' && s[1] == '>'; + PL_bufptr = SvPVX(PL_linestr) + bufoff; + s = SvPVX(PL_linestr) + soff; + if (arrow) + return yyl_fatcomma(aTHX_ s, len); + } + + return yyl_word_or_keyword(aTHX_ s, len, tmp, orig_keyword, + c, bof, saw_infix_sigil); +} + static int yyl_try(pTHX_ char initial_state, char *s, STRLEN len, U8 formbrack, const bool saw_infix_sigil) { char *d; bool bof = FALSE; - GV *gv = NULL, **gvp = NULL; + GV *gv = NULL; switch (initial_state) { case '}': goto rightbracket; @@ -8420,7 +8536,7 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, switch (*s) { default: if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) - goto keylookup; + return yyl_keylookup(aTHX_ s, gv, bof, saw_infix_sigil); yyl_croak_unrecognised(aTHX_ s); case 4: @@ -8817,11 +8933,12 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, } else if ((*start == ':' && start[1] == ':') || (PL_expect == XSTATE && *start == ':')) - goto keylookup; + return yyl_keylookup(aTHX_ s, gv, bof, saw_infix_sigil); else if (PL_expect == XSTATE) { d = start; while (d < PL_bufend && isSPACE(*d)) d++; - if (*d == ':') goto keylookup; + if (*d == ':') + return yyl_keylookup(aTHX_ s, gv, bof, saw_infix_sigil); } /* avoid v123abc() or $h{v1}, allow C */ if (!isALPHA(*start) && (PL_expect == XTERM @@ -8835,13 +8952,14 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, } } } - goto keylookup; + return yyl_keylookup(aTHX_ s, gv, bof, saw_infix_sigil); + case 'x': if (isDIGIT(s[1]) && PL_expect == XOPERATOR) { s++; Mop(OP_REPEAT); } - goto keylookup; + return yyl_keylookup(aTHX_ s, gv, bof, saw_infix_sigil); case '_': case 'a': case 'A': @@ -8870,140 +8988,8 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, case 'X': case 'y': case 'Y': case 'z': case 'Z': - - keylookup: { - bool anydelim; - I32 tmp = 0; - SV *sv = NULL; - CV *cv = NULL; - PADOFFSET off = 0; - OP *rv2cv_op = NULL; - I32 orig_keyword = 0; - - gv = NULL; - gvp = NULL; - - PL_bufptr = s; - s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); - - /* Some keywords can be followed by any delimiter, including ':' */ - anydelim = word_takes_any_delimiter(PL_tokenbuf, len); - - /* x::* is just a word, unless x is "CORE" */ - if (!anydelim && *s == ':' && s[1] == ':') { - struct code c = MAKE_CODE(FALSE); - if (memEQs(PL_tokenbuf, len, "CORE")) - return yyl_key_core(aTHX_ s, len, tmp, orig_keyword, c, bof, saw_infix_sigil); - return yyl_just_a_word(aTHX_ s, len, 0, orig_keyword, c, saw_infix_sigil); - } - - d = s; - while (d < PL_bufend && isSPACE(*d)) - d++; /* no comments skipped here, or s### is misparsed */ - - /* Is this a word before a => operator? */ - if (*d == '=' && d[1] == '>') { - return yyl_fatcomma(aTHX_ s, len); - } - - /* Check for plugged-in keyword */ - { - OP *o; - int result; - char *saved_bufptr = PL_bufptr; - PL_bufptr = s; - result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o); - s = PL_bufptr; - if (result == KEYWORD_PLUGIN_DECLINE) { - /* not a plugged-in keyword */ - PL_bufptr = saved_bufptr; - } else if (result == KEYWORD_PLUGIN_STMT) { - pl_yylval.opval = o; - CLINE; - if (!PL_nexttoke) PL_expect = XSTATE; - return REPORT(PLUGSTMT); - } else if (result == KEYWORD_PLUGIN_EXPR) { - pl_yylval.opval = o; - CLINE; - if (!PL_nexttoke) PL_expect = XOPERATOR; - return REPORT(PLUGEXPR); - } else { - Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", - PL_tokenbuf); - } - } - - /* Check for built-in keyword */ - tmp = keyword(PL_tokenbuf, len, 0); - - /* Is this a label? */ - if (!anydelim && PL_expect == XSTATE - && d < PL_bufend && *d == ':' && *(d + 1) != ':') { - s = d + 1; - pl_yylval.opval = - newSVOP(OP_CONST, 0, - newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0)); - CLINE; - TOKEN(LABEL); - } - - /* Check for lexical sub */ - if (PL_expect != XOPERATOR) { - char tmpbuf[sizeof PL_tokenbuf + 1]; - *tmpbuf = '&'; - Copy(PL_tokenbuf, tmpbuf+1, len, char); - off = pad_findmy_pvn(tmpbuf, len+1, 0); - if (off != NOT_IN_PAD) { - assert(off); /* we assume this is boolean-true below */ - if (PAD_COMPNAME_FLAGS_isOUR(off)) { - HV * const stash = PAD_COMPNAME_OURSTASH(off); - HEK * const stashname = HvNAME_HEK(stash); - sv = newSVhek(stashname); - sv_catpvs(sv, "::"); - sv_catpvn_flags(sv, PL_tokenbuf, len, - (UTF ? SV_CATUTF8 : SV_CATBYTES)); - gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv), - SVt_PVCV); - off = 0; - if (!gv) { - sv_free(sv); - sv = NULL; - return yyl_just_a_word(aTHX_ s, len, tmp, orig_keyword, - MAKE_CODE(FALSE), saw_infix_sigil); - } - } - else { - rv2cv_op = newOP(OP_PADANY, 0); - rv2cv_op->op_targ = off; - cv = find_lexical_cv(off); - } - return yyl_just_a_word(aTHX_ s, len, tmp, orig_keyword, - MAKE_CODE(TRUE), saw_infix_sigil); - } - off = 0; - } - - if (tmp < 0) - tmp = yyl_secondclass_keyword(aTHX_ s, len, tmp, &orig_keyword, &gv, &gvp); - - if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__ - && (!anydelim || *s != '#')) { - /* no override, and not s### either; skipspace is safe here - * check for => on following line */ - bool arrow; - STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr); - STRLEN soff = s - SvPVX(PL_linestr); - s = peekspace(s); - arrow = *s == '=' && s[1] == '>'; - PL_bufptr = SvPVX(PL_linestr) + bufoff; - s = SvPVX(PL_linestr) + soff; - if (arrow) - return yyl_fatcomma(aTHX_ s, len); - } - - return yyl_word_or_keyword(aTHX_ s, len, tmp, orig_keyword, - MAKE_CODE(FALSE), bof, saw_infix_sigil); - }} + return yyl_keylookup(aTHX_ s, gv, bof, saw_infix_sigil); + } } From c2b71845d6ac72c5f9c06664240bdfd05b4bf808 Mon Sep 17 00:00:00 2001 From: Aaron Crane Date: Wed, 23 Oct 2019 21:32:51 +0100 Subject: [PATCH 13/20] toke.c: delete weird initial_state arg to yyl_try() I thought I was going to end up using this for more stuff, but I've found better approaches. This commit also removes two more goto targets. --- toke.c | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/toke.c b/toke.c index 9ac5334de5c3..31527618d64e 100644 --- a/toke.c +++ b/toke.c @@ -6063,6 +6063,9 @@ yyl_leftcurly(pTHX_ char *s, U8 formbrack) static int yyl_rightcurly(pTHX_ char *s, U8 formbrack) { + assert(s != PL_bufend); + s++; + if (PL_lex_brackets <= 0) /* diag_listed_as: Unmatched right %s bracket */ yyerror("Unmatched right curly bracket"); @@ -6776,9 +6779,9 @@ yyl_my(pTHX_ char *s, I32 my) OPERATOR(MY); } -static int yyl_try(pTHX_ char, char*, STRLEN, U8, const bool); +static int yyl_try(pTHX_ char*, STRLEN, U8, const bool); -#define RETRY() yyl_try(aTHX_ 0, s, len, 0, 0) +#define RETRY() yyl_try(aTHX_ s, len, 0, 0) static int yyl_eol(pTHX_ char *s, STRLEN len) @@ -8522,17 +8525,12 @@ yyl_keylookup(pTHX_ char *s, GV *gv, bool bof, bool saw_infix_sigil) } static int -yyl_try(pTHX_ char initial_state, char *s, STRLEN len, - U8 formbrack, const bool saw_infix_sigil) +yyl_try(pTHX_ char *s, STRLEN len, U8 formbrack, const bool saw_infix_sigil) { char *d; bool bof = FALSE; GV *gv = NULL; - switch (initial_state) { - case '}': goto rightbracket; - } - switch (*s) { default: if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) @@ -8708,16 +8706,12 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, return yyl_rightsquare(aTHX_ s); case '{': - s++; - leftbracket: - return yyl_leftcurly(aTHX_ s, formbrack); + return yyl_leftcurly(aTHX_ s + 1, formbrack); case '}': if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF) TOKEN(0); - rightbracket: - assert(s != PL_bufend); - return yyl_rightcurly(aTHX_ s + 1, formbrack); + return yyl_rightcurly(aTHX_ s, formbrack); case '&': return yyl_ampersand(aTHX_ s); @@ -8807,7 +8801,7 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, PL_parser->form_lex_state = PL_lex_state; PL_lex_formbrack = PL_lex_brackets + 1; PL_parser->sub_error_count = PL_error_count; - goto leftbracket; + return yyl_leftcurly(aTHX_ s, formbrack); } } if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { @@ -8869,7 +8863,7 @@ yyl_try(pTHX_ char initial_state, char *s, STRLEN len, { PL_expect = XSTATE; formbrack = 2; /* dot seen where arguments expected */ - goto rightbracket; + return yyl_rightcurly(aTHX_ s, formbrack); } if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') { s += 3; @@ -9273,9 +9267,8 @@ Perl_yylex(pTHX) } assert(PL_lex_formbrack); s = scan_formline(PL_bufptr); - if (!PL_lex_formbrack) { - return yyl_try(aTHX_ '}', s, 0, 1, saw_infix_sigil); - } + if (!PL_lex_formbrack) + return yyl_rightcurly(aTHX_ s, 1); PL_bufptr = s; return yylex(); } @@ -9291,7 +9284,7 @@ Perl_yylex(pTHX) return yyl_sigvar(aTHX_ s); } - return yyl_try(aTHX_ 0, s, 0, 0, saw_infix_sigil); + return yyl_try(aTHX_ s, 0, 0, saw_infix_sigil); } From 6e93a30736704c0d33d0b91c818b0b6281615c73 Mon Sep 17 00:00:00 2001 From: Aaron Crane Date: Wed, 23 Oct 2019 21:50:28 +0100 Subject: [PATCH 14/20] toke.c: remove formbrack argument from yyl_try() With this commit, yyl_try() has few enough arguments that the RETRY() macro no longer serves any useful purpose; delete it too. --- toke.c | 43 ++++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/toke.c b/toke.c index 31527618d64e..a56f1228b61a 100644 --- a/toke.c +++ b/toke.c @@ -6779,9 +6779,7 @@ yyl_my(pTHX_ char *s, I32 my) OPERATOR(MY); } -static int yyl_try(pTHX_ char*, STRLEN, U8, const bool); - -#define RETRY() yyl_try(aTHX_ s, len, 0, 0) +static int yyl_try(pTHX_ char*, STRLEN, const bool); static int yyl_eol(pTHX_ char *s, STRLEN len) @@ -6824,7 +6822,7 @@ yyl_eol(pTHX_ char *s, STRLEN len) incline(s, PL_bufend); } } - return RETRY(); + return yyl_try(aTHX_ s, len, 0); } static int @@ -7076,7 +7074,7 @@ yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s, STRLEN len) PL_preambled = FALSE; if (PERLDB_LINE_OR_SAVESRC) (void)gv_fetchfile(PL_origfilename); - return RETRY(); + return yyl_try(aTHX_ s, len, 0); } } } @@ -7089,7 +7087,7 @@ yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s, STRLEN len) TOKEN(';'); } - return RETRY(); + return yyl_try(aTHX_ s, len, 0); } static int @@ -8525,7 +8523,7 @@ yyl_keylookup(pTHX_ char *s, GV *gv, bool bof, bool saw_infix_sigil) } static int -yyl_try(pTHX_ char *s, STRLEN len, U8 formbrack, const bool saw_infix_sigil) +yyl_try(pTHX_ char *s, STRLEN len, const bool saw_infix_sigil) { char *d; bool bof = FALSE; @@ -8561,7 +8559,7 @@ yyl_try(pTHX_ char *s, STRLEN len, U8 formbrack, const bool saw_infix_sigil) TOKEN(0); } if (s++ < PL_bufend) - return RETRY(); /* ignore stray nulls */ + return yyl_try(aTHX_ s, len, 0); /* ignore stray nulls */ PL_last_uni = 0; PL_last_lop = 0; if (!PL_in_eval && !PL_preambled) { @@ -8639,7 +8637,7 @@ yyl_try(pTHX_ char *s, STRLEN len, U8 formbrack, const bool saw_infix_sigil) PL_last_lop = PL_last_uni = NULL; if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash) update_debugger_info(PL_linestr, NULL, 0); - return RETRY(); + return yyl_try(aTHX_ s, len, 0); } return yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s, len); @@ -8651,7 +8649,7 @@ yyl_try(pTHX_ char *s, STRLEN len, U8 formbrack, const bool saw_infix_sigil) #endif case ' ': case '\t': case '\f': case '\v': s++; - return RETRY(); + return yyl_try(aTHX_ s, len, 0); case '#': case '\n': @@ -8706,12 +8704,12 @@ yyl_try(pTHX_ char *s, STRLEN len, U8 formbrack, const bool saw_infix_sigil) return yyl_rightsquare(aTHX_ s); case '{': - return yyl_leftcurly(aTHX_ s + 1, formbrack); + return yyl_leftcurly(aTHX_ s + 1, 0); case '}': if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF) TOKEN(0); - return yyl_rightcurly(aTHX_ s, formbrack); + return yyl_rightcurly(aTHX_ s, 0); case '&': return yyl_ampersand(aTHX_ s); @@ -8724,7 +8722,7 @@ yyl_try(pTHX_ char *s, STRLEN len, U8 formbrack, const bool saw_infix_sigil) && memBEGINs(s + 2, (STRLEN) (PL_bufend - s + 2), "=====")) { s = vcs_conflict_marker(s + 7); - return RETRY(); + return yyl_try(aTHX_ s, len, 0); } s++; @@ -8774,15 +8772,15 @@ yyl_try(pTHX_ char *s, STRLEN len, U8 formbrack, const bool saw_infix_sigil) else s = d; incline(s, PL_bufend); - return RETRY(); + return yyl_try(aTHX_ s, len, 0); } } } - return RETRY(); + return yyl_try(aTHX_ s, len, 0); } s = PL_bufend; PL_parser->in_pod = 1; - return RETRY(); + return yyl_try(aTHX_ s, len, 0); } } if (PL_expect == XBLOCK) { @@ -8794,14 +8792,13 @@ yyl_try(pTHX_ char *s, STRLEN len, U8 formbrack, const bool saw_infix_sigil) #endif t++; if (*t == '\n' || *t == '#') { - formbrack = 1; ENTER_with_name("lex_format"); SAVEI8(PL_parser->form_lex_state); SAVEI32(PL_lex_formbrack); PL_parser->form_lex_state = PL_lex_state; PL_lex_formbrack = PL_lex_brackets + 1; PL_parser->sub_error_count = PL_error_count; - return yyl_leftcurly(aTHX_ s, formbrack); + return yyl_leftcurly(aTHX_ s, 1); } } if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { @@ -8819,7 +8816,7 @@ yyl_try(pTHX_ char *s, STRLEN len, U8 formbrack, const bool saw_infix_sigil) && memBEGINs(s+2, (STRLEN) (PL_bufend - (s+2)), "<<<<<")) { s = vcs_conflict_marker(s + 7); - return RETRY(); + return yyl_try(aTHX_ s, len, 0); } return yyl_leftpointy(aTHX_ s); @@ -8828,7 +8825,7 @@ yyl_try(pTHX_ char *s, STRLEN len, U8 formbrack, const bool saw_infix_sigil) && memBEGINs(s + 2, (STRLEN) (PL_bufend - s + 2), ">>>>>")) { s = vcs_conflict_marker(s + 7); - return RETRY(); + return yyl_try(aTHX_ s, len, 0); } return yyl_rightpointy(aTHX_ s + 1); @@ -8862,8 +8859,8 @@ yyl_try(pTHX_ char *s, STRLEN len, U8 formbrack, const bool saw_infix_sigil) && (s == PL_linestart || s[-1] == '\n') ) { PL_expect = XSTATE; - formbrack = 2; /* dot seen where arguments expected */ - return yyl_rightcurly(aTHX_ s, formbrack); + /* formbrack==2 means dot seen where arguments expected */ + return yyl_rightcurly(aTHX_ s, 2); } if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') { s += 3; @@ -9284,7 +9281,7 @@ Perl_yylex(pTHX) return yyl_sigvar(aTHX_ s); } - return yyl_try(aTHX_ s, 0, 0, saw_infix_sigil); + return yyl_try(aTHX_ s, 0, saw_infix_sigil); } From d333554320d1a704b62423d391bf751d61ee3e32 Mon Sep 17 00:00:00 2001 From: Aaron Crane Date: Wed, 23 Oct 2019 22:00:47 +0100 Subject: [PATCH 15/20] toke.c: remove some spurious orig_keyword uses --- toke.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/toke.c b/toke.c index a56f1228b61a..cf6c98835253 100644 --- a/toke.c +++ b/toke.c @@ -8410,8 +8410,8 @@ yyl_keylookup(pTHX_ char *s, GV *gv, bool bof, bool saw_infix_sigil) /* x::* is just a word, unless x is "CORE" */ if (!anydelim && *s == ':' && s[1] == ':') { if (memEQs(PL_tokenbuf, len, "CORE")) - return yyl_key_core(aTHX_ s, len, tmp, orig_keyword, c, bof, saw_infix_sigil); - return yyl_just_a_word(aTHX_ s, len, 0, orig_keyword, c, saw_infix_sigil); + return yyl_key_core(aTHX_ s, len, tmp, 0, c, bof, saw_infix_sigil); + return yyl_just_a_word(aTHX_ s, len, 0, 0, c, saw_infix_sigil); } d = s; @@ -8484,8 +8484,7 @@ yyl_keylookup(pTHX_ char *s, GV *gv, bool bof, bool saw_infix_sigil) if (!c.gv) { sv_free(c.sv); c.sv = NULL; - return yyl_just_a_word(aTHX_ s, len, tmp, orig_keyword, - c, saw_infix_sigil); + return yyl_just_a_word(aTHX_ s, len, tmp, 0, c, saw_infix_sigil); } } else { @@ -8494,8 +8493,7 @@ yyl_keylookup(pTHX_ char *s, GV *gv, bool bof, bool saw_infix_sigil) c.cv = find_lexical_cv(c.off); } c.lex = TRUE; - return yyl_just_a_word(aTHX_ s, len, tmp, orig_keyword, - c, saw_infix_sigil); + return yyl_just_a_word(aTHX_ s, len, tmp, 0, c, saw_infix_sigil); } c.off = 0; } From b6c215b12289f6159c8f18d4833430ed1c3ac8b2 Mon Sep 17 00:00:00 2001 From: Aaron Crane Date: Fri, 25 Oct 2019 11:18:39 +0100 Subject: [PATCH 16/20] toke.c: don't pass around a copy of PL_parser->saw_infix_sigil There's exactly one place where we need to consult it (and that only for producing good error messages in a specific group of term-after-term situations). The reason for passing it around was so that it could be reset to false early on in the process of lexing a token, while then allowing the three separate cases that might need to set it true to do so independently. Instead, centralise the logic of determining when it needs to be true. --- toke.c | 121 +++++++++++++++++++++++++++++++-------------------------- 1 file changed, 65 insertions(+), 56 deletions(-) diff --git a/toke.c b/toke.c index cf6c98835253..be0f7903a19e 100644 --- a/toke.c +++ b/toke.c @@ -5577,7 +5577,6 @@ yyl_star(pTHX_ char *s) TOKEN(0); } - PL_parser->saw_infix_sigil = 1; Mop(OP_MULTIPLY); } @@ -5592,7 +5591,6 @@ yyl_percent(pTHX_ char *s) TOKEN(0); } ++s; - PL_parser->saw_infix_sigil = 1; Mop(OP_MODULO); } else if (PL_expect == XPOSTDEREF) @@ -6145,10 +6143,8 @@ yyl_ampersand(pTHX_ char *s) s--; TOKEN(0); } - if (d == s) { - PL_parser->saw_infix_sigil = 1; + if (d == s) BAop(bof ? OP_NBIT_AND : OP_BIT_AND); - } else BAop(OP_SBIT_AND); } @@ -6779,7 +6775,7 @@ yyl_my(pTHX_ char *s, I32 my) OPERATOR(MY); } -static int yyl_try(pTHX_ char*, STRLEN, const bool); +static int yyl_try(pTHX_ char*, STRLEN); static int yyl_eol(pTHX_ char *s, STRLEN len) @@ -6822,7 +6818,7 @@ yyl_eol(pTHX_ char *s, STRLEN len) incline(s, PL_bufend); } } - return yyl_try(aTHX_ s, len, 0); + return yyl_try(aTHX_ s, len); } static int @@ -7074,7 +7070,7 @@ yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s, STRLEN len) PL_preambled = FALSE; if (PERLDB_LINE_OR_SAVESRC) (void)gv_fetchfile(PL_origfilename); - return yyl_try(aTHX_ s, len, 0); + return yyl_try(aTHX_ s, len); } } } @@ -7087,7 +7083,7 @@ yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s, STRLEN len) TOKEN(';'); } - return yyl_try(aTHX_ s, len, 0); + return yyl_try(aTHX_ s, len); } static int @@ -7102,10 +7098,10 @@ yyl_fatcomma(pTHX_ char *s, STRLEN len) } static int -yyl_safe_bareword(pTHX_ char *s, const char lastchar, const bool saw_infix_sigil) +yyl_safe_bareword(pTHX_ char *s, const char lastchar) { if ((lastchar == '*' || lastchar == '%' || lastchar == '&') - && saw_infix_sigil) + && PL_parser->saw_infix_sigil) { Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), "Operator or semicolon missing before %c%" UTF8f, @@ -7197,8 +7193,7 @@ yyl_strictwarn_bareword(pTHX_ const char lastchar) } static int -yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, - struct code c, const bool saw_infix_sigil) +yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c) { int pkgname = 0; const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]); @@ -7278,7 +7273,7 @@ yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, /* And if "Foo::", then that's what it certainly is. */ if (safebw) - return yyl_safe_bareword(aTHX_ s, lastchar, saw_infix_sigil); + return yyl_safe_bareword(aTHX_ s, lastchar); if (!c.off) { OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(c.sv)); @@ -7345,7 +7340,7 @@ yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR; yyl_strictwarn_bareword(aTHX_ lastchar); op_free(c.rv2cv_op); - return yyl_safe_bareword(aTHX_ s, lastchar, saw_infix_sigil); + return yyl_safe_bareword(aTHX_ s, lastchar); } } @@ -7445,17 +7440,15 @@ yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, op_free(c.rv2cv_op); - return yyl_safe_bareword(aTHX_ s, lastchar, saw_infix_sigil); + return yyl_safe_bareword(aTHX_ s, lastchar); } static int -yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, - struct code c, bool bof, const bool saw_infix_sigil) +yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c, bool bof) { switch (key) { default: /* not a keyword */ - return yyl_just_a_word(aTHX_ s, len, key, orig_keyword, - c, saw_infix_sigil); + return yyl_just_a_word(aTHX_ s, len, key, orig_keyword, c); case KEY___FILE__: FUN0OP( newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0)) ); @@ -7493,8 +7486,7 @@ yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, case KEY_END: if (PL_expect == XSTATE) return yyl_sub(aTHX_ PL_bufptr, key); - return yyl_just_a_word(aTHX_ s, len, key, orig_keyword, - c, saw_infix_sigil); + return yyl_just_a_word(aTHX_ s, len, key, orig_keyword, c); case KEY_abs: UNI(OP_ABS); @@ -8349,8 +8341,7 @@ yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, Mop(OP_REPEAT); } check_uni(); - return yyl_just_a_word(aTHX_ s, len, key, orig_keyword, - c, saw_infix_sigil); + return yyl_just_a_word(aTHX_ s, len, key, orig_keyword, c); case KEY_xor: if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC) @@ -8361,8 +8352,7 @@ yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, } static int -yyl_key_core(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, - struct code c, bool bof, const bool saw_infix_sigil) +yyl_key_core(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c, bool bof) { STRLEN olen = len; char *d = s; @@ -8372,7 +8362,7 @@ yyl_key_core(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, || (!(key = keyword(PL_tokenbuf, len, 1)) && *s == '\'')) { Copy(PL_bufptr, PL_tokenbuf, olen, char); - return yyl_just_a_word(aTHX_ d, olen, key, orig_keyword, c, saw_infix_sigil); + return yyl_just_a_word(aTHX_ d, olen, key, orig_keyword, c); } if (!key) Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword", @@ -8385,12 +8375,11 @@ yyl_key_core(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, orig_keyword = key; /* Known to be a reserved word at this point */ - return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c, - bof, saw_infix_sigil); + return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c, bof); } static int -yyl_keylookup(pTHX_ char *s, GV *gv, bool bof, bool saw_infix_sigil) +yyl_keylookup(pTHX_ char *s, GV *gv, bool bof) { STRLEN len; bool anydelim; @@ -8410,8 +8399,8 @@ yyl_keylookup(pTHX_ char *s, GV *gv, bool bof, bool saw_infix_sigil) /* x::* is just a word, unless x is "CORE" */ if (!anydelim && *s == ':' && s[1] == ':') { if (memEQs(PL_tokenbuf, len, "CORE")) - return yyl_key_core(aTHX_ s, len, tmp, 0, c, bof, saw_infix_sigil); - return yyl_just_a_word(aTHX_ s, len, 0, 0, c, saw_infix_sigil); + return yyl_key_core(aTHX_ s, len, tmp, 0, c, bof); + return yyl_just_a_word(aTHX_ s, len, 0, 0, c); } d = s; @@ -8484,7 +8473,7 @@ yyl_keylookup(pTHX_ char *s, GV *gv, bool bof, bool saw_infix_sigil) if (!c.gv) { sv_free(c.sv); c.sv = NULL; - return yyl_just_a_word(aTHX_ s, len, tmp, 0, c, saw_infix_sigil); + return yyl_just_a_word(aTHX_ s, len, tmp, 0, c); } } else { @@ -8493,7 +8482,7 @@ yyl_keylookup(pTHX_ char *s, GV *gv, bool bof, bool saw_infix_sigil) c.cv = find_lexical_cv(c.off); } c.lex = TRUE; - return yyl_just_a_word(aTHX_ s, len, tmp, 0, c, saw_infix_sigil); + return yyl_just_a_word(aTHX_ s, len, tmp, 0, c); } c.off = 0; } @@ -8516,12 +8505,11 @@ yyl_keylookup(pTHX_ char *s, GV *gv, bool bof, bool saw_infix_sigil) return yyl_fatcomma(aTHX_ s, len); } - return yyl_word_or_keyword(aTHX_ s, len, tmp, orig_keyword, - c, bof, saw_infix_sigil); + return yyl_word_or_keyword(aTHX_ s, len, tmp, orig_keyword, c, bof); } static int -yyl_try(pTHX_ char *s, STRLEN len, const bool saw_infix_sigil) +yyl_try(pTHX_ char *s, STRLEN len) { char *d; bool bof = FALSE; @@ -8530,7 +8518,7 @@ yyl_try(pTHX_ char *s, STRLEN len, const bool saw_infix_sigil) switch (*s) { default: if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) - return yyl_keylookup(aTHX_ s, gv, bof, saw_infix_sigil); + return yyl_keylookup(aTHX_ s, gv, bof); yyl_croak_unrecognised(aTHX_ s); case 4: @@ -8557,7 +8545,7 @@ yyl_try(pTHX_ char *s, STRLEN len, const bool saw_infix_sigil) TOKEN(0); } if (s++ < PL_bufend) - return yyl_try(aTHX_ s, len, 0); /* ignore stray nulls */ + return yyl_try(aTHX_ s, len); /* ignore stray nulls */ PL_last_uni = 0; PL_last_lop = 0; if (!PL_in_eval && !PL_preambled) { @@ -8635,7 +8623,7 @@ yyl_try(pTHX_ char *s, STRLEN len, const bool saw_infix_sigil) PL_last_lop = PL_last_uni = NULL; if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash) update_debugger_info(PL_linestr, NULL, 0); - return yyl_try(aTHX_ s, len, 0); + return yyl_try(aTHX_ s, len); } return yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s, len); @@ -8647,7 +8635,7 @@ yyl_try(pTHX_ char *s, STRLEN len, const bool saw_infix_sigil) #endif case ' ': case '\t': case '\f': case '\v': s++; - return yyl_try(aTHX_ s, len, 0); + return yyl_try(aTHX_ s, len); case '#': case '\n': @@ -8681,7 +8669,7 @@ yyl_try(pTHX_ char *s, STRLEN len, const bool saw_infix_sigil) OPERATOR(','); case ':': if (s[1] == ':') - return yyl_just_a_word(aTHX_ s, 0, 0, 0, no_code, saw_infix_sigil); + return yyl_just_a_word(aTHX_ s, 0, 0, 0, no_code); return yyl_colon(aTHX_ s + 1); case '(': @@ -8720,7 +8708,7 @@ yyl_try(pTHX_ char *s, STRLEN len, const bool saw_infix_sigil) && memBEGINs(s + 2, (STRLEN) (PL_bufend - s + 2), "=====")) { s = vcs_conflict_marker(s + 7); - return yyl_try(aTHX_ s, len, 0); + return yyl_try(aTHX_ s, len); } s++; @@ -8770,15 +8758,15 @@ yyl_try(pTHX_ char *s, STRLEN len, const bool saw_infix_sigil) else s = d; incline(s, PL_bufend); - return yyl_try(aTHX_ s, len, 0); + return yyl_try(aTHX_ s, len); } } } - return yyl_try(aTHX_ s, len, 0); + return yyl_try(aTHX_ s, len); } s = PL_bufend; PL_parser->in_pod = 1; - return yyl_try(aTHX_ s, len, 0); + return yyl_try(aTHX_ s, len); } } if (PL_expect == XBLOCK) { @@ -8814,7 +8802,7 @@ yyl_try(pTHX_ char *s, STRLEN len, const bool saw_infix_sigil) && memBEGINs(s+2, (STRLEN) (PL_bufend - (s+2)), "<<<<<")) { s = vcs_conflict_marker(s + 7); - return yyl_try(aTHX_ s, len, 0); + return yyl_try(aTHX_ s, len); } return yyl_leftpointy(aTHX_ s); @@ -8823,7 +8811,7 @@ yyl_try(pTHX_ char *s, STRLEN len, const bool saw_infix_sigil) && memBEGINs(s + 2, (STRLEN) (PL_bufend - s + 2), ">>>>>")) { s = vcs_conflict_marker(s + 7); - return yyl_try(aTHX_ s, len, 0); + return yyl_try(aTHX_ s, len); } return yyl_rightpointy(aTHX_ s + 1); @@ -8922,12 +8910,12 @@ yyl_try(pTHX_ char *s, STRLEN len, const bool saw_infix_sigil) } else if ((*start == ':' && start[1] == ':') || (PL_expect == XSTATE && *start == ':')) - return yyl_keylookup(aTHX_ s, gv, bof, saw_infix_sigil); + return yyl_keylookup(aTHX_ s, gv, bof); else if (PL_expect == XSTATE) { d = start; while (d < PL_bufend && isSPACE(*d)) d++; if (*d == ':') - return yyl_keylookup(aTHX_ s, gv, bof, saw_infix_sigil); + return yyl_keylookup(aTHX_ s, gv, bof); } /* avoid v123abc() or $h{v1}, allow C */ if (!isALPHA(*start) && (PL_expect == XTERM @@ -8941,14 +8929,14 @@ yyl_try(pTHX_ char *s, STRLEN len, const bool saw_infix_sigil) } } } - return yyl_keylookup(aTHX_ s, gv, bof, saw_infix_sigil); + return yyl_keylookup(aTHX_ s, gv, bof); case 'x': if (isDIGIT(s[1]) && PL_expect == XOPERATOR) { s++; Mop(OP_REPEAT); } - return yyl_keylookup(aTHX_ s, gv, bof, saw_infix_sigil); + return yyl_keylookup(aTHX_ s, gv, bof); case '_': case 'a': case 'A': @@ -8977,7 +8965,7 @@ yyl_try(pTHX_ char *s, STRLEN len, const bool saw_infix_sigil) case 'X': case 'y': case 'Y': case 'z': case 'Z': - return yyl_keylookup(aTHX_ s, gv, bof, saw_infix_sigil); + return yyl_keylookup(aTHX_ s, gv, bof); } } @@ -9038,7 +9026,6 @@ Perl_yylex(pTHX) { dVAR; char *s = PL_bufptr; - const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil); if (UNLIKELY(PL_parser->recheck_utf8_validity)) { const U8* first_bad_char_loc; @@ -9273,13 +9260,35 @@ Perl_yylex(pTHX) s = PL_bufptr; PL_oldoldbufptr = PL_oldbufptr; PL_oldbufptr = s; - PL_parser->saw_infix_sigil = 0; if (PL_in_my == KEY_sigvar) { + PL_parser->saw_infix_sigil = 0; return yyl_sigvar(aTHX_ s); } - return yyl_try(aTHX_ s, 0, saw_infix_sigil); + { + /* yyl_try() and its callees might consult PL_parser->saw_infix_sigil. + On its return, we then need to set it to indicate whether the token + we just encountered was an infix operator that (if we hadn't been + expecting an operator) have been a sigil. + */ + bool expected_operator = (PL_expect == XOPERATOR); + int ret = yyl_try(aTHX_ s, 0); + switch (pl_yylval.ival) { + case OP_BIT_AND: + case OP_MODULO: + case OP_MULTIPLY: + case OP_NBIT_AND: + if (expected_operator) { + PL_parser->saw_infix_sigil = 1; + break; + } + /* FALLTHROUGH */ + default: + PL_parser->saw_infix_sigil = 0; + } + return ret; + } } From 3fca6a09620a15cb89ce73ca304720e626da8454 Mon Sep 17 00:00:00 2001 From: Aaron Crane Date: Fri, 25 Oct 2019 15:43:00 +0100 Subject: [PATCH 17/20] toke.c: delete unused bof parameters --- toke.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/toke.c b/toke.c index be0f7903a19e..fb9f7c639cbd 100644 --- a/toke.c +++ b/toke.c @@ -7444,7 +7444,7 @@ yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct cod } static int -yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c, bool bof) +yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c) { switch (key) { default: /* not a keyword */ @@ -7470,7 +7470,7 @@ yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct case KEY___END__: if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) yyl_data_handle(aTHX); - return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, bof, s, len); + return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s, len); case KEY___SUB__: FUN0OP(CvCLONE(PL_compcv) @@ -8352,7 +8352,7 @@ yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct } static int -yyl_key_core(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c, bool bof) +yyl_key_core(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c) { STRLEN olen = len; char *d = s; @@ -8375,11 +8375,11 @@ yyl_key_core(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c orig_keyword = key; /* Known to be a reserved word at this point */ - return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c, bof); + return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c); } static int -yyl_keylookup(pTHX_ char *s, GV *gv, bool bof) +yyl_keylookup(pTHX_ char *s, GV *gv) { STRLEN len; bool anydelim; @@ -8399,7 +8399,7 @@ yyl_keylookup(pTHX_ char *s, GV *gv, bool bof) /* x::* is just a word, unless x is "CORE" */ if (!anydelim && *s == ':' && s[1] == ':') { if (memEQs(PL_tokenbuf, len, "CORE")) - return yyl_key_core(aTHX_ s, len, tmp, 0, c, bof); + return yyl_key_core(aTHX_ s, len, tmp, 0, c); return yyl_just_a_word(aTHX_ s, len, 0, 0, c); } @@ -8505,20 +8505,19 @@ yyl_keylookup(pTHX_ char *s, GV *gv, bool bof) return yyl_fatcomma(aTHX_ s, len); } - return yyl_word_or_keyword(aTHX_ s, len, tmp, orig_keyword, c, bof); + return yyl_word_or_keyword(aTHX_ s, len, tmp, orig_keyword, c); } static int yyl_try(pTHX_ char *s, STRLEN len) { char *d; - bool bof = FALSE; GV *gv = NULL; switch (*s) { default: if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) - return yyl_keylookup(aTHX_ s, gv, bof); + return yyl_keylookup(aTHX_ s, gv); yyl_croak_unrecognised(aTHX_ s); case 4: @@ -8910,12 +8909,12 @@ yyl_try(pTHX_ char *s, STRLEN len) } else if ((*start == ':' && start[1] == ':') || (PL_expect == XSTATE && *start == ':')) - return yyl_keylookup(aTHX_ s, gv, bof); + return yyl_keylookup(aTHX_ s, gv); else if (PL_expect == XSTATE) { d = start; while (d < PL_bufend && isSPACE(*d)) d++; if (*d == ':') - return yyl_keylookup(aTHX_ s, gv, bof); + return yyl_keylookup(aTHX_ s, gv); } /* avoid v123abc() or $h{v1}, allow C */ if (!isALPHA(*start) && (PL_expect == XTERM @@ -8929,14 +8928,14 @@ yyl_try(pTHX_ char *s, STRLEN len) } } } - return yyl_keylookup(aTHX_ s, gv, bof); + return yyl_keylookup(aTHX_ s, gv); case 'x': if (isDIGIT(s[1]) && PL_expect == XOPERATOR) { s++; Mop(OP_REPEAT); } - return yyl_keylookup(aTHX_ s, gv, bof); + return yyl_keylookup(aTHX_ s, gv); case '_': case 'a': case 'A': @@ -8965,7 +8964,7 @@ yyl_try(pTHX_ char *s, STRLEN len) case 'X': case 'y': case 'Y': case 'z': case 'Z': - return yyl_keylookup(aTHX_ s, gv, bof); + return yyl_keylookup(aTHX_ s, gv); } } From 6a78cc121bdcba7c12e94c757eef6f3b472fca15 Mon Sep 17 00:00:00 2001 From: Aaron Crane Date: Fri, 1 Nov 2019 15:10:14 +0000 Subject: [PATCH 18/20] toke.c: replace recursive calls to yyl_try() with goto MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The downside of writing these calls recursively is that not all compilers will compile the tail-position calls as jumps; that's especially true in earlier versions of this refactoring process (where yyl_try() took a large number of arguments), but it's not in general something we can expect to happen — especially in the presence of `-O0` or similar compiler options. This can lead to call-stack overflow in some circumstances. Most recursive calls to yyl_try() occur within yyl_try() itself, so we can easily replace them with an explicit `goto` (which is what most compilers would use for the recursive calls anyway, now that yyl_try() takes ≤3 parameters). There are only two other recursive-call cases. One is yyl_fake_eof(), which as far as I can tell is never called repeatedly within a single file; this seems safe. The other is yyl_eol(). It has exactly two distinct return paths, so this commit moves the retry logic into its yyl_try() caller. With this change, we no longer seem to trigger call-stack overflow. Closes #17220 --- toke.c | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/toke.c b/toke.c index fb9f7c639cbd..50b760ce9ca2 100644 --- a/toke.c +++ b/toke.c @@ -6777,9 +6777,10 @@ yyl_my(pTHX_ char *s, I32 my) static int yyl_try(pTHX_ char*, STRLEN); -static int -yyl_eol(pTHX_ char *s, STRLEN len) +static bool +yyl_eol_needs_semicolon(pTHX_ char **ps) { + char *s = *ps; if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) { @@ -6806,7 +6807,8 @@ yyl_eol(pTHX_ char *s, STRLEN len) if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { PL_lex_state = LEX_FORMLINE; force_next(FORMRBRACK); - TOKEN(';'); + *ps = s; + return TRUE; } } else { @@ -6818,7 +6820,8 @@ yyl_eol(pTHX_ char *s, STRLEN len) incline(s, PL_bufend); } } - return yyl_try(aTHX_ s, len); + *ps = s; + return FALSE; } static int @@ -8514,6 +8517,7 @@ yyl_try(pTHX_ char *s, STRLEN len) char *d; GV *gv = NULL; + retry: switch (*s) { default: if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) @@ -8544,7 +8548,7 @@ yyl_try(pTHX_ char *s, STRLEN len) TOKEN(0); } if (s++ < PL_bufend) - return yyl_try(aTHX_ s, len); /* ignore stray nulls */ + goto retry; /* ignore stray nulls */ PL_last_uni = 0; PL_last_lop = 0; if (!PL_in_eval && !PL_preambled) { @@ -8622,7 +8626,7 @@ yyl_try(pTHX_ char *s, STRLEN len) PL_last_lop = PL_last_uni = NULL; if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash) update_debugger_info(PL_linestr, NULL, 0); - return yyl_try(aTHX_ s, len); + goto retry; } return yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s, len); @@ -8634,11 +8638,16 @@ yyl_try(pTHX_ char *s, STRLEN len) #endif case ' ': case '\t': case '\f': case '\v': s++; - return yyl_try(aTHX_ s, len); + goto retry; case '#': - case '\n': - return yyl_eol(aTHX_ s, len); + case '\n': { + const bool needs_semicolon = yyl_eol_needs_semicolon(aTHX_ &s); + if (needs_semicolon) + TOKEN(';'); + else + goto retry; + } case '-': return yyl_hyphen(aTHX_ s); @@ -8707,7 +8716,7 @@ yyl_try(pTHX_ char *s, STRLEN len) && memBEGINs(s + 2, (STRLEN) (PL_bufend - s + 2), "=====")) { s = vcs_conflict_marker(s + 7); - return yyl_try(aTHX_ s, len); + goto retry; } s++; @@ -8757,15 +8766,15 @@ yyl_try(pTHX_ char *s, STRLEN len) else s = d; incline(s, PL_bufend); - return yyl_try(aTHX_ s, len); + goto retry; } } } - return yyl_try(aTHX_ s, len); + goto retry; } s = PL_bufend; PL_parser->in_pod = 1; - return yyl_try(aTHX_ s, len); + goto retry; } } if (PL_expect == XBLOCK) { @@ -8801,7 +8810,7 @@ yyl_try(pTHX_ char *s, STRLEN len) && memBEGINs(s+2, (STRLEN) (PL_bufend - (s+2)), "<<<<<")) { s = vcs_conflict_marker(s + 7); - return yyl_try(aTHX_ s, len); + goto retry; } return yyl_leftpointy(aTHX_ s); @@ -8810,7 +8819,7 @@ yyl_try(pTHX_ char *s, STRLEN len) && memBEGINs(s + 2, (STRLEN) (PL_bufend - s + 2), ">>>>>")) { s = vcs_conflict_marker(s + 7); - return yyl_try(aTHX_ s, len); + goto retry; } return yyl_rightpointy(aTHX_ s + 1); From 1732dbbf48a1d3612227f954e22269000cc4cbf7 Mon Sep 17 00:00:00 2001 From: Aaron Crane Date: Fri, 1 Nov 2019 15:29:03 +0000 Subject: [PATCH 19/20] toke.c: const-ify formbrack parameters --- toke.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/toke.c b/toke.c index 50b760ce9ca2..2a356e48c39b 100644 --- a/toke.c +++ b/toke.c @@ -5855,7 +5855,7 @@ yyl_subproto(pTHX_ char *s, CV *cv) } static int -yyl_leftcurly(pTHX_ char *s, U8 formbrack) +yyl_leftcurly(pTHX_ char *s, const U8 formbrack) { char *d; if (PL_lex_brackets > 100) { @@ -6059,7 +6059,7 @@ yyl_leftcurly(pTHX_ char *s, U8 formbrack) } static int -yyl_rightcurly(pTHX_ char *s, U8 formbrack) +yyl_rightcurly(pTHX_ char *s, const U8 formbrack) { assert(s != PL_bufend); s++; From 18828ce3c32556d1699380ed1fc7f2096b034ab8 Mon Sep 17 00:00:00 2001 From: Aaron Crane Date: Fri, 25 Oct 2019 11:21:51 +0100 Subject: [PATCH 20/20] perldelta for recent toke.c refactoring --- pod/perldelta.pod | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 97dd4267637f..872bf51839d1 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -357,17 +357,17 @@ XXX =head1 Internal Changes -XXX Changes which affect the interface available to C code go here. Other -significant internal changes for future core maintainers should be noted as -well. - -[ List each change as an =item entry ] - =over 4 =item * -XXX +The lexer (C in F) was previously a single 4100-line +function, relying heavily on C and a lot of widely-scoped local variables +to do its work. It has now been pulled apart into a few dozen smaller static +functions; the largest remaining chunk (C) is a little +over 900 lines, and consists of a single C statement, all of whose +C groups are independent. This should be much easier to understand and +maintain. =back