From 83b4616450f625cd61b35f9c2e5aec5e4d2cfd80 Mon Sep 17 00:00:00 2001 From: Vaibhav Pathak Date: Fri, 9 Feb 2024 00:26:16 +0530 Subject: [PATCH 1/5] Improving prefix-string parsing --- src/lpython/parser/parser.yy | 7 +++++-- src/lpython/parser/semantics.h | 2 +- src/lpython/parser/tokenizer.re | 14 ++++++++++++++ 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/src/lpython/parser/parser.yy b/src/lpython/parser/parser.yy index 79012d5be8..7d773b962f 100644 --- a/src/lpython/parser/parser.yy +++ b/src/lpython/parser/parser.yy @@ -179,6 +179,9 @@ void yyerror(YYLTYPE *yyloc, LCompilers::LPython::Parser &p, const std::string & %token KW_MATCH %token KW_CASE +%token KW_STR_PREFIX +%type KW_STR_PREFIX + // Nonterminal tokens %type script_unit @@ -1102,9 +1105,9 @@ subscript string : string TK_STRING { $$ = STRING2($1, $2, @$); } // TODO - | string id TK_STRING { $$ = STRING4($1, STRING3($2, $3, @$), @$); } + | string KW_STR_PREFIX TK_STRING { $$ = STRING4($1, STRING3($2, $3, @$), @$); } | TK_STRING { $$ = STRING1($1, @$); } - | id TK_STRING { $$ = STRING3($1, $2, @$); } + | KW_STR_PREFIX TK_STRING { $$ = STRING3($1, $2, @$); } ; lambda_parameter diff --git a/src/lpython/parser/semantics.h b/src/lpython/parser/semantics.h index f4059fa0d9..9a41278783 100644 --- a/src/lpython/parser/semantics.h +++ b/src/lpython/parser/semantics.h @@ -800,7 +800,7 @@ static inline ast_t* concat_string(Allocator &al, Location &l, #define INTEGER(x, l) make_ConstantInt_t(p.m_a, l, x, nullptr) #define STRING1(x, l) make_ConstantStr_t(p.m_a, l, str_unescape_c(p.m_a, x), nullptr) #define STRING2(x, y, l) concat_string(p.m_a, l, EXPR(x), str_unescape_c(p.m_a, y), nullptr) -#define STRING3(id, x, l) PREFIX_STRING(p.m_a, l, name2char(id), x.c_str(p.m_a)) +#define STRING3(prefix, x, l) PREFIX_STRING(p.m_a, l, prefix.c_str(p.m_a), x.c_str(p.m_a)) #define STRING4(x, s, l) concat_string(p.m_a, l, EXPR(x), "", EXPR(s)) #define FLOAT(x, l) make_ConstantFloat_t(p.m_a, l, x, nullptr) #define COMPLEX(x, l) make_ConstantComplex_t(p.m_a, l, 0, x, nullptr) diff --git a/src/lpython/parser/tokenizer.re b/src/lpython/parser/tokenizer.re index a5074f46f5..64b0488b76 100644 --- a/src/lpython/parser/tokenizer.re +++ b/src/lpython/parser/tokenizer.re @@ -435,6 +435,19 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost } } + [rR][bB] | [bB][rR] + | [fF][rR] | [rR][fF] + | [rR] | [bB] | [fF] | [uU] + { + if(cur[0] == '\'' || cur[0] == '"'){ + KW(STR_PREFIX); + } + else { + token(yylval.string); + RET(TK_NAME); + } + } + // Tokens newline { if(parenlevel) { continue; } @@ -763,6 +776,7 @@ std::string token2text(const int token) T(KW_MATCH, "match") T(KW_CASE, "case") + T(KW_STR_PREFIX, "string prefix") default : { std::cout << "TOKEN: " << token << std::endl; From daae9dce104ae43007df11edb8bba9ded3d1ae6c Mon Sep 17 00:00:00 2001 From: Vaibhav Pathak Date: Fri, 9 Feb 2024 00:26:16 +0530 Subject: [PATCH 2/5] Improving prefix-string parsing --- src/lpython/parser/parser.yy | 7 +++++-- src/lpython/parser/semantics.h | 2 +- src/lpython/parser/tokenizer.re | 14 ++++++++++++++ 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/src/lpython/parser/parser.yy b/src/lpython/parser/parser.yy index 79012d5be8..7d773b962f 100644 --- a/src/lpython/parser/parser.yy +++ b/src/lpython/parser/parser.yy @@ -179,6 +179,9 @@ void yyerror(YYLTYPE *yyloc, LCompilers::LPython::Parser &p, const std::string & %token KW_MATCH %token KW_CASE +%token KW_STR_PREFIX +%type KW_STR_PREFIX + // Nonterminal tokens %type script_unit @@ -1102,9 +1105,9 @@ subscript string : string TK_STRING { $$ = STRING2($1, $2, @$); } // TODO - | string id TK_STRING { $$ = STRING4($1, STRING3($2, $3, @$), @$); } + | string KW_STR_PREFIX TK_STRING { $$ = STRING4($1, STRING3($2, $3, @$), @$); } | TK_STRING { $$ = STRING1($1, @$); } - | id TK_STRING { $$ = STRING3($1, $2, @$); } + | KW_STR_PREFIX TK_STRING { $$ = STRING3($1, $2, @$); } ; lambda_parameter diff --git a/src/lpython/parser/semantics.h b/src/lpython/parser/semantics.h index f4059fa0d9..9a41278783 100644 --- a/src/lpython/parser/semantics.h +++ b/src/lpython/parser/semantics.h @@ -800,7 +800,7 @@ static inline ast_t* concat_string(Allocator &al, Location &l, #define INTEGER(x, l) make_ConstantInt_t(p.m_a, l, x, nullptr) #define STRING1(x, l) make_ConstantStr_t(p.m_a, l, str_unescape_c(p.m_a, x), nullptr) #define STRING2(x, y, l) concat_string(p.m_a, l, EXPR(x), str_unescape_c(p.m_a, y), nullptr) -#define STRING3(id, x, l) PREFIX_STRING(p.m_a, l, name2char(id), x.c_str(p.m_a)) +#define STRING3(prefix, x, l) PREFIX_STRING(p.m_a, l, prefix.c_str(p.m_a), x.c_str(p.m_a)) #define STRING4(x, s, l) concat_string(p.m_a, l, EXPR(x), "", EXPR(s)) #define FLOAT(x, l) make_ConstantFloat_t(p.m_a, l, x, nullptr) #define COMPLEX(x, l) make_ConstantComplex_t(p.m_a, l, 0, x, nullptr) diff --git a/src/lpython/parser/tokenizer.re b/src/lpython/parser/tokenizer.re index a5074f46f5..64b0488b76 100644 --- a/src/lpython/parser/tokenizer.re +++ b/src/lpython/parser/tokenizer.re @@ -435,6 +435,19 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost } } + [rR][bB] | [bB][rR] + | [fF][rR] | [rR][fF] + | [rR] | [bB] | [fF] | [uU] + { + if(cur[0] == '\'' || cur[0] == '"'){ + KW(STR_PREFIX); + } + else { + token(yylval.string); + RET(TK_NAME); + } + } + // Tokens newline { if(parenlevel) { continue; } @@ -763,6 +776,7 @@ std::string token2text(const int token) T(KW_MATCH, "match") T(KW_CASE, "case") + T(KW_STR_PREFIX, "string prefix") default : { std::cout << "TOKEN: " << token << std::endl; From 9a65faacf9d48e8a718373ea723c991d29bcec53 Mon Sep 17 00:00:00 2001 From: Vaibhav Pathak Date: Fri, 9 Feb 2024 19:59:49 +0530 Subject: [PATCH 3/5] Add test for prefix strings --- tests/errors/prefix_string_01.py | 5 +++++ tests/errors/prefix_string_02.py | 9 +++++++++ tests/reference/ast-prefix_string_01-cf221fd.json | 13 +++++++++++++ tests/reference/ast-prefix_string_01-cf221fd.stderr | 5 +++++ tests/reference/ast-prefix_string_02-3d530b2.json | 13 +++++++++++++ tests/reference/ast-prefix_string_02-3d530b2.stderr | 5 +++++ tests/tests.toml | 8 ++++++++ 7 files changed, 58 insertions(+) create mode 100644 tests/errors/prefix_string_01.py create mode 100644 tests/errors/prefix_string_02.py create mode 100644 tests/reference/ast-prefix_string_01-cf221fd.json create mode 100644 tests/reference/ast-prefix_string_01-cf221fd.stderr create mode 100644 tests/reference/ast-prefix_string_02-3d530b2.json create mode 100644 tests/reference/ast-prefix_string_02-3d530b2.stderr diff --git a/tests/errors/prefix_string_01.py b/tests/errors/prefix_string_01.py new file mode 100644 index 0000000000..bd1c2d2dda --- /dev/null +++ b/tests/errors/prefix_string_01.py @@ -0,0 +1,5 @@ +def main(): + # python2 syntax should result in a syntax error + print "Hello", "World" + +main() \ No newline at end of file diff --git a/tests/errors/prefix_string_02.py b/tests/errors/prefix_string_02.py new file mode 100644 index 0000000000..4bac882263 --- /dev/null +++ b/tests/errors/prefix_string_02.py @@ -0,0 +1,9 @@ +from lpython import i32 + +# fix difference between lpython and cpython in prefix string grammar +# Prefix should be attached to the quote with any whitespace. + +def main(): + print(r "Hello World") + +main() \ No newline at end of file diff --git a/tests/reference/ast-prefix_string_01-cf221fd.json b/tests/reference/ast-prefix_string_01-cf221fd.json new file mode 100644 index 0000000000..b3c6e12414 --- /dev/null +++ b/tests/reference/ast-prefix_string_01-cf221fd.json @@ -0,0 +1,13 @@ +{ + "basename": "ast-prefix_string_01-cf221fd", + "cmd": "lpython --show-ast --no-color {infile} -o {outfile}", + "infile": "tests/errors/prefix_string_01.py", + "infile_hash": "cabdebe33479b322c57ccff7c6b27eae2269832872208d2dc2ce3548", + "outfile": null, + "outfile_hash": null, + "stdout": null, + "stdout_hash": null, + "stderr": "ast-prefix_string_01-cf221fd.stderr", + "stderr_hash": "b489b5727a016d8cce4fba576c74f0137d38d2783d38a7c3869df76f", + "returncode": 1 +} \ No newline at end of file diff --git a/tests/reference/ast-prefix_string_01-cf221fd.stderr b/tests/reference/ast-prefix_string_01-cf221fd.stderr new file mode 100644 index 0000000000..489859a9bc --- /dev/null +++ b/tests/reference/ast-prefix_string_01-cf221fd.stderr @@ -0,0 +1,5 @@ +syntax error: Token '"Hello"' (of type 'string') is unexpected here + --> tests/errors/prefix_string_01.py:3:8 + | +3 | print "Hello", "World" + | ^^^^^^^ diff --git a/tests/reference/ast-prefix_string_02-3d530b2.json b/tests/reference/ast-prefix_string_02-3d530b2.json new file mode 100644 index 0000000000..6205332c87 --- /dev/null +++ b/tests/reference/ast-prefix_string_02-3d530b2.json @@ -0,0 +1,13 @@ +{ + "basename": "ast-prefix_string_02-3d530b2", + "cmd": "lpython --show-ast --no-color {infile} -o {outfile}", + "infile": "tests/errors/prefix_string_02.py", + "infile_hash": "6a69d14e7424e65fec4c97212ad5388bc756f0979e3f2c607519b02d", + "outfile": null, + "outfile_hash": null, + "stdout": null, + "stdout_hash": null, + "stderr": "ast-prefix_string_02-3d530b2.stderr", + "stderr_hash": "240c89e2b84cbcff81575453f87813d784fa51bfa7c2725436db5f31", + "returncode": 1 +} \ No newline at end of file diff --git a/tests/reference/ast-prefix_string_02-3d530b2.stderr b/tests/reference/ast-prefix_string_02-3d530b2.stderr new file mode 100644 index 0000000000..ad0e4c495b --- /dev/null +++ b/tests/reference/ast-prefix_string_02-3d530b2.stderr @@ -0,0 +1,5 @@ +syntax error: Token '"Hello World"' (of type 'string') is unexpected here + --> tests/errors/prefix_string_02.py:7:10 + | +7 | print(r "Hello World") + | ^^^^^^^^^^^^^ diff --git a/tests/tests.toml b/tests/tests.toml index 57ed98bac4..b033615665 100644 --- a/tests/tests.toml +++ b/tests/tests.toml @@ -1286,6 +1286,14 @@ asr = true filename = "errors/unsigned_04.py" asr = true +[[test]] +filename = "errors/prefix_string_01.py" +ast = true + +[[test]] +filename = "errors/prefix_string_02.py" +ast = true + # tests/runtime_errors [[test]] filename = "runtime_errors/test_list_01.py" From dc3f777a445422ef7b9263c790f625d9d30880b0 Mon Sep 17 00:00:00 2001 From: Vaibhav Pathak Date: Fri, 9 Feb 2024 21:43:25 +0530 Subject: [PATCH 4/5] Change tabs to spaces and correct sentence in prefix string tests --- tests/errors/prefix_string_01.py | 6 +++--- tests/errors/prefix_string_02.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/errors/prefix_string_01.py b/tests/errors/prefix_string_01.py index bd1c2d2dda..a790d186b5 100644 --- a/tests/errors/prefix_string_01.py +++ b/tests/errors/prefix_string_01.py @@ -1,5 +1,5 @@ def main(): - # python2 syntax should result in a syntax error - print "Hello", "World" - + # python2 syntax should result in a syntax error + print "Hello", "World" + main() \ No newline at end of file diff --git a/tests/errors/prefix_string_02.py b/tests/errors/prefix_string_02.py index 4bac882263..fcaf1376d5 100644 --- a/tests/errors/prefix_string_02.py +++ b/tests/errors/prefix_string_02.py @@ -1,9 +1,9 @@ from lpython import i32 # fix difference between lpython and cpython in prefix string grammar -# Prefix should be attached to the quote with any whitespace. +# Prefix should be attached to the quote without any whitespace. def main(): - print(r "Hello World") + print(r "Hello World") main() \ No newline at end of file From add2988f232ab3d588ec12f658da873a3860badb Mon Sep 17 00:00:00 2001 From: Vaibhav Pathak Date: Fri, 9 Feb 2024 21:52:01 +0530 Subject: [PATCH 5/5] Update test reference for prefix strings --- tests/reference/ast-prefix_string_01-cf221fd.json | 4 ++-- tests/reference/ast-prefix_string_01-cf221fd.stderr | 6 +++--- tests/reference/ast-prefix_string_02-3d530b2.json | 4 ++-- tests/reference/ast-prefix_string_02-3d530b2.stderr | 6 +++--- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/reference/ast-prefix_string_01-cf221fd.json b/tests/reference/ast-prefix_string_01-cf221fd.json index b3c6e12414..ed1984236e 100644 --- a/tests/reference/ast-prefix_string_01-cf221fd.json +++ b/tests/reference/ast-prefix_string_01-cf221fd.json @@ -2,12 +2,12 @@ "basename": "ast-prefix_string_01-cf221fd", "cmd": "lpython --show-ast --no-color {infile} -o {outfile}", "infile": "tests/errors/prefix_string_01.py", - "infile_hash": "cabdebe33479b322c57ccff7c6b27eae2269832872208d2dc2ce3548", + "infile_hash": "0d83c0e32a78023fccb343a4d3358071792265c1ae357176fe0912eb", "outfile": null, "outfile_hash": null, "stdout": null, "stdout_hash": null, "stderr": "ast-prefix_string_01-cf221fd.stderr", - "stderr_hash": "b489b5727a016d8cce4fba576c74f0137d38d2783d38a7c3869df76f", + "stderr_hash": "b600057f41f59ba7fdebe3971bfea0eadca972747ccf70d575c1cdcd", "returncode": 1 } \ No newline at end of file diff --git a/tests/reference/ast-prefix_string_01-cf221fd.stderr b/tests/reference/ast-prefix_string_01-cf221fd.stderr index 489859a9bc..e2a05a52c7 100644 --- a/tests/reference/ast-prefix_string_01-cf221fd.stderr +++ b/tests/reference/ast-prefix_string_01-cf221fd.stderr @@ -1,5 +1,5 @@ syntax error: Token '"Hello"' (of type 'string') is unexpected here - --> tests/errors/prefix_string_01.py:3:8 + --> tests/errors/prefix_string_01.py:3:11 | -3 | print "Hello", "World" - | ^^^^^^^ +3 | print "Hello", "World" + | ^^^^^^^ diff --git a/tests/reference/ast-prefix_string_02-3d530b2.json b/tests/reference/ast-prefix_string_02-3d530b2.json index 6205332c87..9c3ce1081b 100644 --- a/tests/reference/ast-prefix_string_02-3d530b2.json +++ b/tests/reference/ast-prefix_string_02-3d530b2.json @@ -2,12 +2,12 @@ "basename": "ast-prefix_string_02-3d530b2", "cmd": "lpython --show-ast --no-color {infile} -o {outfile}", "infile": "tests/errors/prefix_string_02.py", - "infile_hash": "6a69d14e7424e65fec4c97212ad5388bc756f0979e3f2c607519b02d", + "infile_hash": "5d0c279ea735e60d5243a4b33100832dc1564917d6ef83c9b32705f9", "outfile": null, "outfile_hash": null, "stdout": null, "stdout_hash": null, "stderr": "ast-prefix_string_02-3d530b2.stderr", - "stderr_hash": "240c89e2b84cbcff81575453f87813d784fa51bfa7c2725436db5f31", + "stderr_hash": "cd72affed29823c0364d52bfb3ba0674d9d7950390b7cd6b04f7538b", "returncode": 1 } \ No newline at end of file diff --git a/tests/reference/ast-prefix_string_02-3d530b2.stderr b/tests/reference/ast-prefix_string_02-3d530b2.stderr index ad0e4c495b..869d52864a 100644 --- a/tests/reference/ast-prefix_string_02-3d530b2.stderr +++ b/tests/reference/ast-prefix_string_02-3d530b2.stderr @@ -1,5 +1,5 @@ syntax error: Token '"Hello World"' (of type 'string') is unexpected here - --> tests/errors/prefix_string_02.py:7:10 + --> tests/errors/prefix_string_02.py:7:13 | -7 | print(r "Hello World") - | ^^^^^^^^^^^^^ +7 | print(r "Hello World") + | ^^^^^^^^^^^^^