Thanks to visit codestin.com
Credit goes to github.com

Skip to content

parse.y: add heredoc <<~ syntax (Feature #9098) #878

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 10 commits into from
19 changes: 19 additions & 0 deletions doc/syntax/literals.rdoc
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,25 @@ Note that the while the closing identifier may be indented, the content is
always treated as if it is flush left. If you indent the content those spaces
will appear in the output.

To have indented content as well as an indented closing identifier, you can use
a "squiggly" heredoc, which uses a "~" instead of a "-" after <tt><<</tt>:

expected_result = <<~SQUIGGLY_HEREDOC
This would contain specially formatted text.

That might span many lines
SQUIGGLY_HEREDOC

The indentation of the least-indented line will be removed from each line of
the content. Note that empty lines and lines consisting solely of literal tabs
and spaces will be ignored for the purposes of determining indentation, but
escaped tabs and spaces are considered non-indentation characters.

If both tabs and spaces are used as indentation in the same heredoc,
tabs are considered as equal to 8 spaces. If the indentation of the
least-indented line falls in the middle of a leading tab, only
indentation to the left of that tab will be removed.

A heredoc allows interpolation and escaped characters. You may disable
interpolation and escaping by surrounding the opening identifier with single
quotes:
Expand Down
193 changes: 185 additions & 8 deletions parse.y
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,8 @@ struct parser_params {
int toksiz;
int tokline;
int heredoc_end;
int heredoc_indent;
int heredoc_line_indent;
char *tokenbuf;
NODE *deferred_nodes;
struct local_vars *lvtbl;
Expand Down Expand Up @@ -345,6 +347,8 @@ static int parser_yyerror(struct parser_params*, const char*);
#define lex_p (parser->lex.pcur)
#define lex_pend (parser->lex.pend)
#define heredoc_end (parser->heredoc_end)
#define heredoc_indent (parser->heredoc_indent)
#define heredoc_line_indent (parser->heredoc_line_indent)
#define command_start (parser->command_start)
#define deferred_nodes (parser->deferred_nodes)
#define lex_gets_ptr (parser->lex.gets_ptr)
Expand Down Expand Up @@ -485,6 +489,10 @@ static int reg_fragment_check_gen(struct parser_params*, VALUE, int);
static NODE *reg_named_capture_assign_gen(struct parser_params* parser, VALUE regexp, NODE *match);
#define reg_named_capture_assign(regexp,match) reg_named_capture_assign_gen(parser,(regexp),(match))

static NODE *parser_heredoc_dedent(struct parser_params*,NODE*);
# define heredoc_dedent(str) parser_heredoc_dedent(parser, (str))


#define get_id(id) (id)
#define get_value(val) (val)
#else
Expand Down Expand Up @@ -668,6 +676,9 @@ new_args_tail_gen(struct parser_params *parser, VALUE k, VALUE kr, VALUE b)

#define new_defined(expr) dispatch1(defined, (expr))

static VALUE parser_heredoc_dedent_ripper(struct parser_params*,VALUE);
# define heredoc_dedent_ripper(str) parser_heredoc_dedent_ripper(parser, (str))

#define FIXME 0

#endif /* RIPPER */
Expand Down Expand Up @@ -3885,6 +3896,7 @@ strings : string
else {
node = evstr2dstr(node);
}
heredoc_indent = 0;
$$ = node;
/*%
$$ = $1;
Expand All @@ -3907,9 +3919,10 @@ string : tCHAR
string1 : tSTRING_BEG string_contents tSTRING_END
{
/*%%%*/
$$ = $2;
$$ = heredoc_dedent($2);
/*%
$$ = dispatch1(string_literal, $2);
$$ = dispatch1(string_literal,
heredoc_dedent_ripper($2));
%*/
}
;
Expand All @@ -3934,9 +3947,10 @@ xstring : tXSTRING_BEG xstring_contents tSTRING_END
break;
}
}
$$ = node;
$$ = heredoc_dedent(node);
/*%
$$ = dispatch1(xstring_literal, $2);
$$ = dispatch1(xstring_literal,
heredoc_dedent_ripper($2));
%*/
}
;
Expand Down Expand Up @@ -4317,18 +4331,23 @@ string_content : tSTRING_CONTENT
$<num>$ = brace_nest;
brace_nest = 0;
}
{
$<num>$ = heredoc_indent;
heredoc_indent = 0;
}
compstmt tSTRING_DEND
{
cond_stack = $<val>1;
cmdarg_stack = $<val>2;
lex_strterm = $<node>3;
lex_state = $<num>4;
brace_nest = $<num>5;
heredoc_indent = $<num>6;
/*%%%*/
if ($6) $6->flags &= ~NODE_FL_NEWLINE;
$$ = new_evstr($6);
if ($7) $7->flags &= ~NODE_FL_NEWLINE;
$$ = new_evstr($7);
/*%
$$ = dispatch1(string_embexpr, $6);
$$ = dispatch1(string_embexpr, $7);
%*/
}
;
Expand Down Expand Up @@ -5693,6 +5712,7 @@ rb_parser_compile_file_path(VALUE vparser, VALUE fname, VALUE file, int start)
#define STR_FUNC_SYMBOL 0x10
#define STR_FUNC_INDENT 0x20
#define STR_FUNC_LABEL 0x40
#define STR_FUNC_DEDENT 0x80

enum string_type {
str_label = STR_FUNC_LABEL,
Expand Down Expand Up @@ -6211,6 +6231,23 @@ parser_tokadd_string(struct parser_params *parser,
} while (0)

while ((c = nextc()) != -1) {
if (heredoc_indent > 0) {
if (heredoc_line_indent == -1) {
if (c == '\n') heredoc_line_indent = 0;
} else {
if (c == ' ') {
heredoc_line_indent++;
} else if (c == '\t') {
heredoc_line_indent += 8;
} else if (c != '\n') {
if (heredoc_indent > heredoc_line_indent) {
heredoc_indent = heredoc_line_indent;
}
heredoc_line_indent = -1;
}
}
}

if (paren && c == paren) {
++*nest;
}
Expand Down Expand Up @@ -6471,6 +6508,11 @@ parser_heredoc_identifier(struct parser_params *parser)
if (c == '-') {
c = nextc();
func = STR_FUNC_INDENT;
} else if (c == '~') {
c = nextc();
func = STR_FUNC_INDENT;
heredoc_indent = INT_MAX;
heredoc_line_indent = 0;
}
switch (c) {
case '\'':
Expand All @@ -6495,7 +6537,9 @@ parser_heredoc_identifier(struct parser_params *parser)
default:
if (!parser_is_identchar()) {
pushback(c);
if (func & STR_FUNC_INDENT) {
if (heredoc_indent > 0) {
pushback('~');
} else if (func & STR_FUNC_INDENT) {
pushback('-');
}
return 0;
Expand Down Expand Up @@ -6541,6 +6585,139 @@ parser_heredoc_restore(struct parser_params *parser, NODE *here)
ripper_flush(parser);
}

static VALUE
parser_heredoc_dedent_string(struct parser_params *parser, VALUE input,
long *count_indent, long *copy_indent)
{
long len, out_len;
char *str, *p, *out_p, *end;
VALUE output;

len = RSTRING_LEN(input);
out_len = 0;
str = RSTRING_PTR(input);
end = &str[len];

p = str;
while (p < end) {
while (p < end && *count_indent > 0) {
if (*p == ' ') {
p++;
(*count_indent)--;
} else if (*p == '\t' && *count_indent >= 8) {
p++;
*count_indent -= 8;
} else if (*p == '\t' && heredoc_indent % 8) {
/* Inconsistent indentation requires us to back up to the
previous tab stop */
heredoc_indent = heredoc_indent - (heredoc_indent % 8);
*count_indent = *copy_indent = heredoc_indent;
return parser_heredoc_dedent_string(parser, input,
count_indent, copy_indent);
} else {
break;
}
}

for (; p < end && *p != '\n'; p++) out_len++;
if (p < end && *p == '\n') {
*count_indent = heredoc_indent;
out_len++;
p++;
}
}

output = rb_str_new(0, out_len);
out_p = RSTRING_PTR(output);

p = str;
while (p < end) {
while (p < end && *copy_indent > 0) {
if (*p == ' ') {
p++;
(*copy_indent)--;
} else if (*p == '\t' && *copy_indent >= 8) {
p++;
*copy_indent -= 8;
} else {
break;
}
}

while (p < end && *p != '\n') *out_p++ = *p++;
if (p < end && *p == '\n') {
*copy_indent = heredoc_indent;
*out_p++ = *p++;
}
}

return output;
}

#ifndef RIPPER
static NODE *
parser_heredoc_dedent(struct parser_params *parser, NODE *root)
{
long count_indent, copy_indent;
VALUE output;
NODE *node, *str_node;

if (heredoc_indent <= 0) return root;

node = str_node = root;
count_indent = copy_indent = heredoc_indent;

while (str_node) {
output = parser_heredoc_dedent_string(parser, str_node->nd_lit,
&count_indent, &copy_indent);

dispose_string(str_node->nd_lit);
str_node->nd_lit = output;

str_node = 0;
while (node = node->nd_next) {
if (nd_type(node) != NODE_ARRAY) break;
if (nd_type(node->nd_head) == NODE_STR ||
nd_type(node->nd_head) == NODE_DSTR) {
str_node = node->nd_head;
break;
}
}
}

return root;
}
#else /* RIPPER */
static VALUE
parser_heredoc_dedent_ripper(struct parser_params *parser, VALUE array)
{
long count_indent, copy_indent, array_len, i;
VALUE e, sym, ret;

if (heredoc_indent <= 0) return array;

count_indent = copy_indent = heredoc_indent;

array_len = RARRAY_LEN(array);
for (i = 0; i < array_len; i++) {
e = rb_ary_entry(array, i);
if (TYPE(e) == T_ARRAY && TYPE(sym = rb_ary_entry(e, 0)) == T_SYMBOL) {
if (rb_to_id(sym) != rb_intern("string_content") &&
rb_to_id(sym) != rb_intern("@tstring_content")) continue;
ret = parser_heredoc_dedent_string(parser, rb_ary_entry(e, 1),
&count_indent, &copy_indent);
rb_ary_store(e, 1, ret);
} else if (TYPE(e) == T_STRING) {
ret = parser_heredoc_dedent_string(parser, e,
&count_indent, &copy_indent);
rb_ary_store(array, i, ret);
}
}

return array;
}
#endif

static int
parser_whole_match_p(struct parser_params *parser,
const char *eos, long len, int indent)
Expand Down
33 changes: 33 additions & 0 deletions test/ripper/test_ripper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,37 @@ def test_yydebug_equals
assert_predicate @ripper, :yydebug
end

def test_squiggly_heredoc
assert_equal(Ripper.sexp(<<-eos), Ripper.sexp(<<-eos))
<<-eot
asdf
eot
eos
<<~eot
asdf
eot
eos
end

def test_squiggly_heredoc_with_interpolated_expression
sexp1 = Ripper.sexp(<<-eos)
<<-eot
a\#{1}z
eot
eos

sexp2 = Ripper.sexp(<<-eos)
<<~eot
a\#{1}z
eot
eos

pos = lambda do |s|
s.fetch(1).fetch(0).fetch(1).fetch(2).fetch(1).fetch(0).fetch(2)
end
assert_not_equal pos[sexp1], pos[sexp2]
pos[sexp1].clear
pos[sexp2].clear
assert_equal sexp1, sexp2
end
end if ripper_test
Loading