From a739e46498de17af4f519266d96ddc140fb0225b Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Sun, 20 Dec 2015 02:29:03 +0000 Subject: [PATCH 1/4] Optimize CGI.escapeHTML for ASCII-compatible encodings --- ext/Setup | 1 + ext/Setup.atheos | 1 + ext/Setup.nacl | 1 + ext/Setup.nt | 1 + ext/cgi/escape/escape.c | 94 +++++++++++++++++++++++++++++++++++++++ ext/cgi/escape/extconf.rb | 3 ++ lib/cgi/util.rb | 5 +++ 7 files changed, 106 insertions(+) create mode 100644 ext/cgi/escape/escape.c create mode 100644 ext/cgi/escape/extconf.rb diff --git a/ext/Setup b/ext/Setup index f4da5215c99ff5..05998e33631611 100644 --- a/ext/Setup +++ b/ext/Setup @@ -1,6 +1,7 @@ #option nodynamic #bigdecimal +#cgi/escape #continuation #coverage #date diff --git a/ext/Setup.atheos b/ext/Setup.atheos index 41eecd161fd2ea..5e39de8e15fe14 100644 --- a/ext/Setup.atheos +++ b/ext/Setup.atheos @@ -2,6 +2,7 @@ option nodynamic #Win32API bigdecimal +cgi/escape dbm digest digest/md5 diff --git a/ext/Setup.nacl b/ext/Setup.nacl index ce65ebcf0a9509..f205e367c612cf 100644 --- a/ext/Setup.nacl +++ b/ext/Setup.nacl @@ -2,6 +2,7 @@ # # #Win32API # bigdecimal +# cgi/escape # continuation # coverage # date diff --git a/ext/Setup.nt b/ext/Setup.nt index 285b1edcb679aa..4812893eefee01 100644 --- a/ext/Setup.nt +++ b/ext/Setup.nt @@ -3,6 +3,7 @@ Win32API bigdecimal +cgi/escape #dbm digest digest/md5 diff --git a/ext/cgi/escape/escape.c b/ext/cgi/escape/escape.c new file mode 100644 index 00000000000000..31467d933ec6a6 --- /dev/null +++ b/ext/cgi/escape/escape.c @@ -0,0 +1,94 @@ +#include "ruby.h" +#include "ruby/encoding.h" + +static VALUE rb_cCGI, rb_mUtil, rb_mEscape; + +static VALUE +html_escaped_cat(VALUE str, char c) +{ + switch (c) { + case '\'': + rb_str_cat_cstr(str, "'"); + break; + case '&': + rb_str_cat_cstr(str, "&"); + break; + case '"': + rb_str_cat_cstr(str, """); + break; + case '<': + rb_str_cat_cstr(str, "<"); + break; + case '>': + rb_str_cat_cstr(str, ">"); + break; + } +} + +static VALUE +optimized_escape_html(VALUE str) +{ + long i, len, modified = 0, beg = 0, offset = 0; + VALUE dest; + const char *cstr; + + len = RSTRING_LEN(str); + cstr = RSTRING_PTR(str); + + for (i = 0; i < len; i++) { + switch (cstr[i]) { + case '\'': + case '&': + case '"': + case '<': + case '>': + if (!modified) { + modified = 1; + dest = rb_str_buf_new(len); + } + + rb_str_cat(dest, cstr + beg, i - beg); + beg = i + 1; + + html_escaped_cat(dest, cstr[i]); + break; + } + } + + if (modified) { + rb_str_cat(dest, cstr + beg, len - beg); + return dest; + } else { + return str; + } +} + +/* + * call-seq: + * CGI.escapeHTML(string) -> string + * + * Returns HTML-escaped string. + * + */ +static VALUE +cgiesc_escape_html(VALUE self, VALUE str) +{ + Check_Type(str, T_STRING); + + if (rb_enc_str_asciicompat_p(str)) { + return optimized_escape_html(str); + } else { + return rb_call_super(1, &str); + } +} + +void +Init_escape(void) +{ + rb_cCGI = rb_define_class("CGI", rb_cObject); + rb_mEscape = rb_define_module_under(rb_cCGI, "Escape"); + rb_mUtil = rb_define_module_under(rb_cCGI, "Util"); + rb_define_method(rb_mEscape, "escapeHTML", cgiesc_escape_html, 1); + rb_prepend_module(rb_mUtil, rb_mEscape); + rb_extend_object(rb_cCGI, rb_mEscape); +} diff --git a/ext/cgi/escape/extconf.rb b/ext/cgi/escape/extconf.rb new file mode 100644 index 00000000000000..16e8ff224de54f --- /dev/null +++ b/ext/cgi/escape/extconf.rb @@ -0,0 +1,3 @@ +require 'mkmf' + +create_makefile 'cgi/escape' diff --git a/lib/cgi/util.rb b/lib/cgi/util.rb index c2327072398432..83c310b3cb53b6 100644 --- a/lib/cgi/util.rb +++ b/lib/cgi/util.rb @@ -38,6 +38,11 @@ def escapeHTML(string) string.gsub(/['&\"<>]/, TABLE_FOR_ESCAPE_HTML__) end + begin + require 'cgi/escape' + rescue LoadError + end + # Unescape a string that has been HTML-escaped # CGI::unescapeHTML("Usage: foo "bar" <baz>") # # => "Usage: foo \"bar\" " From 39cdd833d20b6e644c941486abb79a061c9330da Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Sun, 20 Dec 2015 11:12:17 +0000 Subject: [PATCH 2/4] Fix return value's type of html_escaped_cat --- ext/cgi/escape/escape.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/cgi/escape/escape.c b/ext/cgi/escape/escape.c index 31467d933ec6a6..839dc640e7bcba 100644 --- a/ext/cgi/escape/escape.c +++ b/ext/cgi/escape/escape.c @@ -3,7 +3,7 @@ static VALUE rb_cCGI, rb_mUtil, rb_mEscape; -static VALUE +static void html_escaped_cat(VALUE str, char c) { switch (c) { From 87a0be2a03977c72c6a96a4897b9477d79e27e33 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Sun, 20 Dec 2015 11:21:49 +0000 Subject: [PATCH 3/4] Remove an unused variable --- ext/cgi/escape/escape.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/cgi/escape/escape.c b/ext/cgi/escape/escape.c index 839dc640e7bcba..f4c507b8261ae7 100644 --- a/ext/cgi/escape/escape.c +++ b/ext/cgi/escape/escape.c @@ -28,7 +28,7 @@ html_escaped_cat(VALUE str, char c) static VALUE optimized_escape_html(VALUE str) { - long i, len, modified = 0, beg = 0, offset = 0; + long i, len, modified = 0, beg = 0; VALUE dest; const char *cstr; From 21628354686e95d4ed7f327c967a9347468e3819 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Sun, 20 Dec 2015 11:23:44 +0000 Subject: [PATCH 4/4] Fix encoding of modified return value --- ext/cgi/escape/escape.c | 1 + test/cgi/test_cgi_util.rb | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/ext/cgi/escape/escape.c b/ext/cgi/escape/escape.c index f4c507b8261ae7..0fbdf4729ef2ec 100644 --- a/ext/cgi/escape/escape.c +++ b/ext/cgi/escape/escape.c @@ -57,6 +57,7 @@ optimized_escape_html(VALUE str) if (modified) { rb_str_cat(dest, cstr + beg, len - beg); + rb_enc_associate(dest, rb_enc_get(str)); return dest; } else { return str; diff --git a/test/cgi/test_cgi_util.rb b/test/cgi/test_cgi_util.rb index 5662a30eff9d6c..dd04b56928b678 100644 --- a/test/cgi/test_cgi_util.rb +++ b/test/cgi/test_cgi_util.rb @@ -62,6 +62,12 @@ def test_cgi_escapeHTML assert_equal(CGI::escapeHTML("'&\"><"),"'&"><") end + def test_cgi_escape_html_preserve_encoding + assert_equal(Encoding::US_ASCII, CGI::escapeHTML("'&\"><".force_encoding("US-ASCII")).encoding) + assert_equal(Encoding::ASCII_8BIT, CGI::escapeHTML("'&\"><".force_encoding("ASCII-8BIT")).encoding) + assert_equal(Encoding::UTF_8, CGI::escapeHTML("'&\"><".force_encoding("UTF-8")).encoding) + end + def test_cgi_unescapeHTML assert_equal(CGI::unescapeHTML("'&"><"),"'&\"><") end