Codestin Search App

blob: 8e4f7ecfae15d3b02355732c9f1472342939395a [file] [log] [blame]

Avi Drissman	e4622aa	2022-09-08 20:36:06	[diff] [blame]	1	// Copyright 2020 The Chromium Authors
Weilun Shi	40194033	2020-07-14 22:22:33	[diff] [blame]	2	// Use of this source code is governed by a BSD-style license that can be
				3	// found in the LICENSE file.
				4
Peter Kasting	134ef9af	2024-12-28 02:30:09	[diff] [blame]	5	#include "base/strings/escape.h"
				6
Weilun Shi	40194033	2020-07-14 22:22:33	[diff] [blame]	7	#include <algorithm>
				8	#include <string>
				9
Weilun Shi	40194033	2020-07-14 22:22:33	[diff] [blame]	10	#include "base/strings/string_util.h"
Victor Vasiliev	dc7e817	2022-04-20 23:45:20	[diff] [blame]	11	#include "base/strings/stringprintf.h"
Weilun Shi	40194033	2020-07-14 22:22:33	[diff] [blame]	12	#include "base/strings/utf_string_conversions.h"
				13	#include "testing/gtest/include/gtest/gtest.h"
				14
				15	namespace base {
Victor Vasiliev	dc7e817	2022-04-20 23:45:20	[diff] [blame]	16	namespace {
				17
				18	struct EscapeCase {
				19	const char* input;
				20	const char* output;
				21	};
				22
				23	struct EscapeForHTMLCase {
				24	const char* input;
				25	const char* expected_output;
				26	};
Weilun Shi	40194033	2020-07-14 22:22:33	[diff] [blame]	27
				28	struct UnescapeURLCase {
				29	const char* input;
				30	UnescapeRule::Type rules;
				31	const char* output;
				32	};
				33
				34	struct UnescapeAndDecodeCase {
				35	const char* input;
				36
				37	// The expected output when run through UnescapeURL.
				38	const char* url_unescaped;
				39
				40	// The expected output when run through UnescapeQuery.
				41	const char* query_unescaped;
				42
				43	// The expected output when run through UnescapeAndDecodeURLComponent.
				44	const wchar_t* decoded;
				45	};
				46
				47	struct AdjustOffsetCase {
				48	const char* input;
				49	size_t input_offset;
				50	size_t output_offset;
				51	};
				52
Victor Vasiliev	dc7e817	2022-04-20 23:45:20	[diff] [blame]	53	TEST(EscapeTest, EscapeTextForFormSubmission) {
				54	const EscapeCase escape_cases[] = {
				55	{"foo", "foo"}, {"foo bar", "foo+bar"}, {"foo++", "foo%2B%2B"}};
				56	for (const auto& escape_case : escape_cases) {
				57	EXPECT_EQ(escape_case.output,
				58	EscapeQueryParamValue(escape_case.input, true));
				59	}
				60
				61	const EscapeCase escape_cases_no_plus[] = {
				62	{"foo", "foo"}, {"foo bar", "foo%20bar"}, {"foo++", "foo%2B%2B"}};
				63	for (const auto& escape_case : escape_cases_no_plus) {
				64	EXPECT_EQ(escape_case.output,
				65	EscapeQueryParamValue(escape_case.input, false));
				66	}
				67
				68	// Test all the values in we're supposed to be escaping.
				69	const std::string no_escape(
				70	"abcdefghijklmnopqrstuvwxyz"
				71	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
				72	"0123456789"
				73	"!'()*-._~");
				74	for (int i = 0; i < 256; ++i) {
				75	std::string in;
				76	in.push_back(i);
				77	std::string out = EscapeQueryParamValue(in, true);
				78	if (0 == i) {
				79	EXPECT_EQ(out, std::string("%00"));
				80	} else if (32 == i) {
				81	// Spaces are plus escaped like web forms.
				82	EXPECT_EQ(out, std::string("+"));
				83	} else if (no_escape.find(in) == std::string::npos) {
				84	// Check %hex escaping
				85	std::string expected = StringPrintf("%%%02X", i);
				86	EXPECT_EQ(expected, out);
				87	} else {
				88	// No change for things in the no_escape list.
				89	EXPECT_EQ(out, in);
				90	}
				91	}
				92	}
				93
				94	TEST(EscapeTest, EscapePath) {
				95	ASSERT_EQ(
				96	// Most of the character space we care about, un-escaped
				97	EscapePath("\x02\n\x1d !\"#$%&'()*+,-./0123456789:;"
				98	"<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
				99	"[\\]^_`abcdefghijklmnopqrstuvwxyz"
				100	"{\|}~\x7f\x80\xff"),
				101	// Escaped
				102	"%02%0A%1D%20!%22%23$%25&'()*+,-./0123456789%3A;"
				103	"%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
				104	"%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz"
				105	"%7B%7C%7D~%7F%80%FF");
				106	}
				107
				108	TEST(EscapeTest, EscapeUrlEncodedData) {
				109	ASSERT_EQ(
				110	// Most of the character space we care about, un-escaped
				111	EscapeUrlEncodedData("\x02\n\x1d !\"#$%&'()*+,-./0123456789:;"
				112	"<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
				113	"[\\]^_`abcdefghijklmnopqrstuvwxyz"
				114	"{\|}~\x7f\x80\xff",
				115	true),
				116	// Escaped
				117	"%02%0A%1D+!%22%23%24%25%26%27()*%2B,-./0123456789:%3B"
				118	"%3C%3D%3E%3F%40ABCDEFGHIJKLMNOPQRSTUVWXYZ"
				119	"%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz"
				120	"%7B%7C%7D~%7F%80%FF");
				121	}
				122
				123	TEST(EscapeTest, EscapeUrlEncodedDataSpace) {
				124	ASSERT_EQ(EscapeUrlEncodedData("a b", true), "a+b");
				125	ASSERT_EQ(EscapeUrlEncodedData("a b", false), "a%20b");
				126	}
				127
				128	TEST(EscapeTest, EscapeForHTML) {
				129	const EscapeForHTMLCase tests[] = {
				130	{"hello", "hello"},
				131	{"<hello>", "<hello>"},
				132	{"don\'t mess with me", "don't mess with me"},
				133	};
				134	for (const auto& test : tests) {
				135	std::string result = EscapeForHTML(std::string(test.input));
				136	EXPECT_EQ(std::string(test.expected_output), result);
				137	}
				138	}
				139
				140	TEST(EscapeTest, UnescapeForHTML) {
				141	const EscapeForHTMLCase tests[] = {
				142	{"", ""},
				143	{"<hello>", "<hello>"},
				144	{"don't mess with me", "don\'t mess with me"},
				145	{"<>&"'", "<>&\"'"},
				146	{"& lt; &amp ; &; '", "& lt; &amp ; &; '"},
				147	{"&", "&"},
				148	{""", "\""},
				149	{"'", "'"},
				150	{"<", "<"},
				151	{">", ">"},
				152	{"& &", "& &"},
				153	};
				154	for (const auto& test : tests) {
				155	std::u16string result = UnescapeForHTML(ASCIIToUTF16(test.input));
				156	EXPECT_EQ(ASCIIToUTF16(test.expected_output), result);
				157	}
				158	}
				159
				160	TEST(EscapeTest, EscapeExternalHandlerValue) {
				161	ASSERT_EQ(
				162	// Escaped
				163	"%02%0A%1D%20!%22#$%25&'()*+,-./0123456789:;"
				164	"%3C=%3E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
				165	"[%5C]%5E_%60abcdefghijklmnopqrstuvwxyz"
				166	"%7B%7C%7D~%7F%80%FF",
				167	// Most of the character space we care about, un-escaped
				168	EscapeExternalHandlerValue("\x02\n\x1d !\"#$%&'()*+,-./0123456789:;"
				169	"<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
				170	"[\\]^_`abcdefghijklmnopqrstuvwxyz"
				171	"{\|}~\x7f\x80\xff"));
				172
				173	ASSERT_EQ(
				174	"!#$&'()*+,-./0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_"
				175	"abcdefghijklmnopqrstuvwxyz~",
				176	EscapeExternalHandlerValue(
				177	"!#$&'()*+,-./0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_"
				178	"abcdefghijklmnopqrstuvwxyz~"));
				179
				180	ASSERT_EQ("%258k", EscapeExternalHandlerValue("%8k"));
				181	ASSERT_EQ("a%25", EscapeExternalHandlerValue("a%"));
				182	ASSERT_EQ("%25a", EscapeExternalHandlerValue("%a"));
				183	ASSERT_EQ("a%258", EscapeExternalHandlerValue("a%8"));
				184	ASSERT_EQ("%ab", EscapeExternalHandlerValue("%ab"));
				185	ASSERT_EQ("%AB", EscapeExternalHandlerValue("%AB"));
				186
				187	ASSERT_EQ("http://example.com/path/sub?q=a%7Cb%7Cc&q=1%7C2%7C3#ref%7C",
				188	EscapeExternalHandlerValue(
				189	"http://example.com/path/sub?q=a\|b\|c&q=1\|2\|3#ref\|"));
				190	ASSERT_EQ("http://example.com/path/sub?q=a%7Cb%7Cc&q=1%7C2%7C3#ref%7C",
				191	EscapeExternalHandlerValue(
				192	"http://example.com/path/sub?q=a%7Cb%7Cc&q=1%7C2%7C3#ref%7C"));
				193	ASSERT_EQ("http://[2001:db8:0:1]:80",
				194	EscapeExternalHandlerValue("http://[2001:db8:0:1]:80"));
				195	}
				196
				197	TEST(EscapeTest, EscapeNonASCII) {
				198	EXPECT_EQ("abc\n%2580%80", EscapeNonASCIIAndPercent("abc\n%80\x80"));
				199	EXPECT_EQ("abc\n%80%80", EscapeNonASCII("abc\n%80\x80"));
				200	}
				201
Weilun Shi	40194033	2020-07-14 22:22:33	[diff] [blame]	202	TEST(EscapeTest, DataURLWithAccentedCharacters) {
				203	const std::string url =
				204	"text/html;charset=utf-8,%3Chtml%3E%3Cbody%3ETonton,%20ton%20th%C3"
				205	"%A9%20t'a-t-il%20%C3%B4t%C3%A9%20ta%20toux%20";
				206
				207	OffsetAdjuster::Adjustments adjustments;
				208	UnescapeAndDecodeUTF8URLComponentWithAdjustments(url, UnescapeRule::SPACES,
				209	&adjustments);
				210	}
				211
				212	TEST(EscapeTest, UnescapeURLComponent) {
				213	const UnescapeURLCase kUnescapeCases[] = {
				214	{"", UnescapeRule::NORMAL, ""},
				215	{"%2", UnescapeRule::NORMAL, "%2"},
				216	{"%%%%%%", UnescapeRule::NORMAL, "%%%%%%"},
				217	{"Don't escape anything", UnescapeRule::NORMAL, "Don't escape anything"},
				218	{"Invalid %escape %2", UnescapeRule::NORMAL, "Invalid %escape %2"},
				219	{"Some%20random text %25%2dOK", UnescapeRule::NONE,
				220	"Some%20random text %25%2dOK"},
				221	{"Some%20random text %25%2dOK", UnescapeRule::NORMAL,
				222	"Some%20random text %25-OK"},
				223	{"Some%20random text %25%E1%A6", UnescapeRule::NORMAL,
				224	"Some%20random text %25\xE1\xA6"},
				225	{"Some%20random text %25%E1%A6OK", UnescapeRule::NORMAL,
				226	"Some%20random text %25\xE1\xA6OK"},
				227	{"Some%20random text %25%E1%A6%99OK", UnescapeRule::NORMAL,
				228	"Some%20random text %25\xE1\xA6\x99OK"},
				229
				230	// BiDi Control characters should not be unescaped.
				231	{"Some%20random text %25%D8%9COK", UnescapeRule::NORMAL,
				232	"Some%20random text %25%D8%9COK"},
				233	{"Some%20random text %25%E2%80%8EOK", UnescapeRule::NORMAL,
				234	"Some%20random text %25%E2%80%8EOK"},
				235	{"Some%20random text %25%E2%80%8FOK", UnescapeRule::NORMAL,
				236	"Some%20random text %25%E2%80%8FOK"},
				237	{"Some%20random text %25%E2%80%AAOK", UnescapeRule::NORMAL,
				238	"Some%20random text %25%E2%80%AAOK"},
				239	{"Some%20random text %25%E2%80%ABOK", UnescapeRule::NORMAL,
				240	"Some%20random text %25%E2%80%ABOK"},
				241	{"Some%20random text %25%E2%80%AEOK", UnescapeRule::NORMAL,
				242	"Some%20random text %25%E2%80%AEOK"},
				243	{"Some%20random text %25%E2%81%A6OK", UnescapeRule::NORMAL,
				244	"Some%20random text %25%E2%81%A6OK"},
				245	{"Some%20random text %25%E2%81%A9OK", UnescapeRule::NORMAL,
				246	"Some%20random text %25%E2%81%A9OK"},
				247
				248	// Certain banned characters should not be unescaped.
				249	// U+1F50F LOCK WITH INK PEN
				250	{"Some%20random text %25%F0%9F%94%8FOK", UnescapeRule::NORMAL,
				251	"Some%20random text %25%F0%9F%94%8FOK"},
				252	// U+1F510 CLOSED LOCK WITH KEY
				253	{"Some%20random text %25%F0%9F%94%90OK", UnescapeRule::NORMAL,
				254	"Some%20random text %25%F0%9F%94%90OK"},
				255	// U+1F512 LOCK
				256	{"Some%20random text %25%F0%9F%94%92OK", UnescapeRule::NORMAL,
				257	"Some%20random text %25%F0%9F%94%92OK"},
				258	// U+1F513 OPEN LOCK
				259	{"Some%20random text %25%F0%9F%94%93OK", UnescapeRule::NORMAL,
				260	"Some%20random text %25%F0%9F%94%93OK"},
				261
				262	// Spaces
				263	{"(%C2%85)(%C2%A0)(%E1%9A%80)(%E2%80%80)", UnescapeRule::NORMAL,
				264	"(%C2%85)(%C2%A0)(%E1%9A%80)(%E2%80%80)"},
				265	{"(%E2%80%81)(%E2%80%82)(%E2%80%83)(%E2%80%84)", UnescapeRule::NORMAL,
				266	"(%E2%80%81)(%E2%80%82)(%E2%80%83)(%E2%80%84)"},
				267	{"(%E2%80%85)(%E2%80%86)(%E2%80%87)(%E2%80%88)", UnescapeRule::NORMAL,
				268	"(%E2%80%85)(%E2%80%86)(%E2%80%87)(%E2%80%88)"},
				269	{"(%E2%80%89)(%E2%80%8A)(%E2%80%A8)(%E2%80%A9)", UnescapeRule::NORMAL,
				270	"(%E2%80%89)(%E2%80%8A)(%E2%80%A8)(%E2%80%A9)"},
				271	{"(%E2%80%AF)(%E2%81%9F)(%E3%80%80)", UnescapeRule::NORMAL,
				272	"(%E2%80%AF)(%E2%81%9F)(%E3%80%80)"},
				273	{"(%E2%A0%80)", UnescapeRule::NORMAL, "(%E2%A0%80)"},
				274
				275	// Default Ignorable and Formatting characters should not be unescaped.
				276	{"(%E2%81%A5)(%EF%BF%B0)(%EF%BF%B8)", UnescapeRule::NORMAL,
				277	"(%E2%81%A5)(%EF%BF%B0)(%EF%BF%B8)"},
				278	{"(%F3%A0%82%80)(%F3%A0%83%BF)(%F3%A0%87%B0)", UnescapeRule::NORMAL,
				279	"(%F3%A0%82%80)(%F3%A0%83%BF)(%F3%A0%87%B0)"},
				280	{"(%F3%A0%BF%BF)(%C2%AD)(%CD%8F)", UnescapeRule::NORMAL,
				281	"(%F3%A0%BF%BF)(%C2%AD)(%CD%8F)"},
				282	{"(%D8%80%20)(%D8%85)(%DB%9D)(%DC%8F)(%E0%A3%A2)", UnescapeRule::NORMAL,
				283	"(%D8%80%20)(%D8%85)(%DB%9D)(%DC%8F)(%E0%A3%A2)"},
				284	{"(%E1%85%9F)(%E1%85%A0)(%E1%9E%B4)(%E1%9E%B5)", UnescapeRule::NORMAL,
				285	"(%E1%85%9F)(%E1%85%A0)(%E1%9E%B4)(%E1%9E%B5)"},
				286	{"(%E1%A0%8B)(%E1%A0%8C)(%E1%A0%8D)(%E1%A0%8E)", UnescapeRule::NORMAL,
				287	"(%E1%A0%8B)(%E1%A0%8C)(%E1%A0%8D)(%E1%A0%8E)"},
				288	{"(%E2%80%8B)(%E2%80%8C)(%E2%80%8D)(%E2%81%A0)", UnescapeRule::NORMAL,
				289	"(%E2%80%8B)(%E2%80%8C)(%E2%80%8D)(%E2%81%A0)"},
				290	{"(%E2%81%A1)(%E2%81%A2)(%E2%81%A3)(%E2%81%A4)", UnescapeRule::NORMAL,
				291	"(%E2%81%A1)(%E2%81%A2)(%E2%81%A3)(%E2%81%A4)"},
				292	{"(%E3%85%A4)(%EF%BB%BF)(%EF%BE%A0)(%EF%BF%B9)", UnescapeRule::NORMAL,
				293	"(%E3%85%A4)(%EF%BB%BF)(%EF%BE%A0)(%EF%BF%B9)"},
				294	{"(%EF%BF%BB)(%F0%91%82%BD)(%F0%91%83%8D)", UnescapeRule::NORMAL,
				295	"(%EF%BF%BB)(%F0%91%82%BD)(%F0%91%83%8D)"},
				296	{"(%F0%93%90%B0)(%F0%93%90%B8)", UnescapeRule::NORMAL,
				297	"(%F0%93%90%B0)(%F0%93%90%B8)"},
				298	// General Punctuation - Deprecated (U+206A--206F)
				299	{"(%E2%81%AA)(%E2%81%AD)(%E2%81%AF)", UnescapeRule::NORMAL,
				300	"(%E2%81%AA)(%E2%81%AD)(%E2%81%AF)"},
				301	// Variation selectors (U+FE00--FE0F)
				302	{"(%EF%B8%80)(%EF%B8%8C)(%EF%B8%8D)", UnescapeRule::NORMAL,
				303	"(%EF%B8%80)(%EF%B8%8C)(%EF%B8%8D)"},
				304	// Shorthand format controls (U+1BCA0--1BCA3)
				305	{"(%F0%9B%B2%A0)(%F0%9B%B2%A1)(%F0%9B%B2%A3)", UnescapeRule::NORMAL,
				306	"(%F0%9B%B2%A0)(%F0%9B%B2%A1)(%F0%9B%B2%A3)"},
				307	// Musical symbols beams and slurs (U+1D173--1D17A)
				308	{"(%F0%9D%85%B3)(%F0%9D%85%B9)(%F0%9D%85%BA)", UnescapeRule::NORMAL,
				309	"(%F0%9D%85%B3)(%F0%9D%85%B9)(%F0%9D%85%BA)"},
				310	// Tags block (U+E0000--E007F), includes unassigned points
				311	{"(%F3%A0%80%80)(%F3%A0%80%81)(%F3%A0%81%8F)", UnescapeRule::NORMAL,
				312	"(%F3%A0%80%80)(%F3%A0%80%81)(%F3%A0%81%8F)"},
				313	// Ideographic-specific variation selectors (U+E0100--E01EF)
				314	{"(%F3%A0%84%80)(%F3%A0%84%90)(%F3%A0%87%AF)", UnescapeRule::NORMAL,
				315	"(%F3%A0%84%80)(%F3%A0%84%90)(%F3%A0%87%AF)"},
				316
				317	// Two spoofing characters in a row should not be unescaped.
				318	{"%D8%9C%D8%9C", UnescapeRule::NORMAL, "%D8%9C%D8%9C"},
				319	// Non-spoofing characters surrounded by spoofing characters should be
				320	// unescaped.
				321	{"%D8%9C%C2%A1%D8%9C%C2%A1", UnescapeRule::NORMAL,
				322	"%D8%9C\xC2\xA1%D8%9C\xC2\xA1"},
				323	// Invalid UTF-8 characters surrounded by spoofing characters should be
				324	// unescaped.
				325	{"%D8%9C%85%D8%9C%85", UnescapeRule::NORMAL, "%D8%9C\x85%D8%9C\x85"},
				326	// Test with enough trail bytes to overflow the CBU8_MAX_LENGTH-byte
				327	// buffer. The first two bytes are a spoofing character as well.
				328	{"%D8%9C%9C%9C%9C%9C%9C%9C%9C%9C%9C", UnescapeRule::NORMAL,
				329	"%D8%9C\x9C\x9C\x9C\x9C\x9C\x9C\x9C\x9C\x9C"},
				330
				331	{"Some%20random text %25%2dOK", UnescapeRule::SPACES,
				332	"Some random text %25-OK"},
				333	{"Some%20random text %25%2dOK", UnescapeRule::PATH_SEPARATORS,
				334	"Some%20random text %25-OK"},
				335	{"Some%20random text %25%2dOK",
				336	UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS,
				337	"Some%20random text %-OK"},
				338	{"Some%20random text %25%2dOK",
				339	UnescapeRule::SPACES \|
				340	UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS,
				341	"Some random text %-OK"},
				342	{"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, "\xA0\xB1\xC2\xD3\xE4\xF5"},
				343	{"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, "\xAa\xBb\xCc\xDd\xEe\xFf"},
				344	// Certain URL-sensitive characters should not be unescaped unless asked.
				345	{"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+",
				346	UnescapeRule::SPACES, "Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"},
				347	{"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+",
				348	UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS,
				349	"Hello%20%13%10world ## ?? == && %% ++"},
				350	// We can neither escape nor unescape '@' since some websites expect it to
				351	// be preserved as either '@' or "%40".
				352	// See http://b/996720 and http://crbug.com/23933 .
				353	{"me@my%40example", UnescapeRule::NORMAL, "me@my%40example"},
				354	// Control characters.
				355	{"%01%02%03%04%05%06%07%08%09 %25",
				356	UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS,
				357	"%01%02%03%04%05%06%07%08%09 %"},
				358	{"Hello%20%13%10%02", UnescapeRule::SPACES, "Hello %13%10%02"},
				359
				360	// '/' and '\\' should only be unescaped by PATH_SEPARATORS.
				361	{"%2F%5C", UnescapeRule::PATH_SEPARATORS, "/\\"},
				362	};
				363
				364	for (const auto unescape_case : kUnescapeCases) {
				365	EXPECT_EQ(unescape_case.output,
				366	UnescapeURLComponent(unescape_case.input, unescape_case.rules));
				367	}
				368
				369	// Test NULL character unescaping, which can't be tested above since those are
				370	// just char pointers.
				371	std::string input("Null");
				372	input.push_back(0); // Also have a NULL in the input.
				373	input.append("%00%39Test");
				374
				375	std::string expected = "Null";
				376	expected.push_back(0);
				377	expected.append("%009Test");
				378	EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL));
				379	}
				380
				381	TEST(EscapeTest, UnescapeAndDecodeUTF8URLComponentWithAdjustments) {
				382	const UnescapeAndDecodeCase unescape_cases[] = {
				383	{"%", "%", "%", L"%"},
				384	{"+", "+", " ", L"+"},
				385	{"%2+", "%2+", "%2 ", L"%2+"},
				386	{"+%%%+%%%", "+%%%+%%%", " %%% %%%", L"+%%%+%%%"},
				387	{"Don't escape anything", "Don't escape anything",
				388	"Don't escape anything", L"Don't escape anything"},
				389	{"+Invalid %escape %2+", "+Invalid %escape %2+", " Invalid %escape %2 ",
				390	L"+Invalid %escape %2+"},
				391	{"Some random text %25%2dOK", "Some random text %25-OK",
				392	"Some random text %25-OK", L"Some random text %25-OK"},
				393	{"%01%02%03%04%05%06%07%08%09", "%01%02%03%04%05%06%07%08%09",
				394	"%01%02%03%04%05%06%07%08%09", L"%01%02%03%04%05%06%07%08%09"},
				395	{"%E4%BD%A0+%E5%A5%BD", "\xE4\xBD\xA0+\xE5\xA5\xBD",
				396	"\xE4\xBD\xA0 \xE5\xA5\xBD", L"\x4f60+\x597d"},
				397	{"%ED%ED", // Invalid UTF-8.
				398	"\xED\xED", "\xED\xED", L"%ED%ED"}, // Invalid UTF-8 -> kept unescaped.
				399	};
				400
				401	for (const auto& unescape_case : unescape_cases) {
				402	std::string unescaped =
				403	UnescapeURLComponent(unescape_case.input, UnescapeRule::NORMAL);
				404	EXPECT_EQ(std::string(unescape_case.url_unescaped), unescaped);
				405
				406	unescaped = UnescapeURLComponent(unescape_case.input,
				407	UnescapeRule::REPLACE_PLUS_WITH_SPACE);
				408	EXPECT_EQ(std::string(unescape_case.query_unescaped), unescaped);
				409
				410	// The adjustments argument is covered by the next test.
				411	//
				412	// TODO: Need to test unescape_spaces and unescape_percent.
Jan Wilken Dörrie	085b2aa	2021-03-12 16:26:57	[diff] [blame]	413	std::u16string decoded = UnescapeAndDecodeUTF8URLComponentWithAdjustments(
Weilun Shi	40194033	2020-07-14 22:22:33	[diff] [blame]	414	unescape_case.input, UnescapeRule::NORMAL, nullptr);
				415	EXPECT_EQ(WideToUTF16(unescape_case.decoded), decoded);
				416	}
				417	}
				418
				419	TEST(EscapeTest, AdjustOffset) {
				420	const AdjustOffsetCase adjust_cases[] = {
				421	{"", 0, 0},
				422	{"test", 0, 0},
				423	{"test", 2, 2},
				424	{"test", 4, 4},
				425	{"test", std::string::npos, std::string::npos},
				426	{"%2dtest", 6, 4},
				427	{"%2dtest", 3, 1},
				428	{"%2dtest", 2, std::string::npos},
				429	{"%2dtest", 1, std::string::npos},
				430	{"%2dtest", 0, 0},
				431	{"test%2d", 2, 2},
				432	{"test%2e", 2, 2},
				433	{"%E4%BD%A0+%E5%A5%BD", 9, 1},
				434	{"%E4%BD%A0+%E5%A5%BD", 6, std::string::npos},
				435	{"%E4%BD%A0+%E5%A5%BD", 0, 0},
				436	{"%E4%BD%A0+%E5%A5%BD", 10, 2},
				437	{"%E4%BD%A0+%E5%A5%BD", 19, 3},
				438
				439	{"hi%41test%E4%BD%A0+%E5%A5%BD", 18, 8},
				440	{"hi%41test%E4%BD%A0+%E5%A5%BD", 15, std::string::npos},
				441	{"hi%41test%E4%BD%A0+%E5%A5%BD", 9, 7},
				442	{"hi%41test%E4%BD%A0+%E5%A5%BD", 19, 9},
				443	{"hi%41test%E4%BD%A0+%E5%A5%BD", 28, 10},
				444	{"hi%41test%E4%BD%A0+%E5%A5%BD", 0, 0},
				445	{"hi%41test%E4%BD%A0+%E5%A5%BD", 2, 2},
				446	{"hi%41test%E4%BD%A0+%E5%A5%BD", 3, std::string::npos},
				447	{"hi%41test%E4%BD%A0+%E5%A5%BD", 5, 3},
				448
				449	{"%E4%BD%A0+%E5%A5%BDhi%41test", 9, 1},
				450	{"%E4%BD%A0+%E5%A5%BDhi%41test", 6, std::string::npos},
				451	{"%E4%BD%A0+%E5%A5%BDhi%41test", 0, 0},
				452	{"%E4%BD%A0+%E5%A5%BDhi%41test", 10, 2},
				453	{"%E4%BD%A0+%E5%A5%BDhi%41test", 19, 3},
				454	{"%E4%BD%A0+%E5%A5%BDhi%41test", 21, 5},
				455	{"%E4%BD%A0+%E5%A5%BDhi%41test", 22, std::string::npos},
				456	{"%E4%BD%A0+%E5%A5%BDhi%41test", 24, 6},
				457	{"%E4%BD%A0+%E5%A5%BDhi%41test", 28, 10},
				458
				459	{"%ED%B0%80+%E5%A5%BD", 6, 6}, // not convertible to UTF-8
				460	};
				461
				462	for (const auto& adjust_case : adjust_cases) {
				463	size_t offset = adjust_case.input_offset;
				464	OffsetAdjuster::Adjustments adjustments;
				465	UnescapeAndDecodeUTF8URLComponentWithAdjustments(
				466	adjust_case.input, UnescapeRule::NORMAL, &adjustments);
				467	OffsetAdjuster::AdjustOffset(adjustments, &offset);
				468	EXPECT_EQ(adjust_case.output_offset, offset)
				469	<< "input=" << adjust_case.input
				470	<< " offset=" << adjust_case.input_offset;
				471	}
				472	}
				473
				474	TEST(EscapeTest, UnescapeBinaryURLComponent) {
				475	const UnescapeURLCase kTestCases[] = {
				476	// Check that ASCII characters with special handling in
				477	// UnescapeURLComponent() are still unescaped.
				478	{"%09%20%25foo%2F", UnescapeRule::NORMAL, "\x09 %foo/"},
				479
				480	// UTF-8 Characters banned by UnescapeURLComponent() should also be
				481	// unescaped.
				482	{"Some random text %D8%9COK", UnescapeRule::NORMAL,
				483	"Some random text \xD8\x9COK"},
				484	{"Some random text %F0%9F%94%8FOK", UnescapeRule::NORMAL,
				485	"Some random text \xF0\x9F\x94\x8FOK"},
				486
				487	// As should invalid UTF-8 characters.
				488	{"%A0%A0%E9%E9%A0%A0%A0%A0", UnescapeRule::NORMAL,
				489	"\xA0\xA0\xE9\xE9\xA0\xA0\xA0\xA0"},
				490
				491	// And valid UTF-8 characters that are not banned by
				492	// UnescapeURLComponent() should be unescaped, too!
				493	{"%C2%A1%C2%A1", UnescapeRule::NORMAL, "\xC2\xA1\xC2\xA1"},
				494
				495	// '+' should be left alone by default
				496	{"++%2B++", UnescapeRule::NORMAL, "+++++"},
				497	// But should magically be turned into a space if requested.
				498	{"++%2B++", UnescapeRule::REPLACE_PLUS_WITH_SPACE, " + "},
				499	};
				500
				501	for (const auto& test_case : kTestCases) {
				502	EXPECT_EQ(test_case.output,
				503	UnescapeBinaryURLComponent(test_case.input, test_case.rules));
				504	}
				505
				506	// Test NULL character unescaping, which can't be tested above since those are
				507	// just char pointers.
				508	std::string input("Null");
				509	input.push_back(0); // Also have a NULL in the input.
				510	input.append("%00%39Test");
				511
				512	std::string expected("Null");
				513	expected.push_back(0);
				514	expected.push_back(0);
				515	expected.append("9Test");
				516	EXPECT_EQ(expected, UnescapeBinaryURLComponent(input));
				517	}
				518
				519	TEST(EscapeTest, UnescapeBinaryURLComponentSafe) {
				520	const struct TestCase {
				521	const char* input;
				522	// Expected output. Null if call is expected to fail when
				523	// \|fail_on_path_separators\| is false.
				524	const char* expected_output;
				525	// Whether \|input\| has any escaped path separators.
				526	bool has_path_separators;
				527	} kTestCases[] = {
				528	// Spaces, percents, and invalid UTF-8 characters are all successfully
				529	// unescaped.
				530	{"%20%25foo%81", " %foo\x81", false},
				531
				532	// Characters disallowed unconditionally.
				533	{"foo%00", nullptr, false},
				534	{"foo%01", nullptr, false},
				535	{"foo%0A", nullptr, false},
				536	{"foo%0D", nullptr, false},
				537
				538	// Path separators.
				539	{"foo%2F", "foo/", true},
				540	{"foo%5C", "foo\\", true},
				541
				542	// Characters that are considered invalid to escape are ignored if passed
				543	// in unescaped.
				544	{"foo\x01\r/\\", "foo\x01\r/\\", false},
				545	};
				546
				547	for (const auto& test_case : kTestCases) {
				548	SCOPED_TRACE(test_case.input);
				549
				550	std::string output = "foo";
				551	if (!test_case.expected_output) {
				552	EXPECT_FALSE(UnescapeBinaryURLComponentSafe(
				553	test_case.input, false /* fail_on_path_separators */, &output));
				554	EXPECT_TRUE(output.empty());
				555	EXPECT_FALSE(UnescapeBinaryURLComponentSafe(
				556	test_case.input, true /* fail_on_path_separators */, &output));
				557	EXPECT_TRUE(output.empty());
				558	continue;
				559	}
				560	EXPECT_TRUE(UnescapeBinaryURLComponentSafe(
				561	test_case.input, false /* fail_on_path_separators */, &output));
				562	EXPECT_EQ(test_case.expected_output, output);
				563	if (test_case.has_path_separators) {
				564	EXPECT_FALSE(UnescapeBinaryURLComponentSafe(
				565	test_case.input, true /* fail_on_path_separators */, &output));
				566	EXPECT_TRUE(output.empty());
				567	} else {
				568	output = "foo";
				569	EXPECT_TRUE(UnescapeBinaryURLComponentSafe(
				570	test_case.input, true /* fail_on_path_separators */, &output));
				571	EXPECT_EQ(test_case.expected_output, output);
				572	}
				573	}
				574	}
				575
				576	TEST(EscapeTest, ContainsEncodedBytes) {
				577	EXPECT_FALSE(ContainsEncodedBytes("abc/def", {'/', '\\'}));
				578	EXPECT_FALSE(ContainsEncodedBytes("abc%2Fdef", {'%'}));
				579	EXPECT_TRUE(ContainsEncodedBytes("abc%252Fdef", {'%'}));
				580	EXPECT_TRUE(ContainsEncodedBytes("abc%2Fdef", {'/', '\\'}));
				581	EXPECT_TRUE(ContainsEncodedBytes("abc%5Cdef", {'/', '\\'}));
				582	EXPECT_TRUE(ContainsEncodedBytes("abc%2fdef", {'/', '\\'}));
				583
				584	// Should be looking for byte values, not UTF-8 character values.
Peter Kasting	c1eb091	2021-06-09 17:22:05	[diff] [blame]	585	EXPECT_TRUE(
				586	ContainsEncodedBytes("caf%C3%A9", {static_cast<uint8_t>('\xc3')}));
				587	EXPECT_FALSE(
				588	ContainsEncodedBytes("caf%C3%A9", {static_cast<uint8_t>('\xe9')}));
Weilun Shi	40194033	2020-07-14 22:22:33	[diff] [blame]	589	}
				590
Victor Vasiliev	dc7e817	2022-04-20 23:45:20	[diff] [blame]	591	} // namespace
Weilun Shi	40194033	2020-07-14 22:22:33	[diff] [blame]	592	} // namespace base