diff --git a/core/src/main/java/org/owasp/encoder/Encode.java b/core/src/main/java/org/owasp/encoder/Encode.java
index 89d7ed9..2fb6c3d 100644
--- a/core/src/main/java/org/owasp/encoder/Encode.java
+++ b/core/src/main/java/org/owasp/encoder/Encode.java
@@ -861,6 +861,30 @@ public static void forXmlComment(Writer out, String input)
encode(Encoders.XML_COMMENT_ENCODER, out, input);
}
+ /**
+ * Encoder for KML.
+ *
+ * @param input the input to encode
+ * @return the encoded result
+ */
+ public static String forKml(String input) {
+ return encode(Encoders.KML_ENCODER, input);
+ }
+
+ /**
+ * See {@link #forKml(String)} for description of encoding. This
+ * version writes directly to a Writer without an intervening string.
+ *
+ * @param out where to write encoded output
+ * @param input the input string to encode
+ * @throws IOException if thrown by writer
+ */
+ public static void forKml(Writer out, String input)
+ throws IOException
+ {
+ encode(Encoders.KML_ENCODER, out, input);
+ }
+
/**
* Encodes data for an XML CDATA section. On the chance that the input
* contains a terminating {@code "]]>"}, it will be replaced by
diff --git a/core/src/main/java/org/owasp/encoder/Encoders.java b/core/src/main/java/org/owasp/encoder/Encoders.java
index 3879fd0..0694de4 100644
--- a/core/src/main/java/org/owasp/encoder/Encoders.java
+++ b/core/src/main/java/org/owasp/encoder/Encoders.java
@@ -88,6 +88,10 @@ public final class Encoders {
* Name of {@linkplain Encode#forXmlComment(String) XML comment} context.
*/
public static final String XML_COMMENT = "xml-comment";
+ /**
+ * Name of {@linkplain Encode#forKml(String) KML} context.
+ */
+ public static final String KML = "kml";
/**
* Name of {@linkplain Encode#forCDATA(String) CDATA} context.
*/
@@ -160,6 +164,11 @@ public final class Encoders {
*/
static final XMLCommentEncoder XML_COMMENT_ENCODER
= map(XML_COMMENT, new XMLCommentEncoder());
+ /**
+ * Encoder for KML contexts.
+ */
+ static final KMLEncoder KML_ENCODER
+ = map(KML, new KMLEncoder());
/**
* Encoder for CDATA contexts.
*/
diff --git a/core/src/main/java/org/owasp/encoder/KMLEncoder.java b/core/src/main/java/org/owasp/encoder/KMLEncoder.java
new file mode 100644
index 0000000..4526c32
--- /dev/null
+++ b/core/src/main/java/org/owasp/encoder/KMLEncoder.java
@@ -0,0 +1,55 @@
+// Copyright (c) 2012 Jeff Ichnowski
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//
+// * Redistributions of source code must retain the above
+// copyright notice, this list of conditions and the following
+// disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials
+// provided with the distribution.
+//
+// * Neither the name of the OWASP nor the names of its
+// contributors may be used to endorse or promote products
+// derived from this software without specific prior written
+// permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+// OF THE POSSIBILITY OF SUCH DAMAGE.
+package org.owasp.encoder;
+
+import java.nio.CharBuffer;
+import java.nio.charset.CoderResult;
+
+/**
+ * KMLEncoder -- Special case of XML encoding using numeric character entities (e.g. < instead of entity references (e.g. <).
+ * This encoder should be used instead of {@link XMLEncoder} to address some shortcomings in the KML specification and the way Google Earth (at least the desktop version) interprets HTML.
+ *
+ * @see KML Reference Errata
+ * @see OWASP Issue
+ *
+ * @author cnsgithub
+ */
+class KMLEncoder extends XMLEncoder {
+
+ @Override
+ protected CoderResult encodeArrays(CharBuffer input, CharBuffer output, boolean endOfInput) {
+ return super.encodeArrays(input, output, endOfInput, true);
+ }
+
+}
diff --git a/core/src/main/java/org/owasp/encoder/XMLEncoder.java b/core/src/main/java/org/owasp/encoder/XMLEncoder.java
index cec6205..20078b3 100644
--- a/core/src/main/java/org/owasp/encoder/XMLEncoder.java
+++ b/core/src/main/java/org/owasp/encoder/XMLEncoder.java
@@ -75,15 +75,15 @@ class XMLEncoder extends Encoder {
/**
* The encoded length of an ampersand.
*/
- static final int AMP_LENGTH = 5;
+ static final int AMP_LENGTH = 5, AMP_NUMERIC_LENGTH = 5;
/**
* The encoded length of a less-than sign.
*/
- static final int LT_LENGTH = 4;
+ static final int LT_LENGTH = 4, LT_NUMERIC_LENGTH = 5;
/**
* The encoded length of a greater-than sign.
*/
- static final int GT_LENGTH = 4;
+ static final int GT_LENGTH = 4, GT_NUMERIC_LENGTH = 5;
/**
* The encoded length of an apostrophe.
*/
@@ -245,6 +245,10 @@ public int firstEncodedOffset(String input, int off, int len) {
* {@inheritDoc}
*/
protected CoderResult encodeArrays(CharBuffer input, CharBuffer output, boolean endOfInput) {
+ return encodeArrays(input, output, endOfInput, false);
+ }
+
+ protected CoderResult encodeArrays(CharBuffer input, CharBuffer output, boolean endOfInput, boolean avoidEntityReferences) {
final char[] in = input.array();
final char[] out = output.array();
int i = input.arrayOffset() + input.position();
@@ -264,6 +268,17 @@ protected CoderResult encodeArrays(CharBuffer input, CharBuffer output, boolean
} else {
switch (ch) {
case '&':
+ if (avoidEntityReferences) {
+ if (j + AMP_NUMERIC_LENGTH > m) {
+ return overflow(input, i, output, j);
+ }
+ out[j++] = '&';
+ out[j++] = '#';
+ out[j++] = '3';
+ out[j++] = '8';
+ out[j++] = ';';
+ break;
+ }
if (j + AMP_LENGTH > m) {
return overflow(input, i, output, j);
}
@@ -274,6 +289,17 @@ protected CoderResult encodeArrays(CharBuffer input, CharBuffer output, boolean
out[j++] = ';';
break;
case '<':
+ if (avoidEntityReferences) {
+ if (j + LT_NUMERIC_LENGTH > m) {
+ return overflow(input, i, output, j);
+ }
+ out[j++] = '&';
+ out[j++] = '#';
+ out[j++] = '6';
+ out[j++] = '0';
+ out[j++] = ';';
+ break;
+ }
if (j + LT_LENGTH > m) {
return overflow(input, i, output, j);
}
@@ -283,6 +309,18 @@ protected CoderResult encodeArrays(CharBuffer input, CharBuffer output, boolean
out[j++] = ';';
break;
case '>':
+ if (avoidEntityReferences) {
+ if (j + GT_NUMERIC_LENGTH > m) {
+ return overflow(input, i, output, j);
+ }
+ out[j++] = '&';
+ out[j++] = '#';
+ out[j++] = '6';
+ out[j++] = '2';
+ out[j++] = ';';
+ break;
+
+ }
if (j + GT_LENGTH > m) {
return overflow(input, i, output, j);
}
diff --git a/core/src/test/java/org/owasp/encoder/KMLEncoderTest.java b/core/src/test/java/org/owasp/encoder/KMLEncoderTest.java
new file mode 100644
index 0000000..b20cb07
--- /dev/null
+++ b/core/src/test/java/org/owasp/encoder/KMLEncoderTest.java
@@ -0,0 +1,93 @@
+// Copyright (c) 2012 Jeff Ichnowski
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//
+// * Redistributions of source code must retain the above
+// copyright notice, this list of conditions and the following
+// disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials
+// provided with the distribution.
+//
+// * Neither the name of the OWASP nor the names of its
+// contributors may be used to endorse or promote products
+// derived from this software without specific prior written
+// permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+// OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package org.owasp.encoder;
+
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+
+/**
+ * KMLEncoderTest -- test suite for the KMLEncoder.
+ *
+ * @author cnsgithub
+ */
+public class KMLEncoderTest extends TestCase {
+
+ public static Test suite() {
+ TestSuite suite = new TestSuite();
+ EncoderTestSuiteBuilder builder = new EncoderTestSuiteBuilder(new KMLEncoder(), "-safe-", "-&-")
+ .encode("<strike>foo & bar</strike>", "foo & bar")
+ .encode("invalid-control-characters", " b ", "\0b\26")
+ .encode("valid-surrogate-pair", "\ud800\udc00", "\ud800\udc00")
+ .encode("missing-low-surrogate", " ", "\ud800")
+ .encode("missing-high-surrogate", " ", "\udc00")
+ .encode("valid-upper-char", "\ufffd", "\ufffd")
+ .encode("invalid-upper-char", " ", "\uffff")
+ .invalid(0, 0x1f)
+ .valid("\t\r\n")
+ .valid(' ', Character.MAX_CODE_POINT)
+ .invalid(0x7f, 0x9f)
+ .valid("\u0085")
+ .invalid(Character.MIN_SURROGATE, Character.MAX_SURROGATE)
+ .invalid(0xfdd0, 0xfdef)
+ .invalid(0xfffe, 0xffff)
+ .invalid(0x1fffe, 0x1ffff)
+ .invalid(0x2fffe, 0x2ffff)
+ .invalid(0x3fffe, 0x3ffff)
+ .invalid(0x4fffe, 0x4ffff)
+ .invalid(0x5fffe, 0x5ffff)
+ .invalid(0x6fffe, 0x6ffff)
+ .invalid(0x7fffe, 0x7ffff)
+ .invalid(0x8fffe, 0x8ffff)
+ .invalid(0x9fffe, 0x9ffff)
+ .invalid(0xafffe, 0xaffff)
+ .invalid(0xbfffe, 0xbffff)
+ .invalid(0xcfffe, 0xcffff)
+ .invalid(0xdfffe, 0xdffff)
+ .invalid(0xefffe, 0xeffff)
+ .invalid(0xffffe, 0xfffff)
+ .invalid(0x10fffe, 0x10ffff);
+
+ builder.encoded("&><\'\"")
+ .encode("'", "\'")
+ .encode(""", "\"")
+ .encode("safe", "safe");
+
+ suite.addTest(builder.validSuite().invalidSuite(XMLEncoder.INVALID_CHARACTER_REPLACEMENT).encodedSuite().build());
+ return suite;
+ }
+
+}
+