From a59f4237c1b542a56a844717a50d2417dba1f132 Mon Sep 17 00:00:00 2001
From: Misko Hevery
Date: Fri, 1 May 2015 16:12:53 -0700
Subject: [PATCH 01/10] refactor($sanitize): new implementation of the html
sanitized parser
This implementation is based on using inert document parsed by the browser
Closes #11442
Closes #11443
---
docs/content/error/$sanitize/badparse.ngdoc | 11 -
docs/content/error/$sanitize/ddns.ngdoc | 10 +
lib/htmlparser/htmlparser.js | 309 --------------------
src/ngSanitize/sanitize.js | 286 ++++++------------
test/ngSanitize/sanitizeSpec.js | 78 ++---
5 files changed, 134 insertions(+), 560 deletions(-)
delete mode 100644 docs/content/error/$sanitize/badparse.ngdoc
create mode 100644 docs/content/error/$sanitize/ddns.ngdoc
delete mode 100644 lib/htmlparser/htmlparser.js
diff --git a/docs/content/error/$sanitize/badparse.ngdoc b/docs/content/error/$sanitize/badparse.ngdoc
deleted file mode 100644
index d07c6d62a403..000000000000
--- a/docs/content/error/$sanitize/badparse.ngdoc
+++ /dev/null
@@ -1,11 +0,0 @@
-@ngdoc error
-@name $sanitize:badparse
-@fullName Parsing Error while Sanitizing
-@description
-
-This error occurs when the HTML string passed to '$sanitize' can't be parsed by the sanitizer.
-The error contains part of the html string that can't be parsed.
-
-The parser is more strict than a typical browser parser, so it's possible that some obscure input would produce this error despite the string being recognized as valid HTML by a browser.
-
-If a valid html code results in this error, please file a bug.
diff --git a/docs/content/error/$sanitize/ddns.ngdoc b/docs/content/error/$sanitize/ddns.ngdoc
new file mode 100644
index 000000000000..a1f6f77390c8
--- /dev/null
+++ b/docs/content/error/$sanitize/ddns.ngdoc
@@ -0,0 +1,10 @@
+@ngdoc error
+@name $sanitize:ddns
+@fullName DOMDocument not supported
+@description
+
+This error occurs when `$sanitize` sanitizer determines that `DOMDocument` api is not supported by the current browser.
+
+This api is necessary for safe parsing of HTML strings into DOM trees and without it the sanitizer can't sanitize the input.
+
+The api is present in all supported browsers including IE 9.0, so the presence of this error usually indicates that Angular's `$sanitize` is being used on an unsupported platform.
diff --git a/lib/htmlparser/htmlparser.js b/lib/htmlparser/htmlparser.js
deleted file mode 100644
index 46a3da08313e..000000000000
--- a/lib/htmlparser/htmlparser.js
+++ /dev/null
@@ -1,309 +0,0 @@
-/*
- * HTML Parser By John Resig (ejohn.org)
- * Original code by Erik Arvidsson, Mozilla Public License
- * http://erik.eae.net/simplehtmlparser/simplehtmlparser.js
- *
- * // Use like so:
- * htmlParser(htmlString, {
- * start: function(tag, attrs, unary) {},
- * end: function(tag) {},
- * chars: function(text) {},
- * comment: function(text) {}
- * });
- *
- * // or to get an XML string:
- * HTMLtoXML(htmlString);
- *
- * // or to get an XML DOM Document
- * HTMLtoDOM(htmlString);
- *
- * // or to inject into an existing document/DOM node
- * HTMLtoDOM(htmlString, document);
- * HTMLtoDOM(htmlString, document.body);
- *
- */
-
-(function(){
-
- // Regular Expressions for parsing tags and attributes
- var startTag = /^<(\w+)((?:\s+\w+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)>/,
- endTag = /^<\/(\w+)[^>]*>/,
- attr = /(\w+)(?:\s*=\s*(?:(?:"((?:\\.|[^"])*)")|(?:'((?:\\.|[^'])*)')|([^>\s]+)))?/g;
-
- // Empty Elements - HTML 4.01
- var empty = makeMap("area,base,basefont,br,col,frame,hr,img,input,isindex,link,meta,param,embed");
-
- // Block Elements - HTML 4.01
- var block = makeMap("address,applet,blockquote,button,center,dd,del,dir,div,dl,dt,fieldset,form,frameset,hr,iframe,ins,isindex,li,map,menu,noframes,noscript,object,ol,p,pre,script,table,tbody,td,tfoot,th,thead,tr,ul");
-
- // Inline Elements - HTML 4.01
- var inline = makeMap("a,abbr,acronym,applet,b,basefont,bdo,big,br,button,cite,code,del,dfn,em,font,i,iframe,img,input,ins,kbd,label,map,object,q,s,samp,script,select,small,span,strike,strong,sub,sup,textarea,tt,u,var");
-
- // Elements that you can, intentionally, leave open
- // (and which close themselves)
- var closeSelf = makeMap("colgroup,dd,dt,li,options,p,td,tfoot,th,thead,tr");
-
- // Attributes that have their values filled in disabled="disabled"
- var fillAttrs = makeMap("checked,compact,declare,defer,disabled,ismap,multiple,nohref,noresize,noshade,nowrap,readonly,selected");
-
- // Special Elements (can contain anything)
- var special = makeMap("script,style");
-
- var htmlParser = this.htmlParser = function( html, handler ) {
- var index, chars, match, stack = [], last = html;
- stack.last = function(){
- return this[ this.length - 1 ];
- };
-
- while ( html ) {
- chars = true;
-
- // Make sure we're not in a script or style element
- if ( !stack.last() || !special[ stack.last() ] ) {
-
- // Comment
- if ( html.indexOf("");
-
- if ( index >= 0 ) {
- if ( handler.comment )
- handler.comment( html.substring( 4, index ) );
- html = html.substring( index + 3 );
- chars = false;
- }
-
- // end tag
- } else if ( html.indexOf("") == 0 ) {
- match = html.match( endTag );
-
- if ( match ) {
- html = html.substring( match[0].length );
- match[0].replace( endTag, parseEndTag );
- chars = false;
- }
-
- // start tag
- } else if ( html.indexOf("<") == 0 ) {
- match = html.match( startTag );
-
- if ( match ) {
- html = html.substring( match[0].length );
- match[0].replace( startTag, parseStartTag );
- chars = false;
- }
- }
-
- if ( chars ) {
- index = html.indexOf("<");
-
- var text = index < 0 ? html : html.substring( 0, index );
- html = index < 0 ? "" : html.substring( index );
-
- if ( handler.chars )
- handler.chars( text );
- }
-
- } else {
- html = html.replace(new RegExp("(.*)<\/" + stack.last() + "[^>]*>"), function(all, text){
- text = text.replace(//g, "$1")
- .replace(//g, "$1");
-
- if ( handler.chars )
- handler.chars( text );
-
- return "";
- });
-
- parseEndTag( "", stack.last() );
- }
-
- if ( html == last )
- throw "Parse Error: " + html;
- last = html;
- }
-
- // Clean up any remaining tags
- parseEndTag();
-
- function parseStartTag( tag, tagName, rest, unary ) {
- if ( block[ tagName ] ) {
- while ( stack.last() && inline[ stack.last() ] ) {
- parseEndTag( "", stack.last() );
- }
- }
-
- if ( closeSelf[ tagName ] && stack.last() == tagName ) {
- parseEndTag( "", tagName );
- }
-
- unary = empty[ tagName ] || !!unary;
-
- if ( !unary )
- stack.push( tagName );
-
- if ( handler.start ) {
- var attrs = [];
-
- rest.replace(attr, function(match, name) {
- var value = arguments[2] ? arguments[2] :
- arguments[3] ? arguments[3] :
- arguments[4] ? arguments[4] :
- fillAttrs[name] ? name : "";
-
- attrs.push({
- name: name,
- value: value,
- escaped: value.replace(/(^|[^\\])"/g, '$1\\\"') //"
- });
- });
-
- if ( handler.start )
- handler.start( tagName, attrs, unary );
- }
- }
-
- function parseEndTag( tag, tagName ) {
- // If no tag name is provided, clean shop
- if ( !tagName )
- var pos = 0;
-
- // Find the closest opened tag of the same type
- else
- for ( var pos = stack.length - 1; pos >= 0; pos-- )
- if ( stack[ pos ] == tagName )
- break;
-
- if ( pos >= 0 ) {
- // Close all the open elements, up the stack
- for ( var i = stack.length - 1; i >= pos; i-- )
- if ( handler.end )
- handler.end( stack[ i ] );
-
- // Remove the open elements from the stack
- stack.length = pos;
- }
- }
- };
-
- this.HTMLtoXML = function( html ) {
- var results = "";
-
- htmlParser(html, {
- start: function( tag, attrs, unary ) {
- results += "<" + tag;
-
- for ( var i = 0; i < attrs.length; i++ )
- results += " " + attrs[i].name + '="' + attrs[i].escaped + '"';
-
- results += (unary ? "/" : "") + ">";
- },
- end: function( tag ) {
- results += "" + tag + ">";
- },
- chars: function( text ) {
- results += text;
- },
- comment: function( text ) {
- results += "";
- }
- });
-
- return results;
- };
-
- this.HTMLtoDOM = function( html, doc ) {
- // There can be only one of these elements
- var one = makeMap("html,head,body,title");
-
- // Enforce a structure for the document
- var structure = {
- link: "head",
- base: "head"
- };
-
- if ( !doc ) {
- if ( typeof DOMDocument != "undefined" )
- doc = new DOMDocument();
- else if ( typeof document != "undefined" && document.implementation && document.implementation.createDocument )
- doc = document.implementation.createDocument("", "", null);
- else if ( typeof ActiveX != "undefined" )
- doc = new ActiveXObject("Msxml.DOMDocument");
-
- } else
- doc = doc.ownerDocument ||
- doc.getOwnerDocument && doc.getOwnerDocument() ||
- doc;
-
- var elems = [],
- documentElement = doc.documentElement ||
- doc.getDocumentElement && doc.getDocumentElement();
-
- // If we're dealing with an empty document then we
- // need to pre-populate it with the HTML document structure
- if ( !documentElement && doc.createElement ) (function(){
- var html = doc.createElement("html");
- var head = doc.createElement("head");
- head.appendChild( doc.createElement("title") );
- html.appendChild( head );
- html.appendChild( doc.createElement("body") );
- doc.appendChild( html );
- })();
-
- // Find all the unique elements
- if ( doc.getElementsByTagName )
- for ( var i in one )
- one[ i ] = doc.getElementsByTagName( i )[0];
-
- // If we're working with a document, inject contents into
- // the body element
- var curParentNode = one.body;
-
- htmlParser( html, {
- start: function( tagName, attrs, unary ) {
- // If it's a pre-built element, then we can ignore
- // its construction
- if ( one[ tagName ] ) {
- curParentNode = one[ tagName ];
- return;
- }
-
- var elem = doc.createElement( tagName );
-
- for ( var attr in attrs )
- elem.setAttribute( attrs[ attr ].name, attrs[ attr ].value );
-
- if ( structure[ tagName ] && typeof one[ structure[ tagName ] ] != "boolean" )
- one[ structure[ tagName ] ].appendChild( elem );
-
- else if ( curParentNode && curParentNode.appendChild )
- curParentNode.appendChild( elem );
-
- if ( !unary ) {
- elems.push( elem );
- curParentNode = elem;
- }
- },
- end: function( tag ) {
- elems.length -= 1;
-
- // Init the new parentNode
- curParentNode = elems[ elems.length - 1 ];
- },
- chars: function( text ) {
- curParentNode.appendChild( doc.createTextNode( text ) );
- },
- comment: function( text ) {
- // create comment node
- }
- });
-
- return doc;
- };
-
- function makeMap(str){
- var obj = {}, items = str.split(",");
- for ( var i = 0; i < items.length; i++ )
- obj[ items[i] ] = true;
- return obj;
- }
-})();
\ No newline at end of file
diff --git a/src/ngSanitize/sanitize.js b/src/ngSanitize/sanitize.js
index 4c7f615dc7d1..d423fef72180 100644
--- a/src/ngSanitize/sanitize.js
+++ b/src/ngSanitize/sanitize.js
@@ -28,23 +28,6 @@ var $sanitizeMinErr = angular.$$minErr('$sanitize');
* See {@link ngSanitize.$sanitize `$sanitize`} for usage.
*/
-/*
- * HTML Parser By Misko Hevery (misko@hevery.com)
- * based on: HTML Parser By John Resig (ejohn.org)
- * Original code by Erik Arvidsson, Mozilla Public License
- * http://erik.eae.net/simplehtmlparser/simplehtmlparser.js
- *
- * // Use like so:
- * htmlParser(htmlString, {
- * start: function(tag, attrs, unary) {},
- * end: function(tag) {},
- * chars: function(text) {},
- * comment: function(text) {}
- * });
- *
- */
-
-
/**
* @ngdoc service
* @name $sanitize
@@ -164,16 +147,7 @@ function sanitizeText(chars) {
// Regular Expressions for parsing tags and attributes
-var START_TAG_REGEXP =
- /^<((?:[a-zA-Z])[\w:-]*)((?:\s+[\w:-]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)\s*(>?)/,
- END_TAG_REGEXP = /^<\/\s*([\w:-]+)[^>]*>/,
- ATTR_REGEXP = /([\w:-]+)(?:\s*=\s*(?:(?:"((?:[^"])*)")|(?:'((?:[^'])*)')|([^>\s]+)))?/g,
- BEGIN_TAG_REGEXP = /^,
- BEGING_END_TAGE_REGEXP = /^<\//,
- COMMENT_REGEXP = //g,
- DOCTYPE_REGEXP = /]*?)>/i,
- CDATA_REGEXP = //g,
- SURROGATE_PAIR_REGEXP = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g,
+var SURROGATE_PAIR_REGEXP = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g,
// Match everything outside of normal chars and " (quote character)
NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g;
@@ -184,23 +158,23 @@ var START_TAG_REGEXP =
// Safe Void Elements - HTML5
// http://dev.w3.org/html5/spec/Overview.html#void-elements
-var voidElements = makeMap("area,br,col,hr,img,wbr");
+var voidElements = toMap("area,br,col,hr,img,wbr");
// Elements that you can, intentionally, leave open (and which close themselves)
// http://dev.w3.org/html5/spec/Overview.html#optional-tags
-var optionalEndTagBlockElements = makeMap("colgroup,dd,dt,li,p,tbody,td,tfoot,th,thead,tr"),
- optionalEndTagInlineElements = makeMap("rp,rt"),
+var optionalEndTagBlockElements = toMap("colgroup,dd,dt,li,p,tbody,td,tfoot,th,thead,tr"),
+ optionalEndTagInlineElements = toMap("rp,rt"),
optionalEndTagElements = angular.extend({},
optionalEndTagInlineElements,
optionalEndTagBlockElements);
// Safe Block Elements - HTML5
-var blockElements = angular.extend({}, optionalEndTagBlockElements, makeMap("address,article," +
+var blockElements = angular.extend({}, optionalEndTagBlockElements, toMap("address,article," +
"aside,blockquote,caption,center,del,dir,div,dl,figure,figcaption,footer,h1,h2,h3,h4,h5," +
"h6,header,hgroup,hr,ins,map,menu,nav,ol,pre,script,section,table,ul"));
// Inline Elements - HTML5
-var inlineElements = angular.extend({}, optionalEndTagInlineElements, makeMap("a,abbr,acronym,b," +
+var inlineElements = angular.extend({}, optionalEndTagInlineElements, toMap("a,abbr,acronym,b," +
"bdi,bdo,big,br,cite,code,del,dfn,em,font,i,img,ins,kbd,label,map,mark,q,ruby,rp,rt,s," +
"samp,small,span,strike,strong,sub,sup,time,tt,u,var"));
@@ -208,12 +182,12 @@ var inlineElements = angular.extend({}, optionalEndTagInlineElements, makeMap("a
// https://wiki.whatwg.org/wiki/Sanitization_rules#svg_Elements
// Note: the elements animate,animateColor,animateMotion,animateTransform,set are intentionally omitted.
// They can potentially allow for arbitrary javascript to be executed. See #11290
-var svgElements = makeMap("circle,defs,desc,ellipse,font-face,font-face-name,font-face-src,g,glyph," +
+var svgElements = toMap("circle,defs,desc,ellipse,font-face,font-face-name,font-face-src,g,glyph," +
"hkern,image,linearGradient,line,marker,metadata,missing-glyph,mpath,path,polygon,polyline," +
"radialGradient,rect,stop,svg,switch,text,title,tspan,use");
// Special Elements (can contain anything)
-var specialElements = makeMap("script,style");
+var specialElements = toMap("script,style");
var validElements = angular.extend({},
voidElements,
@@ -223,9 +197,9 @@ var validElements = angular.extend({},
svgElements);
//Attributes that have href and hence need to be sanitized
-var uriAttrs = makeMap("background,cite,href,longdesc,src,usemap,xlink:href");
+var uriAttrs = toMap("background,cite,href,longdesc,src,usemap,xlink:href");
-var htmlAttrs = makeMap('abbr,align,alt,axis,bgcolor,border,cellpadding,cellspacing,class,clear,' +
+var htmlAttrs = toMap('abbr,align,alt,axis,bgcolor,border,cellpadding,cellspacing,class,clear,' +
'color,cols,colspan,compact,coords,dir,face,headers,height,hreflang,hspace,' +
'ismap,lang,language,nohref,nowrap,rel,rev,rows,rowspan,rules,' +
'scope,scrolling,shape,size,span,start,summary,tabindex,target,title,type,' +
@@ -233,7 +207,7 @@ var htmlAttrs = makeMap('abbr,align,alt,axis,bgcolor,border,cellpadding,cellspac
// SVG attributes (without "id" and "name" attributes)
// https://wiki.whatwg.org/wiki/Sanitization_rules#svg_Attributes
-var svgAttrs = makeMap('accent-height,accumulate,additive,alphabetic,arabic-form,ascent,' +
+var svgAttrs = toMap('accent-height,accumulate,additive,alphabetic,arabic-form,ascent,' +
'baseProfile,bbox,begin,by,calcMode,cap-height,class,color,color-rendering,content,' +
'cx,cy,d,dx,dy,descent,display,dur,end,fill,fill-rule,font-family,font-size,font-stretch,' +
'font-style,font-variant,font-weight,from,fx,fy,g1,g2,glyph-name,gradientUnits,hanging,' +
@@ -254,7 +228,7 @@ var validAttrs = angular.extend({},
svgAttrs,
htmlAttrs);
-function makeMap(str, lowercaseKeys) {
+function toMap(str, lowercaseKeys) {
var obj = {}, items = str.split(','), i;
for (i = 0; i < items.length; i++) {
obj[lowercaseKeys ? angular.lowercase(items[i]) : items[i]] = true;
@@ -262,11 +236,36 @@ function makeMap(str, lowercaseKeys) {
return obj;
}
+var baseNode;
+(function(window) {
+ var doc;
+ if (window.DOMDocument) {
+ doc = new window.DOMDocument();
+ } else if (window.document && window.document.implementation) {
+ doc = window.document.implementation.createHTMLDocument("inert");
+ } else if (window.ActiveXObject) {
+ doc = new window.ActiveXObject("Msxml.DOMDocument");
+ } else {
+ throw $sanitizeMinErr('ddns', "DOMDocument not supported");
+ }
+ var docElement = doc.documentElement || doc.getDocumentElement();
+ var bodyElements = docElement.getElementsByTagName('body');
+
+ // usually there should be only one body element in the document, but IE doesn't have any, so we need to create one
+ if (bodyElements.length === 1) {
+ baseNode = bodyElements[0];
+ } else {
+ var html = doc.createElement('html');
+ baseNode = doc.createElement('body');
+ html.appendChild(baseNode);
+ doc.appendChild(html);
+ }
+})(window);
/**
* @example
* htmlParser(htmlString, {
- * start: function(tag, attrs, unary) {},
+ * start: function(tag, attrs) {},
* end: function(tag) {},
* chars: function(text) {},
* comment: function(text) {}
@@ -276,153 +275,58 @@ function makeMap(str, lowercaseKeys) {
* @param {object} handler
*/
function htmlParser(html, handler) {
- if (typeof html !== 'string') {
- if (html === null || typeof html === 'undefined') {
- html = '';
- } else {
- html = '' + html;
- }
+ if (html === null || html === undefined) {
+ html = '';
+ } else if (typeof html !== 'string') {
+ html = '' + html;
}
- var index, chars, match, stack = [], last = html, text;
- stack.last = function() { return stack[stack.length - 1]; };
-
- while (html) {
- text = '';
- chars = true;
-
- // Make sure we're not in a script or style element
- if (!stack.last() || !specialElements[stack.last()]) {
-
- // Comment
- if (html.indexOf("", index) === index) {
- if (handler.comment) handler.comment(html.substring(4, index));
- html = html.substring(index + 3);
- chars = false;
- }
- // DOCTYPE
- } else if (DOCTYPE_REGEXP.test(html)) {
- match = html.match(DOCTYPE_REGEXP);
-
- if (match) {
- html = html.replace(match[0], '');
- chars = false;
- }
- // end tag
- } else if (BEGING_END_TAGE_REGEXP.test(html)) {
- match = html.match(END_TAG_REGEXP);
-
- if (match) {
- html = html.substring(match[0].length);
- match[0].replace(END_TAG_REGEXP, parseEndTag);
- chars = false;
- }
-
- // start tag
- } else if (BEGIN_TAG_REGEXP.test(html)) {
- match = html.match(START_TAG_REGEXP);
+ baseNode.innerHTML = html;
+ var node = baseNode.firstChild;
+ while (node) {
+ switch (node.nodeType) {
+ case 1: // ELEMENT_NODE
+ handler.start(node.nodeName.toLowerCase(), attrToMap(node.attributes));
+ break;
+ case 3: // TEXT NODE
+ handler.chars(node.textContent);
+ break;
+ case 8: // COMMENT NODE
+ handler.comment(node.textContent);
+ break;
+ }
- if (match) {
- // We only have a valid start-tag if there is a '>'.
- if (match[4]) {
- html = html.substring(match[0].length);
- match[0].replace(START_TAG_REGEXP, parseStartTag);
+ var nextNode;
+ if (!(nextNode = node.firstChild)) {
+ if (node.nodeType == 1) {
+ handler.end(node.nodeName.toLowerCase());
+ }
+ nextNode = node.nextSibling;
+ if (!nextNode) {
+ while (nextNode == null) {
+ node = node.parentNode;
+ if (node === baseNode) break;
+ nextNode = node.nextSibling;
+ if (node.nodeType == 1) {
+ handler.end(node.nodeName.toLowerCase());
}
- chars = false;
- } else {
- // no ending tag found --- this piece should be encoded as an entity.
- text += '<';
- html = html.substring(1);
}
}
-
- if (chars) {
- index = html.indexOf("<");
-
- text += index < 0 ? html : html.substring(0, index);
- html = index < 0 ? "" : html.substring(index);
-
- if (handler.chars) handler.chars(decodeEntities(text));
- }
-
- } else {
- // IE versions 9 and 10 do not understand the regex '[^]', so using a workaround with [\W\w].
- html = html.replace(new RegExp("([\\W\\w]*)<\\s*\\/\\s*" + stack.last() + "[^>]*>", 'i'),
- function(all, text) {
- text = text.replace(COMMENT_REGEXP, "$1").replace(CDATA_REGEXP, "$1");
-
- if (handler.chars) handler.chars(decodeEntities(text));
-
- return "";
- });
-
- parseEndTag("", stack.last());
- }
-
- if (html == last) {
- throw $sanitizeMinErr('badparse', "The sanitizer was unable to parse the following block " +
- "of html: {0}", html);
}
- last = html;
+ node = nextNode;
}
- // Clean up any remaining tags
- parseEndTag();
-
- function parseStartTag(tag, tagName, rest, unary) {
- tagName = angular.lowercase(tagName);
- if (blockElements[tagName]) {
- while (stack.last() && inlineElements[stack.last()]) {
- parseEndTag("", stack.last());
- }
- }
-
- if (optionalEndTagElements[tagName] && stack.last() == tagName) {
- parseEndTag("", tagName);
- }
-
- unary = voidElements[tagName] || !!unary;
-
- if (!unary) {
- stack.push(tagName);
- }
-
- var attrs = {};
-
- rest.replace(ATTR_REGEXP,
- function(match, name, doubleQuotedValue, singleQuotedValue, unquotedValue) {
- var value = doubleQuotedValue
- || singleQuotedValue
- || unquotedValue
- || '';
-
- attrs[name] = decodeEntities(value);
- });
- if (handler.start) handler.start(tagName, attrs, unary);
+ while (node = baseNode.firstChild) {
+ baseNode.removeChild(node);
}
+}
- function parseEndTag(tag, tagName) {
- var pos = 0, i;
- tagName = angular.lowercase(tagName);
- if (tagName) {
- // Find the closest opened tag of the same type
- for (pos = stack.length - 1; pos >= 0; pos--) {
- if (stack[pos] == tagName) break;
- }
- }
-
- if (pos >= 0) {
- // Close all the open elements, up the stack
- for (i = stack.length - 1; i >= pos; i--)
- if (handler.end) handler.end(stack[i]);
-
- // Remove the open elements from the stack
- stack.length = pos;
- }
+function attrToMap(attrs) {
+ var map = {};
+ for (var i = 0, ii = attrs.length; i < ii; i++) {
+ var attr = attrs[i];
+ map[attr.name] = attr.value;
}
+ return map;
}
var hiddenPre=document.createElement("pre");
@@ -466,7 +370,7 @@ function encodeEntities(value) {
* create an HTML/XML writer which writes to buffer
* @param {Array} buf use buf.jain('') to get out sanitized html string
* @returns {object} in the form of {
- * start: function(tag, attrs, unary) {},
+ * start: function(tag, attrs) {},
* end: function(tag) {},
* chars: function(text) {},
* comment: function(text) {}
@@ -476,7 +380,7 @@ function htmlSanitizeWriter(buf, uriValidator) {
var ignore = false;
var out = angular.bind(buf, buf.push);
return {
- start: function(tag, attrs, unary) {
+ start: function(tag, attrs) {
tag = angular.lowercase(tag);
if (!ignore && specialElements[tag]) {
ignore = tag;
@@ -496,25 +400,25 @@ function htmlSanitizeWriter(buf, uriValidator) {
out('"');
}
});
- out(unary ? '/>' : '>');
+ out('>');
}
},
end: function(tag) {
- tag = angular.lowercase(tag);
- if (!ignore && validElements[tag] === true) {
- out('');
- out(tag);
- out('>');
- }
- if (tag == ignore) {
- ignore = false;
- }
- },
+ tag = angular.lowercase(tag);
+ if (!ignore && validElements[tag] === true) {
+ out('');
+ out(tag);
+ out('>');
+ }
+ if (tag == ignore) {
+ ignore = false;
+ }
+ },
chars: function(chars) {
- if (!ignore) {
- out(encodeEntities(chars));
- }
+ if (!ignore) {
+ out(encodeEntities(chars));
}
+ }
};
}
diff --git a/test/ngSanitize/sanitizeSpec.js b/test/ngSanitize/sanitizeSpec.js
index 33d036c97efc..068991e72282 100644
--- a/test/ngSanitize/sanitizeSpec.js
+++ b/test/ngSanitize/sanitizeSpec.js
@@ -22,18 +22,21 @@ describe('HTML', function() {
var handler, start, text, comment;
beforeEach(function() {
text = "";
+ start = null;
handler = {
- start: function(tag, attrs, unary) {
+ start: function(tag, attrs) {
start = {
tag: tag,
- attrs: attrs,
- unary: unary
+ attrs: attrs
};
// Since different browsers handle newlines differently we trim
// so that it is easier to write tests.
- angular.forEach(attrs, function(value, key) {
+ for (var i = 0, ii = attrs.length; i < ii; i++) {
+ var keyValue = attrs[i];
+ var key = keyValue.key;
+ var value = keyValue.value;
attrs[key] = value.replace(/^\s*/, '').replace(/\s*$/, '');
- });
+ }
},
chars: function(text_) {
text += text_;
@@ -52,33 +55,9 @@ describe('HTML', function() {
expect(comment).toEqual('FOOBAR');
});
- it('should throw an exception for invalid comments', function() {
- var caught=false;
- try {
- htmlParser('', handler);
- }
- catch (ex) {
- caught = true;
- // expected an exception due to a bad parse
- }
- expect(caught).toBe(true);
- });
-
- it('double-dashes are not allowed in a comment', function() {
- var caught=false;
- try {
- htmlParser('', handler);
- }
- catch (ex) {
- caught = true;
- // expected an exception due to a bad parse
- }
- expect(caught).toBe(true);
- });
-
it('should parse basic format', function() {
htmlParser('text', handler);
- expect(start).toEqual({tag:'tag', attrs:{attr:'value'}, unary:false});
+ expect(start).toEqual({tag:'tag', attrs:{attr:'value'}});
expect(text).toEqual('text');
});
@@ -88,15 +67,15 @@ describe('HTML', function() {
});
it('should throw badparse if text content contains "<" followed by "/" without matching ">"', function() {
- expect(function() {
- htmlParser('foo bar', handler);
- }).toThrowMinErr('$sanitize', 'badparse', 'The sanitizer was unable to parse the following block of html: bar');
+ htmlParser('foo bar', handler);
+ expect(start).toEqual(undefined);
+ expect(text).toEqual('foo ');
});
it('should throw badparse if text content contains "<" followed by an ASCII letter without matching ">"', function() {
- expect(function() {
- htmlParser('foo text\ntag\n>', handler);
- expect(start).toEqual({tag:'tag', attrs:{attr:'value'}, unary:false});
+ expect(start).toEqual({tag:'tag', attrs:{attr:'value'}});
expect(text).toEqual('text');
});
it('should parse newlines in attributes', function() {
htmlParser('text', handler);
- expect(start).toEqual({tag:'tag', attrs:{attr:'value'}, unary:false});
+ expect(start).toEqual({tag:'tag', attrs:{attr:'\nvalue\n'}});
expect(text).toEqual('text');
});
it('should parse namespace', function() {
htmlParser('text', handler);
- expect(start).toEqual({tag:'ns:t-a-g', attrs:{'ns:a-t-t-r':'value'}, unary:false});
+ expect(start).toEqual({tag:'ns:t-a-g', attrs:{'ns:a-t-t-r':'\nvalue\n'}});
expect(text).toEqual('text');
});
it('should parse empty value attribute of node', function() {
htmlParser('', handler);
- expect(start).toEqual({tag:'option', attrs:{selected:'', value:''}, unary:false});
+ expect(start).toEqual({tag:'option', attrs:{selected:'', value:''}});
expect(text).toEqual('abc');
});
});
@@ -137,11 +116,12 @@ describe('HTML', function() {
});
it('should remove script', function() {
- expectHTML('ac.').toEqual('ac.');
});
it('should remove script that has newline characters', function() {
- expectHTML('ac.').toEqual('ac.');
});
it('should remove DOCTYPE header', function() {
@@ -173,7 +153,7 @@ describe('HTML', function() {
});
it('should remove double nested script', function() {
- expectHTML('ailc.').toEqual('ac.');
+ expectHTML('ailc.').toEqual('ailc.');
});
it('should remove unknown names', function() {
@@ -185,7 +165,7 @@ describe('HTML', function() {
});
it('should handle self closed elements', function() {
- expectHTML('ac').toEqual('ac');
+ expectHTML('ac').toEqual('ac');
});
it('should handle namespace', function() {
@@ -212,7 +192,7 @@ describe('HTML', function() {
it('should ignore back slash as escape', function() {
expectHTML('').
- toEqual('');
+ toEqual('');
});
it('should ignore object attributes', function() {
@@ -247,8 +227,8 @@ describe('HTML', function() {
});
it('should accept SVG tags', function() {
- expectHTML('')
- .toEqual('');
+ expectHTML('')
+ .toEqual('');
});
it('should not ignore white-listed svg camelCased attributes', function() {
@@ -435,11 +415,11 @@ describe('HTML', function() {
inject(function() {
$$sanitizeUri.andReturn('someUri');
- expectHTML('').toEqual('');
+ expectHTML('').toEqual('');
expect($$sanitizeUri).toHaveBeenCalledWith('someUri', true);
$$sanitizeUri.andReturn('unsafe:someUri');
- expectHTML('').toEqual('');
+ expectHTML('').toEqual('');
});
});
From 45bc1fac57b58350f70762f856814277fe134e5b Mon Sep 17 00:00:00 2001
From: Igor Minar
Date: Fri, 7 Aug 2015 09:30:26 -0700
Subject: [PATCH 02/10] fix($sanitize): support void elements, fixups, remove
dead code, typos
---
.../$sanitize/{ddns.ngdoc => noinert.ngdoc} | 6 +--
src/ngSanitize/sanitize.js | 49 ++++++-------------
test/ngSanitize/sanitizeSpec.js | 28 +++--------
3 files changed, 25 insertions(+), 58 deletions(-)
rename docs/content/error/$sanitize/{ddns.ngdoc => noinert.ngdoc} (60%)
diff --git a/docs/content/error/$sanitize/ddns.ngdoc b/docs/content/error/$sanitize/noinert.ngdoc
similarity index 60%
rename from docs/content/error/$sanitize/ddns.ngdoc
rename to docs/content/error/$sanitize/noinert.ngdoc
index a1f6f77390c8..0562016bede1 100644
--- a/docs/content/error/$sanitize/ddns.ngdoc
+++ b/docs/content/error/$sanitize/noinert.ngdoc
@@ -1,9 +1,9 @@
@ngdoc error
-@name $sanitize:ddns
-@fullName DOMDocument not supported
+@name $sanitize:noinert
+@fullName Can't create an inert html document
@description
-This error occurs when `$sanitize` sanitizer determines that `DOMDocument` api is not supported by the current browser.
+This error occurs when `$sanitize` sanitizer determines that `document.implementation.createHTMLDocument ` api is not supported by the current browser.
This api is necessary for safe parsing of HTML strings into DOM trees and without it the sanitizer can't sanitize the input.
diff --git a/src/ngSanitize/sanitize.js b/src/ngSanitize/sanitize.js
index d423fef72180..70797a1c9267 100644
--- a/src/ngSanitize/sanitize.js
+++ b/src/ngSanitize/sanitize.js
@@ -149,7 +149,7 @@ function sanitizeText(chars) {
// Regular Expressions for parsing tags and attributes
var SURROGATE_PAIR_REGEXP = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g,
// Match everything outside of normal chars and " (quote character)
- NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g;
+ NON_ALPHANUMERIC_REGEXP = /([^\#-~ |!])/g;
// Good source of info about elements and attributes
@@ -236,28 +236,24 @@ function toMap(str, lowercaseKeys) {
return obj;
}
-var baseNode;
+var inertBodyElement;
(function(window) {
var doc;
- if (window.DOMDocument) {
- doc = new window.DOMDocument();
- } else if (window.document && window.document.implementation) {
+ if (window.document && window.document.implementation) {
doc = window.document.implementation.createHTMLDocument("inert");
- } else if (window.ActiveXObject) {
- doc = new window.ActiveXObject("Msxml.DOMDocument");
} else {
- throw $sanitizeMinErr('ddns', "DOMDocument not supported");
+ throw $sanitizeMinErr('noinert', "Can't create an inert html document");
}
var docElement = doc.documentElement || doc.getDocumentElement();
var bodyElements = docElement.getElementsByTagName('body');
// usually there should be only one body element in the document, but IE doesn't have any, so we need to create one
if (bodyElements.length === 1) {
- baseNode = bodyElements[0];
+ inertBodyElement = bodyElements[0];
} else {
var html = doc.createElement('html');
- baseNode = doc.createElement('body');
- html.appendChild(baseNode);
+ inertBodyElement = doc.createElement('body');
+ html.appendChild(inertBodyElement);
doc.appendChild(html);
}
})(window);
@@ -280,8 +276,8 @@ function htmlParser(html, handler) {
} else if (typeof html !== 'string') {
html = '' + html;
}
- baseNode.innerHTML = html;
- var node = baseNode.firstChild;
+ inertBodyElement.innerHTML = html;
+ var node = inertBodyElement.firstChild;
while (node) {
switch (node.nodeType) {
case 1: // ELEMENT_NODE
@@ -290,9 +286,6 @@ function htmlParser(html, handler) {
case 3: // TEXT NODE
handler.chars(node.textContent);
break;
- case 8: // COMMENT NODE
- handler.comment(node.textContent);
- break;
}
var nextNode;
@@ -304,7 +297,7 @@ function htmlParser(html, handler) {
if (!nextNode) {
while (nextNode == null) {
node = node.parentNode;
- if (node === baseNode) break;
+ if (node === inertBodyElement) break;
nextNode = node.nextSibling;
if (node.nodeType == 1) {
handler.end(node.nodeName.toLowerCase());
@@ -315,8 +308,8 @@ function htmlParser(html, handler) {
node = nextNode;
}
- while (node = baseNode.firstChild) {
- baseNode.removeChild(node);
+ while (node = inertBodyElement.firstChild) {
+ inertBodyElement.removeChild(node);
}
}
@@ -329,20 +322,6 @@ function attrToMap(attrs) {
return map;
}
-var hiddenPre=document.createElement("pre");
-/**
- * decodes all entities into regular string
- * @param value
- * @returns {string} A string with decoded entities.
- */
-function decodeEntities(value) {
- if (!value) { return ''; }
-
- hiddenPre.innerHTML = value.replace(/');
diff --git a/test/ngSanitize/sanitizeSpec.js b/test/ngSanitize/sanitizeSpec.js
index 068991e72282..0f25caa53229 100644
--- a/test/ngSanitize/sanitizeSpec.js
+++ b/test/ngSanitize/sanitizeSpec.js
@@ -50,9 +50,9 @@ describe('HTML', function() {
};
});
- it('should parse comments', function() {
+ it('should not parse comments', function() {
htmlParser('', handler);
- expect(comment).toEqual('FOOBAR');
+ expect(comment).not.toBeDefined();
});
it('should parse basic format', function() {
@@ -66,18 +66,6 @@ describe('HTML', function() {
toBe('<- text1 text2 <1 text1 text2 <{');
});
- it('should throw badparse if text content contains "<" followed by "/" without matching ">"', function() {
- htmlParser('foo bar', handler);
- expect(start).toEqual(undefined);
- expect(text).toEqual('foo ');
- });
-
- it('should throw badparse if text content contains "<" followed by an ASCII letter without matching ">"', function() {
- htmlParser('foo 10 < 100
', handler);
@@ -103,8 +91,8 @@ describe('HTML', function() {
});
it('should parse empty value attribute of node', function() {
- htmlParser('', handler);
- expect(start).toEqual({tag:'option', attrs:{selected:'', value:''}});
+ htmlParser('abc', handler);
+ expect(start).toEqual({tag:'test-foo', attrs:{selected:'', value:''}});
expect(text).toEqual('abc');
});
});
@@ -165,7 +153,7 @@ describe('HTML', function() {
});
it('should handle self closed elements', function() {
- expectHTML('ac').toEqual('ac');
+ expectHTML('ac').toEqual('ac');
});
it('should handle namespace', function() {
@@ -192,7 +180,7 @@ describe('HTML', function() {
it('should ignore back slash as escape', function() {
expectHTML('').
- toEqual('');
+ toEqual('');
});
it('should ignore object attributes', function() {
@@ -415,11 +403,11 @@ describe('HTML', function() {
inject(function() {
$$sanitizeUri.andReturn('someUri');
- expectHTML('').toEqual('');
+ expectHTML('').toEqual('');
expect($$sanitizeUri).toHaveBeenCalledWith('someUri', true);
$$sanitizeUri.andReturn('unsafe:someUri');
- expectHTML('').toEqual('');
+ expectHTML('').toEqual('');
});
});
From 8cff6eb7bdee00c98b1f1c93b56b27f68cb561b9 Mon Sep 17 00:00:00 2001
From: Igor Minar
Date: Fri, 7 Aug 2015 12:39:03 -0700
Subject: [PATCH 03/10] feat($sanitize): make svg support an opt-in
BREAKING CHANGE: The svg support in is now an opt-in option
Applications that depend on this option can use to turn the option back on,
but while doing so, please read the warning provided in the documentation for
information on preventing click-hijacking attacks when this option is turned on.
---
src/ngSanitize/sanitize.js | 71 ++++++++++++++++++++++++---
test/ngSanitize/filter/linkySpec.js | 6 ++-
test/ngSanitize/sanitizeSpec.js | 76 +++++++++++++++++++----------
3 files changed, 118 insertions(+), 35 deletions(-)
diff --git a/src/ngSanitize/sanitize.js b/src/ngSanitize/sanitize.js
index 70797a1c9267..8cf7b9e2b474 100644
--- a/src/ngSanitize/sanitize.js
+++ b/src/ngSanitize/sanitize.js
@@ -34,13 +34,17 @@ var $sanitizeMinErr = angular.$$minErr('$sanitize');
* @kind function
*
* @description
+ * Sanitizes an html string by stripping all potentially dangerous tokens.
+ *
* The input is sanitized by parsing the HTML into tokens. All safe tokens (from a whitelist) are
* then serialized back to properly escaped html string. This means that no unsafe input can make
- * it into the returned string, however, since our parser is more strict than a typical browser
- * parser, it's possible that some obscure input, which would be recognized as valid HTML by a
- * browser, won't make it through the sanitizer. The input may also contain SVG markup.
- * The whitelist is configured using the functions `aHrefSanitizationWhitelist` and
- * `imgSrcSanitizationWhitelist` of {@link ng.$compileProvider `$compileProvider`}.
+ * it into the returned string.
+ *
+ * The whitelist for URL sanitization of attribute values is configured using the functions
+ * `aHrefSanitizationWhitelist` and `imgSrcSanitizationWhitelist` of {@link ng.$compileProvider
+ * `$compileProvider`}.
+ *
+ * The input may also contain SVG markup if this is enabled via {@link $sanitizeProvider}.
*
* @param {string} html HTML input.
* @returns {string} Sanitized HTML.
@@ -126,8 +130,22 @@ var $sanitizeMinErr = angular.$$minErr('$sanitize');
*/
+
+
+/**
+ * @ngdoc provider
+ * @name $sanitizeProvider
+ *
+ * @description
+ * Creates and configures {@link $sanitize} instance.
+ */
function $SanitizeProvider() {
+ var svgEnabled = false;
+
this.$get = ['$$sanitizeUri', function($$sanitizeUri) {
+ if (svgEnabled) {
+ angular.extend(validElements, svgElements);
+ }
return function(html) {
var buf = [];
htmlParser(html, htmlSanitizeWriter(buf, function(uri, isImage) {
@@ -136,6 +154,46 @@ function $SanitizeProvider() {
return buf.join('');
};
}];
+
+
+ /**
+ * @ngdoc method
+ * @name $sanitizeProvider#enableSvg
+ * @kind function
+ *
+ * @description
+ * Enables a subset of svg to be supported by the sanitizer.
+ *
+ *
+ *
By enabling this setting without taking other precautions, you might expose your
+ * application to click-hijacking attacks. In these attacks, sanitized svg elements could be positioned
+ * outside of the containing element and be rendered over other elements on the page (e.g. a login
+ * link). Such behavior can then result in phishing incidents.
+ *
+ *
To protect against these, explicitly setup `overflow: hidden` css rule for all potential svg
+ * tags within the sanitized content: