Sanitizing html string with javascript using browser to interpret html

I want to use a whitelist of tags, attributes and values ​​to sanitize an html string before putting it in dom. Is it safe to build a dom element and cross it to implement a whitelist filter, assuming that no malicious javascript can execute until I add the dom element to the document? Are there pitfalls for this approach?

+3
source share
3 answers

No script embedded in HTML can be executed until it is placed in the document. Try running this code on any page:

var html = "<script>document.body.innerHTML = '';</script>";
var div = document.createElement('div');
div.innerHTML = html;

. "" script HTML , . , DOM HTML, , JS HTML. , , script .


, , ( ). , , HTML- , HTML-. , , ESAPI-JS ( @Brett Zamir)

+1

, , @rvighne, , , () ( FF 27.0):

var userInput = '<a href="http://example.com" onclick="alert(\'boo!\')">Link<\/a>';
var el = document.createElement('div');
el.innerHTML = userInput;
el.addEventListener("click", function(e) {
    if (e.target.nodeName.toLowerCase() === 'a') {
        alert("I will also cause side effects; I shouldn't run on the wrong link!");
    }
});
el.getElementsByTagName('a')[0].click(); // Alerts "boo!" and "I will also cause side effects; I shouldn't run on the wrong link!"

... ...

var userInput = '<a href="http://example.com" onclick="alert(\'boo!\')">Link<\/a>';
var el = document.createElement('div');
el.innerHTML = userInput;
el.addEventListener("cat", function(e) { this.getElementsByTagName('a')[0].click(); });
var event = new CustomEvent("cat", {"detail":{}});
el.dispatchEvent(event); // Alerts "boo!"

... ... ( setUserData , ):

var userInput = '<a href="http://example.com" onclick="alert(\'boo!\')">Link<\/a>';
var span = document.createElement('span');
span.innerHTML = userInput;
span.setUserData('key', 10, {handle: function (n1, n2, n3, src) {
    src.getElementsByTagName('a')[0].click();
}});
var div = document.createElement('div');
div.appendChild(span);
span.cloneNode(); // Alerts "Boo!"    
var imprt = document.importNode(span, true); // Alerts "Boo!"
var adopt = document.adoptNode(span, true); // Alerts "Boo!"

... ...

var userInput = '<a href="http://example.com" onclick="alert(\'Boo!\');">Link</a>';
var span = document.createElement('span');
span.innerHTML = userInput;
var treeWalker = document.createTreeWalker(
  span,
  NodeFilter.SHOW_ELEMENT,
  { acceptNode: function(node) { node.click(); } },
  false
);
var nodeList = [];
while(treeWalker.nextNode()) nodeList.push(treeWalker.currentNode); // Alerts 'Boo!'

() DOM , , - (, , , !).

+2

" " iframe, .

var iframe = document.createElement('iframe');
iframe['sandbox'] = 'allow-same-origin';

w3schools:

"" iframe. , :

  • disable API
  • ...

PS By the way, this is exactly what we are doing in our Html Sanitizer https://github.com/jitbit/HtmlSanitizer (disclaimer: I am a member of this OSS project) - we use a browser to interpret HTML and convert this to the DOM.

0
source

All Articles