Believe it or not, you can (safely) do this using a browser built into the HTML parser. Just create a new div with document.createElement, toss the contents of the text box into the div using innerHTML, and presto, you have a fully functional DOM to work with. And no, the scripts contained in this div will not be evaluated.
Here is a simple example that removes from the element all tags that do not appear in the list ALLOWED_TAGS.
var ALLOWED_TAGS = ["STRONG", "EM", "BLOCKQUOTE", "Q", "DEL", "INS", "A"];
function sanitize(el) {
"Remove all tags from element `el' that aren't in the ALLOWED_TAGS list."
var tags = Array.prototype.slice.apply(el.getElementsByTagName("*"), [0]);
for (var i = 0; i < tags.length; i++) {
if (ALLOWED_TAGS.indexOf(tags[i].nodeName) == -1) {
usurp(tags[i]);
}
}
}
function usurp(p) {
"Replace parent `p' with its children.";
var last = p;
for (var i = p.childNodes.length - 1; i >= 0; i--) {
var e = p.removeChild(p.childNodes[i]);
p.parentNode.insertBefore(e, last);
last = e;
}
p.parentNode.removeChild(p);
}
, div, . , . , , "" - ( ..), " " HTML, .
function sanitizeString(string) {
var div = document.createElement("div");
div.innerHTML = string;
sanitize(div);
return div.innerHTML;
}