[ ]
, , - , STYLE SCRIPT.
DOM, , , .
, DOM- :
function walker(domObject, extractorCallback) {
if (domObject == null) return;
extractorCallback(domObject);
if (domObject.nodeType != Node.ELEMENT_NODE) return;
var childs = domObject.childNodes;
for (var i = 0; i < childs.length; i++)
walker(childs[i]);
}
var textvalue = "":
walker(document, function(node) {
if (node.nodeType == Node.TEXT_NODE)
textvalue += node.nodeValue;
});
In this case, if your walker encounters tags that, as you know, you will not like, you just need to skip their contents in this part of the tree. Therefore, walker()it will be necessary to adapt in this way:
var ignore = { "STYLE":0, "SCRIPT":0, "NOSCRIPT":0, "IFRAME":0, "OBJECT":0 }
function walker(domObject, extractorCallback) {
if (domObject == null) return;
extractorCallback(domObject);
if (domObject.nodeType != Node.ELEMENT_NODE) return;
if (domObject.tagName in ignore) return;
var childs = domObject.childNodes;
for (var i = 0; i < childs.length; i++)
walker(childs[i]);
}
Thus, if we see a tag that you don’t like, we just skip it and all its children, and your extractor will never be exposed to text nodes inside such tags.
source
share