Small reproducible code:
<div id="box">
abc
<b>def</b>
<p>ghi</p>
jk
lm
<div style="color: green;">
nop
</div>
</div>
<script>
let boxElement = document.getElementById("box");
let processChildNode = function(htmlElement)
{
let childNode = htmlElement.firstChild;
while (childNode)
{
switch(childNode.nodeType)
{
case Node.ELEMENT_NODE:
processChildNode(childNode);
break;
case Node.TEXT_NODE:
let plainText = childNode.nodeValue;
console.log(plainText);
break;
}
// Gets next node
childNode = childNode.nextSibling;
}
};
processChildNode(boxElement);
</script>
Result on the rendered page in the browser:
Result in the debug console:
innerText.html:27
abc
innerText.html:27 def
innerText.html:27
innerText.html:27 ghi
innerText.html:27
jk
lm
innerText.html:27
nop
innerText.html:27
Issue is most apparent in the case of "jk" and "lm".
There is a new line between them in the debug console, not in the rendered page…
I noticed "innerText" can do it for me but this variable is not available in node element.
How can I fix that?
ps.
For me it is important to process the node separately.
I can’t do that:
console.log(boxElement.innerText)
>Solution :
As far as I can see, there is no native method to solve this problem. If your concern is just about getting rid of the white space you could do this:
switch(childNode.nodeType){
case Node.ELEMENT_NODE:
processChildNode(childNode);
break;
case Node.TEXT_NODE:
let plainText = childNode.nodeValue.replace(/\s+/gs, " ").trim();
console.log(plainText);
break;
}
