我觉得,这个还是使用遍历所有节点的方法来吧,可用分别遍历子节点,然后对子节点继续查找子节点,这样更方便一些正则的话,如果有些内容不再标签内部,就会出问题的。var reg = /\<\s*([^\>]+\s*\>)([^\<]*?)\<\/\s*\1/g, html = "<html><head>this is head</head><body><a>this is a</a><span>this is span</span><p>this is p </p><i>this is i</i>等等。。</html>", arr = [];
html.replace(reg,function(p1,p2,p3){ console.log(p1); console.log(p2); console.log(p3); arr.push(p3); }); console.log(arr);上面的正则,只能匹配出标签内部的,其他的,比如你代码中的“等等”这样的没有被标签包围的,就无法匹配到。var reg = /(?:\s*\<\s*[^\>]+\s*\>\s*)+/g, html = "<html><head>this is head</head><body><a>this is a</a><span>this is span</span><p>this is p </p><i>this is i</i>等等。。</html>", arr = [];
var str = "<html> <head>this is head</head> <body> <a>this is a</a> <span>this is span</span> <p>this is p <p> <i>this is i</i> 等等。。 </body></html>"; var arr1 = str.split(/<[/a-z]+>/); var arr2 = []; for (var i in arr1) { arr1[i].trim() && arr2.push(arr1[i]); }
html = "<html><head>this is head</head><body><a>this is a</a><span>this is span</span><p>this is p </p><i>this is i</i>等等。。</html>",
arr = [];
html.replace(reg,function(p1,p2,p3){
console.log(p1);
console.log(p2);
console.log(p3);
arr.push(p3);
});
console.log(arr);上面的正则,只能匹配出标签内部的,其他的,比如你代码中的“等等”这样的没有被标签包围的,就无法匹配到。var reg = /(?:\s*\<\s*[^\>]+\s*\>\s*)+/g,
html = "<html><head>this is head</head><body><a>this is a</a><span>this is span</span><p>this is p </p><i>this is i</i>等等。。</html>",
arr = [];
arr = html.replace(reg,"<>").split("<>");
//把所有标签的集合,换成"<>",然后使用split分割
arr = arr.slice(1,arr.length-1);
//去除最初的两个孔数组console.log(arr);
这种,正则,就会有些问题,就是如果结构换乱的话,可能会出问题。
当然,可以把所有的匹配出来。
var str = "<html> <head>this is head</head> <body> <a>this is a</a> <span>this is span</span> <p>this is p <p> <i>this is i</i> 等等。。 </body></html>";
var arr1 = str.split(/<[/a-z]+>/);
var arr2 = [];
for (var i in arr1) {
arr1[i].trim() && arr2.push(arr1[i]);
}