某高手的代码: 存成.asp文件,执行,你用ASPHTTP抓内容的时候用这个很爽,当然自己要改进一下了 <% Option ExplicitFunction stripHTML(strHTML) 'Strips the HTML tags from strHTMLDim objRegExp, strOutput Set objRegExp = New RegexpobjRegExp.IgnoreCase = True objRegExp.Global = True objRegExp.Pattern = "<.+?>"'Replace all HTML tag matches with the empty string strOutput = objRegExp.Replace(strHTML, "")'Replace all < and > with < and > strOutput = Replace(strOutput, "<", "<") strOutput = Replace(strOutput, ">", ">")stripHTML = strOutput 'Return the value of strOutputSet objRegExp = Nothing End Function %><form method="post" id=form1 name=form1> <b>Enter an HTML String:</b><br> <textarea name="txtHTML" cols="50" rows="8" wrap="virtual"><%=Request("txtHTML")%></textarea> <p> <input type="submit" value="Strip HTML Tags!" id=submit1 name=submit1> </form><% if Len(Request("txtHTML")) > 0 then %> <p><hr><p> <b><u>View of string <i>with no</i> HTML stripping:</u></b><br> <xmp> <%=Request("txtHTML")%> </xmp><p> <b><u>View of string <i>with</i> HTML stripping:</u></b><br> <pre> <%=StripHTML(Request("txtHTML"))%> </pre> <% End If %>
存成.asp文件,执行,你用ASPHTTP抓内容的时候用这个很爽,当然自己要改进一下了
<%
Option ExplicitFunction stripHTML(strHTML)
'Strips the HTML tags from strHTMLDim objRegExp, strOutput
Set objRegExp = New RegexpobjRegExp.IgnoreCase = True
objRegExp.Global = True
objRegExp.Pattern = "<.+?>"'Replace all HTML tag matches with the empty string
strOutput = objRegExp.Replace(strHTML, "")'Replace all < and > with < and >
strOutput = Replace(strOutput, "<", "<")
strOutput = Replace(strOutput, ">", ">")stripHTML = strOutput 'Return the value of strOutputSet objRegExp = Nothing
End Function
%><form method="post" id=form1 name=form1>
<b>Enter an HTML String:</b><br>
<textarea name="txtHTML" cols="50" rows="8" wrap="virtual"><%=Request("txtHTML")%></textarea>
<p>
<input type="submit" value="Strip HTML Tags!" id=submit1 name=submit1>
</form><% if Len(Request("txtHTML")) > 0 then %>
<p><hr><p>
<b><u>View of string <i>with no</i> HTML stripping:</u></b><br>
<xmp>
<%=Request("txtHTML")%>
</xmp><p>
<b><u>View of string <i>with</i> HTML stripping:</u></b><br>
<pre>
<%=StripHTML(Request("txtHTML"))%>
</pre>
<% End If %>
------------------------
<?php
function get_link($str)
{
preg_match_all("|<a(.*)>(.*)<\/a>|U",$str,$result);
return $result[0];
}function get_News($str){
$p1=strstr($str,"<table width=560");
$p2=strstr($p1,"<form");
$p3=substr($p1,0,strlen($p1)-strlen($p2));
return $p3;
}function remove_Subject($str){
$p2=strstr($str,'<font style="font-size:12px" color=#000000><b>相关专题');
$p3=strstr($p2,'</td></tr><tr><td style="font-size: 14px;padding-top:4px;line-height:180%" valign=top align=left>');
$p4=substr($p2,0,strlen($p2)-strlen($p3));
$p5=str_replace($p4,"",$str);
return $p5;
}function get_url($str){
$p1=strstr($str,"http");
$p2=strstr($p1,'">');
$p3=substr($p1,0,strlen($p1)-strlen($p2));
return $p3;
}
function get_title($str){
$p1=strstr($str,'">');
$p2=strstr($p1,"</a>");
$p3=substr($p1,6,strlen($p1)-strlen($p2)-6);
return $p3;
}?>
---------------
<?php include('include/regex.php'); ?>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=big5">
<title>无标题文档</title>
</head><body leftmargin="0">
<table width="98%" border="0" cellspacing="1" cellpadding="2">
<?php
$url ="http://202.123.206.170/mslot/topnews/topnews1.html";
$data = implode('', file($url));
$p1=strstr($data,'<table width="333" border="0" cellspacing="3" align="left">');
$p2=strchr($p1,'<table width="400" border="0" cellspacing="0" align="center" height="17">');
$p3=substr($p1,0,strlen($p1)-strlen($p2));
$p3=str_replace("\n","",nl2br($p3));
echo $p3;
$r=array();
$r=get_link($p3);
echo count($r);
$url="";
$title="";
$par="";
for($i=0;$i<sizeof($r);$i++){
$url=get_url($r[$i]);
$title=get_title($r[$i]);
$par=substr($url,24,strlen($url)-24);
?>
<tr>
<td width="170" align="right"><font color="#66CCCC" size="1">■</font></td>
<td>
<? echo "<a href='viewNews.php?uid=".rawurlencode($par)."&ut=".rawurlencode($title)."' target='_blank'>$title</a>"; ?>
</td>
</tr>
<?php } ?>
</table>
</body>
</html>
----------------------
<?php include('include/regex.php'); ?>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=gb2312">
<title>无标题文档</title>
</head><body leftmargin="0">
<table width="98%" border="0" cellspacing="1" cellpadding="2">
<?php
$url ="http://www.sportscn.com/";
$data = implode('', file($url));
$p1=strstr($data,"<!--中国足球-->");
$p2=strchr($p1,"<!-----------------------------------------大表格2结束-------------------------------------------->");
$p3=substr($p1,0,strlen($p1)-strlen($p2));
$r=array();
$r=get_link($p3);
$url="";
$title="";
$par="";
for($i=0;$i<sizeof($r);$i++){
$url=get_url($r[$i]);
$title=get_title($r[$i]);
$par=substr($url,24,strlen($url)-24);
?>
<tr>
<td width="170" align="right"><font color="#66CCCC" size="1">■</font></td>
<td>
<? echo "<a href='viewNews.php?uid=".rawurlencode($par)."&ut=".rawurlencode($title)."' target='_blank'>$title</a>"; ?>
</td>
</tr>
<?php } ?>
</table>
</body>
</html>
$content=fileread($fp,filesize("filename"));
$conarr=explode("<",$content);
for($i=0;$i<count($conarr);$i+)
{
$xxx=strstr($connarr[$i],">");
//每一个<>与</>之间的内容均被解析出
.......
}
.....
http://www.php.net/manual/zh/function.strip-tags.php
怎样才能正确读出下面标签中的url,我参考了楼上各位的办法都不能完全读出上述的连接。不知道这个正则该怎么写?
<a href="http://www.igo5.com/ushop/uspredirect.asp?mid=1&pid=486" target=_blank>
<a href=http://xxxxxxxxx >
</a>
<a href=http://xxxxxxxxx >
<script>
var s=""
var a=document.getElementsByTagName("A")
for(i=0;i<a.length;i++)s+=a[i].href+'\n'
alert(s)
</script>