我想要实现这样的功能:
读取一个txt文件将里所有 href="newsInfo.aspx?id=301&cid=272&mid=01-01&page=1" 这样的地址全部替换成 href="01-01/272/301/newsInfo-1.html", newsInfo不固定,有可能是index、news_info的样子。page、mid、cid、id有可能其中一个会没有,但如果有的话参数的顺序必须是mid、cid、id
读取一个txt文件将里所有 href="newsInfo.aspx?id=301&cid=272&mid=01-01&page=1" 这样的地址全部替换成 href="01-01/272/301/newsInfo-1.html", newsInfo不固定,有可能是index、news_info的样子。page、mid、cid、id有可能其中一个会没有,但如果有的话参数的顺序必须是mid、cid、id
你贴的例子就已经cid在mid前面了。顺序正好反过来了
id,cid,mid了。
private static void TestRegex06()
{
string yourStr = @"href=""newsInfo.aspx?id=301&cid=272&mid=01-01&page=1""";
string result = Regex.Replace(yourStr, @"(?<p1>href="")(?<pageText>[^.]+)[^?]+\?((&?id=(?<id>[^&=]+)|&?cid=(?<cid>[^&=]+)|&?mid=(?<mid>[^&=]+))|&?page=(?<pageNo>[^&=]+))+(?<p2>"")",
delegate(Match m)
{
string res = m.Groups["p1"].Value;
if (m.Groups["mid"].Success) res += m.Groups["mid"].Value + "/";
if (m.Groups["cid"].Success) res += m.Groups["cid"].Value + "/";
if (m.Groups["id"].Success) res += m.Groups["id"].Value + "/";
res += m.Groups["pageText"].Value + "-" + m.Groups["pageNo"].Value + ".html";
res += m.Groups["p2"].Value;
return res;
});
Console.WriteLine(result);
}结果就是你贴出来的
如果没基础看看过客的博客
http://blog.csdn.net/lxcnnDEELX 正则表达式语法
http://www.regexlab.com/zh/deelx/syntax.htm这些先。
<a href="index.aspx"></a>N多字符后<a href="company.aspx?cid=247"></a>
替换后的结果会是这样<a href="247/index.html"
string result = Regex.Replace(html, @"(?<p1><a href="")(?<pageText>[^.]+)[^?]+\?((&?id=(?<id>[^&=]+)|&?cid=(?<cid>[^&=]+)|&?classid=(?<classid>[^&=]+)|&?MainId=(?<MainId>[^&=]+))|&?page=(?<pageNo>[^&=]+))+(?<p2>"")",
delegate(Match m)
{
string res = m.Groups["p1"].Value;
if (m.Groups["MainId"].Success) res += m.Groups["MainId"].Value + "/";
if (m.Groups["cid"].Success) res += m.Groups["cid"].Value + "/";
if (m.Groups["classid"].Success) res += m.Groups["classid"].Value + "/";
if (m.Groups["id"].Success) res += m.Groups["id"].Value + "/";
res += m.Groups["pageText"].Value;
if (m.Groups["pageNo"].Success) res += "-" + m.Groups["pageText"].Value;
res += ".html";
res += m.Groups["p2"].Value;
return res;
});
要替换的字符串为:
<html xmlns="http://www.w3.org/1999/xhtml">
<head id="ctl00_Head1"><title>
</title>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><meta name="Description" content="" /><meta name="Copyright" content="" /><meta name="Author" content="" /><meta name="Keywords" content="" /><link href="1.css" rel="stylesheet" type="text/css" />
<script type="text/javascript" language="javascript" src="/js/topcd.js" charset="gb2312"></script>
<script type="text/javascript" language="javascript" src="/js/bj35com.js"></script>
</head>
<body onkeydown="KeyUp()" onload="MM_preloadImages('images/uer/28.jpg','images/uer/29.jpg','images/uer/30.jpg','images/uer/31.jpg','images/uer/32.jpg','images/uer/34.jpg','images/uer/33.jpg','images/uer/36.jpg','images/uer/35.jpg')">
<form name="aspnetForm" method="post" action="index.aspx" id="aspnetForm">
<div>
<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="/wEPDwUKMTIxOTQyNDc5OGRkQb1H1asgBv2zro2G2/Ns+G+fNeo=" />
</div>
<div class="top">
<table width="350" border="0" cellspacing="0" cellpadding="0">
<tr>
<td width="104" align="left" valign="middle">
<label>
<select name="select" id="class">
<option value="债券信息">债券信息</option>
<option value="发行人">发行人</option>
<option value="研究分析">研究分析</option>
<option value="政策法规">政策法规</option>
<option value="相关新闻">相关新闻</option>
</select>
</label>
<label>
</label>
</td>
<td width="146" align="left" valign="middle">
<input name="textfield3" type="text" id="keyword" value="关键词" size="18" onclick="this.value=''" />
</td>
<td width="100" height="25" align="left" valign="middle">
<img src="/images/iamges/19.jpg" onclick="return CheckForm()" style="cursor:hand;" />
</td>
</tr>
</table>
</div>
<div class="top_1">
<img src="/images/pic/2.jpg" /></div>
<div class="top_2">
<table width="929" border="0" align="center" cellpadding="0" cellspacing="0">
<tr>
<td width="77" align="right">
<a href="index.html" onmouseout="MM_swapImgRestore()" onmouseover="MM_swapImage('Image11','','/images/uer/28.jpg',1)"><img src="/images/uer/37.jpg" name="Image11" width="77" height="31" border="0" id="Image11" /></a>
</td>
<td width="99">
<a href="Company.aspx?cid=247" onmousemove="Display('1')" onmouseout="Hidden('1')"><img src="/images/uer/38.jpg" name="Image12" width="99" height="31" border="0" id="Image12"
onmouseover="MM_swapImage('Image12','','/images/uer/29.jpg',1)" onmouseout="MM_swapImgRestore()" /></a>
</td>
<td width="100">
<a href="NoteList.aspx?classid=01-00" onmousemove="Display('2')" onmouseout="Hidden('2')"><img src="/images/uer/39.jpg" name="Image13" width="99" height="31" border="0" id="Image13"
onmouseover="MM_swapImage('Image13','','/images/uer/30.jpg',1)" onmouseout="MM_swapImgRestore()" /></a><a
href="#" onmouseout="MM_swapImgRestore()" onmouseover="MM_swapImage('Image13','','images/uer/31.jpg',1)"></a>
</td>
<td width="122"><a href="NoteList.aspx?classid=02-00" onmousemove="Display('3')" onmouseout="Hidden('3')"><img src="/images/uer/40.jpg" name="Image14" width="122" height="31" border="0" id="Image14"
onmouseover="MM_swapImage('Image14','','/images/uer/31.jpg',1)" onmouseout="MM_swapImgRestore()" /></a>
</td>
<td width="132"><a href="NoteList.aspx?classid=03-00" onmousemove="Display('4')" onmouseout="Hidden('4')"><img src="/images/uer/41.jpg" name="Image15" width="132" height="31" border="0" id="Image15"
onmouseover="MM_swapImage('Image15','','/images/uer/32.jpg',1)" onmouseout="MM_swapImgRestore()" /></a>
</td>
<td width="106"><a onmousemove="Display('5')" onmouseout="Hidden('5')"><img src="/images/uer/43.jpg" name="Image20" width="105" height="31" border="0" id="Image20"
onmouseover="MM_swapImage('Image20','','/images/uer/34.jpg',1)" onmouseout="MM_swapImgRestore()" /></a><a
href="#" onmouseout="MM_swapImgRestore()" onmouseover="MM_swapImage('Image16','','/images/uer/33.jpg',1)"></a>
</td>
<td width="102"><a href="YjfxList.aspx?MainId=245" onmousemove="Display('6')" onmouseout="Hidden('6')"><img src="/images/uer/42.jpg" name="Image21" width="101" height="31" border="0" id="Image21"
onmouseover="MM_swapImage('Image21','','/images/uer/33.jpg',1)" onmouseout="MM_swapImgRestore()" /></a><a
href="#" onmouseout="MM_swapImgRestore()" onmouseover="MM_swapImage('Image17','','/images/uer/34.jpg',1)"></a>
</td>
<td width="93"><a href="NewsList.html" onmousemove="Display('7')" onmouseout="Hidden('7')"><img src="/images/uer/45.jpg" name="Image22" width="92" height="31" border="0" id="Image22"
onmouseover="MM_swapImage('Image22','','/images/uer/36.jpg',1)" onmouseout="MM_swapImgRestore()" /></a><a
href="#" onmouseout="MM_swapImgRestore()" onmouseover="MM_swapImage('Image18','','/images/uer/35.jpg',1)"></a>
</td>
<td width="98"><a href="Job.aspx?cid=276" onmouseout="MM_swapImgRestore()" onmouseover="MM_swapImage('Image19','','/images/uer/35.jpg',1)"><img src="/images/uer/44.jpg" name="Image19" width="97" height="31" border="0" id="Image19" /></a><a
href="#" onmouseout="MM_swapImgRestore()" onmouseover="MM_swapImage('Image19','','/images/uer/36.jpg',1)"></a>
</td>
</tr>
</table>
</div>
<div class="top_3">
<img src="/images/pic/4.jpg" /></div>
</div>
<div style="height: 0px; position: relative; margin-top: -1px;" class="marowidth">
<div id="1" style="position: absolute; width: 102px; border: #CCCCCC solid 0px; display: none;
left: 110px; top: 0px; background: #ffffff; filter: alpha(opacity=85); opacity: 0.8;
heihgt: 1200px;" onmousemove="Display('1')" onmouseout="Hidden('1')" class="dhcd2">
<table width="102" border="0" align="center" cellpadding="0" cellspacing="1">
<tr>
<td align="center" class="print">
<a href="Messager.aspx?cid=246">董事长献辞</a>
</td>
</tr>
<tr>
<td align="center" class="print">
<a href="Company.aspx?cid=247">公司简介</a>
</td>
</tr>
<tr>
<td align="center" class="print">
<a href="Shareholders.aspx?cid=249">股东介绍</a>
</td>
</tr>
<tr>
<td align="center" class="print">
<a href="Directors.aspx?cid=250">董事会成员介绍</a>
</td>
</tr>
<tr>
<td align="center" class="print">
<a href="ManagementTeam.aspx?cid=252">管理团队</a>
</td>
</tr>
<tr>
<td align="center" class="print">
<a href="Organizational.aspx?cid=253">组织结构图</a>
</td>
</tr>
<tr>
<td align="center" class="print">
<a href="CompanyInfo.aspx?cid=254">公司业务资质</a>
</td>
</tr>
<tr>
<td align="center" class="print">
<a href="Range.aspx?cid=255">业务范围</a>
</td>
</tr>
<tr>
<td align="center" class="print">
<a href="Systems.aspx?cid=256">评级体系</a>
</td>
</tr>
用这个字符串试试
<a href="index.html" onmouseout="MM_swapImgRestore()" onmouseover="MM_swapImage('Image11','','/images/uer/28.jpg',1)"><img src="/images/uer/37.jpg" name="Image11" width="77" height="31" border="0" id="Image11" /></a>
</td>
<td width="99">
<a href="Company.aspx?cid=247" onmousemove="Display('1')" onmouseout="Hidden('1')"><img src="/images/uer/38.jpg" name="Image12" width="99" height="31" border="0" id="Image12"
onmouseover="MM_swapImage('Image12','','/images/uer/29.jpg',1)" onmouseout="MM_swapImgRestore()" /></a>
<a href="247/index.html" onmousemove="Display('1')" onmouseout="Hidden('1')"><img src="/images/uer/38.jpg" name="Image12" width="99" height="31" border="0" id="Image12"
onmouseover="MM_swapImage('Image12','','/images/uer/29.jpg',1)" onmouseout="MM_swapImgRestore()" /></a>
{
string yourStr = File.ReadAllText("a.htm");//或你的html
string result = Regex.Replace(yourStr, @"(?<p1>href="")(?<pageText>[^.]+)[^>?]+\?((&?id=(?<id>[^&=]+)|&?cid=(?<cid>[^&=]+)|&?mid=(?<mid>[^&=]+))|&?page=(?<pageNo>[^&=]+))+(?<p2>"")",
delegate(Match m)
{
string res = m.Groups["p1"].Value;
if (m.Groups["mid"].Success) res += m.Groups["mid"].Value + "/";
if (m.Groups["cid"].Success) res += m.Groups["cid"].Value + "/";
if (m.Groups["id"].Success) res += m.Groups["id"].Value + "/";
res += m.Groups["pageText"].Value + "-" + m.Groups["pageNo"].Value + ".html";
res += m.Groups["p2"].Value;
return res;
});
Console.WriteLine(result);
}