我要从以下代码中(当前页面的地址是http://www.cnyu.net/list/list1.html)
<td height="200" valign="top"><table width="100%" border="0" cellspacing="0" cellpadding="0"><tr><td width="33%">·<a href="../Html/2007118222512-1.html" target=_blank>宋石塔</a> <font color="#999999">4</font></td><td width="33%">·<a href="../Html/200711822912-1.html" target=_blank>崇州文庙</a> <font color="#999999">4</font></td><td width="33%">·<a href="../Html/20071112222545-1.html" target=_blank>石刻造像</a> <font color="#999999">3</font></td></tr><tr><td width="33%">·<a href="../Html/2007118222141-1.html" target=_blank>长秋山麓赏花区</a> <font color="#999999">3</font></td><td width="33%">·<a href="../Html/20071112222353-1.html" target=_blank>金华庵</a> <font color="#999999">3</font></td><td width="33%">·<a href="../Html/2007118222217-1.html" target=_blank>光明樱桃观赏区</a> <font color="#999999">3</font></td></tr><tr><td width="33%">·<a href="../Html/200711822932-1.html" target=_blank>龙藏寺</a> <font color="#999999">3</font></td><td width="33%">·<a href="../Html/200711822147-1.html" target=_blank>船棺遗址</a> <font color="#999999">2</font></td><td width="33%"> </td></tr></table><br></td>
获取每个连接地址,并将它转化为一个指定的绝对地址,比如../Html/2007118222512-1.html转化为http://www.cnyu.net/Html/2007118222512-1.html
请问在C#中如何实现,请高手帮忙写下正则那一段的源码
搞几天了,正则这个东西真不懂。
<td height="200" valign="top"><table width="100%" border="0" cellspacing="0" cellpadding="0"><tr><td width="33%">·<a href="../Html/2007118222512-1.html" target=_blank>宋石塔</a> <font color="#999999">4</font></td><td width="33%">·<a href="../Html/200711822912-1.html" target=_blank>崇州文庙</a> <font color="#999999">4</font></td><td width="33%">·<a href="../Html/20071112222545-1.html" target=_blank>石刻造像</a> <font color="#999999">3</font></td></tr><tr><td width="33%">·<a href="../Html/2007118222141-1.html" target=_blank>长秋山麓赏花区</a> <font color="#999999">3</font></td><td width="33%">·<a href="../Html/20071112222353-1.html" target=_blank>金华庵</a> <font color="#999999">3</font></td><td width="33%">·<a href="../Html/2007118222217-1.html" target=_blank>光明樱桃观赏区</a> <font color="#999999">3</font></td></tr><tr><td width="33%">·<a href="../Html/200711822932-1.html" target=_blank>龙藏寺</a> <font color="#999999">3</font></td><td width="33%">·<a href="../Html/200711822147-1.html" target=_blank>船棺遗址</a> <font color="#999999">2</font></td><td width="33%"> </td></tr></table><br></td>
获取每个连接地址,并将它转化为一个指定的绝对地址,比如../Html/2007118222512-1.html转化为http://www.cnyu.net/Html/2007118222512-1.html
请问在C#中如何实现,请高手帮忙写下正则那一段的源码
搞几天了,正则这个东西真不懂。
解决方案 »
- 请问string.是什么意思
- 在数据库中更新图片 求帮助
- 如何把栈上 byte[] 弄出到托管 byte[] ?
- 为什么SendMessage(hWndControl, BM_SETCHECK, 1, 0)和SendMessage(hWndControl, BM_GETCHECK, 0, 0)无效
- VS2005水晶报表中IFieldObject的问题
- 面试问到的问题,希望大家帮忙
- 简单的问题,请问我想用C#程序创建Oracle数据库,对Oracle不熟悉,高手请解答
- c# 读取excel合并单元格出错!!!
- 找開文件,並在頁面上顯示文件內容
- 找不到运行此应用程序的运行库的任何版本
- 一个排序问题?
- 怎么使用C#来转化文件格式,要求不破坏文件内容
string pattern =@"<a href=""([^""]*)";
System.Text.RegularExpressions.Regex reg = new System.Text.RegularExpressions.Regex(pattern);
foreach(System.Text.RegularExpressions.Match i in reg.Matches(a))
{
MessageBox.Show(reg.Replace(i.Value, "$1").Replace("../","http://www.cnyu.net/"));
}
match mat=reg.match(inputstring);
messegbox.show(mat.value.replace("..","http://www.cnyu.net"));
你就不会动脑想想啊??
我要从以下代码中(当前页面的地址是http://www.cnyu.net/list/list1.html)HTML code <td height="200" valign="top"><table width="100%" border="0" cellspacing="0" cellpadding="0"><tr><td width="33%">·<a href="../Html/2007118222512-1.html" target=_blank>宋石塔</a> <font color="#999999">4</font></td><td width="33%">·<a href="../Html/200711822912-1.html" target=_blank>崇州文庙</a> <font color="#999999">4</font></td><td width="33%">·<a href="../Html/20071112222545-1.html" target=_blank>石刻造像</a> <font color="#999999">3</font></td></tr><tr><td width="33%">·<a href="../Html/2007118222141-1.html" target=_blank>长秋山麓赏花区</a> <font color="#999999">3</font></td><td width="33%">·<a href="../Html/20071112222353-1.html" target=_blank>金华庵</a> <font color="#999999">3</font></td><td width="33%">·<a href="../Html/2007118222217-1.html" target=_blank>光明樱桃观赏区</a> <font color="#999999">3</font></td></tr><tr><td width="33%">·<a href="../Html/200711822932-1.html" target=_blank>龙藏寺</a> <font color="#999999">3</font></td><td width="33%">·<a href="../Html/200711822147-1.html" target=_blank>船棺遗址</a> <font color="#999999">2</font></td><td width="33%"> </td></tr></table><br></td>
你定义一个变量string strBaseLink="http://www.cnyu.net/";这样根本就不存在../的问题,为什么了?(因为是根目录)如果你在相对路径中找到连接直接
strBaseLink+=你匹配的地址这样就可以解决了!!
string str=@" <a href='../Html/2007118222512-1.html' target=_blank>宋石塔</a><a href='../Html/2002.html' target=_blank>22</a>";
Regex rg = new Regex("href=\\'(\\.\\.\\/)",RegexOptions.IgnoreCase);
result = rg.Replace(str,"http://mySite");
没什么问题阿
在vb里面双引号和单引号可以交替使用,但是在cs里面单引号是用来定义char的
string _patrn = @"http(s)?://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?";
ArrayList _myList = new ArrayList();
Regex reg = new Regex(_patrn, RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
MatchCollection _mc = reg.Matches(_str);
foreach (Match _m in _mc)
{
_myList.Add(_m.Groups[0].Value);
}
using System;
using System.Text.RegularExpressions;
public class Test
{
static void Main(string[] args)
{
string str = GetPath("http://www.cnyu.net/", "http://www.cnyu.net/list/list1.html", "../Html/2007118222512-1.html");
}
static string GetPath(string strBaseLink,string strCurrentPage,string strLink)
{
string strFolderAndFile= strCurrentPage.Replace(strBaseLink, "");
string strFile=strFolderAndFile.Substring(strFolderAndFile.LastIndexOf("/")+1);
string Folder = strFolderAndFile.Replace(strFile, ""); MatchCollection mc = Regex.Matches(Folder, @"/");
int count = mc.Count;
if (count > 0)
{
for (int i = 0; i < count; i++)
{
Folder = Folder.Replace(Regex.Match(Folder, ".*?/").Value,"");
}
string strResulsts= strBaseLink + Folder + strLink.Replace("../", "");
strResulsts = strResulsts.Replace("//", "/");
return strResulsts;
}
else
{
return strBaseLink + Folder + strLink;
}
}
}
using System;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;
using System.Text;
public class Test
{
//首页匹配出来应该很简单这里我就不说了正则帮你写出来http[s]?://.*?/
//上一份代码写的太急出现了逻辑错误
// MatchCollection mc = Regex.Matches(Folder, @"/");应该是strFindLink然后表达式是\.\./
//下面顺手写了一个比较全的代码 static void Main(string[] args)
{
string currentPage="http://topic.csdn.net/u/20080413/03/4210919d-9131-48f1-b1db-7087026a8f95.html";
string html = GetHtml(currentPage,"utf-8");//其实可以定义一个枚举
string[] strLinks = GetLink(html);
foreach (string strLink in strLinks)
{
string strResults= GetPath(currentPage, strLink);
}
} static string[] GetLink(string strHtml)
{
MatchCollection mc=Regex.Matches(strHtml,@"<a[\s\S]{1,30}href=['""]?(.*?)['""\s>]");
int count=mc.Count;
if(count>0)
{
string[]strLink=new string[count];
for(int i=0;i<count;i++)
{
strLink[i]=mc[i].Groups[1].Value.Trim();
}
return strLink;
}
else
{
string[]strLink=new string[1];
strLink[0]="";
return strLink;
}
} static string GetHtml(string strLink,string encoding)
{
if(encoding==null)
{
encoding="gb2312";
}
try
{
WebRequest request = WebRequest.Create(strLink);
using (Stream stream = request.GetResponse().GetResponseStream())
{
using (StreamReader reader = new StreamReader(stream, Encoding.GetEncoding(encoding)))
{
string strResults = reader.ReadToEnd();
strResults = Regex.Replace(strResults, @"\r|\n", "");
return strResults;
reader.Close();
stream.Close();
}
}
}
catch
{
return "";
}
}
static string GetPath(string strCurrentPage, string strFindLink)
{
string strBaseLink = Regex.Match(strCurrentPage, "http[s]?://.*?/").Groups[1].Value;
string strFolderAndFile = strCurrentPage.Replace(strBaseLink, "");
string strFile = strFolderAndFile.Substring(strFolderAndFile.LastIndexOf("/") + 1);
string Folder = strFolderAndFile.Replace(strFile, ""); MatchCollection mc = Regex.Matches(strFindLink, @"\.\./");
int count = mc.Count;
if (count > 0)
{
for (int i = 0; i < count; i++)
{
Folder = Folder.Replace(Regex.Match(Folder, ".*?/").Value, "");
}
string strResulsts = strBaseLink + Folder + strFindLink.Replace("../", "");
strResulsts = strResulsts.Replace("//", "/");
return strResulsts;
}
else
{
return strBaseLink + Folder + strFindLink;
} }
}