vb.net和c#.net 都是最近两个月才开始接触,
所以,只是覆盖了知识面,但是代码的实现,
还很欠缺,
能看懂,
但是目前还不可以肆无忌惮的写,和修改!
不是我懒,呵呵!现在水平还真的没到这个火候!
这里卧虎藏龙,给点意见,能改多少,就改多少!我也在拼命的查,看的
我谢过大家!using System;
using System.Data;
using System.Data.SqlClient;
using System.Collections;
using System.ComponentModel;
using System.Drawing;
using System.Text;
using System.Text.RegularExpressions;namespace MyTomySearch
{
/// <summary>
/// RegexHtml 的摘要说明。
/// </summary>
public class RegexHtml
{
private int num = 0;
private string[,] contain_url;
private int url_length; //设置外部访问变量
public int Count
{
get {
return this.num;
}
} public RegexHtml(string[,] url,int length)
{
//
// TODO: 在此处添加构造函数逻辑
//
contain_url = url;
url_length = length;
} /// <summary>
/// 获取指定页面的html代码
/// </summary>
/// <param name="url"></param>
/// <returns></returns>
public string OpenUrl(string url)
{
try
{
string ReturnValue="";
System.Net.WebRequest request = System.Net.WebRequest.Create(url);
System.Net.WebResponse response =
request.GetResponse();
System.IO.Stream resStream = response.GetResponseStream();
System.IO.StreamReader sr =
new System.IO.StreamReader(resStream,System.Text.Encoding.Default);
ReturnValue = sr.ReadToEnd();
resStream.Close();
sr.Close();
return ReturnValue;
}
catch
{
return "";
}
} public int getResultNum()
{
return num;
} public DataTable getResult()
{
DM dm = new DM();
string str = "",url = "",nextpage = "",href = "";
DataTable dt = new DataTable();
DataRow dr; dt.Columns.Add("time", typeof(string));
dt.Columns.Add("title", typeof(string));
dt.Columns.Add("hit_rate", typeof(string));
dt.Columns.Add("href", typeof(string)); for(int i = 0; i < url_length; i++)
{
DataSet ds = dm.SelectDataSet(
"select * from TContext where bbs_id=" + contain_url[i,0]);
DataTable dt1 = ds.Tables[0]; url = contain_url[i,1];
nextpage = dt1.Rows[0]["next"].ToString();
href = dt1.Rows[0]["href"].ToString();
while(i == 0)
{
string content = OpenUrl(url);
if(content == "") break;
Regex re = new Regex(
@dt1.Rows[0]["context"].ToString(),RegexOptions.Compiled);
MatchCollection matches =
re.Matches(content);
for (int j = 0; j < matches.Count; j++)
{
dr = dt.NewRow(); dr["time"] =
matches[j].Result("${time}").ToString();
dr["title"] =
matches[j].Result("${title}").ToString();
dr["hit_rate"] =
matches[j].Result("${hitrate}").ToString();
dr["href"] =
href + matches[j].Result("${href}").ToString();
dt.Rows.Add(dr);
num++;
}
// 下一页
Regex renext=
new Regex(nextpage,RegexOptions.Compiled);
MatchCollection matchesnext =
renext.Matches(content);
if(matchesnext.Count == 0) break;
else
{
url =
href + matchesnext[0].Result("${next}").ToString();;
}
}
}
return dt;
}
}
}
解决方案 »
- List<T> 内存中大数据量查询优化
- 谁使用过VS2010的插入代码片断,一个小问题
- DataGridView 绑定后 如何增加修改删除数据吗?
- 在线等...求教高手补全下面的程序
- GridView的数据源来自三个表的联合查询,显示的数据需要编辑的问题
- C#中画九宫格
- 求救:关于sql语句中时间条件的问题?
- ★★★★求:oracle插入blob信息出错(内含代码)★★★★
- 如何将生成的.exe文件的图表改一下?
- 如何将录音带转存到计算机中???????????
- 怎么取得运行中的程序的安装绝对路径?
- 如何取得WebClient.DownloadFile(...)时的文件的真正类型,比如是text文本或图片?
Imports System.Data
Imports System.IO
Imports System.Net
Imports System.Text
Imports System.Text.RegularExpressionsNamespace MyTomySearch
Public Class RegexHtml
Private contain_url As String(,)
Private num As Integer
Private url_length As Integer
Public Sub New(ByVal url As String(,), ByVal length As Integer)
Me.num = 0
Me.contain_url = url
Me.url_length = length
End Sub Public Function getResult() As DataTable
Dim text1 As String = ""
Dim text2 As String = ""
Dim text3 As String = ""
Dim text4 As String = ""
Dim table1 As New DataTable
table1.Columns.Add("time", GetType(String))
table1.Columns.Add("title", GetType(String))
table1.Columns.Add("hit_rate", GetType(String))
table1.Columns.Add("href", GetType(String))
Dim num1 As Integer
For num1 = 0 To Me.url_length - 1
Dim set1 As New DataSet
Dim table2 As DataTable = set1.Tables.Item(0)
text2 = Me.contain_url(num1, 1)
text3 = table2.Rows.Item(0).Item("next").ToString
text4 = table2.Rows.Item(0).Item("href").ToString
Do While (num1 = 0)
Dim text5 As String = Me.OpenUrl(text2)
If (text5 Is "") Then
Exit Do
End If
Dim regex1 As New Regex(table2.Rows.Item(0).Item("context").ToString, RegexOptions.Compiled)
Dim collection1 As MatchCollection = regex1.Matches(text5)
Dim num2 As Integer
For num2 = 0 To collection1.Count - 1
Dim row1 As DataRow = table1.NewRow
row1.Item("time") = collection1.Item(num2).Result("${time}").ToString
row1.Item("title") = collection1.Item(num2).Result("${title}").ToString
row1.Item("hit_rate") = collection1.Item(num2).Result("${hitrate}").ToString
row1.Item("href") = (text4 & collection1.Item(num2).Result("${href}").ToString)
table1.Rows.Add(row1)
Me.num += 1
Next num2
Dim regex2 As New Regex(text3, RegexOptions.Compiled)
Dim collection2 As MatchCollection = regex2.Matches(text5)
If (collection2.Count = 0) Then
Exit Do
End If
text2 = (text4 & collection2.Item(0).Result("${next}").ToString)
Loop
Next num1
Return table1
End Function Public Function getResultNum() As Integer
Return Me.num
End Function Public Function OpenUrl(ByVal url As String) As String
Dim text2 As String
Try
Dim text1 As String = ""
Dim request1 As WebRequest = WebRequest.Create(url)
Dim response1 As WebResponse = request1.GetResponse
Dim stream1 As Stream = response1.GetResponseStream
Dim reader1 As New StreamReader(stream1, Encoding.Default)
text1 = reader1.ReadToEnd
stream1.Close()
reader1.Close()
text2 = text1
Catch
text2 = ""
End Try
Return text2
End Function Public ReadOnly Property Count() As Integer
Get
Return Me.num
End Get
End Property End Class
End Namespace
这句忘写了,你这个的定义没有给出来。
你改成
Dim dm as DM = New DM()Dim set1 As New DataSet
这句也要改。我写错了。
把这句去掉,改成:Dim set1 As DataSet = dm.SelectDataSet("select * from TContext where bbs_id=" & contain_url(i,0))
我做的也和大家的基本是一样的,DM 的确是没给的, 而且大家琢磨着用的也很对!DM就是干那个事的,现在我们所做的就是把DM 用别人的一套ADO.NET替换掉的,
这个我也做了几天,现在基本算是完工,
对ADO.NET也上升到了理性的认识,现在可以运用自如!
谢谢大家!
过年了!给大家拜个早年!
祝!
热心人,心想事成!