using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.IO;
using System.Net;
using System.Web;
using System.Text.RegularExpressions;namespace WindowsFormsApplication1
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
} private void button1_Click(object sender, EventArgs e)
{
string a;
System.IO.StreamReader Obj_StreamReader = new System.IO.StreamReader(this.webBrowser1.DocumentStream, System.Text.Encoding.GetEncoding("UTF-8"));
string Str_LOBDocument = null;
Str_LOBDocument = Obj_StreamReader.ReadToEnd();
a = Str_LOBDocument;
textBox1.Text = a; } private void Form1_Load(object sender, EventArgs e)
{
webBrowser1.Navigate("http://fanfou.com/login");
} private void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
dizhilan.Text = webBrowser1.Document.Url.ToString();
} private void button2_Click(object sender, EventArgs e)
{
string str = dizhilan.Text;
Regex reg = new Regex(@"(?<=<p\s*id=""lastmsg-s1JXAUbpTOw""\s*>(?:(?!</?p).)*)(?<=</?strong>).*?(?=<(?:/strong|span)[^>]*?>)");
foreach (Match m in reg.Matches(str))
{
textBox2.Text = m.Value;
}
} private void timer1_Tick(object sender, EventArgs e)
{
if (dizhilan.Text == "http://fanfou.com/home")
{
webBrowser1.Refresh();
}
else
{
timer1.Interval = 0;
MessageBox.Show("没有运行");
}
}
}
}原理就是窗口载入后webbrowser载入饭否的登录页面,用户登录后点按钮1在text1box1中显示网页源代码,然后点按钮2在textbox2中显示提取出来的文字,以下是网页源代码:
<form method="post" action="/home" id="message" class="limit">
<h2>你在做什么?</h2>
<p>
<textarea name="content" rows="3" cols="70" class="qs" ></textarea> </p> <div class="act">
<div class="actpost">
<input type="hidden" name="action" value="msg.post" />
<input type="hidden" name="in_reply_to_status_id" value="" />
<input type="hidden" name="repost_status_id" value="" />
<input type="hidden" name="token" value="3457ab08" />
<img class="loading" src="http://static.fanfou.com/img/ajax-indicator.gif" /><input type="submit" class="formbutton" title="按Ctrl+Enter键发送消息" value="发送" /> </div>
<div class="lastmsg">
<p id="lastmsg-s1JXAUbpTOw"><strong>最新:</strong>李云大笨蛋! <span class="time" title="2011-07-01 17:57" ffid="s1JXAUbpTOw" stime="Fri Jul 01 09:57:51 +0000 2011">约 4 小时前</span></p> </div>
</div>
<p class="tip">可以输入 <span class="counter">140</span> 字</p>
</form>
</div>
<div id="content"> 我只要中间的“最新:李云大笨蛋”,其中最新是永久不变的,李云大笨蛋是可以更改的,有位大哥帮我写的正则,只能输出李云大笨蛋,而李云大笨蛋更改成其它文字时就什么反映也没有了,
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.IO;
using System.Net;
using System.Web;
using System.Text.RegularExpressions;namespace WindowsFormsApplication1
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
} private void button1_Click(object sender, EventArgs e)
{
string a;
System.IO.StreamReader Obj_StreamReader = new System.IO.StreamReader(this.webBrowser1.DocumentStream, System.Text.Encoding.GetEncoding("UTF-8"));
string Str_LOBDocument = null;
Str_LOBDocument = Obj_StreamReader.ReadToEnd();
a = Str_LOBDocument;
textBox1.Text = a; } private void Form1_Load(object sender, EventArgs e)
{
webBrowser1.Navigate("http://fanfou.com/login");
} private void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
dizhilan.Text = webBrowser1.Document.Url.ToString();
} private void button2_Click(object sender, EventArgs e)
{
string str = dizhilan.Text;
Regex reg = new Regex(@"(?<=<p\s*id=""lastmsg-s1JXAUbpTOw""\s*>(?:(?!</?p).)*)(?<=</?strong>).*?(?=<(?:/strong|span)[^>]*?>)");
foreach (Match m in reg.Matches(str))
{
textBox2.Text = m.Value;
}
} private void timer1_Tick(object sender, EventArgs e)
{
if (dizhilan.Text == "http://fanfou.com/home")
{
webBrowser1.Refresh();
}
else
{
timer1.Interval = 0;
MessageBox.Show("没有运行");
}
}
}
}原理就是窗口载入后webbrowser载入饭否的登录页面,用户登录后点按钮1在text1box1中显示网页源代码,然后点按钮2在textbox2中显示提取出来的文字,以下是网页源代码:
<form method="post" action="/home" id="message" class="limit">
<h2>你在做什么?</h2>
<p>
<textarea name="content" rows="3" cols="70" class="qs" ></textarea> </p> <div class="act">
<div class="actpost">
<input type="hidden" name="action" value="msg.post" />
<input type="hidden" name="in_reply_to_status_id" value="" />
<input type="hidden" name="repost_status_id" value="" />
<input type="hidden" name="token" value="3457ab08" />
<img class="loading" src="http://static.fanfou.com/img/ajax-indicator.gif" /><input type="submit" class="formbutton" title="按Ctrl+Enter键发送消息" value="发送" /> </div>
<div class="lastmsg">
<p id="lastmsg-s1JXAUbpTOw"><strong>最新:</strong>李云大笨蛋! <span class="time" title="2011-07-01 17:57" ffid="s1JXAUbpTOw" stime="Fri Jul 01 09:57:51 +0000 2011">约 4 小时前</span></p> </div>
</div>
<p class="tip">可以输入 <span class="counter">140</span> 字</p>
</form>
</div>
<div id="content"> 我只要中间的“最新:李云大笨蛋”,其中最新是永久不变的,李云大笨蛋是可以更改的,有位大哥帮我写的正则,只能输出李云大笨蛋,而李云大笨蛋更改成其它文字时就什么反映也没有了,
<strong>最新:</strong>(.+?)!获取 确定一个也不用foreach了
string name = m.Gounp[1].Value;http://book.douban.com/subject/2269648/
自己找这本书的pdf 才300多页 啃了 很简单的
改成
Regex reg = new Regex(@"<strong>最新:</strong>(.+?)");textBox2.Text = m.Value; 改成 m.Gounp[1].Value;