....
<em>PRA number: </em>
1999/4020<br><em>Date created: </em>
26<sup><small>th</small></sup> July 1999<div class=\"recallImagesBox\"></div>\n<h3>Product description</h3>\n
Campaign Ref No. YZF-R1/b. <h3>What are the hazards?</h3>\n
Component Failure<h3>What are the defects?</h3>\n
The Coolant Water Hose At The Thermostat Housing May Not Have Been <h3>Where the product was sold</h3>\n
<ul><li><span class=\"contentrecall\">National</span></li></ul><h3>Supplier</h3>\n
<span class=\"contentrecall\">Yamaha Motor Australia Pty Ltd</span><h2>What should consumers do?</h2>\n
Contact Your Nearest Yamaha Motorcycle .\n<h2>Advertisements and supporting documentation</h2>
<span id="synopFileid1170376405224"><img src="/ui/images"/> <a href="/content/item.phtml?20numbers.JPG">Liberty, Outback and Forester Vehicle VIN numbers.JPG</a> (95.5 KB)</span>
.....
上面是一段html代码的截取。现在要编辑这段代码。每段第一行是数据库的字段名,第二行是数据库该字段的内容,取出各段信息插入数据库。
例如:第一段:PRA number:是表的字段名;1999/4020:是内容
第三段:Product description:是字段名;Campaign Ref No. YZF-R1:是内容
哎,主要是无法从这段代码中把各各所需的文字提取出来放到数组里。愁煞我也,请问各位高手有没有好的实现思想,教教小女子。
我之前做过用字串处理来取值网页抓取。简单如下:
string html = 获取网页代码;
string temHtml;// 开始取字段名
string colPraMark = "</em>"; // 标志字串
int colPraStart = html.Indexof("PRA number:"); // 字段开始位置
tempHtml = html.SubString(praStart + praIndex.Length); // 从开始位置开始取
int colPraEnd = tempHtml.Indexof(colPraMark); // 字段结束位置
string strPRANum = tempHtml.SubString(0, colPraEnd); // 获取字段// 开始取值
string
tempHtml = tempHtml.SubString(praEnd + colPraMark.Length);
int valPraStart =
xmlDoc.Load(listBox1.Items[i].ToString());
XmlTextReader xtr = new XmlTextReader(listBox1.Items[i].ToString());
NamespaceManager nsmgr = new XmlNamespaceManager(xmlDoc.NameTable);
XmlNode node = xmlDoc.SelectSingleNode("Manifest");
string ss = node.ChildNodes[1].ChildNodes[0].ChildNodes[0].InnerText;
label1.Text = node.ChildNodes[1].ChildNodes[0].ChildNodes[0].InnerText;
string st = node.ChildNodes[1].ChildNodes[0].ChildNodes[1].InnerText;
label2.Text = node.ChildNodes[1].ChildNodes[0].ChildNodes[1].InnerText;
string sv = node.ChildNodes[0].ChildNodes[0].InnerText;
label5.Text = node.ChildNodes[0].ChildNodes[0].InnerText;
SqlConnection con = new SqlConnection(tt);
con.Open();
SqlDataAdapter comm = new SqlDataAdapter("select * from Response_tab", con);
SqlCommandBuilder scb = new SqlCommandBuilder(comm);
DataSet dss = new DataSet("Response_tab");
comm.Fill(dss, "Response_tab");
comm.MissingSchemaAction = MissingSchemaAction.AddWithKey;
DataRow newRow();
newRow = dss.Tables[0].NewRow();
FileStream fs = new FileStream(listBox1.Items[i].ToString(), FileMode.Open, FileAccess.Read);
byte[] data = new byte[fs.Length];
fs.Read(data, 0, Convert.ToInt32(fs.Length));//文件读到byte[]
fs.Close();
comm.Fill(dss, "Response_tab");
// comm.Fill(dss, "xml_tab");
sql = "select jc,zyq from xml_tab where wjm1='" + label5.Text + "'";
SqlCommand cmd = con.CreateCommand();
cmd.CommandText = sql;
SqlDataReader myReader = cmd.ExecuteReader();
if (myReader.Read())
{