我靠,昨天忘记保存,还得今天重新写了好多代码!!!!using System;
using System.Data;
using System.Text;
using System.Text.RegularExpressions;
using Boolue.Common;
using Boolue.Crawler.Config;
using Boolue.Crawler.Interface;
using Boolue.Real;
using Boolue.Real.Support;
using PK3W.BI.APP;
using PK3W.BI.Log;
using PK3W.BI.Support;using PK3W.BI.Module.Stock;
namespace PK3W.Crawler.Module.Stock
{
public class StockAlmount : ICrawler
{
private int ID_ts_Tab = 901019006;
private DataSet RegexData;
private ol_Crawler ol;
private int closeTime;
private int spaceTime;
private int iThrow = 0;
static int CurrentPKID;
static string CurrentURL;
private static System.Collections.Hashtable cacheUpdatedSupport = System.Collections.Hashtable.Synchronized(new System.Collections.Hashtable());
public StockAlmount()
{
RegexData = ds_Crawler_Regex.GetByTab(ID_ts_Tab);
}
public bool Perform(int robot, int closeTime, int spaceTime)
{
try
{
this.closeTime = closeTime;
this.spaceTime = spaceTime; ol = dl_Crawler.GetByTabAndNotCompleted(ID_ts_Tab);
if (ol.PKID == 0)
{
ol.ID_ts_Tab = ID_ts_Tab;
ol.ID_ts_Crawler_Robot = robot;
ol.Time_Create = DateTime.Now;
ol.Time_Completed = DateTime.MaxValue;
ol.Count_Total = 0;
ol.Count_Succeed = 0;
ol.Count_Fail = 0;
ol.Count_New = 0;
ol.IsCompleted = 0;
ol.ID_Last = 99999999;
dl_Crawler.Save(ol);
} GetObject(); ol.IsCompleted = 1;
ol.Time_Completed = DateTime.Now;
dl_Crawler.Save(ol);
cacheUpdatedSupport.Clear();
return true;
}
catch (Exception ex)
{
string mystring = ex.ToString();
cacheUpdatedSupport.Clear();
if (iThrow == -1)
{
return true;
}
else if (iThrow == -2)
{
ol.Count_Fail++;
dl_Crawler.Save(ol);
return false;
}
else
{
dl_Error_System.Save(new ol_Error_System(0, ID_ts_Tab, DateTime.Now, ol_Error_System.sT_CrawlerError + ol_Error_System.sF_Location.Replace(ol_Error_System.sF, this.GetType().ToString())
+ ol_Error_System.sF_Message.Replace(ol_Error_System.sF, ex.Message), 0, "", "", DateTime.MaxValue));
ol.Count_Fail++;
dl_Crawler.Save(ol);
return false;
}
}
} private void GetObject()
{
DataTable dtObject = dj_Stock.GetByLastID(ol.ID_Last).Tables[0];
if (ol.ID_Last == 99999999)
{
ol.Count_Total = dtObject.Rows.Count;
dl_Crawler.Save(ol);
} foreach (DataRow drObject in dtObject.Rows)
{
CurrentPKID = int.Parse(drObject["PKID"].ToString());
ol.ID_Last = CurrentPKID;
dl_Crawler.Save(ol); GetPage(drObject["Code"].ToString());
ol.Count_Succeed++;
dl_Crawler.Save(ol);
}
}
private void GetPage(string Scode)
{
CurrentURL = ds_Crawler_Regex.GetRegex(RegexData, "LinkAmount", ID_ts_Tab).Replace("|ID|", Scode);
string html = RealPage.Get(CurrentURL);
if (html.Length == 0)
{
GetError(CurrentURL);
}
GetBistList(html);
}
private void GetBistList(string Html)
{
string Amount_List = ds_Crawler_Regex.GetRegex(RegexData, "Amount_List", ID_ts_Tab);
Regex Re = new Regex(Amount_List);
MatchCollection Mc = Re.Matches(Html);
if (Mc.Count == 0)
{
GetError(CurrentURL, Amount_List);
}
foreach (Match M in Mc)
{
GetList(M.Groups[1].Value);
}
}
private void GetList(string List)
{
string Time_Change_Item = ds_Crawler_Regex.GetRegex(RegexData, "Time_Change_Item", ID_ts_Tab);
string Time_Change_Params = ds_Crawler_Regex.GetRegex(RegexData, "Time_Change_Params", ID_ts_Tab);
string StrParams = "";
Regex Re = new Regex(Time_Change_Item);
Match M = Re.Match(List);
string StrList = M.Groups[1].Value;
if (StrList.Length == 0)
{
GetError(CurrentURL, Time_Change_Item);
}
Regex Re2 = new Regex(Time_Change_Params);
MatchCollection Mc2 = Re2.Matches(StrList);
if (Mc2.Count == 0)
{
GetError(CurrentURL, Time_Change_Params);
}
for (int i = 0; i < Mc2.Count; i++)
{
StrParams += Time_Change_Params;
Match M2 = Mc2[i];
string StrItem = M2.Groups[1].Value;
if (StrItem.Length == 0)
{
continue;
}
else
{
GetItem(List, StrItem);
}
}
}
private void GetItem(string list, string time_change)
{ }
private void GetError(string URL)
{
dl_Error_System.Save(new ol_Error_System(0, ID_ts_Tab, DateTime.Now, ol_Error_System.sT_LinkError + ol_Error_System.sF_Location.Replace(ol_Error_System.sF, this.GetType().ToString())
+ ol_Error_System.sF_LinkError.Replace(ol_Error_System.sF, URL), 0, "", "", DateTime.MaxValue));
iThrow = -2;
throw new Exception();
} private void GetError(string URL, string WrongRegex)
{
dl_Error_System.Save(new ol_Error_System(0, ID_ts_Tab, DateTime.Now, ol_Error_System.sT_RegexError + ol_Error_System.sF_Location.Replace(ol_Error_System.sF, this.GetType().ToString() + "," + URL)
+ ol_Error_System.sF_Regex.Replace(ol_Error_System.sF, WrongRegex), 0, "", "", DateTime.MaxValue));
iThrow = -2;
throw new Exception();
}
}
}
还差Item,马上就要写完了!!!
using System.Data;
using System.Text;
using System.Text.RegularExpressions;
using Boolue.Common;
using Boolue.Crawler.Config;
using Boolue.Crawler.Interface;
using Boolue.Real;
using Boolue.Real.Support;
using PK3W.BI.APP;
using PK3W.BI.Log;
using PK3W.BI.Support;using PK3W.BI.Module.Stock;
namespace PK3W.Crawler.Module.Stock
{
public class StockAlmount : ICrawler
{
private int ID_ts_Tab = 901019006;
private DataSet RegexData;
private ol_Crawler ol;
private int closeTime;
private int spaceTime;
private int iThrow = 0;
static int CurrentPKID;
static string CurrentURL;
private static System.Collections.Hashtable cacheUpdatedSupport = System.Collections.Hashtable.Synchronized(new System.Collections.Hashtable());
public StockAlmount()
{
RegexData = ds_Crawler_Regex.GetByTab(ID_ts_Tab);
}
public bool Perform(int robot, int closeTime, int spaceTime)
{
try
{
this.closeTime = closeTime;
this.spaceTime = spaceTime; ol = dl_Crawler.GetByTabAndNotCompleted(ID_ts_Tab);
if (ol.PKID == 0)
{
ol.ID_ts_Tab = ID_ts_Tab;
ol.ID_ts_Crawler_Robot = robot;
ol.Time_Create = DateTime.Now;
ol.Time_Completed = DateTime.MaxValue;
ol.Count_Total = 0;
ol.Count_Succeed = 0;
ol.Count_Fail = 0;
ol.Count_New = 0;
ol.IsCompleted = 0;
ol.ID_Last = 99999999;
dl_Crawler.Save(ol);
} GetObject(); ol.IsCompleted = 1;
ol.Time_Completed = DateTime.Now;
dl_Crawler.Save(ol);
cacheUpdatedSupport.Clear();
return true;
}
catch (Exception ex)
{
string mystring = ex.ToString();
cacheUpdatedSupport.Clear();
if (iThrow == -1)
{
return true;
}
else if (iThrow == -2)
{
ol.Count_Fail++;
dl_Crawler.Save(ol);
return false;
}
else
{
dl_Error_System.Save(new ol_Error_System(0, ID_ts_Tab, DateTime.Now, ol_Error_System.sT_CrawlerError + ol_Error_System.sF_Location.Replace(ol_Error_System.sF, this.GetType().ToString())
+ ol_Error_System.sF_Message.Replace(ol_Error_System.sF, ex.Message), 0, "", "", DateTime.MaxValue));
ol.Count_Fail++;
dl_Crawler.Save(ol);
return false;
}
}
} private void GetObject()
{
DataTable dtObject = dj_Stock.GetByLastID(ol.ID_Last).Tables[0];
if (ol.ID_Last == 99999999)
{
ol.Count_Total = dtObject.Rows.Count;
dl_Crawler.Save(ol);
} foreach (DataRow drObject in dtObject.Rows)
{
CurrentPKID = int.Parse(drObject["PKID"].ToString());
ol.ID_Last = CurrentPKID;
dl_Crawler.Save(ol); GetPage(drObject["Code"].ToString());
ol.Count_Succeed++;
dl_Crawler.Save(ol);
}
}
private void GetPage(string Scode)
{
CurrentURL = ds_Crawler_Regex.GetRegex(RegexData, "LinkAmount", ID_ts_Tab).Replace("|ID|", Scode);
string html = RealPage.Get(CurrentURL);
if (html.Length == 0)
{
GetError(CurrentURL);
}
GetBistList(html);
}
private void GetBistList(string Html)
{
string Amount_List = ds_Crawler_Regex.GetRegex(RegexData, "Amount_List", ID_ts_Tab);
Regex Re = new Regex(Amount_List);
MatchCollection Mc = Re.Matches(Html);
if (Mc.Count == 0)
{
GetError(CurrentURL, Amount_List);
}
foreach (Match M in Mc)
{
GetList(M.Groups[1].Value);
}
}
private void GetList(string List)
{
string Time_Change_Item = ds_Crawler_Regex.GetRegex(RegexData, "Time_Change_Item", ID_ts_Tab);
string Time_Change_Params = ds_Crawler_Regex.GetRegex(RegexData, "Time_Change_Params", ID_ts_Tab);
string StrParams = "";
Regex Re = new Regex(Time_Change_Item);
Match M = Re.Match(List);
string StrList = M.Groups[1].Value;
if (StrList.Length == 0)
{
GetError(CurrentURL, Time_Change_Item);
}
Regex Re2 = new Regex(Time_Change_Params);
MatchCollection Mc2 = Re2.Matches(StrList);
if (Mc2.Count == 0)
{
GetError(CurrentURL, Time_Change_Params);
}
for (int i = 0; i < Mc2.Count; i++)
{
StrParams += Time_Change_Params;
Match M2 = Mc2[i];
string StrItem = M2.Groups[1].Value;
if (StrItem.Length == 0)
{
continue;
}
else
{
GetItem(List, StrItem);
}
}
}
private void GetItem(string list, string time_change)
{ }
private void GetError(string URL)
{
dl_Error_System.Save(new ol_Error_System(0, ID_ts_Tab, DateTime.Now, ol_Error_System.sT_LinkError + ol_Error_System.sF_Location.Replace(ol_Error_System.sF, this.GetType().ToString())
+ ol_Error_System.sF_LinkError.Replace(ol_Error_System.sF, URL), 0, "", "", DateTime.MaxValue));
iThrow = -2;
throw new Exception();
} private void GetError(string URL, string WrongRegex)
{
dl_Error_System.Save(new ol_Error_System(0, ID_ts_Tab, DateTime.Now, ol_Error_System.sT_RegexError + ol_Error_System.sF_Location.Replace(ol_Error_System.sF, this.GetType().ToString() + "," + URL)
+ ol_Error_System.sF_Regex.Replace(ol_Error_System.sF, WrongRegex), 0, "", "", DateTime.MaxValue));
iThrow = -2;
throw new Exception();
}
}
}
还差Item,马上就要写完了!!!
有时VS也错,代码变成乱码,被搞过一次,竹子就怕了,从那次后,正式项目都每天备份一次。
改也改不了了。
我以前也是深受其害,现在都习惯性的 ctrl+S 了。