//声明一个时间
public event EventHandler<BeforeDownloadEventArgs> BeforeDownload;
// 我想这个应该是触发这个函数
private bool OnBeforeDownload(CrawlStep crawlStep)
{
EventHandler<BeforeDownloadEventArgs> beforeDownloadTmp = BeforeDownload;
if (beforeDownloadTmp == null)
{
return crawlStep.IsAllowed && !crawlStep.IsExternalUrl;
}
BeforeDownloadEventArgs e =
new BeforeDownloadEventArgs(!crawlStep.IsAllowed || crawlStep.IsExternalUrl, crawlStep);
beforeDownloadTmp(this, e);
return !e.Cancel;
}然后我在这同一个类里 就直接看到了 作者使用了这个OnBeforeDownload函数 try
{
if (OnBeforeDownload(crawlerQueueEntry.CrawlStep))
{
///省略
我搜索了整个解决方法 都没有找到订阅这个事件的代码!
疑惑疑惑!
public event EventHandler<BeforeDownloadEventArgs> BeforeDownload;
// 我想这个应该是触发这个函数
private bool OnBeforeDownload(CrawlStep crawlStep)
{
EventHandler<BeforeDownloadEventArgs> beforeDownloadTmp = BeforeDownload;
if (beforeDownloadTmp == null)
{
return crawlStep.IsAllowed && !crawlStep.IsExternalUrl;
}
BeforeDownloadEventArgs e =
new BeforeDownloadEventArgs(!crawlStep.IsAllowed || crawlStep.IsExternalUrl, crawlStep);
beforeDownloadTmp(this, e);
return !e.Cancel;
}然后我在这同一个类里 就直接看到了 作者使用了这个OnBeforeDownload函数 try
{
if (OnBeforeDownload(crawlerQueueEntry.CrawlStep))
{
///省略
我搜索了整个解决方法 都没有找到订阅这个事件的代码!
疑惑疑惑!
解决方案 »
- socket接收数据时 第一次可以完整的接收到数据可是
- 鼠标屏蔽右键的问题,请高手指点下,来者有分
- C 中的Tchar 字体类型转化为C# 中string 类型的问题.
- 页面超时,怎样解决。
- XmlReader 查找指定节点的问题
- 在C#里怎么把一个文本框里输入的内容保存成一个文件,同时再上传到FTP服务器啊?
- C#如何直接打开excle文件
- 两张网卡,怎么实现一张网卡接收数据一张网卡发送数据
- visual C#2005 和visual c#.net有什么区别?
- ★★★在C#安全代码中,怎样实践值类型的引用??as 关键字是不能用在值类型上的。
- C# int 怎么赋值为2进制 或者8进制
- GridView绑定数据,将相同的单元格合并问题
这只是调用类的内部方法而已,跟普通方法没有区别
EventHandler<BeforeDownloadEventArgs> beforeDownloadTmp = BeforeDownload;搜不到类似 BeforeDownload += ..的代码?
你看这个
private bool OnBeforeDownload(CrawlStep crawlStep)
{
EventHandler<BeforeDownloadEventArgs> beforeDownloadTmp = BeforeDownload;
if (beforeDownloadTmp == null)
{
return crawlStep.IsAllowed && !crawlStep.IsExternalUrl;
}
BeforeDownloadEventArgs e =
new BeforeDownloadEventArgs(!crawlStep.IsAllowed || crawlStep.IsExternalUrl, crawlStep);
beforeDownloadTmp(this, e);
return !e.Cancel;
}
函数的定义,就能猜的出这个是个触发BeforeDownload这个事件的函数了
这里面做了判断“if (beforeDownloadTmp == null)”,也就是如果没有订阅者如何处理,因而即便没有订阅者也不会出错。
public event EventHandler<BeforeDownloadEventArgs> BeforeDownload;
// 我想这个应该是触发这个函数这个你也理解错了。你看一下msdn 一般on开头是触发事件,而不是事件调用的函数。BeforeDownload搜一下这个函数。
OnBeforeDownload 这个函数执行完一般是执行 BeforeDownload 这个。
一般的方法是 if (BeforeDownload !=null)
{
BeforeDownload (....)
}
private bool OnBeforeDownload(CrawlStep crawlStep)
{
EventHandler<BeforeDownloadEventArgs> beforeDownloadTmp = BeforeDownload;
if (beforeDownloadTmp == null)
{
return crawlStep.IsAllowed && !crawlStep.IsExternalUrl;
}
BeforeDownloadEventArgs e =
new BeforeDownloadEventArgs(!crawlStep.IsAllowed || crawlStep.IsExternalUrl, crawlStep);
beforeDownloadTmp(this, e);
return !e.Cancel;
}
这个函数是作者用来触发BeforeDownload这个事件的。
请问您还有什么方法用来订阅事件?您不会说 new()吧?to ChargeForward:我想这里理解成指针应该是没有问题的吧?事件其实不就是个私有类型的代理吗?当然我这样讲不全对
我贴源码吧!
using System;
using System.Collections.Generic;
using System.Collections.Specialized;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
using System.Threading;using Amib.Threading;using NCrawler.Utils;using RobotRules;namespace NCrawler
{
public class Crawler
{
#region Readonly & Static Fields private readonly Uri m_BaseUri;
private readonly IPipelineStep[] m_Pipeline;
private readonly HashSet<string> m_VisitedUrls = new HashSet<string>(); #endregion #region Fields private CrawlerQueue m_CrawlerQueue;
private RobotsFileParser m_RobotsFileParser;
private SmartThreadPool m_SmartThreadPool;
private long m_ThreadInUse;
private long m_WaitingQueueLength; #endregion #region Constructors /// <summary>
/// Constructor for Crawler
/// </summary>
/// <param name="crawlStart">The url from where the crawler should start</param>
/// <param name="pipeline">Pipeline steps</param>
public Crawler(string crawlStart, params IPipelineStep[] pipeline)
{
m_BaseUri = new Uri(crawlStart);
MaxCrawlDepth = 3;
AdhereToRobotRules = true;
ThreadCount = 1;
m_Pipeline = pipeline;
UserAgent = "Mozilla";
DownloadDelay = TimeSpan.Zero;
} #endregion #region Instance Properties public bool AdhereToRobotRules { get; set; }
public Regex[] AllowedUrls { get; set; }
public Regex[] DisAllowedUrls { get; set; }
public TimeSpan DownloadDelay { get; set; }
public int MaxCrawlDepth { get; set; }
public int ThreadCount { get; set; } public long ThreadsInUse
{
get { return Interlocked.Read(ref m_ThreadInUse); }
} public string UserAgent { get; set; } public long WaitingQueueLength
{
get { return Interlocked.Read(ref m_WaitingQueueLength); }
} public WorkItemPriority WorkItemPriority { get; set; } #endregion #region Instance Methods public virtual void Crawl()
{
if (AdhereToRobotRules)
{
try
{
m_RobotsFileParser = new RobotsFileParser(m_BaseUri);
}
catch (DownloadFailedException)
{
// Ignore, timeout or robots.txt may not exist
}
} using (m_SmartThreadPool = new SmartThreadPool(new STPStartInfo
{
DisposeOfStateObjects = false,
IdleTimeout = 1000,
MaxWorkerThreads = ThreadCount,
StartSuspended = true,
WorkItemPriority = WorkItemPriority,
UseCallerCallContext = false,
ThreadPriority = ThreadPriority.Lowest,
UseCallerHttpContext = false,
MinWorkerThreads = 0,
}))
using (m_CrawlerQueue = new CrawlerQueue(m_BaseUri.ToString()))
{
AddStep(m_BaseUri.ToString(), 0);
m_SmartThreadPool.Start();
m_SmartThreadPool.WaitForIdle();
}
} public void AddStep(string url, int depth)
{
AddStep(url, depth, null, null);
} public void AddStep(string url, int depth, CrawlStep referrer, NameValueCollection properties)
{
if(m_SmartThreadPool == null)
{
throw new ApplicationException("Unable to add new steps before craler has started");
} Uri uri = new Uri(url); // Exit if not a proper handled schema
if ((uri.Scheme != Uri.UriSchemeHttps) &&
(uri.Scheme != Uri.UriSchemeHttp))
{
return;
} url = uri.ToString(); // Various conditions must be fulfilled forefore commencing
if ((depth >= MaxCrawlDepth && MaxCrawlDepth > 0) ||
IsCrawled(url) || IsExternalUrl(url) || !IsAllowedUrl(url))
{
return;
} // Construct crawl step
CrawlStep crawlStep = new CrawlStep(url, depth)
{
IsExternalUrl = IsExternalUrl(url),
IsAllowed = IsAllowedUrl(url),
}; lock (m_VisitedUrls)
{
m_VisitedUrls.Add(crawlStep.Url.ToString().ToLowerInvariant());
} Interlocked.Increment(ref m_WaitingQueueLength);
m_CrawlerQueue.Push(crawlStep, referrer, properties);
if (m_SmartThreadPool.InUseThreads < m_SmartThreadPool.MaxThreads)
{
m_SmartThreadPool.QueueWorkItem(WorkerProc);
}
} public bool IsExternalUrl(string link)
{
if (string.IsNullOrEmpty(link))
{
return false;
}
return !m_BaseUri.Host.Equals(new Uri(link).Host, StringComparison.InvariantCultureIgnoreCase);
} private PropertyBag Download(CrawlStep step)
{
try
{
WebDownloader webDownloader = WebDownloader.GetDownloader();
if (!string.IsNullOrEmpty(UserAgent))
{
webDownloader.UserAgent = UserAgent;
}
return webDownloader.Download(step, DownloadMethod.GET);
}
catch (Exception ex)
{
OnDownloadException(ex, step);
}
return null;
} private void ExecutePipeLine(PropertyBag propertyBag)
{
Array.ForEach(m_Pipeline,
doc =>
{
try
{
propertyBag.ResponseStream.Seek(0, SeekOrigin.Begin);
if (doc is IPipelineStepWithTimeout)
{
IPipelineStepWithTimeout stepWithTimeout = (IPipelineStepWithTimeout) doc;
ExecuteAlloctedTimeSlice.Execute(stepWithTimeout.ProcessorTimeout,
() => doc.Process(this, propertyBag));
}
else
{
doc.Process(this, propertyBag);
}
}
catch (Exception ex)
{
OnProcessorException(propertyBag, ex);
}
});
} private bool IsAllowedUrl(string url)
{
if (AllowedUrls != null && AllowedUrls.Where(a => a.Match(url).Success).Any())
{
return true;
}
if (DisAllowedUrls != null &&
DisAllowedUrls.
Select(a => a.Match(url)).
Where(a => a != null && a.Success).
Any())
{
return false;
}
try
{
return !AdhereToRobotRules || m_RobotsFileParser != null &&
m_RobotsFileParser.IsAllowed(UserAgent, new Uri(url));
}
catch (SiteMismatchException)
{
return false;
}
} private bool IsCrawled(string url)
{
lock (m_VisitedUrls)
{
return m_VisitedUrls.Contains(url.ToLowerInvariant());
}
} /// <summary>
/// Returns true to continue crawl of this url, else false
/// </summary>
/// <param name="crawlStep"></param>
/// <param name="response"></param>
/// <returns></returns>
private bool OnAfterDownload(CrawlStep crawlStep, PropertyBag response)
{
EventHandler<AfterDownloadEventArgs> afterDownloadTmp = AfterDownload;
if (afterDownloadTmp == null)
{
return crawlStep.IsAllowed && !crawlStep.IsExternalUrl;
}
AfterDownloadEventArgs e =
new AfterDownloadEventArgs(!crawlStep.IsAllowed || crawlStep.IsExternalUrl, response);
afterDownloadTmp(this, e);
return !e.Cancel;
} /// <summary>
/// Returns true to continue crawl of this url, else false
/// </summary>
/// <param name="crawlStep"></param>
/// <returns></returns>
private bool OnBeforeDownload(CrawlStep crawlStep)
{
EventHandler<BeforeDownloadEventArgs> beforeDownloadTmp = BeforeDownload;
if (beforeDownloadTmp == null)
{
return crawlStep.IsAllowed && !crawlStep.IsExternalUrl;
}
BeforeDownloadEventArgs e =
new BeforeDownloadEventArgs(!crawlStep.IsAllowed || crawlStep.IsExternalUrl, crawlStep);
beforeDownloadTmp(this, e);
return !e.Cancel;
} private void OnDownloadException(Exception ex, CrawlStep crawlStep)
{
EventHandler<DownloadExceptionEventArgs> downloadExceptionTmp = DownloadException;
if (downloadExceptionTmp == null)
{
return;
}
DownloadExceptionEventArgs e =
new DownloadExceptionEventArgs(crawlStep, ex);
downloadExceptionTmp(this, e);
} private void OnProcessorException(PropertyBag propertyBag, Exception exception)
{
EventHandler<PipelineExceptionEventArgs> tmpEvent = PipelineException;
if (tmpEvent == null)
{
return;
}
tmpEvent(this, new PipelineExceptionEventArgs(propertyBag, exception));
} /// <summary>
/// The actual worker code for the crawler
/// </summary>
private object WorkerProc(object o)
{
CrawlerQueueEntry crawlerQueueEntry = m_CrawlerQueue.Pop();
while (crawlerQueueEntry != null)
{
// Update counters
Interlocked.Decrement(ref m_WaitingQueueLength);
Interlocked.Increment(ref m_ThreadInUse); try
{
if (OnBeforeDownload(crawlerQueueEntry.CrawlStep))
{
PropertyBag propertyBag = Download(crawlerQueueEntry.CrawlStep);
if (propertyBag != null)
{
try
{
// Assign initial properties to propertybag
if (crawlerQueueEntry.Properties != null)
{
CrawlerQueueEntry entry = crawlerQueueEntry;
crawlerQueueEntry.Properties.AllKeys.
ForEach(key => propertyBag[key].Value = entry.Properties[key]);
}
propertyBag.Referrer = crawlerQueueEntry.Referrer;
if (OnAfterDownload(crawlerQueueEntry.CrawlStep, propertyBag))
{
ExecutePipeLine(propertyBag);
}
// Sleep before next download
if (DownloadDelay != TimeSpan.Zero)
{
Thread.Sleep(DownloadDelay);
}
}
finally
{
propertyBag.Cleanup();
}
}
}
}
finally
{
Interlocked.Decrement(ref m_ThreadInUse);
}
crawlerQueueEntry = m_CrawlerQueue.Pop();
}
return null;
} #endregion #region Event Declarations public event EventHandler<AfterDownloadEventArgs> AfterDownload;
public event EventHandler<BeforeDownloadEventArgs> BeforeDownload;
public event EventHandler<DownloadExceptionEventArgs> DownloadException;
public event EventHandler<PipelineExceptionEventArgs> PipelineException; #endregion
}
}
用过服务器控件吗,他形形色色的事件就是外放给外部使用者,监听他执行某个操作时候的状态,并做相应处理的,虽然基于.NET的事件和自定义事件有点区别,不过,用意一样。
try
{
if (OnBeforeDownload(crawlerQueueEntry.CrawlStep))
是用事件通知外面的观察者先 做一些自己的处理吗?