哪位高人能帮我把下面这段解析网页资源的C#代码转化为java代码。java解析网页可以用htmlparser包。void OntologyElementRecognize()
{
double max = 0;
foreach (HtmlElement i in pageBroswer.Document.All)
{
if (i.Name == "区块")
{
CountSentences(i.InnerText);
if (SentenceCount == 0)
{
try
{
i.InnerHtml = null;
i.OuterHtml = null;
}
catch
{
}
}
}
}
foreach (HtmlElement i in pageBroswer.Document.All)
{
IHTMLElement el = (IHTMLElement)i.DomElement;
if (i.Name == "区块")
{
int num = 0, count = 0, naviCount = 0, newsCount = 0, DivCount = 0, paraCount = 0;
double = 0;
if (i.InnerText != null)
{
CountSentences(i.InnerText);
= SentenceCount * SentenceCount;
Rectangle r = i.ClientRectangle; ##ClientRectangle应该是跟该对象面积有关的参数
= * r.Width * r.Height; }
foreach (HtmlElement j in i.All)
{
if (j.Name == "超链接")
{
naviCount++;
}
}
= / (naviCount+1);
if ( > max)
{
max = ;
text = i.InnerText;
}
}
}
} void HtmlElementRecognize()
{
foreach (HtmlElement i in pageBroswer.Document.All)
{
if (i.TagName.ToUpper() == "A")
{
i.Name = "超链接";
}
if (i.TagName.ToUpper() == "DIV" || i.TagName.ToUpper() == "TABLE" || i.TagName.ToUpper() == "TD")
{
i.Name = "区块";
}
if (i.TagName.ToUpper() == "H1")
{
i.Name = "标题";
}
if (i.TagName.ToUpper() == "P" && i.Children.Count == 0)
{
i.Name = "段落";
}
}
}
##计算句子的数量:
void CountSentences(string txt)
{
ParagraphCount = 0;
SentenceCount = 0;
CurrentSentenceCount = 0;
if (txt == null) return;
for (int i = 0; i < txt.Length; i++)
{ if (txt[i] == '。' || txt[i] == '!' || txt[i] == '…' || txt[i] == '?' || txt[i] == ',')
{
SentenceCount++;
CurrentSentenceCount++;
}
if (txt[i] == '\n')
{
if (CurrentSentenceCount > 0)
{
ParagraphCount++;
}
CurrentSentenceCount = 0;
} }
}
{
double max = 0;
foreach (HtmlElement i in pageBroswer.Document.All)
{
if (i.Name == "区块")
{
CountSentences(i.InnerText);
if (SentenceCount == 0)
{
try
{
i.InnerHtml = null;
i.OuterHtml = null;
}
catch
{
}
}
}
}
foreach (HtmlElement i in pageBroswer.Document.All)
{
IHTMLElement el = (IHTMLElement)i.DomElement;
if (i.Name == "区块")
{
int num = 0, count = 0, naviCount = 0, newsCount = 0, DivCount = 0, paraCount = 0;
double = 0;
if (i.InnerText != null)
{
CountSentences(i.InnerText);
= SentenceCount * SentenceCount;
Rectangle r = i.ClientRectangle; ##ClientRectangle应该是跟该对象面积有关的参数
= * r.Width * r.Height; }
foreach (HtmlElement j in i.All)
{
if (j.Name == "超链接")
{
naviCount++;
}
}
= / (naviCount+1);
if ( > max)
{
max = ;
text = i.InnerText;
}
}
}
} void HtmlElementRecognize()
{
foreach (HtmlElement i in pageBroswer.Document.All)
{
if (i.TagName.ToUpper() == "A")
{
i.Name = "超链接";
}
if (i.TagName.ToUpper() == "DIV" || i.TagName.ToUpper() == "TABLE" || i.TagName.ToUpper() == "TD")
{
i.Name = "区块";
}
if (i.TagName.ToUpper() == "H1")
{
i.Name = "标题";
}
if (i.TagName.ToUpper() == "P" && i.Children.Count == 0)
{
i.Name = "段落";
}
}
}
##计算句子的数量:
void CountSentences(string txt)
{
ParagraphCount = 0;
SentenceCount = 0;
CurrentSentenceCount = 0;
if (txt == null) return;
for (int i = 0; i < txt.Length; i++)
{ if (txt[i] == '。' || txt[i] == '!' || txt[i] == '…' || txt[i] == '?' || txt[i] == ',')
{
SentenceCount++;
CurrentSentenceCount++;
}
if (txt[i] == '\n')
{
if (CurrentSentenceCount > 0)
{
ParagraphCount++;
}
CurrentSentenceCount = 0;
} }
}
解决方案 »
免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货