我在解析HTML页面时
List tableList = pageSource.findAllElements(HTMLElementName.TABLE);
for (Object aTableList : tableList) {
tempElement = (Element) aTableList;
if (tempElement.getAttributeValue("class") != null && tempElement.getAttributeValue("class").equalsIgnoreCase("t_row"))
{
flag = true;
String floor = "";
String text = "";
String name = "";
//System.out.println("nnnnnnnn="+tempElement);
List tdList = tempElement.findAllElements(HTMLElementName.TD);
System.out.println(tdList);
for (Object aTd : tdList) {
Element textElement = (Element) aTd;
//System.out.println("aaaaaaa = "+t);
// System.out.println(textElement);
//System.out.println(textElement.getAttributeValue("class")!=null);
//System.out.println(textElement.getAttributeValue("class").equalsIgnoreCase("line")); if (t % 2 != 0 && textElement.getAttributeValue("class") != null && textElement.getAttributeValue("class").equalsIgnoreCase("line"))
{
//System.out.println();
text = textElement.extractText().trim();
t++;
// System.out.println("wwwwww="+t);
List imgList = textElement.findAllElements(HTMLElementName.IMG);
for (Object aImg : imgList) {
Element imgElement = (Element) aImg;
if (imgElement.getAttributeValue("src") != null && imgElement.getAttributeValue("src").startsWith("http"))
{
picList.add(imgElement.getAttributeValue("src"));
}
}
}
break;
}居然把里面的元素在循环外边就过滤掉了一个<td><td/>标签,这是怎么回事啊?
List tableList = pageSource.findAllElements(HTMLElementName.TABLE);
for (Object aTableList : tableList) {
tempElement = (Element) aTableList;
if (tempElement.getAttributeValue("class") != null && tempElement.getAttributeValue("class").equalsIgnoreCase("t_row"))
{
flag = true;
String floor = "";
String text = "";
String name = "";
//System.out.println("nnnnnnnn="+tempElement);
List tdList = tempElement.findAllElements(HTMLElementName.TD);
System.out.println(tdList);
for (Object aTd : tdList) {
Element textElement = (Element) aTd;
//System.out.println("aaaaaaa = "+t);
// System.out.println(textElement);
//System.out.println(textElement.getAttributeValue("class")!=null);
//System.out.println(textElement.getAttributeValue("class").equalsIgnoreCase("line")); if (t % 2 != 0 && textElement.getAttributeValue("class") != null && textElement.getAttributeValue("class").equalsIgnoreCase("line"))
{
//System.out.println();
text = textElement.extractText().trim();
t++;
// System.out.println("wwwwww="+t);
List imgList = textElement.findAllElements(HTMLElementName.IMG);
for (Object aImg : imgList) {
Element imgElement = (Element) aImg;
if (imgElement.getAttributeValue("src") != null && imgElement.getAttributeValue("src").startsWith("http"))
{
picList.add(imgElement.getAttributeValue("src"));
}
}
}
break;
}居然把里面的元素在循环外边就过滤掉了一个<td><td/>标签,这是怎么回事啊?
解决方案 »
免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货