项目中碰到一个问题:已经获取了一个字符串(不是xml文件),类似于
<ENROUTE_INFOS>
<ENROUTE_INFO Origin="ZGSZ" Dest="ZBAA" Enroute_Code="01" Origin_LOC="深圳" Dest_LOC="北京" />
<ENROUTE_INFO Origin="ZGSA" Dest="ZBAA" Enroute_Code="001" Origin_LOC="广州" Dest_LOC="北京" />
<ENROUTE_INFO Origin="ZGSB" Dest="ZBAA" Enroute_Code="002" Origin_LOC="上海" Dest_LOC="北京" />
<ENROUTE_INFO Origin="ZGSC" Dest="ZBAA" Enroute_Code="003" Origin_LOC="南京" Dest_LOC="北京" />
<ENROUTE_INFO Origin="ZGSD" Dest="ZBAA" Enroute_Code="004" Origin_LOC="重庆" Dest_LOC="北京/" />
……(有多个这样的内容)
</ENROUTE_INFOS>我现在想获得每一个ENROUTE_INFO中的Enroute_Code、Origin_LOC、Dest_LOC对应的值,我自己想用字符串子串的方式来获取,但是做了以后发现很繁琐,效率不高,如果把它写道文件中作为xml文件来解析,觉得也是影响效率,请大家指点该如何获取这些值呢?
<ENROUTE_INFOS>
<ENROUTE_INFO Origin="ZGSZ" Dest="ZBAA" Enroute_Code="01" Origin_LOC="深圳" Dest_LOC="北京" />
<ENROUTE_INFO Origin="ZGSA" Dest="ZBAA" Enroute_Code="001" Origin_LOC="广州" Dest_LOC="北京" />
<ENROUTE_INFO Origin="ZGSB" Dest="ZBAA" Enroute_Code="002" Origin_LOC="上海" Dest_LOC="北京" />
<ENROUTE_INFO Origin="ZGSC" Dest="ZBAA" Enroute_Code="003" Origin_LOC="南京" Dest_LOC="北京" />
<ENROUTE_INFO Origin="ZGSD" Dest="ZBAA" Enroute_Code="004" Origin_LOC="重庆" Dest_LOC="北京/" />
……(有多个这样的内容)
</ENROUTE_INFOS>我现在想获得每一个ENROUTE_INFO中的Enroute_Code、Origin_LOC、Dest_LOC对应的值,我自己想用字符串子串的方式来获取,但是做了以后发现很繁琐,效率不高,如果把它写道文件中作为xml文件来解析,觉得也是影响效率,请大家指点该如何获取这些值呢?
另外,你的XML有多大?3000多条记录的,用dom4j也只要40毫秒左右,不存在效率问题。
然后从数据库中去查询。
String s = "..."; //XML format string
StringReader sr = new StringReader(s);// Create a factory object for creating DOM parsers and configure it
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
// Now use the factory to create a DOM parser
DocumentBuilder parser = factory.newDocumentBuilder();
// Parse the input stream and build a Document tree to represent its content
Document document = parser.parse(sr);//process with DOM Document object
public void parserXML(String path)
{
ClassLoader classloader=DomXmlParser.class.getClassLoader();
try
{
Enumeration<URL> L_urls=classloader.getResources(path);
DocumentBuilderFactory builderFactory=DocumentBuilderFactory.newInstance();
DocumentBuilder builder=builderFactory.newDocumentBuilder();
while(L_urls.hasMoreElements())
{
URL url=L_urls.nextElement();
builder.reset();
InputStream is=url.openStream();
Document document= builder.parse(is);
is.close();
NodeList L_nodes=document.getElementsByTagName("ENROUTE_INFO");
for(int i=0;i<L_nodes.getLength();i++)
{
NamedNodeMap L_childNodes=L_nodes.item(i).getAttributes();
System.out.println("Enroute_Code:"+L_childNodes.getNamedItem("Enroute_Code").getNodeValue()+",Origin_LOC:"+L_childNodes.getNamedItem("Origin_LOC").getNodeValue()+",Dest_LOC:"+L_childNodes.getNamedItem("Dest_LOC").getNodeValue());
}
}
}
catch (IOException e)
{
e.printStackTrace();
}
catch(Exception ex)
{
ex.printStackTrace();
}
}Enroute_Code:01,Origin_LOC:深圳,Dest_LOC:北京
Enroute_Code:001,Origin_LOC:广州,Dest_LOC:北京
Enroute_Code:002,Origin_LOC:上海,Dest_LOC:北京
Enroute_Code:003,Origin_LOC:南京,Dest_LOC:北京
Enroute_Code:004,Origin_LOC:重庆,Dest_LOC:北京/
Pattern p = Pattern.compile("Enroute_Code=\"([0-9]+)\" Origin_LOC=\"(\\D+)\" Dest_LOC=\"(\\D+)\" />");
Matcher m = p.matcher(s);
while (m.find()){
System.out.print ("Enroute_Code = " + m.group(1));
System.out.print ("\t Origin_LOC = " + m.group(2));
System.out.println("\t Dest_LOC = " + m.group(3));
}输出结果:
Enroute_Code = 01 Origin_LOC = 深圳 Dest_LOC = 北京
Enroute_Code = 001 Origin_LOC = 广州 Dest_LOC = 北京
Enroute_Code = 002 Origin_LOC = 上海 Dest_LOC = 北京
Enroute_Code = 003 Origin_LOC = 南京 Dest_LOC = 北京
Enroute_Code = 004 Origin_LOC = 重庆 Dest_LOC = 北京
方法的参数path我直接给一个字符串进去也可以是吗?lmcj001,谢谢解答,因为我对正则表达式不熟,我想问问对于
Enroute_Code=\"([0-9]+)\",如果对于Enroute_Code="t1"这种形式,正则表达式如何写呢?
方法的参数path我直接给一个字符串进去可以吗?
"\\sOrigin_LOC=(['\"])(.*?)\\1"
"\\sDest_LOC=(['\"])(.*?)\\1"
如果频繁解析最好不要用dom4j,效率太低了
40毫秒不是一个小数字
方法的参数path我直接给一个字符串进去可以吗?
import java.io.InputStream;
import java.net.URL;
import java.util.Enumeration;import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.NodeList;public class DomXmlParser
{
public void parserXML(String path)
{
ClassLoader classloader=DomXmlParser.class.getClassLoader();
try
{
Enumeration<URL> L_urls=classloader.getResources(path);
DocumentBuilderFactory builderFactory=DocumentBuilderFactory.newInstance();
DocumentBuilder builder=builderFactory.newDocumentBuilder();
while(L_urls.hasMoreElements())
{
URL url=L_urls.nextElement();
builder.reset();
InputStream is=url.openStream();
Document document= builder.parse(is);
is.close();
NodeList L_nodes=document.getElementsByTagName("ENROUTE_INFO");
for(int i=0;i<L_nodes.getLength();i++)
{
NamedNodeMap L_childNodes=L_nodes.item(i).getAttributes();
System.out.println("Enroute_Code:"+L_childNodes.getNamedItem("Enroute_Code").getNodeValue()+",Origin_LOC:"+L_childNodes.getNamedItem("Origin_LOC").getNodeValue()+",Dest_LOC:"+L_childNodes.getNamedItem("Dest_LOC").getNodeValue());
}
}
}
catch (IOException e)
{
e.printStackTrace();
}
catch(Exception ex)
{
ex.printStackTrace();
}
}
public static void main(String[] args)
{
DomXmlParser parser=new DomXmlParser();
parser.parserXML("test1.xml"); //test1.xml放到了/src下面了
}
}
/*
项目中碰到一个问题:已经获取了一个字符串(不是xml文件),类似于
<ENROUTE_INFOS>
<ENROUTE_INFO Origin="ZGSZ" Dest="ZBAA" Enroute_Code="01" Origin_LOC="深圳" Dest_LOC="北京" />
<ENROUTE_INFO Origin="ZGSA" Dest="ZBAA" Enroute_Code="001" Origin_LOC="广州" Dest_LOC="北京" />
<ENROUTE_INFO Origin="ZGSB" Dest="ZBAA" Enroute_Code="002" Origin_LOC="上海" Dest_LOC="北京" />
<ENROUTE_INFO Origin="ZGSC" Dest="ZBAA" Enroute_Code="003" Origin_LOC="南京" Dest_LOC="北京" />
<ENROUTE_INFO Origin="ZGSD" Dest="ZBAA" Enroute_Code="004" Origin_LOC="重庆" Dest_LOC="北京/" />
……(有多个这样的内容)
</ENROUTE_INFOS>
*/
package com.ricky.www;import java.util.regex.Pattern;
import java.util.regex.Matcher;
import java.util.Scanner;public class Test{
public static void main(String[] args){
String message = "<ENROUTE_INFOS>" +"\n"+
"<ENROUTE_INFO Origin=\"ZGSZ\" Dest=\"ZBAA\" Enroute_Code=\"01\" Origin_LOC=\"深圳\" Dest_LOC=\"北京\" />" + "\n" +
"<ENROUTE_INFO Origin=\"ZGSA\" Dest=\"ZBAA\" Enroute_Code=\"001\" Origin_LOC=\"广州\" Dest_LOC=\"北京\" /> " + "\n"+
"</ENROUTE_INFOS>";
String result = getLoc(message);
System.out.println(result); } public static String getLoc(String message){
StringBuffer result = new StringBuffer("aa");
String regex = "<ENROUTE_INFO.+?Enroute_Code=\"([^\"]+)\".+?Origin_LOC=\"([^\"]+)\".+?Dest_LOC=\"([^\"]+)\".+";
Pattern pattern = Pattern.compile(regex);
Matcher matcher = null;
Scanner m = new Scanner(message);
String temp = null;
while(m.hasNextLine()){
temp = m.nextLine();
//System.out.println(temp);
matcher = pattern.matcher(temp);
boolean isFound = matcher.find();
//System.out.println(isFound);
if(isFound){
result.append("Enroute_Code: " + matcher.group(1) + ", Origin_LOC: " + matcher.group(2) + ", Dest_LOC: " +matcher.group(3) + "\n");
}
}
return result.toString();
}
}