java提取字符串字段如下:[IMG height=50 src="/SYSTEM/bos/padownload.jsp?ftpPath=/upImg/9911938308140319576.gif" width=50 border=0]文字文字111111
[IMG height=55 src="/SYSTEM/bos/padownload.jsp?ftpPath=/upImg/9911938308140319577.gif" width=70 border=0]文字文字22222222222
字段如上 有多个 要求提取类似/SYSTEM/bos/padownload.jsp?ftpPath=/upImg/9911938308140319577.gif 的字段为String数组1! 提取文字为String数组2用方法或者正则都可以 先谢谢大家了!
[IMG height=55 src="/SYSTEM/bos/padownload.jsp?ftpPath=/upImg/9911938308140319577.gif" width=70 border=0]文字文字22222222222
字段如上 有多个 要求提取类似/SYSTEM/bos/padownload.jsp?ftpPath=/upImg/9911938308140319577.gif 的字段为String数组1! 提取文字为String数组2用方法或者正则都可以 先谢谢大家了!
String str = string.substring(string.indexOf("\"")+1,string.lastIndexOf("\""));
提供的方法进行拼接字符串并存入数组.但是我现在是使用的HTTPCLIENT这个第三方开源项目他提供了很方便完整的HTML页面解析方法.这样楼主的
要求可以很简单的实现,但是需要导入第三方JAR文件.而且代码就依赖于这个JAR了.所以怎么做就取决于楼主的实际情况了.
/**
* project_name: Test
* package_name: CSDN_Test_20071126
* package_declaration: package CSDN_Test_20071126;
* filename: StringDisposalTest.java
* author: yuhaiming
* date: 2007-11-26
*/
package CSDN_Test_20071126;
/**
* [IMG height=50 src="/SYSTEM/bos/padownload.jsp?ftpPath=/upImg/9911938308140319576.gif" width=50 border=0]文字文字111111
[IMG height=55 src="/SYSTEM/bos/padownload.jsp?ftpPath=/upImg/9911938308140319577.gif" width=70 border=0]文字文字222222
要求提取类似/SYSTEM/bos/padownload.jsp?ftpPath=/upImg/9911938308140319577.gif 的字段为String数组1! 提取文字为String数组2
* @author yuhaiming
*
*/
public class StringDisposalTest {
/**
* 主处理
*/
public static void disposal(){
//假设两个吧,你可以将数据库中的字段取出来放到List中,再对每一个元素进行处理
String[] sourceString = {"[IMG height=50 src=\"/SYSTEM/bos/padownload.jsp?ftpPath=/upImg/9911938308140319576.gif\" width=50 border=0]文字文字111111",
"[IMG height=55 src=\"/SYSTEM/bos/padownload.jsp?ftpPath=/upImg/9911938308140319577.gif\" width=70 border=0]文字文字222222"};
String[] srcString = new String[2];
String[] charString = new String[2];
for(int i=0;i<sourceString.length;i++){
int srcStart = sourceString[i].indexOf("src=\"")+"src=\"".length();
int srcEnd = sourceString[i].substring(srcStart).indexOf(" ")+srcStart;
srcString[i]=sourceString[i].substring(srcStart,srcEnd-1);
int charStart = sourceString[i].indexOf("]", 1);
charString[i]=sourceString[i].substring(charStart+1);
}
display(srcString,"路径");
display(charString,"文字");
}
public static void display(String[] str,String att){
System.out.println(att+":");
for(int i=0;i<str.length;i++){
System.out.println(str[i]);
}
}
/**
* @param args
*/
public static void main(String[] args) {
disposal();
}}
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;/**
* @author ZhaoJiqing
*
*/
public class Regex {
static final String regexStr = "src=\"[a-zA-Z0-9/.?=]+\"";
public static void main(String args[])
{
Regex r = new Regex();
String string = "[IMG height=50 src=\"/SYSTEM/bos/padownload.jsp?ftpPath"
+"=/upImg/9911938308140319576.gif\" width=50 border=0]文字文字111111 "
+"[IMG height=55 src=\"/SYSTEM/bos/padownload.jsp?ftpPath"
+"=/upImg/9911938308140319577.gif\" width=70 border=0]文字文字22222222222";
r.doRegex(Regex.regexStr,string);
}
public void doRegex(String regexString,String string)
{
Pattern pattern = Pattern.compile(regexString);
Matcher matcher = pattern.matcher(string);
System.out.println(matcher.groupCount());
List<String> strs = new ArrayList<String>();
while(matcher.find())
{
String srcStr = matcher.group();
String printStr = srcStr.substring(5,srcStr.length()-1);
strs.add(printStr);
}
String[] ss = new String[strs.size()];
strs.toArray(ss);
for(int i=0; i<ss.length; i++)
{
System.out.println(ss[i]);
}
}
}
Pattern p=Pattern.compile(regEx);
Matcher m=p.matcher(str);
m.group(1);正则中()代表组,m.group(n)可以提取string, []表示匹配项,需要注意特殊字符(上面的regEx可能不对,没仔细看)
可以使用工具RegexBuddy生成
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;/**
* @author ZhaoJiqing
*
*/
public class Regex {
static final String regexStr = "src=\"[a-zA-Z0-9/.?=]+\"";
public static void main(String args[])
{
Regex r = new Regex();
String string = "[IMG height=50 src=\"/SYSTEM/bos/padownload.jsp?ftpPath"
+"=/upImg/9911938308140319576.gif\" width=50 border=0]文字文字111111 "
+"[IMG height=55 src=\"/SYSTEM/bos/padownload.jsp?ftpPath"
+"=/upImg/9911938308140319577.gif\" width=70 border=0]文字文字22222222222";
r.doRegex(Regex.regexStr,string);
}
public void doRegex(String regexString,String string)
{
Pattern pattern = Pattern.compile(regexString);
Matcher matcher = pattern.matcher(string);
System.out.println(matcher.groupCount());
List<String> strs = new ArrayList<String>();
while(matcher.find())
{
String srcStr = matcher.group(); //这里得到的是src="........"的字符串
String printStr = srcStr.substring(5,srcStr.length()-1); //去掉多余的字符src="和"
strs.add(printStr); //将匹配的结果放入List
}
String[] ss = new String[strs.size()]; //将List转换成需要的数组
strs.toArray(ss);
for(int i=0; i<ss.length; i++)
{
System.out.println(ss[i]);
}
}
}
public static void getRegex(){
String regex = "\\[IMG.*?src=\"(.*?)\".*?border=0\\](.*?) ";
String src = "[IMG height=50 src=\"/SYSTEM/bos/padownload.jsp?ftpPath=/upImg/9911938308140319576.gif\" width=50 border=0]文字文字111111 "+
"[IMG height=55 src=\"/SYSTEM/bos/padownload.jsp?ftpPath=/upImg/9911938308140319577.gif\" width=70 border=0]文字文字222222 ";
Pattern p = Pattern.compile(regex,Pattern.DOTALL + Pattern.MULTILINE);
Matcher m = p.matcher(src);
List<String> tmpList1 = new ArrayList<String>();
List<String> tmpList2 = new ArrayList<String>();
while (m.find()){
tmpList1.add(m.group(1));
tmpList2.add(m.group(2));
}
System.out.println(tmpList1.toString());
System.out.println(tmpList2.toString());
}
结果:
[/SYSTEM/bos/padownload.jsp?ftpPath=/upImg/9911938308140319576.gif, /SYSTEM/bos/padownload.jsp?ftpPath=/upImg/9911938308140319577.gif]
[文字文字111111, 文字文字222222]