刚试验过,StreamTokenizer将'.'和普通字母看成一样东西,不能区分句号,但感叹号可以。我想这是因为英语里有"Mr.","Dr."等等的用法,那些不属于句号,而属于单词的一部分,所以不与区分。可以在每个nextToken()后判断最后一个字符。
public static void main(String[] arg) {
String[] s=new String[20];
int i=0;
for(i=0;i<20;i++)
s[i]="";
i=0;
try{
BufferedReader in=new BufferedReader(new FileReader("test.txt"));
StreamTokenizer st=new StreamTokenizer(in);
while(st.nextToken ()!=StreamTokenizer.TT_EOF){
switch (st.ttype){
case StreamTokenizer.TT_NUMBER:
s[i]+=st.nval+" "; break;
case StreamTokenizer.TT_WORD:
s[i]+=st.sval+" ";
if(st.sval.charAt(st.sval.length()-1)=='.') {
System.out.println (s[i]);
i++;
}
break;
case StreamTokenizer.TT_EOL:
s[i]+='\n';break;
default:
if((char)st.ttype=='?' ){
s[i]+='?';
System.out.println(s[i]);
i++;
}else if((char)st.ttype=='!' ){
s[i]+='!';
System.out.println(s[i]);
i++;
}
else
s[i]+=(char)st.ttype+" ";
}
}
}catch(IOException e){}
for(i=0;i<20;i++) {
//System.out.println(s[i]);
}
}
public static void main(String[] arg) {
String[] s=new String[20];
int i=0;
for(i=0;i<20;i++)
s[i]="";
i=0;
try{
BufferedReader in=new BufferedReader(new FileReader("test.txt"));
StreamTokenizer st=new StreamTokenizer(in);
while(st.nextToken ()!=StreamTokenizer.TT_EOF){
switch (st.ttype){
case StreamTokenizer.TT_NUMBER:
s[i]+=st.nval+" "; break;
case StreamTokenizer.TT_WORD:
s[i]+=st.sval+" ";
if(st.sval.charAt(st.sval.length()-1)=='.') {
System.out.println (s[i]);
i++;
}
break;
case StreamTokenizer.TT_EOL:
s[i]+='\n';break;
default:
if((char)st.ttype=='?' ){
s[i]+='?';
System.out.println(s[i]);
i++;
}else if((char)st.ttype=='!' ){
s[i]+='!';
System.out.println(s[i]);
i++;
}
else
s[i]+=(char)st.ttype+" ";
}
}
}catch(IOException e){}
for(i=0;i<20;i++) {
//System.out.println(s[i]);
}
}
这个'好后面的东西,就分析不出来了,怎么改进一下程序呢?
谢了,小第再加50分,谢谢大大的支持!
在StreamTokenizer st=new StreamTokenizer(in);后面加上这句:
st.ordinaryChar('\'');
StringTokenizer stk=new StringTokenizer(text,".");
while(stk.hasMoreTokens())
{
String tmpStr=stk.nextToken();
if(tmpStr.indexOf("?")>=0)
System.out.println(tmpStr.substring(0,tmpStr.indexOf("?")+1));
}
正则表达式咯!import java.util.regex.*;
class RegexTest {
public static void main (String[] args ) {
String str =
"java is a good program language. what is java use for? thank any way.";
str = str.trim();
Pattern p = Pattern.compile("([^.]+\\?)");
Matcher m = p.matcher(str);
while (m.find()){
System.out.println(m.group(1).trim());//截去头尾空格
}
}
}
分析mr.后面是否字符是错误的做法
因为句子中也可以出现 dr. java这才是正式的英文写法
dr.java当然也可以import java.util.regex.*;
class RegexTest {
public static void main (String[] args ) {
String str =
"java is a good program language. what is mr. java use for?" +
" thank anyway. Mr. Saddam, do you love Bush? Yes, I do. Why not?";
str = str.trim();
Pattern p = Pattern.compile("(?i)([^.]*((mr|ms|dr|jr)\\.)?[^.]*\\?)");
Matcher m = p.matcher(str);
while (m.find()){
System.out.println(m.group(1).trim());
}
}
}结果:
what is mr. java use for?
Mr. Saddam, do you love Bush?
Why not?
--> 把你想的全部加到下面代码的(mr|...)中去
duracell() 兄,请问Pattern在哪个包里?我没见过,很想学习学习。
在StreamTokenizer st=new StreamTokenizer(in);后面加上这句:
st.ordinaryChar('\'');
up
public class Code{
public static void main(String[] arg) {
String[] s=new String[20];
int i=0;
for(i=0;i<20;i++)
s[i]="";
i=0;
try{
BufferedReader in=new BufferedReader(new FileReader("test.txt"));
StreamTokenizer st=new StreamTokenizer(in);
while(st.nextToken ()!=StreamTokenizer.TT_EOF){
st.ordinaryChars(33,47);
st.ordinaryChars(58,64);
st.ordinaryChars(91,96);
st.ordinaryChars(123,126);
//System.out.println(st.ttype); switch (st.ttype){
case StreamTokenizer.TT_NUMBER:
s[i]+=st.nval+" "; break;
case StreamTokenizer.TT_WORD:
s[i]+=st.sval+" ";
/*if(st.sval.charAt(st.sval.length()-1)=='.') {
System.out.println (s[i]);
i++;
}*/
break;
case StreamTokenizer.TT_EOL:
s[i]+='\n';break;
default:
if((char)st.ttype=='?' ){
s[i]+='?';
System.out.println(s[i]);
i++;
}else if((char)st.ttype=='!' ){
s[i]+='!';
System.out.println(s[i]);
i++;
}
else if((char)st.ttype=='.') {
s[i]+='.';
System.out.println(s[i]);
i++;
}else {
s[i]+=(char)st.ttype+" ";
}
}
}
}catch(IOException e){}
}
}
java.util.regexpabove version 1.4 including 1.4
不过写程序时加一些注释比较好
看起来也比较舒服!