有关字符串中汉字提取的问题

import javax.swing.JOptionPane;
public class PrintTheWord { public static void main(String[] args) {
String s1 = JOptionPane.showInputDialog("Please enter string:");
String s2 = JOptionPane.showInputDialog("Please input the num:");
String[] ss = s1.split("");
int num = Integer.parseInt(s2);
for(int i = 1;i < ss.length;i += num){

for(int j = i;j < (i + num);j ++){
if(ss[j].matches("[\u4e00-\u9fa5]") && (j == (i + num - 1)) &&
(j != i)){
System.out.print("\n" + ss[j]);
} else{
System.out.print(ss[j] + "\t");
}
}
System.out.println("");
}
}}
编写一个截取字符串的函数，输入为一个字符串和字节数，输出为按字节截取的字符串。但是要保证汉字不被截半个，如"我ABC"4，应该截为"我AB"，输入"我ABC汉DEF"6，应该输出"我ABC"，而不是"我ABC+汉的半个"。这道题应该有大虾很早前就做过了。按照我的思路我写了，去的num值为3，如果没有两或多个相连的汉字输出时没什么问题的。但是当有三个汉字时，有两个汉字就必然会连在一起。
求指教，谢谢！string汉字提取

解决方案 »

免费领取超大流量手机卡，每月29元包185G流量+100分钟通话, 中国电信官方发货

Java 内部用 Unicode。一旦从JOptionPane.showInputDialog 读入，放到 String 变量里，就已经是Unicode,每个汉字或英文字母都对应一个 char，所以你的汉字被半分的情况，是不会发生的。如果汉字表示出错，一定是发生在 byte 与 String 变换时，你的这种方式是很难重现汉字半分问题的。
以前写的package com.love.yan.xing;import java.io.*;
import java.util.*;public class JieQu {
public static void main(String[] args) throws IOException {
String s;
int i, b;
System.out.println("请你输入要截取的字符串：");
BufferedReader buf = new BufferedReader(
new InputStreamReader(System.in));
s = buf.readLine();
char j[] = s.toCharArray();
System.out.println("请输入字节数：");
Scanner r = new Scanner(System.in);
int a = r.nextInt();
byte k[] = s.getBytes();
r.close();
int count[] = new int[k.length];
b = 0;
for (i = 0; i < j.length; i++) {
if ((byte) j[i] != k[b]) {
count[b] = 1;
count[b + 1] = 1;
b = b + 1;
}
b++;
}
int temp;
for (i = 0, temp = 0; i < a; i++, temp++) {
if (count[i] != 1)
System.out.print((char) k[i]);
else if (count[a - 1] == 1 && i == a - 1)
break;
else if (count[a - 1] == 1 && (temp == a - 1 || temp == a))
break;
else if (count[temp] == 1) {
System.out.print(j[i]);
temp++;
}
}
}
}
了解了，只是当我用 "" （应该是叫做空吧？）去划分出String[]时，多出了一个长度，是字符串最前面有这么个符号吗？谢谢~
我想问的是，你如何在 java 中截到半个汉字？
原来在 MS DOS 6.22 + UCDOS 中使用 FoxBase、FoxPro 什么的时候，经常会出现一个汉字只有一半的情况，但是在 Java 中我还没有发现过有半个汉字的情况出现。
一般不用""空字符串来作 split 函数的第一个参数。因为用 "" 来分隔某个字符串，在直观逻辑上是很不容易解释的。
如果非这么用，结果则是在最前面有这么个"".
比如
"abc".split("")的结果是{"","a","b","c"}
第一个总是 "".
很不容易解释，这样记住就好。
以后自己慢慢琢磨 API 文件中对split 函数的叙述。我读的是英文的 API 文件，还真不会简单地拿中文来解释。
谈谈我的理解,如果不是楼主的意思见谅.
1 java内部处理字符统一使用unicode编码方式，所有字符都占2个字节。不会出现半个汉字的情况。
2 我猜测楼主是针对GBK编码来说的，因为GBK编码，ascii字符占一个字节，汉字占2个字节，比如字符串
  "我ABC汉DEF"就占8个字节。如果把这8个字节截取6个，就会把"汉"这个汉字的两个字节拆开。为了不
  拆开，就得取5个。
3 改写了一下代码，楼主看看。import javax.swing.JOptionPane;
import java.util.Arrays;                                 //用于数组复制。

public class PrintTheWord {
    public static void main(String[] args) throws Exception{
        String s1 = JOptionPane.showInputDialog("Please enter string:");
        String s2 = JOptionPane.showInputDialog("Please input the num:");        int num = Integer.parseInt(s2);
        byte[] newBytes;
        String result;
        byte[] bs1= s1.trim().getBytes("GBK");           //按"GBK"编码获得字节数组。
        if(bs1[num]<0 && bs1[num-1]<0){                  //这种情况就是正好在一个汉字中间。截取时减一个。
            newBytes = Arrays.copyOf(bs1,num-1);
        }else{
            newBytes = Arrays.copyOf(bs1,num);
        }
        result = new String(newBytes,"GBK");             //把截取后的字节数组组成字符串。
        System.out.println(s1+" 截取 "+num+" 个字节结果是 "+result);
    }
}
<java 范例大全>第6章有个例子,楼主参考一下:
package com.zf.s6;//创建一个包class CopyStrByByte{//调用类
  private String str = ""; //字符串
  private int copyNum = 0; //要复制的字节数
  private String arrStr[]; //存放将字符串拆分成的字符数组
  private int cutNum = 0; //已截取的字节数
  private int cc = 0; //str中的中文字符数

  public CopyStrByByte(String str,int copyNum){//构造函数变量初始化
      this.str = str;
      this.copyNum = copyNum;
  }
  public String CopyStr(){ //该方法获得指定的子串
      arrStr = str.split(""); //将传的字符串拆分为字符数组
      str = ""; // 清空，用于存放已截取的字符
      for (int i = 0;i < arrStr.length;i++){
        if (arrStr[i].getBytes().length == 1){   // 非汉字
            cutNum = cutNum + 1;   //统计个数
            str = str + arrStr[i];   //获得非汉字子串
        }else if (arrStr[i].getBytes().length == 2) {//汉字
            cc = cc + 1;
            cutNum = cutNum + 2;   //汉字字节数为2进行统计
            str = str + arrStr[i];
        }
        if (cutNum >= copyNum) break;                //已截取的字符数大于或等于要截取的字符数
    }
    if (cutNum > copyNum)                            //已截取的字符数大于要截取的字符数
        return str.substring(0, copyNum - cc);
    else
        return str;

  }
}
public class TextTruncate{//描述字符串长度的类
public static void main(String args[]){//java程序的主入口方法
      CopyStrByByte cp = new CopyStrByByte("我ABC汉DEF",6);//调用类并初始化
      System.out.println(cp.CopyStr()); //调用方法获取指定子串
}
}
理论上我也不知道，在使用byte[]的时候，如果第一个是汉字，我就选择输出byte[0]，出来的东西就看不懂了个说
这个代码是有错误的.
下面根据<java 范例大全>的例子,把所有的都输出来,未经大量测试,楼主如发现有问题告诉我一下.
import javax.swing.JOptionPane;
import java.util.Arrays;
import java.util.List;
import java.util.ArrayList;public class PrintTheWordRotate{
    public static void main(String[] args) throws Exception{
        String s1 = JOptionPane.showInputDialog("Please enter string:");
        String s2 = JOptionPane.showInputDialog("Please input the num:");
        int num = Integer.parseInt(s2);
        System.out.println("输入的字符串是: "+s1+" 分割的长度是 "+num);
        List<String> list = getTotal(s1,num);                        //得到划分的字符串集合。
        System.out.println(list);
    }    //得到分割的子串放容器返回
    //
    public static List<String> getTotal(String s, int copyNum) {
        List<String> list = new ArrayList<String>();
        int cutNum = 0;
        int cc = 0;
        String[] arrStr = null;
        String str = "";
        String ns=s;
        while(ns.getBytes().length > copyNum){
            arrStr = ns.split("");
            for (int i = 0;i < arrStr.length; i++){
                if (arrStr[i].getBytes().length == 1){               // 非汉字
            cutNum = cutNum + 1;                             //统计个数
            str = str + arrStr[i];                           //获得非汉字子串
                }else if (arrStr[i].getBytes().length == 2) {        //汉字
                    cc = cc + 1;
                    cutNum = cutNum + 2;                             //汉字字节数为2进行统计
                    str = str + arrStr[i];
                }
                if (cutNum >= copyNum){
                    break;                          //已截取的字符数大于或等于要截取的字符数
                }
            }
            if (cutNum > copyNum){     //已截取的字符数大于要截取的字符数
                list.add(str.substring(0, copyNum - cc));
                ns = ns.substring(copyNum - cc);    //去掉被截取的字符串
            }else{
                list.add(str);
                ns = ns.substring(copyNum - cc);
            }
            cutNum = 0;                                              //初始化临时变量。
            cc = 0;
            str = "";
        }
        list.add(ns);                                                //剩余的最后的字符串放容器里。
        return list;
    }
}