一个关于截取字符串的面试题！

用 byte数组方式截取的时候判断截的位置是否在
GBK编码格式的中文区域内这样?好象有点笨效率有问题
我去写写看!!!

我也遇到过同样的面试题，真是麻烦，只想到了stringStream的方法，按字符数截取，按字节的自己搞不懂，关注

我在别的地方找了一段代码
public static String substring(String str, int toCount,String more)
{
int reInt = 0;
String reStr = "";
if (str == null)
return "";
char[] tempChar = str.toCharArray();
for (int kk = 0; (kk < tempChar.length && toCount > reInt); kk++)
{
String s1 = str.valueOf(tempChar[kk]);
System.out.print(s1);
byte[] b = s1.getBytes();
reInt += b.length;
reStr += tempChar[kk];
}
if (toCount == reInt || (toCount == reInt - 1))
reStr += more;
return reStr;
}

一个一个截截好判断是否是数字或E文如果是就 append 如果不是就截2个我是这么个思路！

public static string SubstringByByte(string str, int byteLength)
        {
           char[] strs = str.ToCharArray();
           string strings = null;
           if (byteLength == 0)
               return strings;
           foreach (char temp in strs)
           {
               byte[] bytes = Encoding.UTF8.GetBytes(temp.ToString());
               strings += temp.ToString();
               byteLength = byteLength - bytes.Length;
               if (byteLength <= 0)
                   break;
           }
           return strings;
        }
虽然有点麻烦，不过好像能行

//用C#实现一个：
        static string GetSubString(string str, int byteCount)
        {
            int count = 0;
            string result = string.Empty;
            foreach (char ch in str)
            {
                count += System.Text.Encoding.Default.GetByteCount(ch.ToString());
                if (count > byteCount) break;
                result += ch.ToString();
            }
            return result;
        }        static void Main(string[] args)//调用
        {
            string str = "我ABC汉DEF";
            for (int i = 1; i < 10; i++)
            {
                Console.WriteLine("截出"+i+"个字节：");
                Console.WriteLine(GetSubString(str, i));
            }
        }/*输出结果：
截出1个字节：截出2个字节：
我
截出3个字节：
我A
截出4个字节：
我AB
截出5个字节：
我ABC
截出6个字节：
我ABC
截出7个字节：
我ABC汉
截出8个字节：
我ABC汉D
截出9个字节：
我ABC汉DE*/

public class Test { /**
* @param args
*/
public void output(int count,String str)
{
int index=0;
boolean flag=false;

for(int i=0;i<str.length()&& index<count;i++)
{
char c=str.charAt(i);
if(c>=0 && c<=255)
{
flag=true;
index++;
System.out.println(c);
}
if(flag==false)
{
index+=2;
if(index<=count)System.out.println(c);
}
flag=false;

}
}

public static void main(String[] args) {
Test t=new Test();
t.output(6,"ab@毕AKDJSD");

}}

JDK1.5以上直接substring就可以了，不会半个汉字的。

public String getResult(String input,int num)
{
  int max=getBytes(input).length;
  //判断特殊情况
   if(num<=0||num>=max)return input;
   //重原始字符串得到字符数组
   char [] cs = input.toCharArray();

   //要得到固定字节的数组
   char [] rs = new char[num];
   for(int i=0;i<num;i++)
   {
     //判断字符是否汉字,是最后一次循环并且是汉字，直接退出循环;如果是汉字又不是最后一次循环，字节数num减一，相当与少循环一次，少个字符
      if(cs[i].toString().getBytes().length!= cs[i].toString().length &&(i==num-1))
         break;
      else if(cs[i].toString().getBytes().length!= cs[i].toString().length &&(i<num-1))
         num--;
      rs[i]=cs[i];
   }
   return new String(rs);
}

解释的不是很清楚，我自己写的，验证通过，没问题 /**
* 逐一的验证子串，得到获得临界的那个的位置 index
*
* @param s
* @param b
* @return
*/
public static String sss(String s, int b) {
int byteNum = b;// 记录要的字节数
String sub = "";// 保存子串
int index = 1;// 用于记录字符串的长度，比如：我AB 长度是3，而不是字节数4
for (int i = 1; i <= s.toCharArray().length
&& byteNum - sub.getBytes().length > 0; i++) {
sub = s.substring(0, i);
index = i - 1;
}
if (byteNum - sub.getBytes().length == 0) {// 如果正好满足临界条件，就直接返回sub
return sub;
} else {
return s.substring(0, index);// 如果不满足，就减少一个字符（i-1），确保比限定的字节小
}
}

截取字节和字符似乎没有什么难度，唯一的难度其实就是判断接触去的是个什么东西了。。给出我的方式吧，自己感觉是最方便的写个方法,方法中代码如下，功能就是判断是字符还是非字符，然后调用这个判断下就可以了if (sChar.CompareTo("a") >= 0 && sChar.CompareTo("z") <= 0)
bReturn = true;
if (sChar.CompareTo("A") >= 0 && sChar.CompareTo("Z") <= 0)
bReturn = true;
else
bReturn = false;

这个不错！
MARKED BY CNDO

class CopyStrByByte{
  private String str = "";   //字符串
  private int copyNum = 0;   //要复制的字节数
  private String arrStr[];   //存放将字符串拆分成的字符数组
  private int cutNum = 0;  //已截取的字节数
  private int cc = 0;   //str中的中文字符数

  public CopyStrByByte(String str,int copyNum){
   this.str = str;
   this.copyNum = copyNum;
  }
  public String CopyStr(){
   arrStr = str.split(""); //将传的字符串拆分为字符数组
   str = "";   // 清空，用于存放已截取的字符
    for (int i = 0;i < arrStr.length;i++){
     if (arrStr[i].getBytes().length == 1){  // 非汉字
     cutNum = cutNum + 1;
     str = str + arrStr[i];
     } else if (arrStr[i].getBytes().length == 2) {   //汉字
     cc = cc + 1;
     cutNum = cutNum + 2;
     str = str + arrStr[i];
     }
     if (cutNum >= copyNum) break;  //已截取的字符数大于或等于要截取的字符数
    }
    if (cutNum > copyNum){ //已截取的字符数大于要截取的字符数
      return str.substring(0, copyNum - cc);
    } else {
     return str;
    }
  }
}
public class TestCopyStr{
public static void main(String args[]){
CopyStrByByte cp = new CopyStrByByte("as论者afs为什么",12);
System.out.println(cp.CopyStr());
}
}

public static string SubstringByByte(string str, int byteLength)
        {
          char[] strs = str.ToCharArray();
          string strings = null;
          if (byteLength == 0)
              return strings;
          foreach (char temp in strs)
          {
              byte[] bytes = Encoding.UTF8.GetBytes(temp.ToString());
              strings += temp.ToString();
              byteLength = byteLength - bytes.Length;
              if (byteLength <= 0)
                  break;
          }
          return strings;
        }

package lihan; /**
*
*
* 关于java按字节截取带有汉字的字符串的解法
* @author 李晗
*
*/ public class test{      public void splitIt(String splitStr, int bytes) {
    int cutLength = 0;
    int byteNum = bytes;
    byte bt[] = splitStr.getBytes();
    System.out.println("Length of this String ===>" + bt.length);
    if (bytes > 1) {
    for (int i = 0; i < byteNum; i++) {
    if (bt[i] < 0) {
    cutLength++;      }
    }      if (cutLength % 2 == 0) {
    cutLength /= 2;
    }else
    {
    cutLength=0;
    }
    }
    int result=cutLength+--byteNum;
    if(result>bytes)
    {
    result=bytes;
    }
    if (bytes == 1) {
    if (bt[0] < 0) {
    result+=2;      }else
    {
    result+=1;
    }
    }
    String substrx = new String(bt, 0, result);
    System.out.println(substrx);      }      public static void main(String args[]) {
    String str = "我abc的DEFe呀fgsdfg大撒旦";
    int num =3;
    System.out.println("num:" + num);
    test sptstr = new test();
    sptstr.splitIt(str, num);
    }      }

http://blog.csdn.net/lihan6415151528/archive/2008/12/09/3484930.aspx

c#代码            string str = "我a汉cdefghijklmn";
            int index = 5;            char[] ch = str.ToCharArray();            byte[] bt = Encoding.Default.GetBytes(ch, 0, ch.Length);            if (bt[index] > 64 && bt[index] < 91 || bt[index] > 96 && bt[index] < 123)
            {
                Console.WriteLine(Encoding.Default.GetString(bt, 0, index));
            }
            else
            {
                Console.WriteLine(Encoding.Default.GetString(bt, 0, index - 1 ));
            }
思路很简单，转换成字节数组后，只需比较最后一位，对最后一位进行取舍就ok了

//自己写的一个,可以实现你说的那个了
function GetSubString(splitStr:String;Pos: Integer): String;
var
  CurStr:WideString;
  Str:String;
  i,len: Integer;
begin
  CurStr:= splitStr;
  len := Length(splitStr);
  Result:='';
  for i:=1 to Pos do
  begin
    Str:=SCurStr[I];
    if Length(Result+Str)>Pos then
    begin
      Result:=Result;
      Break;
    end
    else
      Result:=Result+Str;
  end;
end;

Str:=SCurStr[I];
    if Length(Result+Str)>Pos then
    begin
      Result:=Result;
      Break;
    end
    else
      Result:=Result+Str;
  end;
end;

1个汉字等于2个char。肯定是个乱码subString返回忽略了乱码的，所以不会出现半个汉字

先顶一下,回头自己做一下,不错.http://www.92dotnet.com/index.aspx到这里面去找一下！！！

告诉你一个简单的方法:
如果你的字符是UNICODE的，那么，直接截断就OK，因为UNICODE字节，每个字，不管你是什么字，那么都只占用两个字节，不多也不少，所以直接截断不会出现半个汉字的情况
如果你的字符串是多字节的，那么,先用MultiByteToWideChar 转换为UNICODE，按上面的方法截断，然后再用WideCharToMultiByte，转换回去，就没有任何问题了有一个复杂点的办法，还是得区分UNICODE与多字节，
UNICODE:
就不说了，直接截断OK，
多字节:
如果是ANSI编码，那么，汉字的每个字节的最高位为1,以此来决定从哪截断，也就是说，如果是字母或标点符号呀，什么的，他只点一个字节，且这个字节的值小于0x80,而汉字的每个字节都>=0x80
如果是UTF-8编码，那更麻烦，还有其它的编码，慢慢看吧其它，最优方案是上面最简单的方案，我想这道题是考你对UNICODE和多字节的理解吧

定义一个BYTE数组，用SUBSTRING截取

  public static String leftStr(String source, int maxByteLen, int flag){
        if(source == null || maxByteLen <= 0){
            return "";
        }
        byte[] bStr = source.getBytes();
        if(maxByteLen >= bStr.length)return source;
        String cStr = new String(bStr, maxByteLen - 1, 2);
        if(cStr.length() == 1 && source.contains(cStr)){
            maxByteLen += flag;
        }
        return new String(bStr, 0, maxByteLen);
    }

System.out.println("皮蛋的java博客http://www.lifeispig.cn".replaceAll("[!^\\u4E00-\u9FA5]", "")); 我用这个吧..最简单.. 一句话就可以了

学习37楼的方法,加点改进输入超过字节长度的情况!
public static String getString(String str,int index){
String sub="";
int sum=0;
for (int i = 0; i <= str.toCharArray().length&&sub.getBytes().length<index; i++) {
sub=str.substring(0, i);
sum=i-1;
}
if (sub.getBytes().length>index) {
sub=str.substring(0,sum);
}
return sub;
}

    /**
     * 截取字符串，并在截取的字符串后添加指定后缀，如果字符串长度小于指定长度时不添加后缀
     * 原样返回。
     *
     * @param str      需要截取的字符串
     * @param length   截取字符串的长度，字母算 1 个字，全角字符算 2 个字
     * @param suffix   超出时添加的后缀
     * @return
     */
    public static String truncate(String str, int length, String suffix) {
        if((str == null) || (str.length() == 0) || (length < 1)) {
            return str;
        }
        char[] chs = str.toCharArray();
        int len = 0;
        int offset = 0;
        for(int i = 0; i < chs.length; i++, offset++) {
            len += (chs[i] > 0xff) ? 2 : 1;
            if(len > length) {
                break;
            }
        }
        if(offset == chs.length) {
            return str;
        }
        if(suffix == null || suffix.trim().length() == 0) {
            return new String(chs, 0, offset);
        }
        return new String(chs, 0, offset) + suffix.trim();
    }

这个题目很简单，我来给个方案。string getsubstr(string const& orgStr, int number)
{
    if(number>orgStr.size())number=orgStr.size();
    string retVal(orgStr,0,number);
    if(!retVal.empty()&&*retVal.rbegin()>128) retVal.erase(revVal.begin()+retVal.size()-1);
    return retVal;
}汉字编码的问题：双字节编码系统中，汉字首字节>128. 我们大多数编码遇到的情况都是双字节编码。当然要是考虑到
更多编码的问题，就是另外一个话题了。

public static string SubstringByByte(string str, int byteLength)
        {
          char[] strs = str.ToCharArray();
          string strings = null;
          if (byteLength == 0)
              return strings;
          foreach (char temp in strs)
          {
              byte[] bytes = Encoding.UTF8.GetBytes(temp.ToString());
              strings += temp.ToString();
              byteLength = byteLength - bytes.Length;
              if (byteLength <= 0)
                  break;
          }
          return strings;
        }

调试易

一个关于截取字符串的面试题！

解决方案 »