近来写了个中英文字符串的截取的方法,但很不简洁。贴出来大家看看怎么改进:
public string interception(string sourceString, int length)
{
UTF8Encoding utf8 = new UTF8Encoding();
char[] sourceChar = sourceString.ToCharArray();
StringBuilder sb = new StringBuilder();
int j = 0;
for (int i = 0; i < sourceChar.Length; i++)
{
if (utf8.GetByteCount(sourceChar[i].ToString()) == 1)
{ //判断是否为ascii码字符
j++;
}
else
{ j += 2; } //否则为中文字符
sb.Append(sourceChar[i]);
if (j >= length)
{
string left = sourceString.Substring(i+1); //剩余的字符串
char[] characters = { ',', ',', '.', '。', ':', ':', '!', '\'', '-', '<', '>', '(', ')', '{', '}', '〃', '〖', '〗', ' ', '"', '“', '”', '!', '?', '?' };
if (sb.ToString().IndexOfAny(characters) == sb.Length || utf8.GetByteCount(sourceChar[i].ToString()) > 1)
{ //如果截取到的字符串最后一个字符为上述标点符号中的一个或者是中文字符,则停止循环
break;
}
else
{ //否则,再截取一段字符串,以保留原字符串的完整(即不截断单个的英文单词)
string lt = left.Split(characters)[0]; //剩余字符串中第一个标点前的字符串
StringBuilder result = new StringBuilder(""); //存储要加在原截取的字符串后面的字符串
for (int k = 0; k < lt.Length; k++)
{
if (utf8.GetByteCount(lt.Substring(k,1)) == 1)
{ result.Append(lt.Substring(k,1));}
else
{
break;
}
}
if (result.Length < 10) //为保持单词完整截取到后面的字符串长度在10以内,则把这段字符串加在原来截取到的字符串后面,否则放弃
{ sb.Append(result.ToString()); }
break;
}
}
}
if (sb.Length < sourceString.Length)
return sb.ToString() + "...";
else
return sb.ToString();
}
public string interception(string sourceString, int length)
{
UTF8Encoding utf8 = new UTF8Encoding();
char[] sourceChar = sourceString.ToCharArray();
StringBuilder sb = new StringBuilder();
int j = 0;
for (int i = 0; i < sourceChar.Length; i++)
{
if (utf8.GetByteCount(sourceChar[i].ToString()) == 1)
{ //判断是否为ascii码字符
j++;
}
else
{ j += 2; } //否则为中文字符
sb.Append(sourceChar[i]);
if (j >= length)
{
string left = sourceString.Substring(i+1); //剩余的字符串
char[] characters = { ',', ',', '.', '。', ':', ':', '!', '\'', '-', '<', '>', '(', ')', '{', '}', '〃', '〖', '〗', ' ', '"', '“', '”', '!', '?', '?' };
if (sb.ToString().IndexOfAny(characters) == sb.Length || utf8.GetByteCount(sourceChar[i].ToString()) > 1)
{ //如果截取到的字符串最后一个字符为上述标点符号中的一个或者是中文字符,则停止循环
break;
}
else
{ //否则,再截取一段字符串,以保留原字符串的完整(即不截断单个的英文单词)
string lt = left.Split(characters)[0]; //剩余字符串中第一个标点前的字符串
StringBuilder result = new StringBuilder(""); //存储要加在原截取的字符串后面的字符串
for (int k = 0; k < lt.Length; k++)
{
if (utf8.GetByteCount(lt.Substring(k,1)) == 1)
{ result.Append(lt.Substring(k,1));}
else
{
break;
}
}
if (result.Length < 10) //为保持单词完整截取到后面的字符串长度在10以内,则把这段字符串加在原来截取到的字符串后面,否则放弃
{ sb.Append(result.ToString()); }
break;
}
}
}
if (sb.Length < sourceString.Length)
return sb.ToString() + "...";
else
return sb.ToString();
}
/// 名称:IsAllNumber
/// 判断文本是否全数字组成函数1(推荐使用)
/// </summary>
/// <param name="text">需判断的文本或字符串</param>
/// <returns>返回true代表纯数字,假为非纯数字</returns>
public static bool IsAllNumber(string text)
{
foreach (char tempchar in text.ToCharArray())
{
if (tempchar == '.')
{
return false;
}
}
Regex objNotNumberPattern = new Regex("[^0-9.-]");
Regex objTwoDotPattern = new Regex("[0-9]*[.][0-9]*[.][0-9]*");
Regex objTwoMinusPattern = new Regex("[0-9]*[-][0-9]*[-][0-9]*");
String strValidRealPattern = "^([-]|[.]|[-.]|[0-9])[0-9]*[.]*[0-9]+$";
String strValidIntegerPattern = "^([-]|[0-9])[0-9]*$";
Regex objNumberPattern = new Regex("(" + strValidRealPattern + ")|(" + strValidIntegerPattern + ")");
return !objNotNumberPattern.IsMatch(text) && !objTwoDotPattern.IsMatch(text) && !objTwoMinusPattern.IsMatch(text) && objNumberPattern.IsMatch(text);
}
/// 名称:IsAllChina
/// 判断是否全是汉字组合
/// </summary>
/// <param name="text">需判断的文本</param>
/// <returns>返回True为全是汉字组合</returns>
public static bool IsAllChina(string text)
{
foreach (char c in text.ToCharArray())
{
if (Regex.IsMatch(c.ToString(), @"^[\u4e00-\u9fa5]+$") == false)
{
return false;
}
}
return true;
}
/// 名称:IsAllChar
/// 判断文本是否全是字母组合
/// </summary>
/// <param name="text">需判断的文本或是字符串</param>
/// <returns>返回true代表全是字母组合</returns>
public static bool IsAllChar(string text)
{
foreach (char tempchar in text.ToCharArray())
{
if (!char.IsLetter(tempchar))
{
return false;
}
}
return true;
}