蛙蛙请教:把一段c算法代码转换成c#代码。
这是一段剪贴板的数据转换算法代码,请帮忙把UTF8ToHtml转换成c#代码,我转了一下,根本不能用。
再帮忙写下注释,简单说一下原理,谢谢。
原文如下:
I was working on a project where I had to paste into textbox HTML, copied from the Browser. A quick search on "HTML Clipboard Format" in MSDN gives you an article that thoroughly explained how HTML is kept in the Clipboard. Unfortunately, this article tells you that it's kept in UTF-8 format without explaining how to convert from UTF-8 back to HTML. So I had do some research on my own.UTF-8 is the format that allows using Unicode characters in ASCII text by embedding a special token, &#code;, into the text, where the code is Unicode code (in decimal format) for the symbol. For some symbols there are special names. An example " " is " "... You can jump to the specification if you need more examples.Here is a UTF8ToHtml function, which converts from UTF-8 to HTML. The algorithm is not explained, but you can read more about it here.//utf8 - pointer to UTF8 formatted text. dwSize - size of UTF8 text; ptr is the pointer to Output buffer.//The OnClickedPastehtml is the handler for BN_CLICK event of the button in Dialog box. IDC_TEXT is the multiline text box.
void UTF8ToHtml(BYTE *utf8, DWORD dwSize, CHAR *ptr )
{
int code;
BYTE *end = utf8 + dwSize;
while( utf8 < end )
{
code = 0;
if( (*utf8 & 0xF0) == 0xF0 )
{
code = (((*utf8)&0x0F) << 18) | (((*(utf8+1))
& 0x7F)<<12) | (((*(utf8+2)) & 0x7F)<<6)
| ((*(utf8+3)) & 0x7F );
utf8+=3;
}
else
{
if( (*utf8 & 0xE0) == 0xE0 )
{
code = (((*utf8)&0x1F) << 12) | (((*(utf8+1))
& 0x7F)<<6 ) | ((*(utf8+2)) & 0x7F );
utf8+=2;
}
else
{
if( (*utf8 & 0xC0) == 0xC0 )
{
code = (((*utf8)&0x3F) << 6) | ((*(utf8+1)) & 0x7F) ;
utf8+=1;
}
}
}
if( code == 0 )
{
*ptr = *utf8;
}
else
{
char s[10];
switch(code)
{
case 160:
strcpy(s, "& ");
break;
case 34:
strcpy(s, "&");
break;
case 36:
strcpy( s, "&&");
break;
case 60:
strcpy( s, "&<");
break;
case 62:
strcpy( s, "&>");
break;
default:
sprintf( s, "&#%d;", code );
break;
}
strcpy( ptr, s );
ptr += strlen(s)-1;
}
utf8++;
ptr++;
}
*ptr = 0;
}
LRESULT CDialog::OnClickedPastehtml( WORD wNotifyCode,
WORD wID,
HWND hWndCtl,
BOOL& bHandled)
{
if (!OpenClipboard() )
return 0;
UINT uHtmlFormat = RegisterClipboardFormat("HTML Format");
UINT uFormat = uHtmlFormat;
if( IsClipboardFormatAvailable( uHtmlFormat ) == FALSE )
{
if( IsClipboardFormatAvailable( CF_TEXT ) == FALSE )
return 0;
uFormat = CF_TEXT;
} HGLOBAL hglb;
LPTSTR lptstr;
hglb = GetClipboardData(uFormat);
if (hglb != NULL)
{
lptstr = (LPTSTR)GlobalLock(hglb);
if (lptstr != NULL)
{
char *ptr1 = strstr( lptstr, "<!--StartFragment-->");
if( ptr1 != 0 )
{
ptr1 += 20;
char * ptr2 = strstr( lptstr, "<!--EndFragment-->");
int iSize = (ptr2 - ptr1);
char * tmp = (char*)_alloca( iSize *2);
UTF8ToHtml((BYTE*)ptr1, iSize, tmp );
//memcpy(tmp, ptr1, iSize );
//tmp[iSize] = 0;
SetDlgItemText(IDC_TEXT, tmp );
}
else
SetDlgItemText(IDC_TEXT, lptstr );
GlobalUnlock(hglb);
}
}
CloseClipboard();
return 0;
}
这是我转换的代码
private void FTopMost_DragDrop(object sender, DragEventArgs e)
{
strCont = e.Data.GetData(DataFormats.Html, true).ToString();
int start = strCont.IndexOf("<!--StartFragment-->");
int end = strCont.IndexOf("<!--EndFragment-->");
string s = strCont.Substring(start + 20, end - start - 20); FNewPost f = new FNewPost();
f.HTML = UTF8ToHtml(s);
f.Show();
f.Activate();
}
string UTF8ToHtml(string utf8)
{
string ptr = null;
int code;
for (int i = 0; i < utf8.Length; i++ )
{
code = 0;
if ((utf8[i]) == 0xF0)
{
code = (((utf8[i]) & 0x0F) << 18) | ((((utf8[i] + 1)) & 0x7F) << 12) | ((((utf8[i] + 2)) & 0x7F) << 6) | (((utf8[i] + 3)) & 0x7F);
i += 3;
}
else
{
if ((utf8[i]) == 0xE0)
{
code = (((utf8[i]) & 0x1F) << 12) | ((((utf8[i] + 1)) & 0x7F) << 6) | (((utf8[i] + 2)) & 0x7F);
i += 2;
}
else
{
if ((utf8[i]) == 0xC0)
{
code = (((utf8[i]) & 0x3F) << 6) | (((utf8[i] + 1)) & 0x7F);
i += 1;
}
}
} if (code == 0)
{
ptr += utf8[i];
}
else
{
string s = null;
switch (code)
{
case 160:
s = "& ";
break;
case 34:
s = "&";
break;
case 36:
s = "&&";
break;
case 60:
s = "&<";
break;
case 62:
s = "&>";
break;
default:
//sprintf( s, "&#%d;", code );
s = "&" + code.ToString();
break;
}
ptr += s;
}
}
return ptr;
}
这是一段剪贴板数据,当然这里没有中文
Version:0.9
StartHTML:71
EndHTML:170
StartFragment:140
EndFragment:160
StartSelection:140
EndSelection:160
<!DOCTYPE>
<HTML>
<HEAD>
<TITLE>The HTML Clipboard</TITLE>
<BASE HREF="http://sample/specs">
</HEAD>
<BODY>
<!--StartFragment -->
<P>The Fragment</P>
<!--EndFragment -->
</BODY>
</HTML>
这是一段剪贴板的数据转换算法代码,请帮忙把UTF8ToHtml转换成c#代码,我转了一下,根本不能用。
再帮忙写下注释,简单说一下原理,谢谢。
原文如下:
I was working on a project where I had to paste into textbox HTML, copied from the Browser. A quick search on "HTML Clipboard Format" in MSDN gives you an article that thoroughly explained how HTML is kept in the Clipboard. Unfortunately, this article tells you that it's kept in UTF-8 format without explaining how to convert from UTF-8 back to HTML. So I had do some research on my own.UTF-8 is the format that allows using Unicode characters in ASCII text by embedding a special token, &#code;, into the text, where the code is Unicode code (in decimal format) for the symbol. For some symbols there are special names. An example " " is " "... You can jump to the specification if you need more examples.Here is a UTF8ToHtml function, which converts from UTF-8 to HTML. The algorithm is not explained, but you can read more about it here.//utf8 - pointer to UTF8 formatted text. dwSize - size of UTF8 text; ptr is the pointer to Output buffer.//The OnClickedPastehtml is the handler for BN_CLICK event of the button in Dialog box. IDC_TEXT is the multiline text box.
void UTF8ToHtml(BYTE *utf8, DWORD dwSize, CHAR *ptr )
{
int code;
BYTE *end = utf8 + dwSize;
while( utf8 < end )
{
code = 0;
if( (*utf8 & 0xF0) == 0xF0 )
{
code = (((*utf8)&0x0F) << 18) | (((*(utf8+1))
& 0x7F)<<12) | (((*(utf8+2)) & 0x7F)<<6)
| ((*(utf8+3)) & 0x7F );
utf8+=3;
}
else
{
if( (*utf8 & 0xE0) == 0xE0 )
{
code = (((*utf8)&0x1F) << 12) | (((*(utf8+1))
& 0x7F)<<6 ) | ((*(utf8+2)) & 0x7F );
utf8+=2;
}
else
{
if( (*utf8 & 0xC0) == 0xC0 )
{
code = (((*utf8)&0x3F) << 6) | ((*(utf8+1)) & 0x7F) ;
utf8+=1;
}
}
}
if( code == 0 )
{
*ptr = *utf8;
}
else
{
char s[10];
switch(code)
{
case 160:
strcpy(s, "& ");
break;
case 34:
strcpy(s, "&");
break;
case 36:
strcpy( s, "&&");
break;
case 60:
strcpy( s, "&<");
break;
case 62:
strcpy( s, "&>");
break;
default:
sprintf( s, "&#%d;", code );
break;
}
strcpy( ptr, s );
ptr += strlen(s)-1;
}
utf8++;
ptr++;
}
*ptr = 0;
}
LRESULT CDialog::OnClickedPastehtml( WORD wNotifyCode,
WORD wID,
HWND hWndCtl,
BOOL& bHandled)
{
if (!OpenClipboard() )
return 0;
UINT uHtmlFormat = RegisterClipboardFormat("HTML Format");
UINT uFormat = uHtmlFormat;
if( IsClipboardFormatAvailable( uHtmlFormat ) == FALSE )
{
if( IsClipboardFormatAvailable( CF_TEXT ) == FALSE )
return 0;
uFormat = CF_TEXT;
} HGLOBAL hglb;
LPTSTR lptstr;
hglb = GetClipboardData(uFormat);
if (hglb != NULL)
{
lptstr = (LPTSTR)GlobalLock(hglb);
if (lptstr != NULL)
{
char *ptr1 = strstr( lptstr, "<!--StartFragment-->");
if( ptr1 != 0 )
{
ptr1 += 20;
char * ptr2 = strstr( lptstr, "<!--EndFragment-->");
int iSize = (ptr2 - ptr1);
char * tmp = (char*)_alloca( iSize *2);
UTF8ToHtml((BYTE*)ptr1, iSize, tmp );
//memcpy(tmp, ptr1, iSize );
//tmp[iSize] = 0;
SetDlgItemText(IDC_TEXT, tmp );
}
else
SetDlgItemText(IDC_TEXT, lptstr );
GlobalUnlock(hglb);
}
}
CloseClipboard();
return 0;
}
这是我转换的代码
private void FTopMost_DragDrop(object sender, DragEventArgs e)
{
strCont = e.Data.GetData(DataFormats.Html, true).ToString();
int start = strCont.IndexOf("<!--StartFragment-->");
int end = strCont.IndexOf("<!--EndFragment-->");
string s = strCont.Substring(start + 20, end - start - 20); FNewPost f = new FNewPost();
f.HTML = UTF8ToHtml(s);
f.Show();
f.Activate();
}
string UTF8ToHtml(string utf8)
{
string ptr = null;
int code;
for (int i = 0; i < utf8.Length; i++ )
{
code = 0;
if ((utf8[i]) == 0xF0)
{
code = (((utf8[i]) & 0x0F) << 18) | ((((utf8[i] + 1)) & 0x7F) << 12) | ((((utf8[i] + 2)) & 0x7F) << 6) | (((utf8[i] + 3)) & 0x7F);
i += 3;
}
else
{
if ((utf8[i]) == 0xE0)
{
code = (((utf8[i]) & 0x1F) << 12) | ((((utf8[i] + 1)) & 0x7F) << 6) | (((utf8[i] + 2)) & 0x7F);
i += 2;
}
else
{
if ((utf8[i]) == 0xC0)
{
code = (((utf8[i]) & 0x3F) << 6) | (((utf8[i] + 1)) & 0x7F);
i += 1;
}
}
} if (code == 0)
{
ptr += utf8[i];
}
else
{
string s = null;
switch (code)
{
case 160:
s = "& ";
break;
case 34:
s = "&";
break;
case 36:
s = "&&";
break;
case 60:
s = "&<";
break;
case 62:
s = "&>";
break;
default:
//sprintf( s, "&#%d;", code );
s = "&" + code.ToString();
break;
}
ptr += s;
}
}
return ptr;
}
这是一段剪贴板数据,当然这里没有中文
Version:0.9
StartHTML:71
EndHTML:170
StartFragment:140
EndFragment:160
StartSelection:140
EndSelection:160
<!DOCTYPE>
<HTML>
<HEAD>
<TITLE>The HTML Clipboard</TITLE>
<BASE HREF="http://sample/specs">
</HEAD>
<BODY>
<!--StartFragment -->
<P>The Fragment</P>
<!--EndFragment -->
</BODY>
</HTML>
解决方案 »
- C#套接字编程关于不能循环接受客户端的问题
- 无操作系统的客户机能不能连接服务器?请大家帮忙给个思路
- 在用VS2008编程时,如何能够对修改的程序实时调试
- 如何让鼠标暂时不可点击
- C# 鼠标控制问题?
- 申请了一个虚拟主机,放了一个access数据库,C#里边的链接字符串怎么写?
- 如何实现两个窗口间的通讯?我想在MDI子窗口设置主窗口某菜单项不可用,怎么办?50分相送!
- 请教大家一个对称加密解密的问题,根据给出的解密方法,写出相对应的加密的方法
- 新手使用C#多线程遇到的问题
- 在datagrid中checkbox取值的问题!
- 想网络中的一台机器上写文件的时候,老是不成功,怀疑是不是权限的问题?
- delphi写的一个加密DLL,在C#中运行出错,
在C#里就得先用Encoding.UTF8.GetBytes()得到Byte数组
用Encoding.UTF8.GetString();返回
{
string ptr = null;
int code;
byte[] utf8bytes = Encoding.UTF8.GetBytes(utf8); for (int i = 0; i < utf8bytes.Length; i++)
{
code = 0;
if ((utf8bytes[i] & 0xF0) == 0xF0)
{
code = ((utf8bytes[i] & 0x0F) << 18) |
((utf8bytes[i + 1] & 0x7F) << 12) |
((utf8bytes[i + 2] & 0x7F) << 6) |
(utf8bytes[i + 3] & 0x7F);
i += 3;
}
else
{
if ((utf8bytes[i] & 0xE0) == 0xE0)
{
code = ((utf8bytes[i] & 0x1F) << 12) |
((utf8bytes[i + 1] & 0x7F) << 6) |
(utf8bytes[i + 2] & 0x7F);
i += 2;
}
else
{
if ((utf8bytes[i]) == 0xC0)
{
code = (((utf8bytes[i]) & 0x3F) << 6) |
((utf8bytes[i + 1]) & 0x7F);
i += 1;
}
}
} if (code == 0)
{
ptr += (char)utf8bytes[i];
}
else
{
string s = null;
switch (code)
{
case 160:
s = "& ";
break;
case 34:
s = "&";
break;
case 36:
s = "&&";
break;
case 60:
s = "&<";
break;
case 62:
s = "&>";
break;
default:
//sprintf( s, "&#%d;", code );
s = "&#" + code.ToString() + ";";
break;
}
ptr += s;
}
}
return ptr;
}
charset=utf-8. Alternatively put a META tag in the header that
declares it as such. Long ago HTML was restricted to Latin1 but that
is history. (Maybe there is more to this than you are telling us?)Anyway, if you really want to convert Unicode to latin1 + html
character entities, I believe that GNU recode can do what you want:
你的英文我不懂,有中文吗?我只能看个大概,不能准确理解。
我问题解决了,详见以下网址
http://www.cnblogs.com/onlytiancai/archive/2007/04/10/706543.html
{
string ptr = null;
int code;
byte[] utf8bytes = Encoding.UTF8.GetBytes(utf8); for (int i = 0; i < utf8bytes.Length; i++)
{
code = 0;
if ((utf8bytes[i] & 0xF0) == 0xF0)
{
code = ((utf8bytes[i] & 0x0F) << 18) |
((utf8bytes[i + 1] & 0x7F) << 12) |
((utf8bytes[i + 2] & 0x7F) << 6) |
(utf8bytes[i + 3] & 0x7F);
i += 3;
}
else
{
if ((utf8bytes[i] & 0xE0) == 0xE0)
{
code = ((utf8bytes[i] & 0x1F) << 12) |
((utf8bytes[i + 1] & 0x7F) << 6) |
(utf8bytes[i + 2] & 0x7F);
i += 2;
}
else
{
if ((utf8bytes[i]) == 0xC0)
{
code = (((utf8bytes[i]) & 0x3F) << 6) |
((utf8bytes[i + 1]) & 0x7F);
i += 1;
}
}
} if (code == 0)
{
ptr += (char)utf8bytes[i];
}
else
{
string s = null;
switch (code)
{
case 160:
s = "& ";
break;
case 34:
s = "&";
break;
case 36:
s = "&&";
break;
case 60:
s = "&<";
break;
case 62:
s = "&>";
break;
default:
//sprintf( s, "&#%d;", code );
s = "&#" + code.ToString() + ";";
break;
}
ptr += s;
}
}
return ptr;
}
我早上看了会.net的DLL。发现你猜的是对的 2007-04-11 10:42:55 谁染枫林醉
怎么看的呀
2007-04-11 10:46:32 翔子
reflector 2007-04-11 10:47:07 翔子
text = new string((sbyte*) ptr);
2007-04-11 10:47:18 翔子
它就是这么转的 2007-04-11 10:47:27 翔子
if (unicode)
{
return new string((char*) ptr);
}
text = new string((sbyte*) ptr); 2007-04-11 10:47:49 翔子
Html的时候unicode定成了false
2007-04-11 10:44:51 谁染枫林醉
是吗?
2007-04-11 10:45:01 谁染枫林醉
你反射的哪条东西呀?
2007-04-11 10:48:33 翔子
你的WebEdit的webBrowser1.Navigate(Url); 2007-04-11 10:45:05 谁染枫林醉
哪个DLL呀
2007-04-11 10:45:13 谁染枫林醉
什么?
2007-04-11 10:49:08 翔子
public class DataObject : IDataObject, UnsafeNativeMethods.IOleDataObject
2007-04-11 10:45:45 谁染枫林醉
private void FNewPost_Load(object sender, EventArgs e)
{
webEdit.DocumentText = "<html><body contentEditable='true'></body></html>";
}
2007-04-11 10:46:00 谁染枫林醉
你反射的这个?
2007-04-11 10:49:33 翔子
嗯 2007-04-11 10:46:23 谁染枫林醉
在哪个DLL里呀
2007-04-11 10:49:59 翔子
从那个事件的E里一路找下去的 2007-04-11 10:46:45 谁染枫林醉
if (unicode)
{
return new string((char*) ptr); 这些代码是哪个方法?
2007-04-11 10:47:30 谁染枫林醉
是不是说unicode就用char转,而不是unicode就直接用sbyte转了呀,这块儿原理不太懂呀
2007-04-11 10:51:20 翔子
我也不太懂,不过,看起来,它就是写死了 2007-04-11 10:48:04 谁染枫林醉
那怎么重写这个过程呢
2007-04-11 10:51:38 翔子
不可能 2007-04-11 10:48:15 谁染枫林醉
为什么
2007-04-11 10:48:25 谁染枫林醉
这算bug了,一定能解决。
2007-04-11 10:51:54 翔子
我也想重写来着,他用的方法全是internal 2007-04-11 10:48:33 谁染枫林醉
我日
2007-04-11 10:52:03 翔子
外部不让有 2007-04-11 10:48:54 谁染枫林醉
那有没有相关事件可以挂接处理
2007-04-11 10:52:55 翔子
DataFormats.UnicodeText 可以得到正确文本,但,没有HTML 2007-04-11 10:49:36 谁染枫林醉
我知道的
2007-04-11 10:53:14 翔子
传说中只能调API来干活了 2007-04-11 10:53:42 翔子
用API来模拟他GetData() 2007-04-11 10:50:22 谁染枫林醉
那个我有相关资料的
2007-04-11 10:53:52 翔子
得到系统中的数据 2007-04-11 10:50:29 谁染枫林醉
恩
2007-04-11 10:54:32 翔子
关键是得到未处理的系统剪切数据 2007-04-11 10:53:22 谁染枫林醉
恩,等下,我找找
2007-04-11 11:02:05 翔子
你的方法确实绝啊!呵呵 2007-04-11 10:58:53 谁染枫林醉
呵呵,没办法,弄了三天了,急死了快
2007-04-11 11:04:13 翔子
呵呵,是啊,可以说成是.net的BUG了 2007-04-11 11:01:29 谁染枫林醉
找不到相关的文档了我,你查下相关win32api吧,应该有的