« 前一篇:Linux下推荐应用程序列表
后一篇:优化Apache »

VC中GBK与UTF8字符串的转换 @ 10/27/2004

技术类
想在VC中把GB2312/GBK字符串转换成UTF8/Unicode字符串,昨天折腾了三个多小时,在google上跑了好几圈,还是没能实现,结果今天早上一过来就搞定了。共享之:
void ConvertGBKToUtf8(CString& strGBK) {
    int len=MultiByteToWideChar(CP_ACP, 0, (LPCTSTR)strGBK, -1, NULL,0);
    unsigned short * wszUtf8 = new unsigned short[len+1];
    memset(wszUtf8, 0, len * 2 + 2);
    MultiByteToWideChar(CP_ACP, 0, (LPCTSTR)strGBK, -1, wszUtf8, len);

    len = WideCharToMultiByte(CP_UTF8, 0, wszUtf8, -1, NULL, 0, NULL, NULL);
    char *szUtf8=new char[len + 1];
    memset(szUtf8, 0, len + 1);
    WideCharToMultiByte (CP_UTF8, 0, wszUtf8, -1, szUtf8, len, NULL,NULL);

    strGBK = szUtf8;
    delete[] szUtf8;
    delete[] wszUtf8;
}

void ConvertUtf8ToGBK(CString& strUtf8) {
    int len=MultiByteToWideChar(CP_UTF8, 0, (LPCTSTR)strUtf8, -1, NULL,0);
    unsigned short * wszGBK = new unsigned short[len+1];
    memset(wszGBK, 0, len * 2 + 2);
    MultiByteToWideChar(CP_UTF8, 0, (LPCTSTR)strUtf8, -1, wszGBK, len);

    len = WideCharToMultiByte(CP_ACP, 0, wszGBK, -1, NULL, 0, NULL, NULL);
    char *szGBK=new char[len + 1];
    memset(szGBK, 0, len + 1);
    WideCharToMultiByte (CP_ACP, 0, wszGBK, -1, szGBK, len, NULL,NULL);

    strUtf8 = szGBK;
    delete[] szGBK;
    delete[] wszGBK;
}



==========
2006-09-14
补充:UTF8的编解码函数
==========
<script language="javascript">
function encode_utf8(rawtext) {
  rawtext = rawtext.replace(/\r\n/g,"\n");
  var utftext = "";
  for(var n=0; n<rawtext.length; n++)
      {
      //
      var c=rawtext.charCodeAt(n);
      // 0-127 => 1byte
      if (c<128)
          utftext += String.fromCharCode(c);
      // 127 - 2047 => 2byte
      else if((c>127) && (c<2048)) {
          utftext += String.fromCharCode((c>>6)|192);
          utftext += String.fromCharCode((c&63)|128);}
      // 2048 - 66536 => 3byte
      else {
          utftext += String.fromCharCode((c>>12)|224);
          utftext += String.fromCharCode(((c>>6)&63)|128);
          utftext += String.fromCharCode((c&63)|128);}
      }
  return utftext;
}
function decode_utf8(utftext) {
  var plaintext = ""; var i=0; var c=c1=c2=0;
  while(i<utftext.length)
      {
      c = utftext.charCodeAt(i);
      if (c<128) {
          plaintext += String.fromCharCode(c);
          i++;}
      else if((c>191) && (c<224)) {
          c2 = utftext.charCodeAt(i+1);
          plaintext += String.fromCharCode(((c&31)<<6) | (c2&63));
          i+=2;}
      else {
          c2 = utftext.charCodeAt(i+1); c3 = utftext.charCodeAt(i+2);
          plaintext += String.fromCharCode(((c&15)<<12) | ((c2&63)<<6) | (c3&63));
          i+=3;}
      }
  return plaintext;
}
</script>
发布于 10/27/2004 10:25:33 | 评论:9
old_eye @ 11/20/2004 15:10:32
谢谢 你的代码
xiexie @ 6/9/2005 15:05:25
感谢感谢。
yzligp @ 7/1/2005 0:33:27
非常感谢
蓝心炬 @ 11/8/2005 17:42:55
非常感谢!
windguy @ 3/2/2006 20:47:36
谢谢,写得非常精简!
waaaa @ 4/5/2006 15:09:09
谈笑风生
<匿名人士> @ 8/13/2006 13:15:36
谢谢
kenlistian @ 9/10/2006 22:55:49
在cbuilder中直接调用封装好的
utf8decode
utf8encode,
把它导出dll,更好.
a @ 1/10/2007 23:56:55
垃圾

看帖要回帖...

categories
archives
links
statistics
  • 网志数:1168
  • 评论数:2011