windows API实现中文中字符串与GBK、Unicode、UTF-8三种编码互转
#include <iostream> #include <string> #include <Windows.h> using namespace std; //gbk转UTF-8 string GbkToUtf8(const std::string& strGbk)//传入的strGbk是GBK编码 { //gbk转unicode int len = MultiByteToWideChar(CP_ACP, 0, strGbk.c_str(), -1, NULL, 0); wchar_t *strUnicode = new wchar_t[len]; wmemset(strUnicode, 0, len); MultiByteToWideChar(CP_ACP, 0, strGbk.c_str(), -1, strUnicode, len); //unicode转UTF-8 len = WideCharToMultiByte(CP_UTF8, 0, strUnicode, -1, NULL, 0, NULL, NULL); char * strUtf8 = new char[len]; WideCharToMultiByte(CP_UTF8, 0, strUnicode, -1, strUtf8, len, NULL, NULL); std::string strTemp(strUtf8);//此时的strTemp是UTF-8编码 delete[]strUnicode; delete[]strUtf8; strUnicode = NULL; strUtf8 = NULL; return strTemp; } //UTF-8转gbk string Utf8ToGbk(const std::string& strUtf8)//传入的strUtf8是UTF-8编码 { //UTF-8转unicode int len = MultiByteToWideChar(CP_UTF8, 0, strUtf8.c_str(), -1, NULL, 0); wchar_t * strUnicode = new wchar_t[len];//len = 2 wmemset(strUnicode, 0, len); MultiByteToWideChar(CP_UTF8, 0, strUtf8.c_str(), -1, strUnicode, len); //unicode转gbk len = WideCharToMultiByte(CP_ACP, 0, strUnicode, -1, NULL, 0, NULL, NULL); char *strGbk = new char[len];//len=3 本来为2,但是char*后面自动加上了 memset(strGbk, 0, len); WideCharToMultiByte(CP_ACP,0, strUnicode, -1, strGbk, len, NULL, NULL); std::string strTemp(strGbk);//此时的strTemp是GBK编码 delete[]strUnicode; delete[]strGbk; strUnicode = NULL; strGbk = NULL; return strTemp; } //gbk转unicode (下面的例子没用到) wstring GbkToUnicode(const std::string& strGbk)//返回值是wstring { int len = MultiByteToWideChar(CP_ACP, 0, strGbk.c_str(), -1, NULL, 0); wchar_t *strUnicode = new wchar_t[len]; wmemset(strUnicode, 0, len); MultiByteToWideChar(CP_ACP, 0, strGbk.c_str(), -1, strUnicode, len); std::wstring strTemp(strUnicode);//此时的strTemp是Unicode编码 delete[]strUnicode; strUnicode = NULL; return strTemp; } //Unicode转gbk string UnicodeToGbk (const std::wstring& strUnicode)//参数是wstring { int len = WideCharToMultiByte(CP_ACP, 0, strUnicode.c_str(), -1, NULL, 0, NULL, NULL); char *strGbk = new char[len];//len=3 本来为2,但是char*后面自动加上了 memset(strGbk, 0, len); WideCharToMultiByte(CP_ACP,0,strUnicode.c_str(), -1, strGbk, len, NULL, NULL); std::string strTemp(strGbk);//此时的strTemp是GBK编码 delete[]strGbk; strGbk = NULL; return strTemp; } int main() { //1、ANSI/GBK编码 string strGbk = "我"; int num = strGbk.size();//获取两个字符数,也是我字所占的字节数 unsigned char* p = (unsigned char*)strGbk.c_str(); for (int i = 0; i < num; i++) { printf("%0x", *p); p++; } //输出ced2 所以我的GBK编码是0xced2 printf(" "); char gbk[] = {0xce, 0xd2, 0x00}; //加上0x00字符串结束符,不会输出乱码 cout<<gbk<<endl;//输出汉字我 //2、unicodde编码 //方法一 //wchar_t str = 0x6211; //wcout.imbue(locale("chs")); //wcout << str << endl;//输出汉字我 //wchar_t c=L"我"; //cout << hex << (short)c << endl<<endl;//输出unicodde编码 6211 //方法二: wstring strUnicode = L"我";//转成unicode编码 num = strUnicode.size()*2;//乘以2,才是我所占的字节数 p = (unsigned char*)strUnicode.c_str(); for (int i = 0; i < num; i++) { printf("%0x", *p); p++; } //输出1162 因为默认是小端模式,所以我的unicode编码是0x6211 printf(" "); wchar_t s[2] = {0x6211, 0x00}; //加上0x00字符串结束符,不会输出乱码 wstring str =(wchar_t*)s; cout<<UnicodeToGbk(str)<<endl;//需要先将unicode字符串转成gbk之后才能用cout输出 //3、UTF-8编码 string strUtf8 = GbkToUtf8("我");//转成utf8编码 num = strUtf8.size();//num=3 p = (unsigned char*)strUtf8.c_str(); for (int i = 0; i < num; i++) { printf("%0x", *p); p++; } //输出e68891 printf(" "); char utf8[] = {0xe6, 0x88, 0x91,0x00}; //加上0x00字符串结束符,不会输出乱码 cout<<Utf8ToGbk(utf8)<<endl;//需要先将utf8字符串转成gbk之后才能用cout输出 return 0; }
声明:该文观点仅代表作者本人,牛骨文系教育信息发布平台,牛骨文仅提供信息存储空间服务。