How do you properly use WideCharToMultiByte
Solution 1:
Here's a couple of functions (based on Brian Bondy's example) that use WideCharToMultiByte and MultiByteToWideChar to convert between std::wstring and std::string using utf8 to not lose any data.
// Convert a wide Unicode string to an UTF8 string
std::string utf8_encode(const std::wstring &wstr)
{
if( wstr.empty() ) return std::string();
int size_needed = WideCharToMultiByte(CP_UTF8, 0, &wstr[0], (int)wstr.size(), NULL, 0, NULL, NULL);
std::string strTo( size_needed, 0 );
WideCharToMultiByte (CP_UTF8, 0, &wstr[0], (int)wstr.size(), &strTo[0], size_needed, NULL, NULL);
return strTo;
}
// Convert an UTF8 string to a wide Unicode String
std::wstring utf8_decode(const std::string &str)
{
if( str.empty() ) return std::wstring();
int size_needed = MultiByteToWideChar(CP_UTF8, 0, &str[0], (int)str.size(), NULL, 0);
std::wstring wstrTo( size_needed, 0 );
MultiByteToWideChar (CP_UTF8, 0, &str[0], (int)str.size(), &wstrTo[0], size_needed);
return wstrTo;
}
Solution 2:
Elaborating on the answer provided by Brian R. Bondy: Here's an example that shows why you can't simply size the output buffer to the number of wide characters in the source string:
#include <windows.h>
#include <stdio.h>
#include <wchar.h>
#include <string.h>
/* string consisting of several Asian characters */
wchar_t wcsString[] = L"\u9580\u961c\u9640\u963f\u963b\u9644";
int main()
{
size_t wcsChars = wcslen( wcsString);
size_t sizeRequired = WideCharToMultiByte( 950, 0, wcsString, -1,
NULL, 0, NULL, NULL);
printf( "Wide chars in wcsString: %u\n", wcsChars);
printf( "Bytes required for CP950 encoding (excluding NUL terminator): %u\n",
sizeRequired-1);
sizeRequired = WideCharToMultiByte( CP_UTF8, 0, wcsString, -1,
NULL, 0, NULL, NULL);
printf( "Bytes required for UTF8 encoding (excluding NUL terminator): %u\n",
sizeRequired-1);
}
And the output:
Wide chars in wcsString: 6
Bytes required for CP950 encoding (excluding NUL terminator): 12
Bytes required for UTF8 encoding (excluding NUL terminator): 18
Solution 3:
You use the lpMultiByteStr [out] parameter by creating a new char array. You then pass this char array in to get it filled. You only need to initialize the length of the string + 1 so that you can have a null terminated string after the conversion.
Here are a couple of useful helper functions for you, they show the usage of all parameters.
#include <string>
std::string wstrtostr(const std::wstring &wstr)
{
// Convert a Unicode string to an ASCII string
std::string strTo;
char *szTo = new char[wstr.length() + 1];
szTo[wstr.size()] = '\0';
WideCharToMultiByte(CP_ACP, 0, wstr.c_str(), -1, szTo, (int)wstr.length(), NULL, NULL);
strTo = szTo;
delete[] szTo;
return strTo;
}
std::wstring strtowstr(const std::string &str)
{
// Convert an ASCII string to a Unicode String
std::wstring wstrTo;
wchar_t *wszTo = new wchar_t[str.length() + 1];
wszTo[str.size()] = L'\0';
MultiByteToWideChar(CP_ACP, 0, str.c_str(), -1, wszTo, (int)str.length());
wstrTo = wszTo;
delete[] wszTo;
return wstrTo;
}
--
Anytime in documentation when you see that it has a parameter which is a pointer to a type, and they tell you it is an out variable, you will want to create that type, and then pass in a pointer to it. The function will use that pointer to fill your variable.
So you can understand this better:
//pX is an out parameter, it fills your variable with 10.
void fillXWith10(int *pX)
{
*pX = 10;
}
int main(int argc, char ** argv)
{
int X;
fillXWith10(&X);
return 0;
}