How to (un)escape strings in C/C++?
This is a function to process a single character:
/*
** Does not generate hex character constants.
** Always generates triple-digit octal constants.
** Always generates escapes in preference to octal.
** Escape question mark to ensure no trigraphs are generated by repetitive use.
** Handling of 0x80..0xFF is locale-dependent (might be octal, might be literal).
*/
void chr_cstrlit(unsigned char u, char *buffer, size_t buflen)
{
if (buflen < 2)
*buffer = '\0';
else if (isprint(u) && u != '\'' && u != '\"' && u != '\\' && u != '\?')
sprintf(buffer, "%c", u);
else if (buflen < 3)
*buffer = '\0';
else
{
switch (u)
{
case '\a': strcpy(buffer, "\\a"); break;
case '\b': strcpy(buffer, "\\b"); break;
case '\f': strcpy(buffer, "\\f"); break;
case '\n': strcpy(buffer, "\\n"); break;
case '\r': strcpy(buffer, "\\r"); break;
case '\t': strcpy(buffer, "\\t"); break;
case '\v': strcpy(buffer, "\\v"); break;
case '\\': strcpy(buffer, "\\\\"); break;
case '\'': strcpy(buffer, "\\'"); break;
case '\"': strcpy(buffer, "\\\""); break;
case '\?': strcpy(buffer, "\\\?"); break;
default:
if (buflen < 5)
*buffer = '\0';
else
sprintf(buffer, "\\%03o", u);
break;
}
}
}
And this is the code to handle a null-terminated string (using the function above):
void str_cstrlit(const char *str, char *buffer, size_t buflen)
{
unsigned char u;
size_t len;
while ((u = (unsigned char)*str++) != '\0')
{
chr_cstrlit(u, buffer, buflen);
if ((len = strlen(buffer)) == 0)
return;
buffer += len;
buflen -= len;
}
*buffer = '\0';
}
Rather than allocating a new buffer to contain the escaped string I like to escape my string while I write it to a stream.
The following function makes for readable and concise code.
struct Escaped
{
const char* str;
friend inline std::ostream& operator<<(std::ostream& os, const Escaped& e)
{
for (const char* char_p = e.str; *char_p != '\0'; char_p++)
{
switch (*char_p)
{
case '\a': os << "\\a"; break;
case '\b': os << "\\b"; break;
case '\f': os << "\\f"; break;
case '\n': os << "\\n"; break;
case '\r': os << "\\r"; break;
case '\t': os << "\\t"; break;
case '\v': os << "\\v"; break;
case '\\': os << "\\\\"; break;
case '\'': os << "\\'"; break;
case '\"': os << "\\\""; break;
case '\?': os << "\\\?"; break;
default: os << *char_p;
}
}
return os;
}
};
int main()
{
std::cout << Escaped{ "foo\n\tbar" } << std::endl;
}
Produces
foo\n bar