Convert UTF-8 encoded NSData to NSString

Solution 1:

If the data is not null-terminated, you should use -initWithData:encoding:

NSString* newStr = [[NSString alloc] initWithData:theData encoding:NSUTF8StringEncoding];

If the data is null-terminated, you should instead use -stringWithUTF8String: to avoid the extra \0 at the end.

NSString* newStr = [NSString stringWithUTF8String:[theData bytes]];

(Note that if the input is not properly UTF-8-encoded, you will get nil.)


Swift variant:

let newStr = String(data: data, encoding: .utf8)
// note that `newStr` is a `String?`, not a `String`.

If the data is null-terminated, you could go though the safe way which is remove the that null character, or the unsafe way similar to the Objective-C version above.

// safe way, provided data is \0-terminated
let newStr1 = String(data: data.subdata(in: 0 ..< data.count - 1), encoding: .utf8)
// unsafe way, provided data is \0-terminated
let newStr2 = data.withUnsafeBytes(String.init(utf8String:))

Solution 2:

You could call this method

+(id)stringWithUTF8String:(const char *)bytes.

Solution 3:

I humbly submit a category to make this less annoying:

@interface NSData (EasyUTF8)

// Safely decode the bytes into a UTF8 string
- (NSString *)asUTF8String;

@end

and

@implementation NSData (EasyUTF8)

- (NSString *)asUTF8String {
    return [[NSString alloc] initWithData:self encoding:NSUTF8StringEncoding];    
}

@end

(Note that if you're not using ARC you'll need an autorelease there.)

Now instead of the appallingly verbose:

NSData *data = ...
[[NSString alloc] initWithData:data encoding:NSUTF8StringEncoding];

You can do:

NSData *data = ...
[data asUTF8String];

Solution 4:

The Swift version from String to Data and back to String:

Xcode 10.1 • Swift 4.2.1

extension Data {
    var string: String? {
        return String(data: self, encoding: .utf8)
    }
}

extension StringProtocol {
    var data: Data {
        return Data(utf8)
    }
}

extension String {
    var base64Decoded: Data? {
        return Data(base64Encoded: self)
    }
}

Playground

let string = "Hello World"                                  // "Hello World"
let stringData = string.data                                // 11 bytes
let base64EncodedString = stringData.base64EncodedString()  // "SGVsbG8gV29ybGQ="
let stringFromData = stringData.string                      // "Hello World"

let base64String = "SGVsbG8gV29ybGQ="
if let data = base64String.base64Decoded {
    print(data)                                    //  11 bytes
    print(data.base64EncodedString())              // "SGVsbG8gV29ybGQ="
    print(data.string ?? "nil")                    // "Hello World"
}

let stringWithAccent = "Olá Mundo"                          // "Olá Mundo"
print(stringWithAccent.count)                               // "9"
let stringWithAccentData = stringWithAccent.data            // "10 bytes" note: an extra byte for the acute accent
let stringWithAccentFromData = stringWithAccentData.string  // "Olá Mundo\n"