Storing UUID as base64 String
I have been experimenting with using UUIDs as database keys. I want to take up the least amount of bytes as possible, while still keeping the UUID representation human readable.
I think that I have gotten it down to 22 bytes using base64 and removing some trailing "==" that seem to be unnecessary to store for my purposes. Are there any flaws with this approach?
Basically my test code does a bunch of conversions to get the UUID down to a 22 byte String, then converts it back into a UUID.
import java.io.IOException;
import java.util.UUID;
public class UUIDTest {
public static void main(String[] args){
UUID uuid = UUID.randomUUID();
System.out.println("UUID String: " + uuid.toString());
System.out.println("Number of Bytes: " + uuid.toString().getBytes().length);
System.out.println();
byte[] uuidArr = asByteArray(uuid);
System.out.print("UUID Byte Array: ");
for(byte b: uuidArr){
System.out.print(b +" ");
}
System.out.println();
System.out.println("Number of Bytes: " + uuidArr.length);
System.out.println();
try {
// Convert a byte array to base64 string
String s = new sun.misc.BASE64Encoder().encode(uuidArr);
System.out.println("UUID Base64 String: " +s);
System.out.println("Number of Bytes: " + s.getBytes().length);
System.out.println();
String trimmed = s.split("=")[0];
System.out.println("UUID Base64 String Trimmed: " +trimmed);
System.out.println("Number of Bytes: " + trimmed.getBytes().length);
System.out.println();
// Convert base64 string to a byte array
byte[] backArr = new sun.misc.BASE64Decoder().decodeBuffer(trimmed);
System.out.print("Back to UUID Byte Array: ");
for(byte b: backArr){
System.out.print(b +" ");
}
System.out.println();
System.out.println("Number of Bytes: " + backArr.length);
byte[] fixedArr = new byte[16];
for(int i= 0; i<16; i++){
fixedArr[i] = backArr[i];
}
System.out.println();
System.out.print("Fixed UUID Byte Array: ");
for(byte b: fixedArr){
System.out.print(b +" ");
}
System.out.println();
System.out.println("Number of Bytes: " + fixedArr.length);
System.out.println();
UUID newUUID = toUUID(fixedArr);
System.out.println("UUID String: " + newUUID.toString());
System.out.println("Number of Bytes: " + newUUID.toString().getBytes().length);
System.out.println();
System.out.println("Equal to Start UUID? "+newUUID.equals(uuid));
if(!newUUID.equals(uuid)){
System.exit(0);
}
} catch (IOException e) {
}
}
public static byte[] asByteArray(UUID uuid) {
long msb = uuid.getMostSignificantBits();
long lsb = uuid.getLeastSignificantBits();
byte[] buffer = new byte[16];
for (int i = 0; i < 8; i++) {
buffer[i] = (byte) (msb >>> 8 * (7 - i));
}
for (int i = 8; i < 16; i++) {
buffer[i] = (byte) (lsb >>> 8 * (7 - i));
}
return buffer;
}
public static UUID toUUID(byte[] byteArray) {
long msb = 0;
long lsb = 0;
for (int i = 0; i < 8; i++)
msb = (msb << 8) | (byteArray[i] & 0xff);
for (int i = 8; i < 16; i++)
lsb = (lsb << 8) | (byteArray[i] & 0xff);
UUID result = new UUID(msb, lsb);
return result;
}
}
output:
UUID String: cdaed56d-8712-414d-b346-01905d0026fe
Number of Bytes: 36
UUID Byte Array: -51 -82 -43 109 -121 18 65 77 -77 70 1 -112 93 0 38 -2
Number of Bytes: 16
UUID Base64 String: za7VbYcSQU2zRgGQXQAm/g==
Number of Bytes: 24
UUID Base64 String Trimmed: za7VbYcSQU2zRgGQXQAm/g
Number of Bytes: 22
Back to UUID Byte Array: -51 -82 -43 109 -121 18 65 77 -77 70 1 -112 93 0 38 -2 0 38
Number of Bytes: 18
Fixed UUID Byte Array: -51 -82 -43 109 -121 18 65 77 -77 70 1 -112 93 0 38 -2
Number of Bytes: 16
UUID String: cdaed56d-8712-414d-b346-01905d0026fe
Number of Bytes: 36
Equal to Start UUID? true
I was also trying to do something similar. I am working with a Java application which uses UUIDs of the form 6fcb514b-b878-4c9d-95b7-8dc3a7ce6fd8
(which are generated with the standard UUID lib in Java). In my case I needed to be able to get this UUID down to 30 characters or less. I used Base64 and these are my convenience functions. Hopefully they will be helpful for someone as the solution was not obvious to me right away.
Usage:
String uuid_str = "6fcb514b-b878-4c9d-95b7-8dc3a7ce6fd8";
String uuid_as_64 = uuidToBase64(uuid_str);
System.out.println("as base64: "+uuid_as_64);
System.out.println("as uuid: "+uuidFromBase64(uuid_as_64));
Output:
as base64: b8tRS7h4TJ2Vt43Dp85v2A
as uuid : 6fcb514b-b878-4c9d-95b7-8dc3a7ce6fd8
Functions:
import org.apache.commons.codec.binary.Base64;
private static String uuidToBase64(String str) {
Base64 base64 = new Base64();
UUID uuid = UUID.fromString(str);
ByteBuffer bb = ByteBuffer.wrap(new byte[16]);
bb.putLong(uuid.getMostSignificantBits());
bb.putLong(uuid.getLeastSignificantBits());
return base64.encodeBase64URLSafeString(bb.array());
}
private static String uuidFromBase64(String str) {
Base64 base64 = new Base64();
byte[] bytes = base64.decodeBase64(str);
ByteBuffer bb = ByteBuffer.wrap(bytes);
UUID uuid = new UUID(bb.getLong(), bb.getLong());
return uuid.toString();
}
You can safely drop the padding "==" in this application. If you were to decode the base-64 text back to bytes, some libraries would expect it to be there, but since you are just using the resulting string as a key, it's not a problem.
I'd use Base-64 because its encoding characters can be URL-safe, and it looks less like gibberish. But there's also Base-85. It uses more symbols and codes 4 bytes as 5 characters, so you could get your text down to 20 characters.
Here's my code, it uses org.apache.commons.codec.binary.Base64 to produce url-safe unique strings that are 22 characters in length (and that have the same uniqueness as UUID).
private static Base64 BASE64 = new Base64(true);
public static String generateKey(){
UUID uuid = UUID.randomUUID();
byte[] uuidArray = KeyGenerator.toByteArray(uuid);
byte[] encodedArray = BASE64.encode(uuidArray);
String returnValue = new String(encodedArray);
returnValue = StringUtils.removeEnd(returnValue, "\r\n");
return returnValue;
}
public static UUID convertKey(String key){
UUID returnValue = null;
if(StringUtils.isNotBlank(key)){
// Convert base64 string to a byte array
byte[] decodedArray = BASE64.decode(key);
returnValue = KeyGenerator.fromByteArray(decodedArray);
}
return returnValue;
}
private static byte[] toByteArray(UUID uuid) {
byte[] byteArray = new byte[(Long.SIZE / Byte.SIZE) * 2];
ByteBuffer buffer = ByteBuffer.wrap(byteArray);
LongBuffer longBuffer = buffer.asLongBuffer();
longBuffer.put(new long[] { uuid.getMostSignificantBits(), uuid.getLeastSignificantBits() });
return byteArray;
}
private static UUID fromByteArray(byte[] bytes) {
ByteBuffer buffer = ByteBuffer.wrap(bytes);
LongBuffer longBuffer = buffer.asLongBuffer();
return new UUID(longBuffer.get(0), longBuffer.get(1));
}
I have an application where I'm doing almost exactly this. 22 char encoded UUID. It works fine. However, the main reason I'm doing it this way is that the IDs are exposed in the web app's URIs, and 36 characters is really quite big for something that appears in a URI. 22 characters is still kinda long, but we make do.
Here's the Ruby code for this:
# Make an array of 64 URL-safe characters
CHARS64 = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + ["-", "_"]
# Return a 22 byte URL-safe string, encoded six bits at a time using 64 characters
def to_s22
integer = self.to_i # UUID as a raw integer
rval = ""
22.times do
c = (integer & 0x3F)
rval += CHARS64[c]
integer = integer >> 6
end
return rval.reverse
end
It's not exactly the same as base64 encoding because base64 uses characters that would have to be escaped if they appeared in a URI path component. The Java implementation is likely to be quite different since you're more likely to have an array of raw bytes instead of a really big integer.
Here is an example with java.util.Base64
introduced in JDK8:
import java.nio.ByteBuffer;
import java.util.Base64;
import java.util.Base64.Encoder;
import java.util.UUID;
public class Uuid64 {
private static final Encoder BASE64_URL_ENCODER = Base64.getUrlEncoder().withoutPadding();
public static void main(String[] args) {
// String uuidStr = UUID.randomUUID().toString();
String uuidStr = "eb55c9cc-1fc1-43da-9adb-d9c66bb259ad";
String uuid64 = uuidHexToUuid64(uuidStr);
System.out.println(uuid64); //=> 61XJzB_BQ9qa29nGa7JZrQ
System.out.println(uuid64.length()); //=> 22
String uuidHex = uuid64ToUuidHex(uuid64);
System.out.println(uuidHex); //=> eb55c9cc-1fc1-43da-9adb-d9c66bb259ad
}
public static String uuidHexToUuid64(String uuidStr) {
UUID uuid = UUID.fromString(uuidStr);
byte[] bytes = uuidToBytes(uuid);
return BASE64_URL_ENCODER.encodeToString(bytes);
}
public static String uuid64ToUuidHex(String uuid64) {
byte[] decoded = Base64.getUrlDecoder().decode(uuid64);
UUID uuid = uuidFromBytes(decoded);
return uuid.toString();
}
public static byte[] uuidToBytes(UUID uuid) {
ByteBuffer bb = ByteBuffer.wrap(new byte[16]);
bb.putLong(uuid.getMostSignificantBits());
bb.putLong(uuid.getLeastSignificantBits());
return bb.array();
}
public static UUID uuidFromBytes(byte[] decoded) {
ByteBuffer bb = ByteBuffer.wrap(decoded);
long mostSigBits = bb.getLong();
long leastSigBits = bb.getLong();
return new UUID(mostSigBits, leastSigBits);
}
}
The UUID encoded in Base64 is URL safe and without padding.