How do you query for a non-existent (null) attribute in DynamoDB

I'm trying to query a DynamoDB table to find all items where the email attribute is not set. A global secondary index called EmailPasswordIndex exists on the table which includes the email field.

var params = {
    "TableName": "Accounts",
    "IndexName": "EmailPasswordIndex",
    "KeyConditionExpression": "email = NULL",
};

dynamodb.query(params, function(err, data) {
    if (err)
        console.log(JSON.stringify(err, null, 2));
    else
        console.log(JSON.stringify(data, null, 2));
});

Result:

{
  "message": "Invalid KeyConditionExpression: Attribute name is a reserved keyword; reserved keyword: NULL",
  "code": "ValidationException",
  "time": "2015-12-18T05:33:00.356Z",
  "statusCode": 400,
  "retryable": false
}

Table definition:

var params = {
    "TableName": "Accounts",
    "KeySchema": [
        { "AttributeName": "id", KeyType: "HASH" }, // Randomly generated UUID
    ],
    "AttributeDefinitions": [
        { "AttributeName": "id", AttributeType: "S" },
        { "AttributeName": "email", AttributeType: "S" }, // User e-mail.
        { "AttributeName": "password", AttributeType: "S" }, // Hashed password.
    ],
    "GlobalSecondaryIndexes": [
        {
            "IndexName": "EmailPasswordIndex",
            "ProvisionedThroughput": {
                "ReadCapacityUnits": 1,
                "WriteCapacityUnits": 1
            },
            "KeySchema": [
                { "AttributeName": "email", KeyType: "HASH" },
                { "AttributeName": "password", KeyType: "RANGE" },
            ],
            "Projection": { "ProjectionType": "ALL" }
        },
    ],
    ProvisionedThroughput: {       
        ReadCapacityUnits: 1, 
        WriteCapacityUnits: 1
    }
};

dynamodb.createTable(params, function(err, data) {
    if (err)
        console.log(JSON.stringify(err, null, 2));
    else
        console.log(JSON.stringify(data, null, 2));
});

Solution 1:

DynamoDB's Global Secondary Indexes allow for the indexes to be sparse. That means that if you have a GSI whose hash or range key for an item is not defined then that item will simply not be included in the GSI. This is useful in a number of use cases as it allows you to directly identify records that contain certain fields. However, this approach will not work if you are looking for the lack of a field.

To get all of the items that have a field not set your best bet may be resorting to a scan with a filter. This operation will be very expensive but it would be straightforward code looking something like the following:

var params = {
    TableName: "Accounts",
    FilterExpression: "attribute_not_exists(email)"
};

dynamodb.scan(params, {
    if (err)
        console.log(JSON.stringify(err, null, 2));
    else
        console.log(JSON.stringify(data, null, 2));
});

Solution 2:

@jaredHatfield is correct if the field does not exist but that will not work if the filed is null. NULL is a keyword and can't used directly. But you can use it with ExpressionAttributeValues.

const params = {
    TableName: "Accounts",
    FilterExpression: "attribute_not_exists(email) or email = :null",
    ExpressionAttributeValues: {
        ':null': null
    }
}

dynamodb.scan(params, (err, data) => {
    if (err)
        console.log(JSON.stringify(err, null, 2));
    else
        console.log(JSON.stringify(data, null, 2));
})