Delete a Document with all Subcollections and Nested Subcollections in Firestore
How can you delete a Document with all it's collections and nested subcollections? (inside the functions environment)
In the RTDB you can ref.child('../someNode).setValue(null)
and that completes the desired behavior.
I can think of two ways you could achieve the desired delete behavior, both with tremendously ghastly drawbacks.
Create a 'Super' function that will spider every document and delete them in a batch. This function would be complicated, brittle to changes, and might take a lengthy execution time.
Add 'onDelete' triggers for each Document type, and make it delete any direct subcollections. You'll call delete on the root document, and the deletion calls will propagate down the 'tree'. This is sluggish, scales atrociously and is costly due to the colossal load of function executions.
Imagine you would have to delete a 'GROUP' and all it's children. It would be deeply chaotic with #1 and pricey with #2 (1 function call per doc)
groups > GROUP > projects > PROJECT > files > FILE > assets > ASSET
> urls > URL
> members > MEMBER
> questions > QUESTION > answers > ANSWER > replies > REPLY
> comments > COMMENT
> resources > RESOURCE > submissions > SUBMISSION
> requests > REQUEST
Is there a superior/favored/cleaner way to delete a document and all it's nested subcollections?
It ought to be possible considering you can do it from the console.
Solution 1:
according to firebase documentation:
https://firebase.google.com/docs/firestore/solutions/delete-collections
Deleting collection with nested subcollections might be done easy and neat with node-JS on the server side.
const client = require('firebase-tools');
await client.firestore
.delete(collectionPath, {
project: process.env.GCLOUD_PROJECT,
recursive: true,
yes: true
});
Solution 2:
Unfortunately, your analysis is spot on and indeed this use case does require a lot of ceremony. According to official documentation, there is no support for deep deletes in a single shot in firestore
neither via client libraries nor rest-api nor the cli
tool.
The cli
is open sourced and its implementation lives here: https://github.com/firebase/firebase-tools/blob/master/src/firestore/delete.js. They basically implemented option 1. you described in your question, so you can take some inspiration from there.
Both options 1. and 2. are far from ideal situation and to make your solution 100% reliable you will need to keep a persistent queue with deletion tasks, as any error in the long running procedure will leave your system in some ill-defined state.
I would discourage to go with raw option 2. as recursive cloud function calls may very easily went wrong - for example, hitting max. limits.
In case the link changed, below the full source of https://github.com/firebase/firebase-tools/blob/master/src/firestore/delete.js:
"use strict";
var clc = require("cli-color");
var ProgressBar = require("progress");
var api = require("../api");
var firestore = require("../gcp/firestore");
var FirebaseError = require("../error");
var logger = require("../logger");
var utils = require("../utils");
/**
* Construct a new Firestore delete operation.
*
* @constructor
* @param {string} project the Firestore project ID.
* @param {string} path path to a document or collection.
* @param {boolean} options.recursive true if the delete should be recursive.
* @param {boolean} options.shallow true if the delete should be shallow (non-recursive).
* @param {boolean} options.allCollections true if the delete should universally remove all collections and docs.
*/
function FirestoreDelete(project, path, options) {
this.project = project;
this.path = path;
this.recursive = Boolean(options.recursive);
this.shallow = Boolean(options.shallow);
this.allCollections = Boolean(options.allCollections);
// Remove any leading or trailing slashes from the path
if (this.path) {
this.path = this.path.replace(/(^\/+|\/+$)/g, "");
}
this.isDocumentPath = this._isDocumentPath(this.path);
this.isCollectionPath = this._isCollectionPath(this.path);
this.allDescendants = this.recursive;
this.parent = "projects/" + project + "/databases/(default)/documents";
// When --all-collections is passed any other flags or arguments are ignored
if (!options.allCollections) {
this._validateOptions();
}
}
/**
* Validate all options, throwing an exception for any fatal errors.
*/
FirestoreDelete.prototype._validateOptions = function() {
if (this.recursive && this.shallow) {
throw new FirebaseError("Cannot pass recursive and shallow options together.");
}
if (this.isCollectionPath && !this.recursive && !this.shallow) {
throw new FirebaseError("Must pass recursive or shallow option when deleting a collection.");
}
var pieces = this.path.split("/");
if (pieces.length === 0) {
throw new FirebaseError("Path length must be greater than zero.");
}
var hasEmptySegment = pieces.some(function(piece) {
return piece.length === 0;
});
if (hasEmptySegment) {
throw new FirebaseError("Path must not have any empty segments.");
}
};
/**
* Determine if a path points to a document.
*
* @param {string} path a path to a Firestore document or collection.
* @return {boolean} true if the path points to a document, false
* if it points to a collection.
*/
FirestoreDelete.prototype._isDocumentPath = function(path) {
if (!path) {
return false;
}
var pieces = path.split("/");
return pieces.length % 2 === 0;
};
/**
* Determine if a path points to a collection.
*
* @param {string} path a path to a Firestore document or collection.
* @return {boolean} true if the path points to a collection, false
* if it points to a document.
*/
FirestoreDelete.prototype._isCollectionPath = function(path) {
if (!path) {
return false;
}
return !this._isDocumentPath(path);
};
/**
* Construct a StructuredQuery to find descendant documents of a collection.
*
* See:
* https://firebase.google.com/docs/firestore/reference/rest/v1beta1/StructuredQuery
*
* @param {boolean} allDescendants true if subcollections should be included.
* @param {number} batchSize maximum number of documents to target (limit).
* @param {string=} startAfter document name to start after (optional).
* @return {object} a StructuredQuery.
*/
FirestoreDelete.prototype._collectionDescendantsQuery = function(
allDescendants,
batchSize,
startAfter
) {
var nullChar = String.fromCharCode(0);
var startAt = this.parent + "/" + this.path + "/" + nullChar;
var endAt = this.parent + "/" + this.path + nullChar + "/" + nullChar;
var where = {
compositeFilter: {
op: "AND",
filters: [
{
fieldFilter: {
field: {
fieldPath: "__name__",
},
op: "GREATER_THAN_OR_EQUAL",
value: {
referenceValue: startAt,
},
},
},
{
fieldFilter: {
field: {
fieldPath: "__name__",
},
op: "LESS_THAN",
value: {
referenceValue: endAt,
},
},
},
],
},
};
var query = {
structuredQuery: {
where: where,
limit: batchSize,
from: [
{
allDescendants: allDescendants,
},
],
select: {
fields: [{ fieldPath: "__name__" }],
},
orderBy: [{ field: { fieldPath: "__name__" } }],
},
};
if (startAfter) {
query.structuredQuery.startAt = {
values: [{ referenceValue: startAfter }],
before: false,
};
}
return query;
};
/**
* Construct a StructuredQuery to find descendant documents of a document.
* The document itself will not be included
* among the results.
*
* See:
* https://firebase.google.com/docs/firestore/reference/rest/v1beta1/StructuredQuery
*
* @param {boolean} allDescendants true if subcollections should be included.
* @param {number} batchSize maximum number of documents to target (limit).
* @param {string=} startAfter document name to start after (optional).
* @return {object} a StructuredQuery.
*/
FirestoreDelete.prototype._docDescendantsQuery = function(allDescendants, batchSize, startAfter) {
var query = {
structuredQuery: {
limit: batchSize,
from: [
{
allDescendants: allDescendants,
},
],
select: {
fields: [{ fieldPath: "__name__" }],
},
orderBy: [{ field: { fieldPath: "__name__" } }],
},
};
if (startAfter) {
query.structuredQuery.startAt = {
values: [{ referenceValue: startAfter }],
before: false,
};
}
return query;
};
/**
* Query for a batch of 'descendants' of a given path.
*
* For document format see:
* https://firebase.google.com/docs/firestore/reference/rest/v1beta1/Document
*
* @param {boolean} allDescendants true if subcollections should be included,
* @param {number} batchSize the maximum size of the batch.
* @param {string=} startAfter the name of the document to start after (optional).
* @return {Promise<object[]>} a promise for an array of documents.
*/
FirestoreDelete.prototype._getDescendantBatch = function(allDescendants, batchSize, startAfter) {
var url;
var body;
if (this.isDocumentPath) {
url = this.parent + "/" + this.path + ":runQuery";
body = this._docDescendantsQuery(allDescendants, batchSize, startAfter);
} else {
url = this.parent + ":runQuery";
body = this._collectionDescendantsQuery(allDescendants, batchSize, startAfter);
}
return api
.request("POST", "/v1beta1/" + url, {
auth: true,
data: body,
origin: api.firestoreOrigin,
})
.then(function(res) {
// Return the 'document' property for each element in the response,
// where it exists.
return res.body
.filter(function(x) {
return x.document;
})
.map(function(x) {
return x.document;
});
});
};
/**
* Progress bar shared by the class.
*/
FirestoreDelete.progressBar = new ProgressBar("Deleted :current docs (:rate docs/s)", {
total: Number.MAX_SAFE_INTEGER,
});
/**
* Repeatedly query for descendants of a path and delete them in batches
* until no documents remain.
*
* @return {Promise} a promise for the entire operation.
*/
FirestoreDelete.prototype._recursiveBatchDelete = function() {
var self = this;
// Tunable deletion parameters
var readBatchSize = 7500;
var deleteBatchSize = 250;
var maxPendingDeletes = 15;
var maxQueueSize = deleteBatchSize * maxPendingDeletes * 2;
// All temporary variables for the deletion queue.
var queue = [];
var numPendingDeletes = 0;
var pagesRemaining = true;
var pageIncoming = false;
var lastDocName;
var failures = [];
var retried = {};
var queueLoop = function() {
if (queue.length == 0 && numPendingDeletes == 0 && !pagesRemaining) {
return true;
}
if (failures.length > 0) {
logger.debug("Found " + failures.length + " failed deletes, failing.");
return true;
}
if (queue.length <= maxQueueSize && pagesRemaining && !pageIncoming) {
pageIncoming = true;
self
._getDescendantBatch(self.allDescendants, readBatchSize, lastDocName)
.then(function(docs) {
pageIncoming = false;
if (docs.length == 0) {
pagesRemaining = false;
return;
}
queue = queue.concat(docs);
lastDocName = docs[docs.length - 1].name;
})
.catch(function(e) {
logger.debug("Failed to fetch page after " + lastDocName, e);
pageIncoming = false;
});
}
if (numPendingDeletes > maxPendingDeletes) {
return false;
}
if (queue.length == 0) {
return false;
}
var toDelete = [];
var numToDelete = Math.min(deleteBatchSize, queue.length);
for (var i = 0; i < numToDelete; i++) {
toDelete.push(queue.shift());
}
numPendingDeletes++;
firestore
.deleteDocuments(self.project, toDelete)
.then(function(numDeleted) {
FirestoreDelete.progressBar.tick(numDeleted);
numPendingDeletes--;
})
.catch(function(e) {
// For server errors, retry if the document has not yet been retried.
if (e.status >= 500 && e.status < 600) {
logger.debug("Server error deleting doc batch", e);
// Retry each doc up to one time
toDelete.forEach(function(doc) {
if (retried[doc.name]) {
logger.debug("Failed to delete doc " + doc.name + " multiple times.");
failures.push(doc.name);
} else {
retried[doc.name] = true;
queue.push(doc);
}
});
} else {
logger.debug("Fatal error deleting docs ", e);
failures = failures.concat(toDelete);
}
numPendingDeletes--;
});
return false;
};
return new Promise(function(resolve, reject) {
var intervalId = setInterval(function() {
if (queueLoop()) {
clearInterval(intervalId);
if (failures.length == 0) {
resolve();
} else {
reject("Failed to delete documents " + failures);
}
}
}, 0);
});
};
/**
* Delete everything under a given path. If the path represents
* a document the document is deleted and then all descendants
* are deleted.
*
* @return {Promise} a promise for the entire operation.
*/
FirestoreDelete.prototype._deletePath = function() {
var self = this;
var initialDelete;
if (this.isDocumentPath) {
var doc = { name: this.parent + "/" + this.path };
initialDelete = firestore.deleteDocument(doc).catch(function(err) {
logger.debug("deletePath:initialDelete:error", err);
if (self.allDescendants) {
// On a recursive delete, we are insensitive to
// failures of the initial delete
return Promise.resolve();
}
// For a shallow delete, this error is fatal.
return utils.reject("Unable to delete " + clc.cyan(this.path));
});
} else {
initialDelete = Promise.resolve();
}
return initialDelete.then(function() {
return self._recursiveBatchDelete();
});
};
/**
* Delete an entire database by finding and deleting each collection.
*
* @return {Promise} a promise for all of the operations combined.
*/
FirestoreDelete.prototype.deleteDatabase = function() {
var self = this;
return firestore
.listCollectionIds(this.project)
.catch(function(err) {
logger.debug("deleteDatabase:listCollectionIds:error", err);
return utils.reject("Unable to list collection IDs");
})
.then(function(collectionIds) {
var promises = [];
logger.info("Deleting the following collections: " + clc.cyan(collectionIds.join(", ")));
for (var i = 0; i < collectionIds.length; i++) {
var collectionId = collectionIds[i];
var deleteOp = new FirestoreDelete(self.project, collectionId, {
recursive: true,
});
promises.push(deleteOp.execute());
}
return Promise.all(promises);
});
};
/**
* Check if a path has any children. Useful for determining
* if deleting a path will affect more than one document.
*
* @return {Promise<boolean>} a promise that retruns true if the path has
* children and false otherwise.
*/
FirestoreDelete.prototype.checkHasChildren = function() {
return this._getDescendantBatch(true, 1).then(function(docs) {
return docs.length > 0;
});
};
/**
* Run the delete operation.
*/
FirestoreDelete.prototype.execute = function() {
var verifyRecurseSafe;
if (this.isDocumentPath && !this.recursive && !this.shallow) {
verifyRecurseSafe = this.checkHasChildren().then(function(multiple) {
if (multiple) {
return utils.reject("Document has children, must specify -r or --shallow.", { exit: 1 });
}
});
} else {
verifyRecurseSafe = Promise.resolve();
}
var self = this;
return verifyRecurseSafe.then(function() {
return self._deletePath();
});
};
module.exports = FirestoreDelete;
Solution 3:
i don't know how much helpful for you but test it and compare the execution time which i get use it from fire store doc
/** Delete a collection in batches to avoid out-of-memory errors. * Batch size may be tuned based on document size (atmost 1MB) and application requirements.
*/
void deleteCollection(CollectionReference collection, int batchSize) {
try {
// retrieve a small batch of documents to avoid out-of-memory errors
ApiFuture<QuerySnapshot> future = collection.limit(batchSize).get();
int deleted = 0;
// future.get() blocks on document retrieval
List<QueryDocumentSnapshot> documents = future.get().getDocuments();
for (QueryDocumentSnapshot document : documents) {
document.getReference().delete();
++deleted;
}
if (deleted >= batchSize) {
// retrieve and delete another batch
deleteCollection(collection, batchSize);
}
} catch (Exception e) {
System.err.println("Error deleting collection : " + e.getMessage());
}
}
Solution 4:
As mentioned above, you need to write good bit of code for this. For each document that is to be deleted you need to check if it has one or more collections. If it does, then you need to queue those up for deletion too. I wrote the code below to do this. It's not tested to be scalable to large data sets, which is fine for me as I'm using it to clean up after small scale integration tests. If you need something more scalable, feel free to take this as a starting point and play around with batching more.
class FirebaseDeleter {
constructor(database, collections) {
this._database = database;
this._pendingCollections = [];
}
run() {
return new Promise((resolve, reject) => {
this._callback = resolve;
this._database.getCollections().then(collections => {
this._pendingCollections = collections;
this._processNext();
});
});
}
_processNext() {
const collections = this._pendingCollections;
this._pendingCollections = [];
const promises = collections.map(collection => {
return this.deleteCollection(collection, 10000);
});
Promise.all(promises).then(() => {
if (this._pendingCollections.length == 0) {
this._callback();
} else {
process.nextTick(() => {
this._processNext();
});
}
});
}
deleteCollection(collectionRef, batchSize) {
var query = collectionRef;
return new Promise((resolve, reject) => {
this.deleteQueryBatch(query, batchSize, resolve, reject);
});
}
deleteQueryBatch(query, batchSize, resolve, reject) {
query
.get()
.then(snapshot => {
// When there are no documents left, we are done
if (snapshot.size == 0) {
return 0;
}
// Delete documents in a batch
var batch = this._database.batch();
const collectionPromises = [];
snapshot.docs.forEach(doc => {
collectionPromises.push(
doc.ref.getCollections().then(collections => {
collections.forEach(collection => {
this._pendingCollections.push(collection);
});
})
);
batch.delete(doc.ref);
});
// Wait until we know if all the documents have collections before deleting them.
return Promise.all(collectionPromises).then(() => {
return batch.commit().then(() => {
return snapshot.size;
});
});
})
.then(numDeleted => {
if (numDeleted === 0) {
resolve();
return;
}
// Recurse on the next process tick, to avoid
// exploding the stack.
process.nextTick(() => {
this.deleteQueryBatch(query, batchSize, resolve, reject);
});
})
.catch(reject);
}
}