Cloud Functions for Firebase - Converting PDF to image
Node modules can install native code that is in the same directory as the Cloud Function's source code. I found that some node libraries on github that do this for ghostscript which is a very useful library for PDF processing:
- Node library that wraps Ghostscript command line: https://github.com/sina-masnadi/node-gs
- Compiled Ghostscript which is used via git submodule: https://github.com/sina-masnadi/lambda-ghostscript
I put lambda-ghostscript into a sub-directory of my functions
directory, then add the node-gs
as a dependency in my package file like this:
{
"name": "functions",
"dependencies": {
"@google-cloud/storage": "^1.3.1",
"child-process-promise": "^2.2.1",
"firebase-admin": "~5.4.0",
"firebase-functions": "^0.7.2",
"gs": "https://github.com/sina-masnadi/node-gs/tarball/master"
}
}
Then in my index.js file I can just require the node library to easily use ghostscript from JavaScript. Here's the complete code for the Cloud Function that uses a Google Cloud Storage trigger:
const functions = require('firebase-functions');
const gcs = require('@google-cloud/storage')();
const spawn = require('child-process-promise').spawn;
const path = require('path');
const os = require('os');
const fs = require('fs');
var gs = require('gs');
exports.makePNG = functions.storage.object().onChange(event => {
// ignore delete events
if (event.data.resourceState == 'not_exists') return false;
const filePath = event.data.name;
const fileDir = path.dirname(filePath);
const fileName = path.basename(filePath);
const tempFilePath = path.join(os.tmpdir(), fileName);
if (fileName.endsWith('.png')) return false;
if (!fileName.endsWith('.pdf')) return false;
const newName = path.basename(filePath, '.pdf') + '.png';
const tempNewPath = path.join(os.tmpdir(), newName);
// // Download file from bucket.
const bucket = gcs.bucket(event.data.bucket);
return bucket.file(filePath).download({
destination: tempFilePath
}).then(() => {
console.log('Image downloaded locally to', tempFilePath);
return new Promise(function (resolve, reject) {
gs()
.batch()
.nopause()
.option('-r' + 50 * 2)
.option('-dDownScaleFactor=2')
.executablePath('lambda-ghostscript/bin/./gs')
.device('png16m')
.output(tempNewPath)
.input(tempFilePath)
.exec(function (err, stdout, stderr) {
if (!err) {
console.log('gs executed w/o error');
console.log('stdout',stdout);
console.log('stderr',stderr);
resolve();
} else {
console.log('gs error:', err);
reject(err);
}
});
});
}).then(() => {
console.log('PNG created at', tempNewPath);
// Uploading the thumbnail.
return bucket.upload(tempNewPath, {destination: newName});
// Once the thumbnail has been uploaded delete the local file to free up disk space.
}).then(() => {
fs.unlinkSync(tempNewPath);
fs.unlinkSync(tempFilePath);
}).catch((err) => {
console.log('exception:', err);
return err;
});
});
Here's the project on github: https://github.com/ultrasaurus/ghostscript-cloud-function
Disclaimer: This is using compiled native code and I verified experimentally that works for this case, so it is probably fine. I didn't look into the specific compile options and validate if they exactly correct for the Cloud Functions environment.
WORKING SOLUTION
Thank you @Ultrasaurus for pointing out this approach! However, for me it did not work and in your Github repo your also stated I haven't tested them
. I modified your solution a little bit and got the following code, which is 100% working for me:
{
"dependencies": {
"@google-cloud/firestore": "^4.4.0",
"@google-cloud/storage": "^5.3.0",
"ghostscript": "https://github.com/musubu/node-ghostscript/tarball/master",
"pdf-image": "^2.0.0",
"rimraf": "^3.0.2",
"uuid": "^8.3.1"
}
}
The function is triggered by a Firestore event:
const Storage = require('@google-cloud/storage')
const fs = require('fs')
const rimraf = require('rimraf')
const os = require('os')
const gs = require('ghostscript')
const GOOGLE_PROJECT_ID = 'MY_GOOGLE_PROJECT_ID'
const GOOGLE_STORAGE_BUCKET_NAME = 'MY_GOOGLE_STORAGE_BUCKET_NAME'
const storage = new Storage.Storage({
projectId: GOOGLE_PROJECT_ID
})
exports.createImage = async (event) => {
let {
appointment,
name
} = event.value.fields
name = getFileName(name.stringValue)
appointment = appointment.stringValue
console.log(`Processing document ${name} in appointment ${appointment}`)
const tempDir = createTempDir(appointment)
const tmpDocumentPath = await downloadPdf(tempDir, name, appointment)
const imagePath = await convertPdfToImage(tmpDocumentPath)
await uploadImage(imagePath, appointment)
deleteDir(tempDir)
}
function getFileName (name) {
const nameParts = name.split('/')
return nameParts[nameParts.length - 1]
}
function createTempDir (appointment) {
const tempDir = `${os.tmpdir()}/${appointment}_${Math.random()}`
fs.mkdirSync(tempDir)
console.log(`Created dir ${tempDir}`)
return tempDir
}
async function downloadPdf (tempDir, name, appointment) {
const destination = `${tempDir}/${name}`
await storage.bucket(GOOGLE_STORAGE_BUCKET_NAME).file(`${appointment}/${name}`).download({ destination })
console.log(`Successfully downloaded document ${name}`)
return destination
}
async function convertPdfToImage (pdfPath) {
const imagePath = pdfPath.replace('pdf', 'png')
return new Promise(function (resolve, reject) {
try {
gs()
.batch()
.nopause()
.device('png16m')
.output(imagePath)
.input(pdfPath)
.exec(function (err, stdout, stderr) {
if (!err) {
console.log('gs executed w/o error')
console.log('stdout', stdout)
console.log('stderr', stderr)
resolve(imagePath)
} else {
console.log('gs error:', err)
reject(err)
}
})
} catch (error) {
console.log(error)
}
})
}
async function uploadImage (imagePath, appointment) {
const imagePathParts = imagePath.split('/')
const imageName = imagePathParts[imagePathParts.length - 1]
console.log(`Starting upload for ${imageName} at ${imagePath} to storage ${appointment}/${imageName}`)
await storage.bucket(GOOGLE_STORAGE_BUCKET_NAME).upload(imagePath, {
destination: `${appointment}/${imageName}`,
metadata: {
metadata: { appointment }
}
})
console.log(`Successfully uploaded image for appointment ${appointment}`)
}
function deleteDir (dir) {
rimraf.sync(dir)
}