Save and render a webpage with PhantomJS and node.js
From your comments, I'd guess you have 2 options
- Try to find a phantomjs node module - https://github.com/amir20/phantomjs-node
- Run phantomjs as a child process inside node - http://nodejs.org/api/child_process.html
Edit:
It seems the child process is suggested by phantomjs as a way of interacting with node, see faq - http://code.google.com/p/phantomjs/wiki/FAQ
Edit:
Example Phantomjs script for getting the pages HTML markup:
var page = require('webpage').create();
page.open('http://www.google.com', function (status) {
if (status !== 'success') {
console.log('Unable to access network');
} else {
var p = page.evaluate(function () {
return document.getElementsByTagName('html')[0].innerHTML
});
console.log(p);
}
phantom.exit();
});
With v2 of phantomjs-node
it's pretty easy to print the HTML after it has been processed.
var phantom = require('phantom');
phantom.create().then(function(ph) {
ph.createPage().then(function(page) {
page.open('https://stackoverflow.com/').then(function(status) {
console.log(status);
page.property('content').then(function(content) {
console.log(content);
page.close();
ph.exit();
});
});
});
});
This will show the output as it would have been rendered with the browser.
Edit 2019:
You can use async/await
:
const phantom = require('phantom');
(async function() {
const instance = await phantom.create();
const page = await instance.createPage();
await page.on('onResourceRequested', function(requestData) {
console.info('Requesting', requestData.url);
});
const status = await page.open('https://stackoverflow.com/');
const content = await page.property('content');
console.log(content);
await instance.exit();
})();
Or if you just want to test, you can use npx
npx phantom@latest https://stackoverflow.com/