Nodejs: Async request with a list of URL
I am working on a crawler. I have a list of URL need to be requested. There are several hundreds of request at the same time if I don't set it to be async. I am afraid that it would explode my bandwidth or produce to much network access to the target website. What should I do?
Here is what I am doing:
urlList.forEach((url, index) => {
console.log('Fetching ' + url);
request(url, function(error, response, body) {
//do sth for body
});
});
I want one request is called after one request is completed.
Solution 1:
You can use something like Promise library e.g. snippet
const Promise = require("bluebird");
const axios = require("axios");
//Axios wrapper for error handling
const axios_wrapper = (options) => {
return axios(...options)
.then((r) => {
return Promise.resolve({
data: r.data,
error: null,
});
})
.catch((e) => {
return Promise.resolve({
data: null,
error: e.response ? e.response.data : e,
});
});
};
Promise.map(
urls,
(k) => {
return axios_wrapper({
method: "GET",
url: k,
});
},
{ concurrency: 1 } // Here 1 represents how many requests you want to run in parallel
)
.then((r) => {
console.log(r);
//Here r will be an array of objects like {data: [{}], error: null}, where if the request was successfull it will have data value present otherwise error value will be non-null
})
.catch((e) => {
console.error(e);
});