Nodejs: Async request with a list of URL

I am working on a crawler. I have a list of URL need to be requested. There are several hundreds of request at the same time if I don't set it to be async. I am afraid that it would explode my bandwidth or produce to much network access to the target website. What should I do?

Here is what I am doing:

urlList.forEach((url, index) => {

    console.log('Fetching ' + url);
    request(url, function(error, response, body) {
        //do sth for body

    });
});

I want one request is called after one request is completed.


Solution 1:

You can use something like Promise library e.g. snippet

const Promise = require("bluebird");
const axios = require("axios");

//Axios wrapper for error handling
const axios_wrapper = (options) => {
    return axios(...options)
        .then((r) => {
            return Promise.resolve({
                data: r.data,
                error: null,
            });
        })
        .catch((e) => {
            return Promise.resolve({
                data: null,
                error: e.response ? e.response.data : e,
            });
        });
};

Promise.map(
    urls,
    (k) => {
        return axios_wrapper({
            method: "GET",
            url: k,
        });
    },
    { concurrency: 1 } // Here 1 represents how many requests you want to run in parallel
)
    .then((r) => {
        console.log(r);
        //Here r will be an array of objects like {data: [{}], error: null}, where if the request was successfull it will have data value present otherwise error value will be non-null
    })
    .catch((e) => {
        console.error(e);
    });