How to identify a directory in getBlobList request in Azure Data Lake
I am making a get Blob list call to a container in Azure Data Lake Storage having both blobs and directories in it.
The request looks like this:
{
url: 'https://{account}.blob.core.windows.net/container-1?comp=list&restype=container&prefix=directory-1/&maxresults=100',
method: 'GET',
headers: {
'x-ms-date': 'Tue, 18 Jan 2022 05:58:28 GMT',
'x-ms-version': '2019-02-02',
Authorization: 'Bearer {Token}'
}
}
and the response looks something like this:
{
"EnumerationResults": {
"Prefix": "directory-1/",
"MaxResults": "100",
"Blobs": {
"Blob": [
{
"Name": "directory-1/directory1-1",
"Properties": {
"Creation-Time": "Wed, 12 Jan 2022 10:56:50 GMT",
"Last-Modified": "Wed, 12 Jan 2022 10:56:50 GMT",
"Etag": "0x8D9D5BA3C8CC53E",
"Content-Length": "0",
"Content-Type": "application/octet-stream",
"Content-Encoding": "",
"Content-Language": "",
"Content-CRC64": "AAAAAAAAAAA=",
"Content-MD5": "",
"Cache-Control": "",
"Content-Disposition": "",
"BlobType": "BlockBlob",
"AccessTier": "Hot",
"AccessTierInferred": "true",
"LeaseStatus": "unlocked",
"LeaseState": "available",
"ServerEncrypted": "true"
}
},
{
"Name": "directory-1/directory1-1/file1-1-1.csv",
"Properties": {
"Creation-Time": "Thu, 13 Jan 2022 13:08:34 GMT",
"Last-Modified": "Thu, 13 Jan 2022 13:08:34 GMT",
"Etag": "0x8D9D695CDD159F1",
"Content-Length": "80205",
"Content-Type": "text/csv",
"Content-Encoding": "",
"Content-Language": "",
"Content-CRC64": "",
"Content-MD5": "D/UezNpgI+t6xFpVw3tUGA==",
"Cache-Control": "",
"Content-Disposition": "",
"BlobType": "BlockBlob",
"AccessTier": "Hot",
"AccessTierInferred": "true",
"LeaseStatus": "unlocked",
"LeaseState": "available",
"ServerEncrypted": "true"
}
},
{
"Name": "directory-1/file1-1.csv",
"Properties": {
"Creation-Time": "Wed, 12 Jan 2022 05:45:28 GMT",
"Last-Modified": "Thu, 13 Jan 2022 14:04:43 GMT",
"Etag": "0x8D9D69DA5DA0F28",
"Content-Length": "65",
"Content-Type": "text/csv",
"Content-Encoding": "",
"Content-Language": "",
"Content-CRC64": "",
"Content-MD5": "Xqyu+Y7Jhxu2n7INUROqNg==",
"Cache-Control": "",
"Content-Disposition": "",
"BlobType": "BlockBlob",
"AccessTier": "Hot",
"AccessTierInferred": "true",
"LeaseStatus": "unlocked",
"LeaseState": "available",
"ServerEncrypted": "true"
}
},
{
"Name": "directory-1/file1-2.json",
"Properties": {
"Creation-Time": "Wed, 12 Jan 2022 05:45:28 GMT",
"Last-Modified": "Thu, 13 Jan 2022 14:07:17 GMT",
"Etag": "0x8D9D69E01C01B66",
"Content-Length": "414",
"Content-Type": "application/json",
"Content-Encoding": "",
"Content-Language": "",
"Content-CRC64": "",
"Content-MD5": "xxdWz9XwRegDoYI+OrG6tg==",
"Cache-Control": "",
"Content-Disposition": "",
"BlobType": "BlockBlob",
"AccessTier": "Hot",
"AccessTierInferred": "true",
"LeaseStatus": "unlocked",
"LeaseState": "available",
"ServerEncrypted": "true"
}
},
{
"Name": "directory-1/file1-3.jpeg",
"Properties": {
"Creation-Time": "Wed, 12 Jan 2022 05:45:28 GMT",
"Last-Modified": "Wed, 12 Jan 2022 05:45:28 GMT",
"Etag": "0x8D9D58EBD29AFA4",
"Content-Length": "172946",
"Content-Type": "image/jpeg",
"Content-Encoding": "",
"Content-Language": "",
"Content-CRC64": "",
"Content-MD5": "JVLMabvgKvlALNE4V/7eaA==",
"Cache-Control": "",
"Content-Disposition": "",
"BlobType": "BlockBlob",
"AccessTier": "Hot",
"AccessTierInferred": "true",
"LeaseStatus": "unlocked",
"LeaseState": "available",
"ServerEncrypted": "true"
}
},
{
"Name": "directory-1/test",
"Properties": {
"Creation-Time": "Thu, 13 Jan 2022 08:56:13 GMT",
"Last-Modified": "Thu, 13 Jan 2022 08:56:13 GMT",
"Etag": "0x8D9D6728D3B1933",
"Content-Length": "0",
"Content-Type": "",
"Content-Encoding": "",
"Content-Language": "",
"Content-CRC64": "",
"Content-MD5": "",
"Cache-Control": "",
"Content-Disposition": "",
"BlobType": "BlockBlob",
"AccessTier": "Hot",
"AccessTierInferred": "true",
"LeaseStatus": "unlocked",
"LeaseState": "available",
"ServerEncrypted": "true"
}
},
{
"Name": "directory-1/test/:file.csv",
"Properties": {
"Creation-Time": "Thu, 13 Jan 2022 08:56:13 GMT",
"Last-Modified": "Thu, 13 Jan 2022 08:56:13 GMT",
"Etag": "0x8D9D6728D3FD74D",
"Content-Length": "14",
"Content-Type": "text/csv",
"Content-Encoding": "",
"Content-Language": "",
"Content-CRC64": "",
"Content-MD5": "0X493GkdoXENg7klv3zR8g==",
"Cache-Control": "",
"Content-Disposition": "",
"BlobType": "BlockBlob",
"AccessTier": "Hot",
"AccessTierInferred": "true",
"LeaseStatus": "unlocked",
"LeaseState": "available",
"ServerEncrypted": "true"
}
},
{
"Name": "directory-1/test/file.csv",
"Properties": {
"Creation-Time": "Thu, 13 Jan 2022 08:59:10 GMT",
"Last-Modified": "Thu, 13 Jan 2022 08:59:10 GMT",
"Etag": "0x8D9D672F701A8DA",
"Content-Length": "14",
"Content-Type": "text/csv",
"Content-Encoding": "",
"Content-Language": "",
"Content-CRC64": "",
"Content-MD5": "0X493GkdoXENg7klv3zR8g==",
"Cache-Control": "",
"Content-Disposition": "",
"BlobType": "BlockBlob",
"AccessTier": "Hot",
"AccessTierInferred": "true",
"LeaseStatus": "unlocked",
"LeaseState": "available",
"ServerEncrypted": "true"
}
}
]
},
"NextMarker": "",
"_ServiceEndpoint": "https://{account}.blob.core.windows.net/",
"_ContainerName": "container-1"
}
}
Here some of the results are actual blobs/files while some are directories. How can I diffrentiate between a directory and file by looking at responses?
PS: Initially i thought content-type of application/octet-stream would be for directories and any other format for blobs/files but that too doesnt work as all xlsx file also have application/octet-stream in response of Azure Data lake Storage.
Complete NodeJS code is:
const request = require('request')
const account = 'add your account here'
const strTime = new Date().toUTCString()
const containerName = 'container-1'
const BearerToken = 'Add your token here'
const options = {
url: `https://${account}.blob.core.windows.net/${containerName}?comp=list&restype=container&prefix=directory-1/&delimiter=`,
headers: {
Authorization: `Bearer ${BearerToken}`,
'x-ms-date': strTime, // var strTime = new Date().toUTCString();
'x-ms-version': '2019-02-02' // Stable xms version
}
}
function callback (error, response, body) {
if (error) console.log(error)
console.log(body)
}
request(options, callback)
As mentioned in the comments, you will need to add delimiter=/
parameter to your request. So your request would be something like:
https://{account}.blob.core.windows.net/container-1?comp=list&restype=container&prefix=directory-1/&maxresults=100&delimiter=/
When you request includes the delimiter
parameter you will see the virtual folders/directories returned under BlobPrefix
element in the response.
Please see this link to learn more about the delimiter parameter: https://docs.microsoft.com/en-us/rest/api/storageservices/list-blobs#remarks ("Using a Delimiter to Traverse the Blob Namespace" section).