Downloading a Large Folder From Google-Drive Using Python
As what kite has mentioned in the comments, use it with the remaining_ok
flag.
gdown.download_folder(url, remaining_ok=True)
This wasn't mentioned in https://pypi.org/project/gdown/ so there might be any confusion.
Any references on remaining_ok
isn't available aside from the warning and this github code.
EDIT:
Seems like gdown
is strictly limited to 50 files and haven't found a way of circumventing it.
If other than gdown
is an option, then see code below.
Script:
import io
import os
import os.path
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
from google.oauth2 import service_account
credential_json = {
### Create a service account and use its the json content here ###
### https://cloud.google.com/docs/authentication/getting-started#creating_a_service_account
### credentials.json looks like this:
"type": "service_account",
"project_id": "*********",
"private_key_id": "*********",
"private_key": "-----BEGIN PRIVATE KEY-----\n*********\n-----END PRIVATE KEY-----\n",
"client_email": "service-account@*********.iam.gserviceaccount.com",
"client_id": "*********",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/service-account%40*********.iam.gserviceaccount.com"
}
credentials = service_account.Credentials.from_service_account_info(credential_json)
drive_service = build('drive', 'v3', credentials=credentials)
folderId = '### Google Drive Folder ID ###'
outputFolder = 'output'
# Create folder if not existing
if not os.path.isdir(outputFolder):
os.mkdir(outputFolder)
items = []
pageToken = ""
while pageToken is not None:
response = drive_service.files().list(q="'" + folderId + "' in parents", pageSize=1000, pageToken=pageToken,
fields="nextPageToken, files(id, name)").execute()
items.extend(response.get('files', []))
pageToken = response.get('nextPageToken')
for file in items:
file_id = file['id']
file_name = file['name']
request = drive_service.files().get_media(fileId=file_id)
### Saves all files under outputFolder
fh = io.FileIO(outputFolder + '/' + file_name, 'wb')
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print(f'{file_name} downloaded completely.')
References:
- https://cloud.google.com/docs/authentication/getting-started#creating_a_service_account
!pip uninstall --yes gdown # After running this line, restart Colab runtime.
!pip install gdown -U --no-cache-dir
import gdown
url = r'https://drive.google.com/drive/folders/1sWD6urkwyZo8ZyZBJoJw40eKK0jDNEni'
gdown.download_folder(url)