Python - Download Images from google Image search?

I want to download all Images of google image search using python . The code I am using seems to have some problem some times .My code is

import os
import sys
import time
from urllib import FancyURLopener
import urllib2
import simplejson

# Define search term
searchTerm = "parrot"

# Replace spaces ' ' in search term for '%20' in order to comply with request
searchTerm = searchTerm.replace(' ','%20')

# Start FancyURLopener with defined version 
class MyOpener(FancyURLopener): 
    version = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv: Gecko/20071127     Firefox/'
    myopener = MyOpener()

    # Set count to 0
    count= 0

    for i in range(0,10):
    # Notice that the start changes for each iteration in order to request a new set of   images for each loop
    url = ('' + 'v=1.0& q='+searchTerm+'&start='+str(i*10)+'&userip=MyIP')
    print url
    request = urllib2.Request(url, None, {'Referer': 'testing'})
    response = urllib2.urlopen(request)

# Get results using JSON
    results = simplejson.load(response)
    data = results['responseData']
    dataInfo = data['results']

# Iterate for each result and get unescaped url
    for myUrl in dataInfo:
        count = count + 1
        my_url = myUrl['unescapedUrl']

After downloading few pages I am getting an error as follows:

Traceback (most recent call last):

  File "C:\Python27\", line 37, in <module>
    dataInfo = data['results']
TypeError: 'NoneType' object has no attribute '__getitem__'

What to do ??????

Solution 1:

I have modified my code. Now the code can download 100 images for a given query, and images are full high resolution that is original images are being downloaded.

I am downloading the images using urllib2 & Beautiful soup

from bs4 import BeautifulSoup
import requests
import re
import urllib2
import os
import cookielib
import json

def get_soup(url,header):
    return BeautifulSoup(urllib2.urlopen(urllib2.Request(url,headers=header)),'html.parser')

query = raw_input("query image")# you can change the query for the image  here
query= query.split()
print url
#add the directory for your image here
header={'User-Agent':"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36"
soup = get_soup(url,header)

ActualImages=[]# contains the link for Large original images, type of  image
for a in soup.find_all("div",{"class":"rg_meta"}):
    link , Type =json.loads(a.text)["ou"]  ,json.loads(a.text)["ity"]

print  "there are total" , len(ActualImages),"images"

if not os.path.exists(DIR):
DIR = os.path.join(DIR, query.split()[0])

if not os.path.exists(DIR):
###print images
for i , (img , Type) in enumerate( ActualImages):
        req = urllib2.Request(img, headers={'User-Agent' : header})
        raw_img = urllib2.urlopen(req).read()

        cntr = len([i for i in os.listdir(DIR) if image_type in i]) + 1
        print cntr
        if len(Type)==0:
            f = open(os.path.join(DIR , image_type + "_"+ str(cntr)+".jpg"), 'wb')
        else :
            f = open(os.path.join(DIR , image_type + "_"+ str(cntr)+"."+Type), 'wb')

    except Exception as e:
        print "could not load : "+img
        print e

i hope this helps you

Solution 2:

The Google Image Search API is deprecated, you need to use the Google Custom Search for what you want to achieve. To fetch the images you need to do this:

import urllib2
import simplejson
import cStringIO

fetcher = urllib2.build_opener()
searchTerm = 'parrot'
startIndex = 0
searchUrl = "" + searchTerm + "&start=" + startIndex
f =
deserialized_output = simplejson.load(f)

This will give you 4 results, as JSON, you need to iteratively get the results by incrementing the startIndex in the API request.

To get the images you need to use a library like cStringIO.

For example, to access the first image, you need to do this:

imageUrl = deserialized_output['responseData']['results'][0]['unescapedUrl']
file = cStringIO.StringIO(urllib.urlopen(imageUrl).read())
img =

Solution 3:

Google deprecated their API, scraping Google is complicated, so I would suggest using Bing API instead to automatically download images. The pip package bing-image-downloader allows you to easily download an arbitrary number of images to a directory with a single line of code.

from bing_image_downloader import downloader, limit=100, output_dir='dataset', adult_filter_off=True, force_replace=False, timeout=60, verbose=True)

Google is not so good, and Microsoft is not so evil

Solution 4:

Here's my latest google image snarfer, written in Python, using Selenium and headless Chrome.

It requires python-selenium, the chromium-driver, and a module called retry from pip.


Example Usage: tiger 10 --opts isz:lt,islt:svga,itp:photo > urls.txt
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
(i=0; while read url; do wget -e robots=off -T10 --tries 10 -U"$user_agent" "$url" -O`printf %04d $i`.jpg & i=$(($i+1)) ; [ $(($i % $parallel)) = 0 ] && wait; done < urls.txt; wait)

Help Usage:

$ --help
usage: [-h] [--safe SAFE] [--opts OPTS] query n

Fetch image URLs from Google Image Search.

positional arguments:
  query        image search query
  n            number of images (approx)

optional arguments:
  -h, --help   show this help message and exit
  --safe SAFE  safe search [off|active|images]
  --opts OPTS  search options, e.g.


#!/usr/bin/env python3

# requires: selenium, chromium-driver, retry

from selenium import webdriver
from import Options
import selenium.common.exceptions as sel_ex
import sys
import time
import urllib.parse
from retry import retry
import argparse
import logging

logging.basicConfig(stream=sys.stderr, level=logging.INFO)
logger = logging.getLogger()
retry_logger = None

css_thumbnail = "img.Q4LuWd"
css_large = "img.n3VNCb"
css_load_more = ".mye4qd"
selenium_exceptions = (sel_ex.ElementClickInterceptedException, sel_ex.ElementNotInteractableException, sel_ex.StaleElementReferenceException)

def scroll_to_end(wd):
    wd.execute_script("window.scrollTo(0, document.body.scrollHeight);")

@retry(exceptions=KeyError, tries=6, delay=0.1, backoff=2, logger=retry_logger)
def get_thumbnails(wd, want_more_than=0):
    thumbnails = wd.find_elements_by_css_selector(css_thumbnail)
    n_results = len(thumbnails)
    if n_results <= want_more_than:
        raise KeyError("no new thumbnails")
    return thumbnails

@retry(exceptions=KeyError, tries=6, delay=0.1, backoff=2, logger=retry_logger)
def get_image_src(wd):
    actual_images = wd.find_elements_by_css_selector(css_large)
    sources = []
    for img in actual_images:
        src = img.get_attribute("src")
        if src.startswith("http") and not src.startswith(""):
    if not len(sources):
        raise KeyError("no large image")
    return sources

@retry(exceptions=selenium_exceptions, tries=6, delay=0.1, backoff=2, logger=retry_logger)
def retry_click(el):

def get_images(wd, start=0, n=20, out=None):
    thumbnails = []
    count = len(thumbnails)
    while count < n:
            thumbnails = get_thumbnails(wd, want_more_than=count)
        except KeyError as e:
            logger.warning("cannot load enough thumbnails")
        count = len(thumbnails)
    sources = []
    for tn in thumbnails:
        except selenium_exceptions as e:
            logger.warning("main image click failed")
        sources1 = []
            sources1 = get_image_src(wd)
        except KeyError as e:
            # logger.warning("main image not found")
        if not sources1:
            tn_src = tn.get_attribute("src")
            if not tn_src.startswith("data"):
                logger.warning("no src found for main image, using thumbnail")          
                sources1 = [tn_src]
                logger.warning("no src found for main image, thumbnail is a data URL")
        for src in sources1:
            if not src in sources:
                if out:
                    print(src, file=out)
        if len(sources) >= n:
    return sources

def google_image_search(wd, query, safe="off", n=20, opts='', out=None):
    search_url_t = "{safe}&site=&tbm=isch&source=hp&q={q}&oq={q}&gs_l=img&tbs={opts}"
    search_url = search_url_t.format(q=urllib.parse.quote(query), opts=urllib.parse.quote(opts), safe=safe)
    sources = get_images(wd, n=n, out=out)
    return sources

def main():
    parser = argparse.ArgumentParser(description='Fetch image URLs from Google Image Search.')
    parser.add_argument('--safe', type=str, default="off", help='safe search [off|active|images]')
    parser.add_argument('--opts', type=str, default="", help='search options, e.g. isz:lt,islt:svga,itp:photo,ic:color,ift:jpg')
    parser.add_argument('query', type=str, help='image search query')
    parser.add_argument('n', type=int, default=20, help='number of images (approx)')
    args = parser.parse_args()

    opts = Options()
    # opts.add_argument("--blink-settings=imagesEnabled=false")
    with webdriver.Chrome(options=opts) as wd:
        sources = google_image_search(wd, args.query,, n=args.n, opts=args.opts, out=sys.stdout)


Solution 5:

Haven't looked into your code but this is an example solution made with selenium to try to get 400 pictures from the search term

# -*- coding: utf-8 -*-
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import json
import os
import urllib2

searchterm = 'vannmelon' # will also be the name of the folder
url = ""+searchterm+"&source=lnms&tbm=isch"
browser = webdriver.Firefox()
header={'User-Agent':"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36"}
counter = 0
succounter = 0

if not os.path.exists(searchterm):

for _ in range(500):

for x in browser.find_elements_by_xpath("//div[@class='rg_meta']"):
    counter = counter + 1
    print "Total Count:", counter
    print "Succsessful Count:", succounter
    print "URL:",json.loads(x.get_attribute('innerHTML'))["ou"]

    img = json.loads(x.get_attribute('innerHTML'))["ou"]
    imgtype = json.loads(x.get_attribute('innerHTML'))["ity"]
        req = urllib2.Request(img, headers={'User-Agent': header})
        raw_img = urllib2.urlopen(req).read()
        File = open(os.path.join(searchterm , searchterm + "_" + str(counter) + "." + imgtype), "wb")
        succounter = succounter + 1
            print "can't get img"

print succounter, "pictures succesfully downloaded"