Skip to content Skip to sidebar Skip to footer

Scraping Google Images Using Selenium In Python

Now, I have been trying to scrape google images using the following code : from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common

Solution 1:

Replacing:

driver.find_elements_by_class_name('rg_meta') with driver.find_element_by_xpath('//div[@class="rg_meta"]/text()')

and a.text with a

will resolve your issue.

The resultant code :

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys 
import os
import time
import requests
import re
import urllib2
import re
from threading import Thread
import json
#AssumingI have a folder named Pictures1, the images are downloaded there. 
def threaded_func(url,i):
     raw_img = urllib2.urlopen(url).read()
     cntr = len([i for i in os.listdir("Pictures1") if image_type in i]) + 1
     f = open("Pictures1/" + image_type + "_"+ str(total), 'wb')
     f.write(raw_img)
     f.close()
driver = webdriver.Firefox()
driver.get("https://images.google.com/")
elem = driver.find_element_by_xpath('/html/body/div/div[3]/div[3]/form/div[2]/div[2]/div[1]/div[1]/div[3]/div/div/div[2]/div/input[1]')
elem.clear()
elem.send_keys("parrot")
elem.send_keys(Keys.RETURN)
image_type = "parrot_defG"
images=[]
total=0
time.sleep(10)
for a in driver.find_element_by_xpath('//div[@class="rg_meta"]/text()'):
     link =json.loads(a)["ou"]
     thread = Thread(target = threaded_func, args = (link,total))
     thread.start()
     thread.join()
     total+=1

Printing link results in :

http://media.web.britannica.com/eb-media/89/89689-004-4C85E0F0.jpg

Post a Comment for "Scraping Google Images Using Selenium In Python"