Created
October 20, 2024 09:42
-
-
Save supershadoe/8122ce3346b20e9238ddda446b039f9d to your computer and use it in GitHub Desktop.
Scraping links from images using selenium
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from selenium import webdriver | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
def search_by_image(image_path): | |
driver = webdriver.Chrome() | |
try: | |
driver.get('https://images.google.com/') | |
search_by_image_button = WebDriverWait(driver, 3).until( | |
EC.presence_of_element_located((By.CLASS_NAME, 'Gdd5U')) | |
) | |
search_by_image_button.click() | |
upload_input = WebDriverWait(driver, 10).until( | |
EC.presence_of_element_located( | |
(By.CSS_SELECTOR, 'input[type="file"]'), | |
) | |
) | |
upload_input.send_keys(image_path) | |
results_div = WebDriverWait(driver, 10).until( | |
EC.presence_of_element_located((By.CLASS_NAME, 'aah4tc')) | |
) | |
all_links = results_div.find_elements( | |
By.CSS_SELECTOR, '.G19kAf.ENn9pd > .Vd9M6 > a' | |
) | |
if len(all_links) == 0: | |
print("Cannot get the search results.") | |
return | |
for link in all_links: | |
print( | |
f"{link.get_attribute('aria-label')}: {link.get_attribute('href')}" | |
) | |
except Exception as e: | |
print(f"An error occurred: {e}") | |
finally: | |
driver.quit() | |
image_path = "city of tears.jpg" | |
search_by_image(image_path) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment