Created
July 2, 2025 06:58
-
-
Save AdelMmdi/226bf778c03813c0ba79782dbdc8c8c2 to your computer and use it in GitHub Desktop.
Download jpg from wallpaper's sites for your offline storage and backup for longtime
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from bs4 import BeautifulSoup | |
| from selenium import webdriver | |
| from selenium.webdriver.edge.service import Service | |
| from selenium.webdriver.common.by import By | |
| from selenium.webdriver.common.keys import Keys | |
| from selenium.webdriver.support.wait import WebDriverWait | |
| from selenium.webdriver.support import expected_conditions as EC | |
| import time | |
| import os, requests | |
| def downloadIMagage(path, link):#https://stackoverflow.com/questions/60702271/download-images-from-url-python | |
| print(link, 'Downloading...') | |
| response=requests.get(link, headers=headers, verify=True) | |
| print('debug', path) | |
| with open(path, 'wb') as f: | |
| f.write(response.content) | |
| #print(response.request.headers) | |
| WD = r'C:\Users\AAAdel\AppData\Local\Programs\Python\Python312\msedgedriver.exe'#https://developer.microsoft.com/en-us/microsoft-edge/tools/webdriver/?form=MA13LH#downloads | |
| headers = { | |
| 'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0" | |
| } | |
| urls = open('links.txt', 'r').readlines()#you work in this file | |
| for url in urls: | |
| url = url.strip() | |
| ti = url[url.rfind('/')+1:] | |
| tit = ti.replace('?', '') | |
| titl = tit[:tit.rfind('#')-1] | |
| print(titl) | |
| path = tit | |
| os.makedirs(path, exist_ok=True) | |
| try:#wallpapercave, wallpapers.com | |
| options = webdriver.EdgeOptions() | |
| options.headless = True | |
| #options.binary_location = r'C:\Program Files (x86)\Internet Explorer\iexplore.exe' | |
| driverService=Service(WD) | |
| driver=webdriver.Edge(service=driverService, options=options) | |
| driver.get(url) | |
| img = driver.find_elements(By.TAG_NAME, 'img') | |
| i=0 | |
| for j in img: | |
| link = img[i].get_attribute('src') | |
| #f=open(path+link[link.rfind('/'):], 'wb') | |
| if link[link.rfind('.'):] == '.jpg': | |
| downloadIMagage(path+link[link.rfind('/'):], link) | |
| i+=1 | |
| #wallpapercat.com | |
| except:#https://stackoverflow.com/questions/72955624/how-to-find-image-current-source-in-html-using-python | |
| #if e.code == 403: | |
| print('Method 2', url) | |
| response = requests.get(url, headers=headers) | |
| soup = BeautifulSoup(response.content, "html.parser") | |
| for img in soup.findAll('img'): | |
| src = img.get("src") | |
| src2 = img.get("data-src") | |
| if src or src2: | |
| # resolve any relative urls to absolute urls using base URL | |
| src = requests.compat.urljoin(url, src) | |
| if 'jpg' in src: | |
| picName = src[src.rfind('=')+1:] | |
| picName = src[src.rfind('/')+1:] | |
| jpg = path+'/'+picName | |
| print('verbose', picName) | |
| print(">>", src) | |
| # Checks if a given path (which can be a file or a directory) exists. | |
| if os.path.exists(jpg): | |
| print(f"The path '{jpg}' exists.") | |
| else: | |
| downloadIMagage(path+'/'+picName, src) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment