Last active
April 16, 2021 10:17
-
-
Save RahulDas-dev/d741a328d12fcf36350d34cc1912c852 to your computer and use it in GitHub Desktop.
Image Download using Python3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import shutil | |
import pandas as pd | |
import requests | |
datadir = '/your/path/to/url_csv/file/ulfile.csv' # Change this variable and point 2 sitable csv file path | |
downloadDir='/your/path/to/download/directory' # Change this variable and point 2 image download directory | |
if not os.path.exists(datadir): | |
raise Exception(' data dir {} not found'.format(datadir)) | |
if not os.path.exists(downloadDir): | |
raise Exception(' data dir {} not found'.format(downloadDir)) | |
target_col = ['Item #', 'Image url'] | |
target_df = pd.read_csv(datadir, sep='^', encoding='ISO-8859-1', usecols = target_col, index_col=None) | |
target_df.rename(columns={'Item #': 'id', 'Image url': 'url'}, inplace=True) | |
target_df.drop_duplicates() | |
print('Dtatframe Shape : {}'.format(target_df.shape)) | |
print('Unique Image Id Count {}'.format(target_df['id'].nunique())) | |
print('Unique Image URL Count {}'.format(target_df['url'].nunique())) | |
target_df.head() | |
imageCounter = 0 | |
image_count = len(os.listdir(downloadDir)) | |
print('Total Number Image present in Directory before downLoad {}'.format(image_count)) | |
for index, row in target_df.iterrows(): | |
#print(row['url']) | |
newFilefileName=str(row['id'])+'.PNG' | |
download_path=os.path.join(downloadDir,newFilefileName) | |
if os.path.exists(download_path): | |
print('Image {} is present in Download directory') | |
continue | |
try: | |
r = requests.get(str(row['url']), stream = True) | |
r.raw.decode_content = True | |
with open(download_path,'wb') as f: | |
shutil.copyfileobj(r.raw, f) | |
imageCounter = imageCounter + 1 | |
except Exception as e: | |
print(e) | |
finally: | |
print('{} : {}'.format(index, row['id'])) | |
print('Total Number Image DownLoaded {}'.format(imageCounter)) | |
image_count = len(os.listdir(downloadDir)) | |
print('Total Number Image present in Directory after downLoad {}'.format(image_count)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment