Skip to content

Instantly share code, notes, and snippets.

@RahulDas-dev
Last active April 16, 2021 10:17
Show Gist options
  • Save RahulDas-dev/d741a328d12fcf36350d34cc1912c852 to your computer and use it in GitHub Desktop.
Save RahulDas-dev/d741a328d12fcf36350d34cc1912c852 to your computer and use it in GitHub Desktop.
Image Download using Python3
import os
import shutil
import pandas as pd
import requests
datadir = '/your/path/to/url_csv/file/ulfile.csv' # Change this variable and point 2 sitable csv file path
downloadDir='/your/path/to/download/directory' # Change this variable and point 2 image download directory
if not os.path.exists(datadir):
raise Exception(' data dir {} not found'.format(datadir))
if not os.path.exists(downloadDir):
raise Exception(' data dir {} not found'.format(downloadDir))
target_col = ['Item #', 'Image url']
target_df = pd.read_csv(datadir, sep='^', encoding='ISO-8859-1', usecols = target_col, index_col=None)
target_df.rename(columns={'Item #': 'id', 'Image url': 'url'}, inplace=True)
target_df.drop_duplicates()
print('Dtatframe Shape : {}'.format(target_df.shape))
print('Unique Image Id Count {}'.format(target_df['id'].nunique()))
print('Unique Image URL Count {}'.format(target_df['url'].nunique()))
target_df.head()
imageCounter = 0
image_count = len(os.listdir(downloadDir))
print('Total Number Image present in Directory before downLoad {}'.format(image_count))
for index, row in target_df.iterrows():
#print(row['url'])
newFilefileName=str(row['id'])+'.PNG'
download_path=os.path.join(downloadDir,newFilefileName)
if os.path.exists(download_path):
print('Image {} is present in Download directory')
continue
try:
r = requests.get(str(row['url']), stream = True)
r.raw.decode_content = True
with open(download_path,'wb') as f:
shutil.copyfileobj(r.raw, f)
imageCounter = imageCounter + 1
except Exception as e:
print(e)
finally:
print('{} : {}'.format(index, row['id']))
print('Total Number Image DownLoaded {}'.format(imageCounter))
image_count = len(os.listdir(downloadDir))
print('Total Number Image present in Directory after downLoad {}'.format(image_count))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment