Forked from krisrak/picodash_export_url_download.py
Last active
September 7, 2021 17:47
-
-
Save mbarkhau/7f323c8afa0d177179d3cae0c5fcd0a9 to your computer and use it in GitHub Desktop.
Python script to download urls in a csv file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import os | |
import sys | |
import csv | |
import urllib | |
try: | |
filename = sys.argv[1] | |
url_name = sys.argv[2] | |
except: | |
print("\nERROR: Please specify filename and url column name to download\n" ) | |
print("Usage:") | |
print(" $ picodash_export_url_download.py data.csv image_url\n") | |
print("- First param should be the csv file path") | |
print("- Second param should be the column name that has image urls to download\n" ) | |
sys.exit(0) | |
# open csv file to read | |
with open(filename, 'r') as csvfile: | |
csv_reader = csv.reader(csvfile) | |
# iterate on all rows in csv | |
for row_index,row in enumerate(csv_reader): | |
# find the url column name to download in first row | |
if row_index == 0: | |
IMAGE_URL_COL_NUM = None | |
for col_index,col in enumerate(row): | |
if col == url_name: | |
IMAGE_URL_COL_NUM = col_index | |
if IMAGE_URL_COL_NUM is None: | |
print("\nERROR: url column name '"+url_name+"' not found, available options:" ) | |
for col_index,col in enumerate(row): | |
print(" " + col ) | |
print("\nUsage:") | |
print(" $ picodash_export_url_download.py data.csv image_url\n") | |
sys.exit(0) | |
continue | |
# check if we have an image URL and download in rows > 1 | |
image_urls = row[IMAGE_URL_COL_NUM] | |
image_urls = image_urls.split('\n') | |
#print(image_urls) | |
for image_url in image_urls: | |
if image_url != '' and image_url != "\n": | |
date = row[3].split(' ')[0] | |
image_filename = date + '_' + url_name + '_' +image_url.split('/')[-1].split('?')[0] | |
#image_filename = row[0] + "-" + row[1] + ".jpg" | |
directory = filename.split('.csv')[0] + "-" + url_name | |
if not os.path.exists(directory): | |
os.makedirs(directory) | |
try: | |
urllib.urlretrieve(image_url, directory+'/'+image_filename) | |
print("["+str(row_index)+"] Image saved: " + image_filename) | |
except: | |
# second attempt to download if failed | |
try: | |
urllib.urlretrieve(image_url, directory+'/'+image_filename) | |
print("["+str(row_index)+"] Image saved: " + image_filename) | |
except: | |
print("["+str(row_index)+"] Could not download url: " + image_url) | |
else: | |
print("["+str(row_index)+"] No " + url_name) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment