Last active
September 14, 2023 20:44
-
-
Save krisrak/2cd4230682997d399c33a1b24c266521 to your computer and use it in GitHub Desktop.
Python script to download urls in a csv file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import os | |
import sys | |
import urllib | |
import csv | |
try: | |
filename = sys.argv[1] | |
url_name = sys.argv[2] | |
except: | |
print "\nERROR: Please specify filename and url column name to download\n" | |
print "Usage:" | |
print " $ picodash_export_url_download.py data.csv image_url\n" | |
print "- First param should be the csv file path" | |
print "- Second param should be the column name that has image urls to download\n" | |
sys.exit(0) | |
# open csv file to read | |
with open(filename, 'r') as csvfile: | |
csv_reader = csv.reader(csvfile) | |
# iterate on all rows in csv | |
for row_index,row in enumerate(csv_reader): | |
# find the url column name to download in first row | |
if row_index == 0: | |
IMAGE_URL_COL_NUM = None | |
for col_index,col in enumerate(row): | |
if col == url_name: | |
IMAGE_URL_COL_NUM = col_index | |
if IMAGE_URL_COL_NUM is None: | |
print "\nERROR: url column name '"+url_name+"' not found, available options:" | |
for col_index,col in enumerate(row): | |
print " " + col | |
print "\nUsage:" | |
print " $ picodash_export_url_download.py data.csv image_url\n" | |
sys.exit(0) | |
continue | |
# check if we have an image URL and download in rows > 1 | |
image_urls = row[IMAGE_URL_COL_NUM] | |
image_urls = image_urls.split('\n') | |
#print image_urls | |
for image_url in image_urls: | |
if image_url != '' and image_url != "\n": | |
date = row[3].split(' ')[0] | |
image_filename = date + '_' + url_name + '_' +image_url.split('/')[-1].split('?')[0] | |
#image_filename = row[0] + "-" + row[1] + ".jpg" | |
directory = filename.split('.csv')[0] + "-" + url_name | |
if not os.path.exists(directory): | |
os.makedirs(directory) | |
try: | |
urllib.urlretrieve(image_url, directory+'/'+image_filename) | |
print "["+str(row_index)+"] Image saved: " + image_filename | |
except: | |
# second attempt to download if failed | |
try: | |
urllib.urlretrieve(image_url, directory+'/'+image_filename) | |
print "["+str(row_index)+"] Image saved: " + image_filename | |
except: | |
print "["+str(row_index)+"] Could not download url: " + image_url | |
else: | |
print "["+str(row_index)+"] No " + url_name |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is a Python script to download image/video urls in csv exported from picodash.com
you have to specify the csv_filename and the column_header_name that has the urls to be downloaded. The urls can be images or video files, and the script will create a folder in the same location and download the files to it.
Usage:
/usr/bin/python picodash_export_url_download.py <csv_filename> <column_header_name>
Example:
/usr/bin/python picodash_export_url_download.py data.csv display_src