Last active
August 25, 2019 19:06
-
-
Save pwneddesal/e8d30102afbaefec531d6708f761e104 to your computer and use it in GitHub Desktop.
for python-webkit2png. create a screenshoot of a website(https://stackoverflow.com/questions/2744191/how-do-i-use-python-webkit2png-to-take-many-screenshots-at-the-same-time)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import subprocess | |
import re | |
import os | |
import sys | |
def scrape_url(value, outpath): | |
""" | |
Requires webkit2png to be on the path | |
""" | |
subprocess.call(["webkit2png", "-x","1200","900","-o", outpath, "-g", "1000", "1260", | |
"-t", "30", value]) | |
def get_valid_filename(s): | |
s = str(s).strip().replace(' ', '_') | |
return re.sub(r'(?u)[^-\w.]', '', s) | |
def readfile(path): | |
aList=[] | |
f = open(path) | |
filecontents = f.read().splitlines() | |
for line in filecontents: | |
aList.append('https://'+line) | |
return aList | |
def scrape_list_urls(list_url_out_name, outdir): | |
""" | |
list_url_out_name is a list of tuples: (url, name) | |
where name.png will be the image's name | |
""" | |
for value in list_url_out_name: | |
print value | |
outpath = outdir + get_valid_filename(value) + '.png' | |
scrape_url(value, outpath) | |
dict=readfile(sys.argv[1]) #the input file file | |
scrape_list_urls(dict,sys.argv[2]) #the output directory |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi, I couldn't use your code. You can explain better?
I spent the last two hours trying to make this code run :p