-
-
Save felipe-negri/d2046effeab5880cc29ecd924504b478 to your computer and use it in GitHub Desktop.
Get photos from deviantart gallery(s), download to local folder or export url list to local file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* author: crazy4groovy | |
* description: given a list of 1+ deviant art gallery URLs (semicolon separated), | |
* will parse out a list of all full size images found. | |
* license: MIT, no warranties or guarantees! | |
*/ | |
import static groovyx.gpars.GParsPool.withPool as parallel | |
///////////////////////////////// | |
List urls = args.size() > 0 ? | |
args[0].split(';')*.trim() : | |
['http://name_of_account.deviantart.com/gallery/?set=gallery_id_number'] | |
String fileOutPath = args.size() > 1 ? args[1] : /C:\out.txt/ | |
int maxCount = args.size() > 2 ? args[2].toInteger() : 2000 | |
int threads = args.size() > 3 ? Math.min(args[3].toInteger(), 4) : 2 | |
String groupSeparator = args.size() > 4 ? args[4] : File.separator | |
///////////////////////////////// | |
println "$fileOutPath $maxCount $threads $groupSeparator" | |
int offsetInterval = 24 | |
Map imgMap = ([:].withDefault{[] as LinkedHashSet}) | |
Map retriesMap = [:].withDefault{0} | |
int timeoutTries = 5; | |
imgMap = Collections.synchronizedMap(imgMap) | |
parallel(threads) { | |
urls.eachParallel { url -> | |
println "START *** ${url} *****>>" | |
String picGroup = getPicGroup(url) | |
if (!picGroup) { | |
println "Invalid URL, please check it" | |
return | |
} | |
url = java.net.URLEncoder.encode(url + (!url.contains('?') ? '?' : '&')) | |
int prevSize = 0 | |
int count = 0 | |
int page = 0 | |
while (count < maxCount) { | |
String u = url + 'offset=' + (page*offsetInterval) | |
//println u | |
String yql = """https://query.yahooapis.com/v1/public/yql?diagnostics=true&q=select%20*%20from%20html%20where%20xpath%3D%27%2F%2Fa%5B%40data-super-img%5D%27and%20url%3D'${u}'""" | |
println "{{{ $yql }}}" | |
//println (yql.toURL().text) | |
Thread.sleep(1000 * (retriesMap[yql] ?: 0) + 100) | |
def root | |
try { | |
root = new XmlSlurper().parse(yql) | |
} | |
catch (java.io.IOException ex) { | |
println ("caught IOException! $picGroup -- $ex") | |
break; | |
} | |
catch (Exception ex) { | |
println ("caught Exception! $picGroup -- $ex") | |
break; | |
} | |
String err = root.diagnostics.url?.@error | |
if (err != "") { | |
if (retriesMap[yql] < timeoutTries) { | |
println "! timed out :: $err -- retrying $picGroup pg#$page" | |
retriesMap[yql] = retriesMap[yql] + 1 | |
} | |
else { | |
println "!! timed out :: $err -- SKIPPING $picGroup pg#$page" | |
if (err != "Read timed out") break; | |
page++ | |
} | |
continue; | |
} | |
println "found: ${root.results.a.size()}" | |
List res = root.results.a | |
//.each {println it.@'data-super-img'.text()} | |
.collect{it.@'data-super-img'.text()} | |
//.findAll{it.contains(error)}*.replaceAll(error, '/') | |
//println res.size() | |
if (!res) break; | |
imgMap[picGroup].addAll(res) | |
count += res.size() | |
if (prevSize == count) break; | |
else | |
prevSize = count | |
println "?? $picGroup pg#$page (${(page*offsetInterval)}) :: found: ${root.results.a.size()}, total size: $count >> set grand total size: ${imgMap.values().flatten().size()}" | |
page++ | |
} | |
println "END : ${url}; #$count" | |
} | |
} | |
println "*SET GRAND TOTAL img list: (${imgMap.values().flatten().size()})*" | |
File o = new File(fileOutPath) | |
if (o.isDirectory()) { | |
def ant = new AntBuilder() | |
imgMap.each { groupName, imgSet -> | |
String rootPath = o.absolutePath + File.separator + groupName + groupSeparator | |
rootPath = rootPath.replaceAll(/[\*\?"<>|=]/,'_') | |
if (groupSeparator == File.separator | |
&& (!(new File(rootPath).directory))) { | |
(new File(rootPath)).mkdir() | |
} | |
parallel(4) { | |
imgSet.eachParallel { | |
String imgName = it.split('/')[-1] | |
try { | |
ant.get(src: it, dest: rootPath + imgName, skipexisting: 'true') | |
} | |
catch (Exception ignore) { } | |
} | |
} | |
} | |
} | |
else { | |
print "Directory $fileOutPath does not exist, writing to file..." | |
o << '' | |
//o.delete() | |
o << imgMap.values().flatten().join('\n') + '\n' | |
println "DONE" | |
} | |
String getPicGroup( String url ) { | |
String regex = /^(.+).deviantart.com(.+?)(\?.*?q=(\w+))?$/ | |
url -= 'http://' | |
def m = (url =~ regex) | |
try { | |
String name = m[0][1] + m[0][2] + (m[0][4] ?: '') | |
return (name.replaceAll('/',' ').trim().replaceAll(' ','_')) | |
} catch (Exception ignore) {} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment