Last active
December 16, 2015 08:59
-
-
Save hzqtc/5409775 to your computer and use it in GitHub Desktop.
Download Jiandan OOXX gallery. http://jandan.net/ooxx
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Example: | |
# curl -s http://jandan.net/ooxx | ./jdooxx.py -o 5 -r 2.0 -u | wget -nv -i - | |
# This will download all pictures with oo >= 5 in the last page of Jiandan OOXX. | |
import HTMLParser | |
import sys | |
import getopt | |
class OOXXImage(object): | |
def __init__(self): | |
self.oo = 0 | |
self.xx = 0 | |
self.url = "" | |
def test(self, ooMin, xxMax, ratio): | |
if self.xx == 0: | |
self.xx = 1 | |
return self.oo >= ooMin and self.xx <= xxMax and float(self.oo) / float(self.xx) >= ratio | |
class JDOOXXParser(HTMLParser.HTMLParser): | |
def __init__(self): | |
HTMLParser.HTMLParser.__init__(self) | |
self.withinPostLi = False | |
self.fetchData = None | |
self.currentImage = OOXXImage() | |
self.imageList = [] | |
def handle_starttag(self, tag, attrs): | |
attrMap = dict(attrs) | |
if self.withinPostLi == True: | |
if tag == "img" and "class" not in attrMap: | |
self.currentImage.url = attrMap["src"] | |
elif tag == "span" and "id" in attrMap and attrMap["id"].startswith("cos_support-"): | |
self.fetchData = "oo" | |
elif tag == "span" and "id" in attrMap and attrMap["id"].startswith("cos_unsupport-"): | |
self.fetchData = "xx" | |
if tag == "li" and "id" in attrMap and attrMap["id"].startswith("comment-"): | |
self.withinPostLi = True | |
def handle_endtag(self, tag): | |
if tag == "li" and self.withinPostLi == True: | |
self.withinPostLi = False | |
self.imageList.append(self.currentImage) | |
self.currentImage = OOXXImage() | |
def handle_data(self, data): | |
if self.fetchData == "oo": | |
self.currentImage.oo = int(data) | |
self.fetchData = None | |
elif self.fetchData == "xx": | |
self.currentImage.xx = int(data) | |
self.fetchData = None | |
def usage(): | |
print "A parser for Jiandan (http://jandan.net/ooxx) MM Gallery." | |
print "Read from standard input and print image informations." | |
print " -h, --help Print this infomation." | |
print " -o, --oomin=INT Set the minimum OO value; images with a lower OO value will be excluded." | |
print " -x, --xxmax=INT Set the maximum XX value; images with a greater XX value will be excluded." | |
print " -r, --ratio=FLOAT Set the OO/XX ratio; images with lower ratio will be excluded" | |
print " -u, --urlonly Only print the URLs; both OO and XX value will be omitted." | |
if __name__ == "__main__": | |
try: | |
opts, args = getopt.getopt(sys.argv[1:], "ho:x:r:u", ["help", "oomin=", "xxmax=", "ratio=", "urlonly"]) | |
except getopt.GetoptError as err: | |
print str(err) | |
usage() | |
sys.exit(2) | |
ooMin = 0 | |
xxMax = 1000 | |
ratio = -1 | |
urlOnly = False | |
for o, a in opts: | |
if o in ("-h", "--help"): | |
usage() | |
sys.exit() | |
elif o in ("-o", "--oomin"): | |
ooMin = int(a) | |
elif o in ("-x", "--xxmax"): | |
xxMax = int(a) | |
elif o in ("-r", "--ratio"): | |
ratio = float(a) | |
elif o in ("-u", "--urlonly"): | |
urlOnly = True | |
parser = JDOOXXParser() | |
parser.feed(sys.stdin.read().decode("utf-8")) | |
for img in parser.imageList: | |
if img.test(ooMin, xxMax, ratio): | |
if urlOnly: | |
print img.url | |
else: | |
print "oo = %3d, xx = %3d, %s" % (img.oo, img.xx, img.url) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment