Last active
April 18, 2020 11:53
-
-
Save py7hon/b6ebe4d30a68704bf5ffc727cd96dcbf to your computer and use it in GitHub Desktop.
nhentai dl
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
J=len | |
I=int | |
B=print | |
import requests as K,re | |
from tqdm import trange as L | |
import urllib.request,os as C,sys as E | |
from glob import glob | |
def M(link):A=link.rsplit('/',1)[1];B=A.split('t')[0];return I(B) | |
def N(cm_id): | |
C='https://nhent.ai/g/{}/'.format(cm_id);D=K.get(C).text;E='https\\:\\/\\/t\\.nhent\\.ai/galleries/\\d+/\\d+t\\.jpg';A=list(set(re.findall(E,D)));A.sort(key=lambda f:M(f)) | |
for B in range(J(A)):A[B]=A[B].replace('//t.nhen','//i.nhen').replace('t.jpg','.jpg') | |
return A | |
def O(links,location,skip): | |
I=False;E=location;D=links | |
if not C.path.isdir(E):B('no such dir:',E);return I,-1 | |
for A in L(J(D)): | |
F=C.path.split(D[A])[1] | |
if A<skip:B('skip:',F);continue | |
G=C.path.join(E,F) | |
try:urllib.request.urlretrieve(D[A],G) | |
except Exception as H:B(H);return I,A | |
return True,9999 | |
def P(fd):D=C.path.join(fd,'*.*');A=glob(D);B(A);A.sort(key=lambda f:I(re.sub('\\D','',f)));B('v'*20);B(A);B('#'*20);return A | |
def Q(fd,f_ls): | |
H='</td></tr>\n';D='<img src="{}" width="100%"/>\n';F=C.path.join(fd,'00.html') | |
with open(F,'w')as A: | |
A.write('<html><head></head>\n');A.write('<body bgcolor="#555555">\n');A.write('<table width="1000" align="center">\n');A.write('<tr><td width="100%">\n');A.write('<h1 align="center">');A.write(fd);A.write('</h1>');A.write(H) | |
for G in f_ls:E=C.path.split(G)[1];A.write('<tr><td>\n');B(D.format(E));A.write(D.format(E));A.write('<br/>\n');A.write(H) | |
A.write('</table></body></html>\n') | |
return | |
if __name__=='__main__': | |
F=E.argv[1];D='n-{}'.format(F) | |
if not C.path.isdir(D):C.mkdir(D);G=0 | |
else: | |
A=input('dir {} already exist, continue?[Y|N|int:skip]'.format(D)) | |
if A=='Y'or A=='y':G=0 | |
elif A=='N'or A=='n':B('Goodbye');E.exit() | |
elif A.isdigit():G=I(A) | |
else:B('Wrong input <{}>, exit.'.format(A));E.exit() | |
H=N(F);R,S=O(H,D,G) | |
if R:B('Completely download {} for {}'.format(J(H),F));Q(D,P(D)) | |
else:B('Download progress suck at {}'.format(S)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
import requests | |
import re | |
from tqdm import trange | |
import urllib.request | |
import os | |
import sys | |
from glob import glob | |
""" | |
https://i.nhent.ai/galleries/<num>/<num>.jpg | |
https://t.nhent.ai/galleries/<num>/<num>t.jpg | |
""" | |
def num_from_link(link): | |
fn = link.rsplit('/',1)[1] | |
num = fn.split('t')[0] | |
return int(num) | |
def get_links(cm_id): | |
src = 'https://nhent.ai/g/{}/'.format(cm_id) | |
webpage = requests.get(src).text | |
pat = r'https\:\/\/t\.nhent\.ai/galleries/\d+/\d+t\.jpg' | |
links = list(set(re.findall(pat, webpage))) | |
links.sort(key=lambda f:num_from_link(f)) | |
for i in range(len(links)): | |
links[i] = links[i].replace('//t.nhen', '//i.nhen').replace('t.jpg', '.jpg') | |
return links | |
def downloadlinks(links, location, skip): | |
if not os.path.isdir(location): | |
print('no such dir:', location) | |
return False, -1 | |
for i in trange(len(links)): | |
b = os.path.split(links[i])[1] | |
if i < skip: | |
print('skip:', b) | |
continue | |
tar = os.path.join(location, b) | |
try: | |
urllib.request.urlretrieve(links[i], tar) | |
except Exception as e: | |
print(e) | |
return False, i | |
return True, 9999 | |
def fd_list(fd): | |
file_name = os.path.join(fd, '*.*') | |
file_list = glob(file_name) | |
print(file_list) | |
file_list.sort(key=lambda f:int(re.sub('\D','',f))) | |
print('v'*20) | |
print(file_list) | |
print('#'*20) | |
return file_list | |
def writefile(fd, f_ls): | |
imgtag = '<img src="{}" width="100%"/>\n' | |
html_fname = os.path.join(fd, '00.html') | |
with open(html_fname, 'w') as n_htm: | |
n_htm.write('<html><head></head>\n') | |
n_htm.write('<body bgcolor="#555555">\n') | |
n_htm.write('<table width="1000" align="center">\n') | |
n_htm.write('<tr><td width="100%">\n') | |
n_htm.write('<h1 align="center">') | |
n_htm.write(fd) | |
n_htm.write('</h1>') | |
n_htm.write('</td></tr>\n') | |
for nm in f_ls: | |
fn = os.path.split(nm)[1] | |
n_htm.write('<tr><td>\n') | |
print(imgtag.format(fn)) | |
n_htm.write(imgtag.format(fn)) | |
n_htm.write('<br/>\n') | |
n_htm.write('</td></tr>\n') | |
n_htm.write('</table></body></html>\n') | |
return | |
if __name__ == '__main__': | |
cm_id = sys.argv[1] | |
fd = 'n-{}'.format(cm_id) | |
if not os.path.isdir(fd): | |
os.mkdir(fd) | |
skip = 0 | |
else: | |
opt = input('dir {} already exist, continue?[Y|N|int:skip]'.format(fd)) | |
if opt == "Y" or opt == "y": | |
skip = 0 | |
elif opt == "N" or opt == "n": | |
print('Goodbye') | |
sys.exit() | |
elif opt.isdigit(): | |
skip = int(opt) | |
else: | |
print('Wrong input <{}>, exit.'.format(opt)) | |
sys.exit() | |
links = get_links(cm_id) | |
ret, num = downloadlinks(links, fd, skip) | |
if ret: | |
print('Completely download {} for {}'.format(len(links),cm_id)) | |
writefile(fd, fd_list(fd)) | |
else: | |
print('Download progress suck at {}'.format(num)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment