-
-
Save Nominom/634f27db499c04ae08f3962ce6cd9a4d to your computer and use it in GitHub Desktop.
# Original script by drdaxxy | |
# https://gist.github.com/drdaxxy/1e43b3aee3e08a5898f61a45b96e4cb4 | |
# Thanks to ToshiroScan for fixing after a shonen update broke this. | |
# And QPDEH for helping fix the black bars issue with some manga. | |
# This script has been tested to work with python 3.10 | |
# To install required libraries: | |
# | |
# pip install requests | |
# pip install Pillow | |
# pip install beautifulsoup4 | |
import sys | |
import os | |
import requests | |
import errno | |
import json | |
from PIL import Image | |
from bs4 import BeautifulSoup | |
login = False # Set this to True if you want to login | |
username = "your email here" | |
password = "your password" | |
loginheaders = { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:130.0) Gecko/20100101 Firefox/130.0', | |
'origin': 'https://pocket.shonenmagazine.com', | |
'x-requested-with': 'XMLHttpRequest' | |
} | |
loginurl = "https://pocket.shonenmagazine.com/user_account/login" | |
sess = requests.Session() | |
if login: | |
logindata = {"email_address": username, "password": password, "return_location_path" : "/"} | |
r = sess.post(loginurl, headers=loginheaders, data = logindata) | |
if r.ok: | |
print('LOGIN SUCCESS') | |
print(sess.cookies) | |
else: | |
print('LOGIN FAILED') | |
print(r.headers) | |
print(r.status_code) | |
print(r.reason) | |
print(r.text) | |
if len(sys.argv) != 3: | |
print("usage: shonenripperjson.py <url> <destination folder>") | |
sys.exit(1) | |
destination = sys.argv[2] | |
if not os.path.exists(destination): | |
try: | |
os.makedirs(destination) | |
except OSError as exc: | |
if exc.errno != errno.EEXIST: | |
raise | |
url = sys.argv[1] | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:130.0) Gecko/20100101 Firefox/130.0'} | |
# If soup method below doesn't work, try uncommenting these 2 lines | |
# if not url.endswith('.json'): | |
# url = url + ".json" | |
print("Getting from url: "+url) | |
r = sess.get(url=url, headers=headers) | |
# And this | |
# data = r.json() | |
# and comment from here ======== | |
soup = BeautifulSoup(r.content, 'html.parser') | |
script_tag = soup.find('script', id='episode-json') | |
if script_tag: | |
json_data = script_tag['data-value'] | |
data = json.loads(json_data) | |
else: | |
print("No <script> with ID 'episode-json' found.") | |
sys.exit(1) | |
# to here ====================== | |
def dlImage(url, outFilename, drm): | |
r = sess.get(url, stream=True, headers=headers) | |
if not r.ok: | |
print(r) | |
return | |
content_type = r.headers.get('content-type') | |
if content_type == "image/jpeg": | |
outFilename = outFilename + ".jpg" | |
elif content_type == "image/png": | |
outFilename = outFilename + ".png" | |
else: | |
print("content type not recognized!") | |
print(r) | |
return | |
with open(outFilename, 'wb') as file: | |
for block in r.iter_content(1024): | |
if not block: | |
break | |
file.write(block) | |
if drm == True: | |
source = Image.open(outFilename) | |
dest = source.copy() | |
def draw_subimage(sx, sy, sWidth, sHeight, dx, dy): | |
rect = source.crop((sx, sy, sx+sWidth, sy+sHeight)) | |
dest.paste(rect, (dx, dy, dx+sWidth, dy+sHeight)) | |
DIVIDE_NUM = 4 | |
MULTIPLE = 8 | |
cell_width = (source.width // (DIVIDE_NUM * MULTIPLE)) * MULTIPLE | |
cell_height = (source.height // (DIVIDE_NUM * MULTIPLE)) * MULTIPLE | |
for e in range(0, DIVIDE_NUM * DIVIDE_NUM): | |
t = e // DIVIDE_NUM * cell_height | |
n = e % DIVIDE_NUM * cell_width | |
r = e // DIVIDE_NUM | |
i_ = e % DIVIDE_NUM | |
u = i_ * DIVIDE_NUM + r | |
s = u % DIVIDE_NUM * cell_width | |
c = (u // DIVIDE_NUM) * cell_height | |
draw_subimage(n, t, cell_width, cell_height, s, c) | |
dest.save(outFilename) | |
if 'readableProduct' in data: | |
readableProduct = data['readableProduct'] | |
nextReadableProductUri = None | |
if 'nextReadableProductUri' in readableProduct: | |
nextReadableProductUri = readableProduct['nextReadableProductUri'] | |
if 'pageStructure' in readableProduct: | |
pageStructure = readableProduct['pageStructure'] | |
if pageStructure == None: | |
print('Could not download pages. Most likely this volume is not public.') | |
sys.exit(1) | |
choJuGiga = pageStructure['choJuGiga'] if 'choJuGiga' in pageStructure else '' | |
print('choJuGiga: ', choJuGiga) | |
drm = choJuGiga != "usagi" | |
pages = pageStructure['pages'] if 'pages' in pageStructure else [] | |
if len(pages) == 0: | |
print("No pages found") | |
sys.exit(1) | |
pageIndex = 0 | |
for page in pages: | |
if 'src' in page: | |
src = page['src'] | |
print(src) | |
pageIndex += 1 | |
outFile = os.path.join(destination, f"{pageIndex:04d}") | |
dlImage(src, outFile, drm) | |
else: | |
print('could not find pageStructure from json response') | |
sys.exit(1) | |
if nextReadableProductUri != None: | |
print("Next URI: ", nextReadableProductUri) | |
else: | |
print('could not find readableProduct from json response') |
Hi I notced when I used the script today that it sudenly put a black bar the same size onto every page in the right side. Covering art and making spreads look bad. Any idea what could cause this? Haven't had any problems using the script in the past.
Hi, @jonaselz
Not sure why this would happen. Does this happen with only some manga or some chapters of a manga, or does it happen to you with everything now?
Can you link to some chapter you're having problems with (preferably available for free), so I can take a look?
Hi @Nominom sorry for the late reply. It seems it only happens with some titles. For some reason
Found a few free ones that gives me me the black bar.
https://pocket.shonenmagazine.com/episode/10834108156634258465
https://pocket.shonenmagazine.com/episode/3270375685326280731
https://pocket.shonenmagazine.com/episode/4856001361109256286
https://pocket.shonenmagazine.com/episode/3269754496643321245
hi, is this script still supported? The script writes to me "could not find readableProduct from json response"
steps were performed correctly, the script passes, but it cannot download images
hi, is this script still supported? The script writes to me "could not find readableProduct from json response" steps were performed correctly, the script passes, but it cannot download images
Here is a fix that works for me @weynscanlate :
Add these to the imports :
from bs4 import BeautifulSoup
import json
Don't forget to install bs4 :
pip install beautifulsoup4
Remove these 2 lines (57 and 58)
if not url.endswith('.json'):
url = url + ".json"
And then replace line 66 :
data = r.json()
By this :
soup = BeautifulSoup(r.content, 'html.parser')
script_tag = soup.find('script', id='episode-json')
if script_tag:
json_data = script_tag['data-value']
data = json.loads(json_data)
else:
print("No <script> with ID 'episode-json' found.")
sys.exit(1)
@ToshiroScan , probably you forgot to tell that lines 57-58 also needs to be removed
if not url.endswith('.json'):
url = url + ".json"
@ToshiroScan , probably you forgot to tell that lines 57-58 also needs to be removed
if not url.endswith('.json'): url = url + ".json"
You're right, thanks for mentioning it, I edited my original post
Thanks for fixing this. @ToshiroScan is it ok to edit the original script with your edits?
Hi @Nominom, yes, of course! Feel free to edit the original script with my changes, it will make things easier for everyone.
Thanks, @ToshiroScan. I've edited the original script.
Also sorry @jonaselz for taking so long to reply (kinda forgot about this), but I couldn't find a cause for the black bars. If anyone else finds the reason (or a fix), please share!
@Nominom, in some drmed pages there are unused pixels on the right side, so we need create new image without additional width.
dest = Image.new(source.mode, (source.width-source.width%32, source.height-source.height%32))
Thanks, @QPDEH. It looks like it was indeed any pixels that don't align with 32. I noticed when trying your fix that some of the art would get covered, so those pixels had to be used somehow. It turned out that those pixels were just not scrambled at all, and the fix was to copy the source image as the destination image before writing to it so that those extra pixels would get included as well.
dest = source.copy()
I've updated the script with the fix.
I went to download a paid one It said
Did you login in the script?
@Nominom I wanted to thank you a lot, for this script!
I adapted it, rewrote it in rust and added an automated epub conversion on top - its still work in progress but in principle it works
https://github.com/ChrisB9/manga_epub_creator
I will totally credit you on the readme once I wrote one and also mention you about the drm-part that I 1:1 copied
I went to download a paid one It said
Did you login in the script?
Yes, i did. It also worked on already released free chapter. But didnt work on the teased next chapter that was releasing for free next week
@LA20000, @MandyHiman, I'll see if I can figure out what's wrong. I haven't bought any chapters myself, so it's a bit hard to test 😄
@ChrisB9 Thanks for the thanks! Be sure to credit the original creator of the script as well; this version is just an updated version from here: https://gist.github.com/drdaxxy/1e43b3aee3e08a5898f61a45b96e4cb4
The DRM code is basically straight from there.
@LA20000 @MandyHiman I bought a few chapters to test, and the script seems to work just fine (even for the more expensive, latest episodes). Please double-check that your script is logging in correctly. It should say something like:
...
LOGIN SUCCESS
<RequestsCookieJar[<Cookie glsc=<SECRETCOOKIEHERE> for pocket.shonenmagazine.com/>]>
Getting from URL: <urlhere>
...
If you've bought the chapter, the script logs in correctly, and it still fails to download, you can see what it says if you add print('data:' + json.dumps(data))
to line 163. Here:
if pageStructure == None:
print('Could not download pages. Most likely this volume is not public.')
+ print('data:' + json.dumps(data))
sys.exit(1)
For me, it has "hasPurchased": false
and "isPublic": false
for chapters that I have not bought.
Hi I notced when I used the script today that it sudenly put a black bar the same size onto every page in the right side. Covering art and making spreads look bad. Any idea what could cause this? Haven't had any problems using the script in the past.