Last active
March 13, 2025 19:19
-
-
Save Nominom/634f27db499c04ae08f3962ce6cd9a4d to your computer and use it in GitHub Desktop.
New version of pocket.shonenmagazine.com ripper
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Original script by drdaxxy | |
# https://gist.github.com/drdaxxy/1e43b3aee3e08a5898f61a45b96e4cb4 | |
# Thanks to ToshiroScan for fixing after a shonen update broke this. | |
# And QPDEH for helping fix the black bars issue with some manga. | |
# This script has been tested to work with python 3.10 | |
# To install required libraries: | |
# | |
# pip install requests | |
# pip install Pillow | |
# pip install beautifulsoup4 | |
import sys | |
import os | |
import requests | |
import errno | |
import json | |
from PIL import Image | |
from bs4 import BeautifulSoup | |
login = False # Set this to True if you want to login | |
username = "your email here" | |
password = "your password" | |
loginheaders = { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:130.0) Gecko/20100101 Firefox/130.0', | |
'origin': 'https://pocket.shonenmagazine.com', | |
'x-requested-with': 'XMLHttpRequest' | |
} | |
loginurl = "https://pocket.shonenmagazine.com/user_account/login" | |
sess = requests.Session() | |
if login: | |
logindata = {"email_address": username, "password": password, "return_location_path" : "/"} | |
r = sess.post(loginurl, headers=loginheaders, data = logindata) | |
if r.ok: | |
print('LOGIN SUCCESS') | |
print(sess.cookies) | |
else: | |
print('LOGIN FAILED') | |
print(r.headers) | |
print(r.status_code) | |
print(r.reason) | |
print(r.text) | |
if len(sys.argv) != 3: | |
print("usage: shonenripperjson.py <url> <destination folder>") | |
sys.exit(1) | |
destination = sys.argv[2] | |
if not os.path.exists(destination): | |
try: | |
os.makedirs(destination) | |
except OSError as exc: | |
if exc.errno != errno.EEXIST: | |
raise | |
url = sys.argv[1] | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:130.0) Gecko/20100101 Firefox/130.0'} | |
# If soup method below doesn't work, try uncommenting these 2 lines | |
# if not url.endswith('.json'): | |
# url = url + ".json" | |
print("Getting from url: "+url) | |
r = sess.get(url=url, headers=headers) | |
# And this | |
# data = r.json() | |
# and comment from here ======== | |
soup = BeautifulSoup(r.content, 'html.parser') | |
script_tag = soup.find('script', id='episode-json') | |
if script_tag: | |
json_data = script_tag['data-value'] | |
data = json.loads(json_data) | |
else: | |
print("No <script> with ID 'episode-json' found.") | |
sys.exit(1) | |
# to here ====================== | |
def dlImage(url, outFilename, drm): | |
r = sess.get(url, stream=True, headers=headers) | |
if not r.ok: | |
print(r) | |
return | |
content_type = r.headers.get('content-type') | |
if content_type == "image/jpeg": | |
outFilename = outFilename + ".jpg" | |
elif content_type == "image/png": | |
outFilename = outFilename + ".png" | |
else: | |
print("content type not recognized!") | |
print(r) | |
return | |
with open(outFilename, 'wb') as file: | |
for block in r.iter_content(1024): | |
if not block: | |
break | |
file.write(block) | |
if drm == True: | |
source = Image.open(outFilename) | |
dest = source.copy() | |
def draw_subimage(sx, sy, sWidth, sHeight, dx, dy): | |
rect = source.crop((sx, sy, sx+sWidth, sy+sHeight)) | |
dest.paste(rect, (dx, dy, dx+sWidth, dy+sHeight)) | |
DIVIDE_NUM = 4 | |
MULTIPLE = 8 | |
cell_width = (source.width // (DIVIDE_NUM * MULTIPLE)) * MULTIPLE | |
cell_height = (source.height // (DIVIDE_NUM * MULTIPLE)) * MULTIPLE | |
for e in range(0, DIVIDE_NUM * DIVIDE_NUM): | |
t = e // DIVIDE_NUM * cell_height | |
n = e % DIVIDE_NUM * cell_width | |
r = e // DIVIDE_NUM | |
i_ = e % DIVIDE_NUM | |
u = i_ * DIVIDE_NUM + r | |
s = u % DIVIDE_NUM * cell_width | |
c = (u // DIVIDE_NUM) * cell_height | |
draw_subimage(n, t, cell_width, cell_height, s, c) | |
dest.save(outFilename) | |
if 'readableProduct' in data: | |
readableProduct = data['readableProduct'] | |
nextReadableProductUri = None | |
if 'nextReadableProductUri' in readableProduct: | |
nextReadableProductUri = readableProduct['nextReadableProductUri'] | |
if 'pageStructure' in readableProduct: | |
pageStructure = readableProduct['pageStructure'] | |
if pageStructure == None: | |
print('Could not download pages. Most likely this volume is not public.') | |
sys.exit(1) | |
choJuGiga = pageStructure['choJuGiga'] if 'choJuGiga' in pageStructure else '' | |
print('choJuGiga: ', choJuGiga) | |
drm = choJuGiga != "usagi" | |
pages = pageStructure['pages'] if 'pages' in pageStructure else [] | |
if len(pages) == 0: | |
print("No pages found") | |
sys.exit(1) | |
pageIndex = 0 | |
for page in pages: | |
if 'src' in page: | |
src = page['src'] | |
print(src) | |
pageIndex += 1 | |
outFile = os.path.join(destination, f"{pageIndex:04d}") | |
dlImage(src, outFile, drm) | |
else: | |
print('could not find pageStructure from json response') | |
sys.exit(1) | |
if nextReadableProductUri != None: | |
print("Next URI: ", nextReadableProductUri) | |
else: | |
print('could not find readableProduct from json response') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@LA20000 @MandyHiman I bought a few chapters to test, and the script seems to work just fine (even for the more expensive, latest episodes). Please double-check that your script is logging in correctly. It should say something like:
If you've bought the chapter, the script logs in correctly, and it still fails to download, you can see what it says if you add
print('data:' + json.dumps(data))
to line 163. Here:if pageStructure == None: print('Could not download pages. Most likely this volume is not public.') + print('data:' + json.dumps(data)) sys.exit(1)
For me, it has
"hasPurchased": false
and"isPublic": false
for chapters that I have not bought.