Skip to content

Instantly share code, notes, and snippets.

@Lesmiscore
Last active December 17, 2022 17:34
Show Gist options
  • Save Lesmiscore/24d93c6ed75b5b7d8ef40535bf114663 to your computer and use it in GitHub Desktop.
Save Lesmiscore/24d93c6ed75b5b7d8ef40535bf114663 to your computer and use it in GitHub Desktop.
PixivUtil2 to galley-dl migration script
### PixivUtil2 to galley-dl migration script
# this assumes that:
# - FIRST FOUR filename format options in PixivUtil2 (i.e. filenameFormat, filenameMangaFormat, filenameInfoFormat, filenameMangaInfoFormat) are set to
# "%artist% (%member_id%)-%urlFilename%-%title%" (should be default)
# - PixivUtil2 hasn't modified, or very slight (such as fixing image ID and page number into a certain length)
# - "archive-format" options in gallery-dl for pixiv category is default
# usage: python3 pxu2gdl.py
# Copyright 2022 Lesmiscore (https://github.com/Lesmiscore)
import sqlite3
import re
import itertools
fnre = re.compile(r"_p(\d+)-.*?\.(jpe?g|png|gif)$")
fnre_ugoira = re.compile(r"_ugoira[\dx]+-.*?\.(zip)$")
with sqlite3.connect("db.sqlite") as pxu, \
sqlite3.connect("archive-pixiv-converted.sqlite") as gdl:
gdl.execute("CREATE TABLE archive (entry TEXT PRIMARY KEY) WITHOUT ROWID")
pxuc = pxu.cursor()
pmi_offset = 0
gdl_count = 0
def enumerate_pixiv_master_images():
""" (image_id, pages, extension) """
global pmi_offset
count = -1
advance = 12000
while pmi_offset != count:
pxuc.execute("""
SELECT "image_id","save_name" FROM "main"."pixiv_master_image" LIMIT ?, ?;
""", [pmi_offset, advance])
count = pmi_offset
# print(pmi_offset)
for image_id, save_name in pxuc.fetchall():
matches = re.search(fnre, save_name)
if matches:
pages, extension = matches.groups()
else:
matches = re.search(fnre_ugoira, save_name)
if not matches:
raise Exception(f'{save_name} is not acceptable')
pages = 0
extension = matches.group(1)
yield (image_id, int(pages) + 1, extension)
pmi_offset += 1
def convert_to_gallerydl_archive(iter):
global gdl_count
for image_id, pages, extension in iter:
if pages == 1:
yield f"pixiv{image_id}.{extension}"
else:
yield from (f"pixiv{image_id}_p{p:02d}.{extension}" for p in range(pages))
gdl_count += pages
def chunk(it):
n = 12000
it = ([x] for x in it)
while True:
chunk = tuple(itertools.islice(it, n))
if not chunk:
return
yield chunk
for chk in chunk(convert_to_gallerydl_archive(enumerate_pixiv_master_images())):
gdl.executemany("INSERT INTO archive VALUES (?)", chk)
print(f"Converted {pmi_offset} PixivUtil2 entries to {gdl_count} gallery-dl entries")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment