Last active
February 7, 2018 10:12
-
-
Save larvata/5e8727906518c36ae012d01ec855feb9 to your computer and use it in GitHub Desktop.
dump pages from zasshi-online.com
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const fs = require('fs'); | |
const requestDefault = require('request'); | |
const curlParser = require('parse-curl'); | |
const Jimp = require('jimp'); | |
const jar = requestDefault.jar(); | |
const request = requestDefault.defaults({ jar }); | |
const requestPromise = (requestOption) => new Promise((resolve, reject) => { | |
request(requestOption, (err, res) => { | |
if (err) { | |
return reject(err); | |
} | |
return resolve(res); | |
}); | |
}); | |
const componseQuery = ({ sid, utg, cmd, pid, layer, x, y }) => { | |
const params = []; | |
if (sid) { | |
params.push(`sid="{${sid}}"`); | |
} | |
if (utg) { | |
params.push(`utg="${utg}"`); | |
} | |
if (cmd) { | |
params.push(`cmd="${cmd}"`); | |
} | |
if (pid) { | |
params.push(`pid="${pid}"`); | |
} | |
if (layer) { | |
params.push(`layer="${layer}"`); | |
} | |
if (x) { | |
params.push(`x="${x}"`); | |
} | |
if (y) { | |
params.push(`y="${y}"`); | |
} | |
const query = `<p><query ${params.join(' ')}/></p>`; | |
const composed = `query=${encodeURIComponent(query).replace(/-/g, '%2D')}`; | |
return composed; | |
}; | |
const getMatrix = (index) => { | |
const r = [1, 0, 0, 0, 0]; | |
const g = [0, 1, 0, 0, 0]; | |
const b = [0, 0, 1, 0, 0]; | |
const a = [0, 0, 0, 1, 0]; | |
let result = [...r, ...g, ...b]; | |
if (index === 0) { | |
result = [...g, ...r, ...b]; | |
} | |
else if (index === 1) { | |
result = [...r, ...b, ...g]; | |
} | |
else if (index === 2) { | |
result = [...b, ...g, ...r]; | |
} | |
else if (index === 3) { | |
result = [...b, ...r, ...g]; | |
} | |
else if (index === 4) { | |
result = [...g, ...b, ...r]; | |
} | |
result = result.concat(a); | |
return result; | |
}; | |
// load session from curl command | |
const curlCommand = fs.readFileSync('./curl-command.txt', 'utf8'); | |
const { method, header, body, url } = curlParser(curlCommand); | |
const requestOption = { | |
method, | |
url, | |
headers: header, | |
body, | |
}; | |
// extract session id from query | |
const query = decodeURIComponent(body); | |
const sid = query.match(/sid="\{(\S+)\}"/)[1]; | |
const utg = query.match(/utg="(\S+)"/)[1]; | |
if (!sid || !utg) { | |
console.log('sid or utg is invalid in the given curl command.'); | |
process.exit(); | |
} | |
const pageFrom = process.argv[2]; | |
const pageTo = process.argv[3]; | |
console.log(`dump page ${pageFrom}-${pageTo}`); | |
(async () => { | |
try { | |
for (let pid = pageFrom; pid <= pageTo; pid++) { | |
// get master metadata query | |
const metadataQuery = componseQuery({ sid, utg, cmd: 2, pid }); | |
// console.log('metadataQuery', metadataQuery); | |
requestOption.body = metadataQuery; | |
const { body } = await requestPromise(requestOption); | |
console.log(body); | |
// parse metadata | |
const orig_width = +body.match(/orig_width="(\d+)"/)[1]; | |
const orig_height = +body.match(/orig_height="(\d+)"/)[1]; | |
const tile_width = +body.match(/tile_w="(\d+)"/)[1]; | |
const tile_height = +body.match(/tile_h="(\d+)"/)[1]; | |
const obf_tile_width = +body.match(/obf_tile_w="(\d+)"/)[1]; | |
const obf_tile_height = +body.match(/obf_tile_h="(\d+)"/)[1]; | |
// fetch tiles in pages | |
const tile_count_x = Math.ceil(orig_width / tile_width); | |
const tile_count_y = Math.ceil(orig_height / tile_height); | |
const tiles = []; | |
for (let ty = 0; ty < tile_count_y; ty++) { | |
for (let tx = 0; tx < tile_count_x; tx++) { | |
const tile_x = (tx * tile_width).toString(); | |
const tile_y = (ty * tile_height).toString(); | |
const tileQuery = componseQuery({ sid, utg, cmd: 4, pid, layer: 0, x: tile_x, y: tile_y }); | |
requestOption.body = tileQuery; | |
requestOption.encoding = null; | |
const { body: tile } = await requestPromise(requestOption); | |
// get tile meta | |
const tileMetaQuery = componseQuery({ sid, utg, cmd: 5, pid, layer: 0, x: tile_x, y: tile_y }); | |
requestOption.body = tileMetaQuery; | |
requestOption.encoding = 'utf8'; | |
const { body: meta } = await requestPromise(requestOption); | |
console.log(meta); | |
const metaMatch = meta.match(/<key size="\d+" rgbcode="(\d+)">(\S+)<\/key>/); | |
const matrix = getMatrix(+metaMatch[1]); | |
const seq = metaMatch[2].split(',').map(x => +x); | |
// re-arrange tiles | |
const image = await Jimp.read(tile).then((image) => { | |
// adjust color | |
const { width, height } = image.bitmap; | |
return image.scan(0, 0, width, height, function (x, y, idx) { | |
const red = this.bitmap.data[idx + 0]; | |
const green = this.bitmap.data[idx + 1]; | |
const blue = this.bitmap.data[idx + 2]; | |
const alpha = this.bitmap.data[idx + 3]; | |
const r = (matrix[0] * red) + (matrix[1] * green) + (matrix[2] * blue) + (matrix[3] * alpha) + matrix[4]; | |
const g = (matrix[5] * red) + (matrix[6] * green) + (matrix[7] * blue) + (matrix[8] * alpha) + matrix[9]; | |
const b = (matrix[10] * red) + (matrix[11] * green) + (matrix[12] * blue) + (matrix[13] * alpha) + matrix[14]; | |
const a = (matrix[15] * red) + (matrix[16] * green) + (matrix[17] * blue) + (matrix[18] * alpha) + matrix[19]; | |
const hex = Jimp.rgbaToInt(r, g, b, a); | |
this.bitmap.data.writeUInt32BE(hex, idx, true); | |
}); | |
}); | |
const subtiles = []; | |
const subtile_count_x = Math.floor(tile_width / obf_tile_width); | |
const subtile_count_y = Math.floor(tile_height / obf_tile_height); | |
// console.log('subtiles: '+ subtile_count_x + 'x' + subtile_count_y); | |
for (let sty = 0; sty < subtile_count_y; sty++) { | |
for (let stx = 0; stx < subtile_count_x; stx++) { | |
// console.log('crop: ', stx, ',', sty, ',', obf_tile_width, ',', obf_tile_height); | |
const subtile = await image.clone().crop(stx * obf_tile_width, sty * obf_tile_height, obf_tile_width, obf_tile_height); | |
subtiles.push(subtile); | |
} | |
} | |
// const fn = `dump/${tx}-${ty}-s.jpg`; | |
// await image.write(fn); | |
// console.log(fn); | |
seq.forEach(async (s, i) => { | |
const sx = s % subtile_count_x; | |
const sy = Math.floor(s / subtile_count_x); | |
const subtile = subtiles[i]; | |
const x = sx * obf_tile_width; | |
const y = sy * obf_tile_height; | |
// console.log(`${s}: [${sx}, ${sy}]`); | |
await image.blit(subtile, x, y); | |
}); | |
const filename = `dump/${tx}-${ty}.jpg`; | |
await image.write(filename); | |
tiles.push(image); | |
} | |
} | |
// compose tiles | |
const wholeImage = new Jimp(orig_width, orig_height); | |
console.log('compositing...'); | |
tiles.forEach(async (tile, i) => { | |
const tx = i % tile_count_x; | |
const ty = Math.floor(i / tile_count_x); | |
const x = tx * tile_width; | |
const y = ty * tile_height; | |
await wholeImage.composite(tile, x, y); | |
}); | |
await wholeImage.write(`dump/whole-${pid}.jpg`); | |
console.log(`-> saved page: ${pid}`); | |
} | |
} | |
catch(e) { | |
console.log(e); | |
} | |
})(); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "zasshi-teardown", | |
"version": "1.0.0", | |
"description": "", | |
"main": "index.js", | |
"scripts": { | |
"test": "echo \"Error: no test specified\" && exit 1" | |
}, | |
"keywords": [], | |
"author": "", | |
"license": "ISC", | |
"dependencies": { | |
"jimp": "^0.2.28", | |
"parse-curl": "^0.2.6", | |
"request": "^2.83.0" | |
}, | |
"devDependencies": { | |
"eslint": "^4.16.0", | |
"eslint-config-airbnb-base": "^12.1.0", | |
"eslint-plugin-import": "^2.8.0" | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
the async forEach should be replaced with for...of