Skip to content

Instantly share code, notes, and snippets.

Last active August 5, 2020 01:35
Show Gist options
  • Save oshoham/b05e81e4759e5105d0a4c947172e025c to your computer and use it in GitHub Desktop.
Save oshoham/b05e81e4759e5105d0a4c947172e025c to your computer and use it in GitHub Desktop.
Parse the Book of Blaseball from the site's minified JavaScript
"name": "blaseball-book-scraper",
"version": "1.0.0",
"description": "",
"main": "parse_blaseball_book.js",
"author": "",
"license": "ISC",
"dependencies": {
"acorn": "^7.4.0",
"acorn-walk": "^7.2.0",
"bent": "^7.3.9",
"cheerio": "^1.0.0-rc.3"
const url = require('url');
const bent = require('bent');
const cheerio = require('cheerio');
const acorn = require('acorn');
const walk = require('acorn-walk');
const getString = bent('string');
async function parseBookFromJavaScript() {
const html = await getString('');
const $ = cheerio.load(html);
const scriptTags = $('script[src^="/static/js/main\."]');
if (scriptTags.length === 0) {
throw new Error('Could not find the main JS file.')
if (scriptTags.length > 1) {
throw new Error('More than one main JS files found.')
const src = scriptTags.attr('src');
const jsUrl = url.resolve('', src);
const js = await getString(jsUrl);
const ast = acorn.parse(js);
let bookFunctionNode = null;
walk.ancestor(ast, {
Literal(node, ancestors) {
if (node.value !== 'The Book of Blaseball' || bookFunctionNode !== null || ancestors.length <= 1) {
// start at the 2nd-to-last ancestor since the last ancestor is the current node
for (let i = ancestors.length - 2; i >= 0; i--) {
if (ancestors[i].type === 'FunctionDeclaration') {
bookFunctionNode = ancestors[i];
if (bookFunctionNode === null) {
throw new Error('Could not find the FunctionDeclaration node for rendering the Book in the AST.')
let text = '';
walk.recursive(bookFunctionNode, null, {
CallExpression(node, st, c) {
const isCreateElement = (
node.callee.type === 'MemberExpression' && === 'Identifier' && === 'createElement'
if (!isCreateElement) {
c(node.callee, st, 'Expression');
if (!node.arguments) {
for (let i = 0; i < node.arguments.length; i++) {
if (i === 0 && node.arguments[0].type === 'Literal') { // HTML tag
if (node.arguments[0].value === 'div' && text !== '') {
text += '\n';
const isLiteral = (
node.arguments[i].type === 'Literal' &&
node.arguments[i].value !== null
const hasStrProperty = (
node.arguments[i].type === 'ObjectExpression' &&
node.arguments[i].properties.length === 1 &&
node.arguments[i].properties[0] === 'str'
const hasClassNameProperty = (
node.arguments[i].type === 'ObjectExpression' &&
node.arguments[i].properties.length === 1 &&
node.arguments[i].properties[0] === 'className'
if (isLiteral) {
text += node.arguments[i].value;
} else if (hasStrProperty) {
text += node.arguments[i].properties[0].value.value;
} else if (hasClassNameProperty) {
const classNames = node.arguments[i].properties[0].value.value.split(' ');
if (classNames.includes('TheBook-Bullet')) {
text += '\n'
} else if (classNames.includes('TheBook-SubBullet')) {
text += ' '
c(node.arguments[i], st, 'Expression');
return text;
(async () => {
try {
const text = await parseBookFromJavaScript();
} catch (e) {
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment