Last active
January 10, 2019 04:53
-
-
Save uhyo/6ba05bbc594f601286e2443ea254d054 to your computer and use it in GitHub Desktop.
Sotsuron counter (for node.js)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #! /usr/bin/env node | |
| 'use strict'; | |
| const fs = require('fs'); | |
| const path = require('path'); | |
| const crypto = require('crypto'); | |
| const child_process = require('child_process'); | |
| // parse args | |
| const args = process.argv.slice(2); | |
| const { | |
| thesis, | |
| chapter, | |
| countMath, | |
| help, | |
| nameMaxlen, | |
| showSections, | |
| noColor, | |
| } = parseArgs(args); | |
| if (help){ | |
| console.log(` | |
| count.js [OPTIONS] FILENAME | |
| Counts words in tex file for each chapter. | |
| Options: | |
| -h, --help : Help message. | |
| -c CHAPTER, --chapter CHAPTER: Count only one chapter. | |
| -m : Do not remove math commands with 'detex' command. | |
| -l LENGTH, --length LENGTH : Limit character numbers shown. | |
| -s, --section : Wordcount each section. | |
| -S, --section-number : Do not show section titles. | |
| --no-color : Do not color outputs. | |
| `); | |
| process.exit(); | |
| } | |
| // check latexcount.pl | |
| ensureLatexcount(); | |
| // load thesis | |
| const data = loadTeXFile(thesis); | |
| const lines = data.split(/\r\n|\r|\n/); | |
| const [chapters, names] = splitToChapters(data, chapter); | |
| const pendings = []; | |
| for (const chap of names){ | |
| if (chapter != null && chap !== chapter){ | |
| continue; | |
| } | |
| const { | |
| appendix, | |
| lines, | |
| sections, | |
| } = chapters[chap]; | |
| // section属さないカウント | |
| pendings.push(countWords(lines, countMath).then(words=>({ | |
| type: 'chapter', | |
| chapter: chap, | |
| words, | |
| }))); | |
| // 各sectionのカウント | |
| let i = 0; | |
| for (const {lines, name} of sections){ | |
| const index = i++; | |
| pendings.push(countWords(lines, countMath).then(words=>({ | |
| type: 'section', | |
| chapter: chap, | |
| section: name, | |
| index, | |
| words, | |
| }))); | |
| } | |
| } | |
| Promise.all(pendings).then(results=>{ | |
| // 結果を収集 | |
| const table = {}; | |
| for (const chap of names){ | |
| table[chap] = { | |
| appendix: chapters[chap].appendix, | |
| words: 0, | |
| sections: [], | |
| }; | |
| } | |
| for (const {type, words, chapter, section, index} of results){ | |
| if (type === 'chapter'){ | |
| table[chapter].words += words; | |
| }else{ | |
| const o = table[chapter]; | |
| o.sections[index] = { | |
| name: section, | |
| words, | |
| }; | |
| o.words += words; | |
| } | |
| } | |
| // 整形の前処理 | |
| let leftMax = 0; | |
| { | |
| const numbering = { | |
| chapter: 1, | |
| appendix: 0, | |
| }; | |
| for (const chap of names){ | |
| const { | |
| appendix, | |
| words, | |
| sections, | |
| } = table[chap]; | |
| // chapter小計の表示 | |
| const chapnum = appendix ? | |
| String.fromCharCode(0x41 + numbering.appendix) : | |
| String(numbering.chapter); | |
| const chapnumpad = padStart(chapnum, 2); | |
| const n = `${chapnumpad}. `; | |
| const newName = `${n}${chap}`.slice(0, nameMaxlen); | |
| leftMax = Math.max(leftMax, newName.length); | |
| const newSections = showSections ? | |
| sections.map(({name, words}, i)=>{ | |
| const sectnum = `${chapnumpad}-${i+1}. `; | |
| const n = showSections === 'number' ? | |
| `${sectnum}` : | |
| `${sectnum}${name}`; | |
| const newName = n.slice(0, nameMaxlen); | |
| leftMax = Math.max(leftMax, newName.length); | |
| return { | |
| name: newName, | |
| words, | |
| }; | |
| }) : | |
| []; | |
| table[chap] = { | |
| name: newName, | |
| appendix, | |
| words, | |
| sections: newSections, | |
| }; | |
| if (appendix){ | |
| numbering.appendix++; | |
| }else{ | |
| numbering.chapter++; | |
| } | |
| } | |
| } | |
| // count each chapter | |
| const count = { | |
| main: 0, | |
| appendix: 0, | |
| }; | |
| // 結果を表示 | |
| leftMax = Math.max('APPENDIX'.length, leftMax); | |
| for (const chap of names){ | |
| if (chapter != null && chapter !== chap){ | |
| continue; | |
| } | |
| const { | |
| name, | |
| appendix, | |
| words, | |
| sections, | |
| } = table[chap]; | |
| // chapter小計の表示 | |
| const leftPad = padEnd(name, leftMax); | |
| const wpad = padStart(words, 5); | |
| console.log(`${leftPad}:${wpad}`); | |
| // section | |
| let sectnum = 1; | |
| if (showSections){ | |
| for (const {name, words} of sections){ | |
| const leftPad = padEnd(name, leftMax); | |
| const wpad = padStart(words, 5); | |
| console.log(gray(`${leftPad}:${wpad}`)); | |
| sectnum++; | |
| } | |
| } | |
| if (appendix){ | |
| count.appendix += words; | |
| }else{ | |
| count.main += words; | |
| } | |
| } | |
| if (chapter == null){ | |
| console.log('-'.repeat(leftMax+6)); | |
| console.log(`${padEnd('MAIN', leftMax)}:${padStart(count.main, 5)}`); | |
| console.log(`${padEnd('APPENDIX', leftMax)}:${padStart(count.appendix, 5)}`); | |
| console.log(`${padEnd('TOTAL', leftMax)}:${padStart(count.main+count.appendix, 5)}`); | |
| } | |
| }) | |
| .catch(e=>{ | |
| console.error(e); | |
| }); | |
| function parseArgs(args){ | |
| let thesis = './thesis.tex'; | |
| let countMath = false; | |
| let help = false; | |
| let chapter = null; | |
| let nameMaxlen = 25; | |
| let showSections = false; | |
| let noColor = false; | |
| let state = ''; | |
| for (const a of args){ | |
| if (state === '-c'){ | |
| chapter = a; | |
| state = ''; | |
| continue; | |
| } | |
| if (state === '-l'){ | |
| nameMaxlen = parseInt(a, 10) || 25; | |
| state = ''; | |
| continue; | |
| } | |
| const opts = /^-\w+$/.test(a) ? | |
| a.slice(1).split('') : | |
| [a]; | |
| for(const o of opts){ | |
| switch (o){ | |
| case 'c': case '--chapter': { | |
| state = '-c'; | |
| break; | |
| } | |
| case 'm': { | |
| countMath = true; | |
| break; | |
| } | |
| case 'h': case '--help': { | |
| help = true; | |
| break; | |
| } | |
| case 'l': case '--length': { | |
| state = '-l'; | |
| break; | |
| } | |
| case 's': case '--section': { | |
| showSections = true; | |
| break; | |
| } | |
| case 'S': case '--section-number': { | |
| showSections = 'number'; | |
| break; | |
| } | |
| case '--no-color': { | |
| noColor = true; | |
| break; | |
| } | |
| default: { | |
| if (/^-/.test(a)){ | |
| throw new Error(`Unrecognized option: ${a}`); | |
| } | |
| thesis = a; | |
| } | |
| } | |
| } | |
| } | |
| return { | |
| thesis, | |
| chapter, | |
| countMath, | |
| nameMaxlen, | |
| showSections, | |
| noColor, | |
| help, | |
| }; | |
| } | |
| function ensureLatexcount(){ | |
| // load latexcount.pl | |
| let lcd; | |
| try { | |
| lcd = fs.readFileSync('./latexcount.pl', 'utf8'); | |
| } catch(e){ | |
| if (e.code === 'ENOENT'){ | |
| console.error(green(`Downloading latexcount.pl to ${path.resolve('./latexcount.pl')}`)); | |
| child_process.execSync('curl -o ./latexcount.pl http://ftp.jaist.ac.jp/pub/CTAN/support/latexcount/latexcount.pl 2> /dev/null', { | |
| encoding: 'utf8', | |
| }); | |
| lcd = fs.readFileSync('./latexcount.pl', 'utf8'); | |
| }else{ | |
| throw e; | |
| } | |
| } | |
| const md5 = md5sum(lcd); | |
| if (md5 !== 'e7a62d514ef12a326109c685cad975a0'){ | |
| console.log(green('Applying patch to latexcount.pl')); | |
| const p = String.raw `--- latexcount.pl 2016-12-29 20:45:35.381827297 +0900 | |
| +++ latexcount_fix.pl 2016-12-31 19:46:23.000000000 +0900 | |
| @@ -57,8 +57,8 @@ | |
| $line =~ s/(?<!\\)%.*?\n//g; | |
| # Count curly braces | |
| - while($line =~ /\{/g){$depth++} | |
| - while($line =~ /\}/g){$depth--} | |
| + while($line =~ /(?<!\\)\{/g){$depth++} | |
| + while($line =~ /(?<!\\)\}/g){$depth--} | |
| # Concatenate the new hunk of input to any | |
| # left over from previous cycles. | |
| `; | |
| try { | |
| child_process.execSync('patch -u', { | |
| input: p, | |
| }); | |
| } catch(e){ | |
| console.log('Applying patch failed.'); | |
| } | |
| } | |
| } | |
| function md5sum(str){ | |
| const hash = crypto.createHash('md5'); | |
| hash.update(str); | |
| return hash.digest('hex'); | |
| } | |
| function loadTeXFile(file) { | |
| const data = fs.readFileSync(file, 'utf8'); | |
| // process `input command of limited form. | |
| return data.replace(/^\\include\{(.+)\}$/gm, (_, f) => loadTeXFile(`./${f}.tex`)); | |
| } | |
| function splitToChapters(data, chapter){ | |
| const lines = data.split(/\r\n|\r|\n/); | |
| let appendix = false; | |
| const result = {}; | |
| const names = []; | |
| let sects = []; | |
| let cur = null; | |
| // split lines into each chapter | |
| for (const l of lines){ | |
| const r = l.match(/\\(chapter|section)\s*\{(.+)\}/); | |
| if (r != null){ | |
| const name = r[2]; | |
| if (r[1] === 'chapter'){ | |
| names.push(name); | |
| cur = []; | |
| sects = []; | |
| result[name] = { | |
| appendix, | |
| lines: cur, | |
| sections: sects, | |
| }; | |
| }else if(r[1] === 'section'){ | |
| cur = []; | |
| if (sects != null){ | |
| const section = { | |
| name, | |
| lines: cur, | |
| }; | |
| sects.push(section); | |
| } | |
| } | |
| continue; | |
| } | |
| if (/\\bibliographystyle/.test(l)){ | |
| cur = null; | |
| continue; | |
| } | |
| if (/\\appendix/.test(l)){ | |
| appendix = true; | |
| continue; | |
| } | |
| if (cur != null){ | |
| cur.push(l); | |
| } | |
| } | |
| return [result, names]; | |
| } | |
| function countWords(lines, countMath){ | |
| return new Promise((resolve, reject)=>{ | |
| let input; | |
| const countProcess = child_process.spawn('perl', ['latexcount.pl'], { | |
| stdio: ['pipe', 'pipe', process.stderr], | |
| }); | |
| if (countMath){ | |
| input = countProcess.stdin; | |
| }else{ | |
| const detex = child_process.spawn('detex', ['-'], { | |
| stdio: ['pipe', 'pipe', process.stderr], | |
| }); | |
| detex.stdout.pipe(countProcess.stdin); | |
| input = detex.stdin; | |
| } | |
| input.end(lines.join('\n'), 'utf8'); | |
| // 出力受ける | |
| const output = countProcess.stdout; | |
| let buf = ''; | |
| output.setEncoding('utf8'); | |
| output.on('data', chunk=>{ | |
| buf += chunk; | |
| }); | |
| output.on('end', ()=>{ | |
| // 結果を解析 | |
| const r = buf.match(/^(\d+) total/m); | |
| if (r != null){ | |
| resolve(parseInt(r[1], 10)); | |
| }else{ | |
| console.error(buf); | |
| reject(new Error('Invalid result')); | |
| } | |
| }); | |
| output.on('error', reject); | |
| }); | |
| } | |
| function padStart(str, len){ | |
| return ' '.repeat(len - String(str).length) + str; | |
| } | |
| function padEnd(str, len){ | |
| return str + ' '.repeat(len - String(str).length); | |
| } | |
| // template tag | |
| function gray(string){ | |
| return noColor ? string : `\x1B[90m${string}\x1B[39m`; | |
| } | |
| function green(string){ | |
| return noColor ? string : `\x1B[92m${string}\x1B[39m`; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment