Skip to content

Instantly share code, notes, and snippets.

@uhyo
Last active January 10, 2019 04:53
Show Gist options
  • Save uhyo/6ba05bbc594f601286e2443ea254d054 to your computer and use it in GitHub Desktop.
Save uhyo/6ba05bbc594f601286e2443ea254d054 to your computer and use it in GitHub Desktop.
Sotsuron counter (for node.js)
#! /usr/bin/env node
'use strict';
const fs = require('fs');
const path = require('path');
const crypto = require('crypto');
const child_process = require('child_process');
// parse args
const args = process.argv.slice(2);
const {
thesis,
chapter,
countMath,
help,
nameMaxlen,
showSections,
noColor,
} = parseArgs(args);
if (help){
console.log(`
count.js [OPTIONS] FILENAME
Counts words in tex file for each chapter.
Options:
-h, --help : Help message.
-c CHAPTER, --chapter CHAPTER: Count only one chapter.
-m : Do not remove math commands with 'detex' command.
-l LENGTH, --length LENGTH : Limit character numbers shown.
-s, --section : Wordcount each section.
-S, --section-number : Do not show section titles.
--no-color : Do not color outputs.
`);
process.exit();
}
// check latexcount.pl
ensureLatexcount();
// load thesis
const data = loadTeXFile(thesis);
const lines = data.split(/\r\n|\r|\n/);
const [chapters, names] = splitToChapters(data, chapter);
const pendings = [];
for (const chap of names){
if (chapter != null && chap !== chapter){
continue;
}
const {
appendix,
lines,
sections,
} = chapters[chap];
// section属さないカウント
pendings.push(countWords(lines, countMath).then(words=>({
type: 'chapter',
chapter: chap,
words,
})));
// 各sectionのカウント
let i = 0;
for (const {lines, name} of sections){
const index = i++;
pendings.push(countWords(lines, countMath).then(words=>({
type: 'section',
chapter: chap,
section: name,
index,
words,
})));
}
}
Promise.all(pendings).then(results=>{
// 結果を収集
const table = {};
for (const chap of names){
table[chap] = {
appendix: chapters[chap].appendix,
words: 0,
sections: [],
};
}
for (const {type, words, chapter, section, index} of results){
if (type === 'chapter'){
table[chapter].words += words;
}else{
const o = table[chapter];
o.sections[index] = {
name: section,
words,
};
o.words += words;
}
}
// 整形の前処理
let leftMax = 0;
{
const numbering = {
chapter: 1,
appendix: 0,
};
for (const chap of names){
const {
appendix,
words,
sections,
} = table[chap];
// chapter小計の表示
const chapnum = appendix ?
String.fromCharCode(0x41 + numbering.appendix) :
String(numbering.chapter);
const chapnumpad = padStart(chapnum, 2);
const n = `${chapnumpad}. `;
const newName = `${n}${chap}`.slice(0, nameMaxlen);
leftMax = Math.max(leftMax, newName.length);
const newSections = showSections ?
sections.map(({name, words}, i)=>{
const sectnum = `${chapnumpad}-${i+1}. `;
const n = showSections === 'number' ?
`${sectnum}` :
`${sectnum}${name}`;
const newName = n.slice(0, nameMaxlen);
leftMax = Math.max(leftMax, newName.length);
return {
name: newName,
words,
};
}) :
[];
table[chap] = {
name: newName,
appendix,
words,
sections: newSections,
};
if (appendix){
numbering.appendix++;
}else{
numbering.chapter++;
}
}
}
// count each chapter
const count = {
main: 0,
appendix: 0,
};
// 結果を表示
leftMax = Math.max('APPENDIX'.length, leftMax);
for (const chap of names){
if (chapter != null && chapter !== chap){
continue;
}
const {
name,
appendix,
words,
sections,
} = table[chap];
// chapter小計の表示
const leftPad = padEnd(name, leftMax);
const wpad = padStart(words, 5);
console.log(`${leftPad}:${wpad}`);
// section
let sectnum = 1;
if (showSections){
for (const {name, words} of sections){
const leftPad = padEnd(name, leftMax);
const wpad = padStart(words, 5);
console.log(gray(`${leftPad}:${wpad}`));
sectnum++;
}
}
if (appendix){
count.appendix += words;
}else{
count.main += words;
}
}
if (chapter == null){
console.log('-'.repeat(leftMax+6));
console.log(`${padEnd('MAIN', leftMax)}:${padStart(count.main, 5)}`);
console.log(`${padEnd('APPENDIX', leftMax)}:${padStart(count.appendix, 5)}`);
console.log(`${padEnd('TOTAL', leftMax)}:${padStart(count.main+count.appendix, 5)}`);
}
})
.catch(e=>{
console.error(e);
});
function parseArgs(args){
let thesis = './thesis.tex';
let countMath = false;
let help = false;
let chapter = null;
let nameMaxlen = 25;
let showSections = false;
let noColor = false;
let state = '';
for (const a of args){
if (state === '-c'){
chapter = a;
state = '';
continue;
}
if (state === '-l'){
nameMaxlen = parseInt(a, 10) || 25;
state = '';
continue;
}
const opts = /^-\w+$/.test(a) ?
a.slice(1).split('') :
[a];
for(const o of opts){
switch (o){
case 'c': case '--chapter': {
state = '-c';
break;
}
case 'm': {
countMath = true;
break;
}
case 'h': case '--help': {
help = true;
break;
}
case 'l': case '--length': {
state = '-l';
break;
}
case 's': case '--section': {
showSections = true;
break;
}
case 'S': case '--section-number': {
showSections = 'number';
break;
}
case '--no-color': {
noColor = true;
break;
}
default: {
if (/^-/.test(a)){
throw new Error(`Unrecognized option: ${a}`);
}
thesis = a;
}
}
}
}
return {
thesis,
chapter,
countMath,
nameMaxlen,
showSections,
noColor,
help,
};
}
function ensureLatexcount(){
// load latexcount.pl
let lcd;
try {
lcd = fs.readFileSync('./latexcount.pl', 'utf8');
} catch(e){
if (e.code === 'ENOENT'){
console.error(green(`Downloading latexcount.pl to ${path.resolve('./latexcount.pl')}`));
child_process.execSync('curl -o ./latexcount.pl http://ftp.jaist.ac.jp/pub/CTAN/support/latexcount/latexcount.pl 2> /dev/null', {
encoding: 'utf8',
});
lcd = fs.readFileSync('./latexcount.pl', 'utf8');
}else{
throw e;
}
}
const md5 = md5sum(lcd);
if (md5 !== 'e7a62d514ef12a326109c685cad975a0'){
console.log(green('Applying patch to latexcount.pl'));
const p = String.raw `--- latexcount.pl 2016-12-29 20:45:35.381827297 +0900
+++ latexcount_fix.pl 2016-12-31 19:46:23.000000000 +0900
@@ -57,8 +57,8 @@
$line =~ s/(?<!\\)%.*?\n//g;
# Count curly braces
- while($line =~ /\{/g){$depth++}
- while($line =~ /\}/g){$depth--}
+ while($line =~ /(?<!\\)\{/g){$depth++}
+ while($line =~ /(?<!\\)\}/g){$depth--}
# Concatenate the new hunk of input to any
# left over from previous cycles.
`;
try {
child_process.execSync('patch -u', {
input: p,
});
} catch(e){
console.log('Applying patch failed.');
}
}
}
function md5sum(str){
const hash = crypto.createHash('md5');
hash.update(str);
return hash.digest('hex');
}
function loadTeXFile(file) {
const data = fs.readFileSync(file, 'utf8');
// process `input command of limited form.
return data.replace(/^\\include\{(.+)\}$/gm, (_, f) => loadTeXFile(`./${f}.tex`));
}
function splitToChapters(data, chapter){
const lines = data.split(/\r\n|\r|\n/);
let appendix = false;
const result = {};
const names = [];
let sects = [];
let cur = null;
// split lines into each chapter
for (const l of lines){
const r = l.match(/\\(chapter|section)\s*\{(.+)\}/);
if (r != null){
const name = r[2];
if (r[1] === 'chapter'){
names.push(name);
cur = [];
sects = [];
result[name] = {
appendix,
lines: cur,
sections: sects,
};
}else if(r[1] === 'section'){
cur = [];
if (sects != null){
const section = {
name,
lines: cur,
};
sects.push(section);
}
}
continue;
}
if (/\\bibliographystyle/.test(l)){
cur = null;
continue;
}
if (/\\appendix/.test(l)){
appendix = true;
continue;
}
if (cur != null){
cur.push(l);
}
}
return [result, names];
}
function countWords(lines, countMath){
return new Promise((resolve, reject)=>{
let input;
const countProcess = child_process.spawn('perl', ['latexcount.pl'], {
stdio: ['pipe', 'pipe', process.stderr],
});
if (countMath){
input = countProcess.stdin;
}else{
const detex = child_process.spawn('detex', ['-'], {
stdio: ['pipe', 'pipe', process.stderr],
});
detex.stdout.pipe(countProcess.stdin);
input = detex.stdin;
}
input.end(lines.join('\n'), 'utf8');
// 出力受ける
const output = countProcess.stdout;
let buf = '';
output.setEncoding('utf8');
output.on('data', chunk=>{
buf += chunk;
});
output.on('end', ()=>{
// 結果を解析
const r = buf.match(/^(\d+) total/m);
if (r != null){
resolve(parseInt(r[1], 10));
}else{
console.error(buf);
reject(new Error('Invalid result'));
}
});
output.on('error', reject);
});
}
function padStart(str, len){
return ' '.repeat(len - String(str).length) + str;
}
function padEnd(str, len){
return str + ' '.repeat(len - String(str).length);
}
// template tag
function gray(string){
return noColor ? string : `\x1B[90m${string}\x1B[39m`;
}
function green(string){
return noColor ? string : `\x1B[92m${string}\x1B[39m`;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment