Created
April 30, 2016 20:07
-
-
Save tanraya/c0bbc9a8f183e5a10e7f27232362f1be to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Tagger { | |
constructor(text, markup = []) { | |
this.text = text; | |
this.markup = this.cloneMarkup(markup); | |
} | |
cloneMarkup(markup) { | |
let result = []; | |
markup.forEach((x) => { | |
result.push([ x[0], x[1], x[2] ]) | |
}) | |
return result; | |
} | |
// Рассчитывает отрезки, занимаемые пробельными символами | |
calcWhitespaceSegments() { | |
let result = []; | |
let pos = { start: null, end: null }; | |
for (let i = 0, len = this.text.length; i < len; i++) { | |
let isWhitespace = /\s/.test(this.text[i]); | |
let isNextWhitespace = /\s/.test(this.text[i + 1]); | |
if (isWhitespace && pos.start === null) { | |
pos.start = i; | |
} | |
if (isWhitespace && !isNextWhitespace && pos.start !== null) { | |
pos.end = i + 1; | |
} | |
if (pos.start && pos.end) { | |
// true обозначает что этот отрезок пробельный | |
let a = [' '.repeat(pos.end - pos.start), pos.start, pos.end]; | |
Object.defineProperty(a, 'whitespace', { value: true }); | |
result.push(a); | |
pos = { start: null, end: null }; | |
} | |
} | |
return result; | |
} | |
// 2. Определить вложенность отрезков, записать её. | |
// 3. Построить древовидную структуру на основе п. 2. | |
calculate() { | |
// Рассчитать отрезки, занимаемые пробельными символами. | |
this.markup = this.markup.concat(this.calcWhitespaceSegments()); | |
let result = []; | |
let del = [] | |
this.markup.forEach((a) => { | |
this.markup.forEach((b) => { | |
if (a == b) { return } | |
let exists = result.some((x) => { | |
return x[0] == a[0] && x[1] == a[1] && x[2] == a[2] | |
}); | |
if (b.whitespace === true && b[1] >= a[1] && b[2] <= a[2]) { | |
var t1 = Math.min(a[1], b[1]); | |
var t2 = Math.min(a[2], b[2]); | |
var t3 = Math.max(a[1], b[1]); | |
var t4 = Math.max(a[2], b[2]); | |
if (t1 != t3) { result.push([a[0], t1, t3]) } | |
if (t2 != t4) { result.push([a[0], t2, t4]) } | |
del.push(a); | |
} else { | |
if (exists) { return } | |
result.push(a); | |
} | |
}) | |
}); | |
result = result.filter((a) => { | |
return !del.some((x) => { | |
return x[0] == a[0] && x[1] == a[1] && x[2] == a[2] | |
}); | |
}); | |
result = result.sort(function (a, b) { | |
if (a[1] > b[1] && a[0] > b[0]) return 1; | |
if (a[1] < b[1] && a[0] < b[0]) return -1; | |
return 0; | |
}); | |
return result; | |
} | |
} | |
describe('Tagger dev', () => { | |
let tagger; | |
describe('#calculate', () => { | |
beforeEach(function() { | |
tagger = new Tagger('Hello world', [ | |
['strong', 0, 5], | |
['span', 0, 5], | |
['em', 5, 11], | |
['s', 2, 8] | |
]); | |
}); | |
// <strong><span>He<s>llo</s></span></strong> <em><s>wo</s>rld</em> | |
it('calculates proper data', () => { | |
expect(tagger.calculate()).to.deep.equal([ | |
[ 'strong', 0, 5 ], | |
[ 'span', 0, 5 ], | |
[ 'em', 6, 11 ], | |
[ 's', 2, 5 ], | |
[ ' ', 5, 6 ], | |
[ 's', 6, 8 ] | |
]); | |
}); | |
}); | |
describe('#calcWhitespaceSegments', () => { | |
it('calc right 1', () => { | |
tagger = new Tagger('Hello world') | |
expect(tagger.calcWhitespaceSegments()).to.deep.equal([[' ', 5, 6]]); | |
}); | |
it('calc right 2', () => { | |
tagger = new Tagger('Hello world') | |
expect(tagger.calcWhitespaceSegments()).to.deep.equal([[' ', 5, 8]]); | |
}); | |
it('calc right 3', () => { | |
tagger = new Tagger('Once upon a time in America') | |
expect(tagger.calcWhitespaceSegments()).to.deep.equal( | |
[[' ', 4, 6], [' ', 10, 11], [' ', 12, 13], [' ', 17, 19], [' ', 21, 22]] | |
); | |
}); | |
}) | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment