Created
May 12, 2021 11:30
-
-
Save wintercn/e6ffaea241056a45db6a70f96925ad19 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<body> | |
<style> | |
.number { | |
color:purple; | |
} | |
.keyword { | |
color:blue; | |
} | |
.string { | |
color:red; | |
} | |
#code { | |
width:100%; | |
height:500px; | |
} | |
</style> | |
<textarea id="code"> | |
- 章节一 | |
- 标题一 | |
- 标题二 | |
- 子标题三 | |
- 章节二 | |
- 标题一 | |
- 标题二 | |
</textarea> | |
<script> | |
var XRegExp =function(xregexps,rootname,flag){ | |
var expnames = [rootname]; | |
function buildRegExp(source) { | |
var regexp = new RegExp; | |
regexp.compile(source.replace(/<([^>]+)>/g,function(all,expname) { | |
if(!xregexps[expname])return ""; | |
expnames.push(expname); | |
if(xregexps[expname] instanceof RegExp) return "(" + xregexps[expname].source +")"; | |
return "(" + buildRegExp(xregexps[expname]).source +")"; | |
}),flag) | |
return regexp; | |
} | |
var regexp = buildRegExp(xregexps[rootname]); | |
this.exec = function(string) { | |
var matches = regexp.exec(string); | |
if(matches==null) return null; | |
var result = new String(matches[0]); | |
for(var i = 0; i < expnames.length; i++) | |
if(matches[i]) | |
result[expnames[i]] = matches[i]; | |
return result; | |
} | |
this.lastIndex = function (lastIndex) { | |
if(arguments.length == 1) | |
regexp.lastIndex = lastIndex; | |
return regexp.lastIndex; | |
} | |
} | |
function Token(name,value) | |
{ | |
this.name = name; | |
this.value = value; | |
} | |
function Lexer(source) { | |
var tabstop = 2; | |
var lex = { | |
inputElement:"<whitespace>|<name>|<newline>", | |
name:/\- [^\n \t]+/, | |
newline:/\n/, | |
whitespace:/[ \t]+/, | |
comments:/#[^\n]*/ | |
}; | |
var input = new XRegExp(lex,"inputElement","g"); | |
var isLineStart = true; | |
var isBlankLine = true; | |
var leadingSpace = 0; | |
var oldLeadingSpace = 0; | |
var dentstack = []; | |
this.getToken = function (){ | |
if(dentstack.length) return dentstack.pop(); | |
if(input.lastIndex()==source.length) return new Token("ENDMARKER"); | |
var ele = input.exec(source); | |
//console.log(ele); | |
if(ele.comments) { | |
return this.getToken(); | |
} | |
if(ele.newline) { | |
isLineStart = true; | |
leadingSpace = 0; | |
if(isLineStart) { | |
isBlankLine = true; | |
return this.getToken(); | |
} | |
return new Token("NEWLINE"); | |
} | |
if(isLineStart) { | |
leadingSpace = 0; | |
if(ele.whitespace){ | |
for(var i = 0; i < ele.whitespace.length; i++) { | |
if(ele.whitespace.charAt(i) == " ") | |
leadingSpace ++; | |
if(ele.whitespace.charAt(i) == "\t") | |
leadingSpace = Math.floor(leadingSpace/tabstop+1)*tabstop; | |
} | |
} else { | |
dentstack.push(new Token("NAME", ele.name)); | |
} | |
var dc = (leadingSpace-oldLeadingSpace)/tabstop; | |
if(dc<0) | |
while(dc++<0)dentstack.push(new Token("DEDENT")); | |
else if(dc>0) | |
while(dc-->0)dentstack.push(new Token("INDENT")); | |
oldLeadingSpace = leadingSpace; | |
isLineStart = false; | |
isBlankLine = false; | |
return this.getToken(); | |
} | |
isLineStart = false; | |
isBlankLine = false; | |
if(ele.name) return new Token("NAME",ele.name); | |
if(ele.punctuator) return new Token("PUNCTUATOR",ele.punctuator); | |
} | |
/* | |
NUMBER | |
STRING | |
NAME | |
NEWLINE | |
ENDMARKER | |
INDENT | |
DEDENT | |
*/ | |
} | |
var lexer = new Lexer(code.value); | |
var token; | |
</script> | |
<pre> | |
<script> | |
var leadingspace = 0; | |
var isLineStart = true; | |
var current = {name:"root", children:[]}; | |
var stack = [current]; | |
while((token=lexer.getToken()).name!="ENDMARKER") | |
{ | |
console.log(token) | |
if(token.name == "NEWLINE") { | |
//document.write("<span class='newline'>"+"\n"+"</span>"); | |
isLineStart = true; | |
continue; | |
} | |
if(token.name == "NAME") { | |
//leadingspace+=4; | |
//document.writeln("<span class='indent'>"+"indent"+"</span>"); | |
console.log(stack) | |
current = { | |
name: token.value, | |
children: [] | |
}; | |
stack[stack.length - 1].children.push(current); | |
continue; | |
} | |
if(token.name == "INDENT") { | |
//leadingspace+=4; | |
//document.writeln("<span class='indent'>"+"indent"+"</span>"); | |
//if(stack[stack.length - 1] !== current) | |
stack.push(current); | |
//current.children | |
continue; | |
} | |
if(token.name == "DEDENT") { | |
stack.pop(); | |
//leadingspace-=4; | |
//document.writeln("<span class='dedent'>"+"dedent"+"</span>"); | |
continue; | |
} | |
if(isLineStart) { | |
isLineStart = false; | |
//document.write("<span class='name'>"+new Array(leadingspace+1).join(" ")+"</span>"); | |
} | |
} | |
document.write(JSON.stringify(stack[0].children, null, " ")) | |
</script> | |
</pre> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment