Created
September 24, 2013 15:02
-
-
Save wintercn/6686095 to your computer and use it in GitHub Desktop.
python词法分析
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<body> | |
<style> | |
.number { | |
color:purple; | |
} | |
.keyword { | |
color:blue; | |
} | |
.string { | |
color:red; | |
} | |
</style> | |
<textarea id="code"> | |
def is_spell(sentence): | |
n = len(sentence) | |
flags=[False for i in range(n+1)] | |
# 从后往前判断 | |
flags[0]=True | |
for i in range(1, n+1, 1): | |
for word in magic_words: | |
if i>=len(word) and flags[i-len(word)]==True and sentence[i-len(word):i]==word : | |
flags[i]=True | |
break | |
return flags[n] | |
</textarea> | |
<textarea id="code2"> | |
#coding: utf-8 | |
#2008-8-21, Shaofei Cheng | |
# 判断一个句子是否为咒语(由魔法词组成) | |
# 解法:动态规划法 | |
# 魔法词典 | |
magic_words = ['m', 'foo', 'bar', 'boo', 'barr', 'jac', 'j' 'c', 'to', 'too', 'object'] | |
# 是否为魔力单词 | |
def is_magic_word(word): | |
return word in magic_words | |
# 是否为咒语 | |
def is_spell(sentence): | |
n = len(sentence) | |
flags=[False for i in range(n+1)] | |
# 从后往前判断 | |
flags[0]=True | |
for i in range(1, n+1, 1): | |
for word in magic_words: | |
if i>=len(word) and flags[i-len(word)]==True and sentence[i-len(word):i]==word : | |
flags[i]=True | |
break | |
return flags[n] | |
#================ 测试代码 ====================== | |
if __name__ == '__main__': | |
print is_magic_word('foo') # True | |
print is_magic_word('fo') # False | |
print is_spell('morning') # False | |
print is_spell('hello') # False | |
print is_spell('foo') # True | |
print is_spell('bar') # True | |
print is_spell('barfoombarmjac') # True | |
print is_spell('tobject') # False | |
print is_spell('toobject') # True | |
print is_spell('tooobject') # True | |
</textarea> | |
<script> | |
var XRegExp =function(xregexps,rootname,flag){ | |
var expnames = [rootname]; | |
function buildRegExp(source) { | |
var regexp = new RegExp; | |
regexp.compile(source.replace(/<([^>]+)>/g,function(all,expname) { | |
if(!xregexps[expname])return ""; | |
expnames.push(expname); | |
if(xregexps[expname] instanceof RegExp) return "(" + xregexps[expname].source +")"; | |
return "(" + buildRegExp(xregexps[expname]).source +")"; | |
}),flag) | |
return regexp; | |
} | |
var regexp = buildRegExp(xregexps[rootname]); | |
this.exec = function(string) { | |
var matches = regexp.exec(string); | |
if(matches==null) return null; | |
var result = new String(matches[0]); | |
for(var i = 0; i < expnames.length; i++) | |
if(matches[i]) | |
result[expnames[i]] = matches[i]; | |
return result; | |
} | |
this.lastIndex = function (lastIndex) { | |
if(arguments.length == 1) | |
regexp.lastIndex = lastIndex; | |
return regexp.lastIndex; | |
} | |
} | |
function Token(name,value) | |
{ | |
this.name = name; | |
this.value = value; | |
} | |
function Lexer(source) { | |
var tabstop = 4; | |
var lex = { | |
inputElement:"<keyword>|<name>|<string>|<number>|<newline>|<whitespace>|<punctuator>|<comments>", | |
name:/[a-zA-Z_][a-zA-Z_0-9]*/, | |
number:"<imagnumber>|<floatnumber>|<longinteger>|<integer>", | |
string:"[uU]?[rR]?(?:<shortstring>|<longstring>)", | |
shortstring:/\'(?:[^\\\n\"\']|\\[\s\S])*\'/, | |
longstring:/\'\'\'(?:[^\\]|\\[\s\S])*\'\'\'/, | |
integer:"<octinteger>|<hexinteger>|<decimalinteger>", | |
decimalinteger:/[1-9][0-9]*|0/, | |
octinteger:/0[0-7]/, | |
hexinteger:/0[xX][0-9a-fA-F]+/, | |
longinteger:"<integer>[lL]", | |
floatnumber:"<pointfloat>|<exponentfloat>", | |
pointfloat:/[0-9]*\.[0-9]+|[0-9]+\./, | |
exponentfloat:/(?:[0-9]*\.[0-9]+|[0-9]+\.?)[eE][+-]?[0-9]+/, | |
imagnumber:/(?:[0-9]*\.[0-9]+|[0-9]+\.?)[jJ]/, | |
punctuator:/\.\.\.|\*\*\=|\>\>\=|\<\<\=|\+\=|\>\>|\=\=|\>\=|\<\=|\<\>|\!\=|\|\=|\-\=|\*\=|\/\=|\%\=|\*\*|\/\/|\<\<|\&\=|\^\=|\>|\}|\:|\`|\(|\*|\[|\=|\~|\]|\/|\%|\,|\{|\||\<|\)|\;|\&|\^|\@|\-|\+/, | |
keyword:/(?:for|in|if|yield|is|not|else|or|and|lambda|assert|pass|del|print|return|raise|break|continue|import|as|from|global|exec|elif|while|try|except|finally|with|def|class)(?![_$a-zA-Z0-9])/, | |
newline:/\n/, | |
whitespace:/[ \t]+/, | |
comments:/#[^\n]*/ | |
}; | |
var input = new XRegExp(lex,"inputElement","g"); | |
var isLineStart = true; | |
var isBlankLine = true; | |
var leadingSpace = 0; | |
var oldLeadingSpace = 0; | |
var dentstack = []; | |
this.getToken = function (){ | |
if(dentstack.length) return dentstack.pop(); | |
if(input.lastIndex()==source.length) return new Token("ENDMARKER"); | |
var ele = input.exec(source); | |
if(ele.comments) { | |
return this.getToken(); | |
} | |
if(ele.newline) { | |
isLineStart = true; | |
leadingSpace = 0; | |
if(isBlankLine) { | |
isBlankLine = true; | |
return this.getToken(); | |
} | |
isBlankLine = true; | |
return new Token("NEWLINE"); | |
} | |
if(ele.whitespace) { | |
if(!isLineStart) return this.getToken(); | |
for(var i = 0; i < ele.whitespace.length; i++) { | |
if(ele.whitespace.charAt(i) == " ") | |
leadingSpace++; | |
if(ele.whitespace.charAt(i) == "\t") | |
leadingSpace = Math.floor(leadingSpace/tabstop+1)*tabstop; | |
} | |
var dc = (leadingSpace-oldLeadingSpace)/tabstop; | |
if(dc<0) | |
while(dc++)dentstack.push(new Token("DEDENT")); | |
else if(dc>0) | |
while(dc--)dentstack.push(new Token("INDENT")); | |
oldLeadingSpace = leadingSpace; | |
return this.getToken(); | |
} | |
isLineStart = false; | |
isBlankLine = false; | |
if(ele.name) return new Token("NAME",ele.name); | |
if(ele.number) return new Token("NUMBER",ele.number); | |
if(ele.string) return new Token("STRING",ele.string); | |
if(ele.punctuator) return new Token("PUNCTUATOR",ele.punctuator); | |
if(ele.keyword) return new Token("KEYWORD",ele.keyword); | |
} | |
/* | |
NUMBER | |
STRING | |
NAME | |
NEWLINE | |
ENDMARKER | |
INDENT | |
DEDENT | |
*/ | |
} | |
var lexer = new Lexer(code.value); | |
var token; | |
</script> | |
<pre> | |
<script> | |
var leadingspace = 0; | |
var isLineStart = true; | |
while((token=lexer.getToken()).name!="ENDMARKER") | |
{ | |
if(token.name == "NEWLINE") { | |
document.write("<span class='newline'>"+"\n"+"</span>"); | |
isLineStart = true; | |
continue; | |
} | |
if(token.name == "INDENT") { | |
leadingspace+=4; | |
//document.writeln("<span class='indent'>"+"indent"+"</span>"); | |
continue; | |
} | |
if(token.name == "DEDENT") { | |
leadingspace-=4; | |
//document.writeln("<span class='dedent'>"+"dedent"+"</span>"); | |
continue; | |
} | |
if(isLineStart) { | |
isLineStart = false; | |
document.write("<span class='name'>"+new Array(leadingspace+1).join(" ")+"</span>"); | |
} | |
if(token.name == "NUMBER") document.write("<span class='number'>"+token.value+" </span>"); | |
if(token.name == "STRING") document.write("<span class='string'>"+token.value+" </span>"); | |
if(token.name == "NAME") document.write("<span class='name'>"+token.value+" </span>"); | |
if(token.name == "KEYWORD") document.write("<span class='keyword'>"+token.value+" </span>"); | |
if(token.name == "PUNCTUATOR") document.write("<span class='punctuator'>"+token.value+" </span>"); | |
} | |
</script> | |
</pre> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment