Skip to content

Instantly share code, notes, and snippets.

@wintercn
Created September 24, 2013 15:02
Show Gist options
  • Save wintercn/6686095 to your computer and use it in GitHub Desktop.
Save wintercn/6686095 to your computer and use it in GitHub Desktop.
python词法分析
<body>
<style>
.number {
color:purple;
}
.keyword {
color:blue;
}
.string {
color:red;
}
</style>
<textarea id="code">
def is_spell(sentence):
n = len(sentence)
flags=[False for i in range(n+1)]
# 从后往前判断
flags[0]=True
for i in range(1, n+1, 1):
for word in magic_words:
if i>=len(word) and flags[i-len(word)]==True and sentence[i-len(word):i]==word :
flags[i]=True
break
return flags[n]
</textarea>
<textarea id="code2">
#coding: utf-8
#2008-8-21, Shaofei Cheng
# 判断一个句子是否为咒语(由魔法词组成)
# 解法:动态规划法
# 魔法词典
magic_words = ['m', 'foo', 'bar', 'boo', 'barr', 'jac', 'j' 'c', 'to', 'too', 'object']
# 是否为魔力单词
def is_magic_word(word):
return word in magic_words
# 是否为咒语
def is_spell(sentence):
n = len(sentence)
flags=[False for i in range(n+1)]
# 从后往前判断
flags[0]=True
for i in range(1, n+1, 1):
for word in magic_words:
if i>=len(word) and flags[i-len(word)]==True and sentence[i-len(word):i]==word :
flags[i]=True
break
return flags[n]
#================ 测试代码 ======================
if __name__ == '__main__':
print is_magic_word('foo') # True
print is_magic_word('fo') # False
print is_spell('morning') # False
print is_spell('hello') # False
print is_spell('foo') # True
print is_spell('bar') # True
print is_spell('barfoombarmjac') # True
print is_spell('tobject') # False
print is_spell('toobject') # True
print is_spell('tooobject') # True
</textarea>
<script>
var XRegExp =function(xregexps,rootname,flag){
var expnames = [rootname];
function buildRegExp(source) {
var regexp = new RegExp;
regexp.compile(source.replace(/<([^>]+)>/g,function(all,expname) {
if(!xregexps[expname])return "";
expnames.push(expname);
if(xregexps[expname] instanceof RegExp) return "(" + xregexps[expname].source +")";
return "(" + buildRegExp(xregexps[expname]).source +")";
}),flag)
return regexp;
}
var regexp = buildRegExp(xregexps[rootname]);
this.exec = function(string) {
var matches = regexp.exec(string);
if(matches==null) return null;
var result = new String(matches[0]);
for(var i = 0; i < expnames.length; i++)
if(matches[i])
result[expnames[i]] = matches[i];
return result;
}
this.lastIndex = function (lastIndex) {
if(arguments.length == 1)
regexp.lastIndex = lastIndex;
return regexp.lastIndex;
}
}
function Token(name,value)
{
this.name = name;
this.value = value;
}
function Lexer(source) {
var tabstop = 4;
var lex = {
inputElement:"<keyword>|<name>|<string>|<number>|<newline>|<whitespace>|<punctuator>|<comments>",
name:/[a-zA-Z_][a-zA-Z_0-9]*/,
number:"<imagnumber>|<floatnumber>|<longinteger>|<integer>",
string:"[uU]?[rR]?(?:<shortstring>|<longstring>)",
shortstring:/\'(?:[^\\\n\"\']|\\[\s\S])*\'/,
longstring:/\'\'\'(?:[^\\]|\\[\s\S])*\'\'\'/,
integer:"<octinteger>|<hexinteger>|<decimalinteger>",
decimalinteger:/[1-9][0-9]*|0/,
octinteger:/0[0-7]/,
hexinteger:/0[xX][0-9a-fA-F]+/,
longinteger:"<integer>[lL]",
floatnumber:"<pointfloat>|<exponentfloat>",
pointfloat:/[0-9]*\.[0-9]+|[0-9]+\./,
exponentfloat:/(?:[0-9]*\.[0-9]+|[0-9]+\.?)[eE][+-]?[0-9]+/,
imagnumber:/(?:[0-9]*\.[0-9]+|[0-9]+\.?)[jJ]/,
punctuator:/\.\.\.|\*\*\=|\>\>\=|\<\<\=|\+\=|\>\>|\=\=|\>\=|\<\=|\<\>|\!\=|\|\=|\-\=|\*\=|\/\=|\%\=|\*\*|\/\/|\<\<|\&\=|\^\=|\>|\}|\:|\`|\(|\*|\[|\=|\~|\]|\/|\%|\,|\{|\||\<|\)|\;|\&|\^|\@|\-|\+/,
keyword:/(?:for|in|if|yield|is|not|else|or|and|lambda|assert|pass|del|print|return|raise|break|continue|import|as|from|global|exec|elif|while|try|except|finally|with|def|class)(?![_$a-zA-Z0-9])/,
newline:/\n/,
whitespace:/[ \t]+/,
comments:/#[^\n]*/
};
var input = new XRegExp(lex,"inputElement","g");
var isLineStart = true;
var isBlankLine = true;
var leadingSpace = 0;
var oldLeadingSpace = 0;
var dentstack = [];
this.getToken = function (){
if(dentstack.length) return dentstack.pop();
if(input.lastIndex()==source.length) return new Token("ENDMARKER");
var ele = input.exec(source);
if(ele.comments) {
return this.getToken();
}
if(ele.newline) {
isLineStart = true;
leadingSpace = 0;
if(isBlankLine) {
isBlankLine = true;
return this.getToken();
}
isBlankLine = true;
return new Token("NEWLINE");
}
if(ele.whitespace) {
if(!isLineStart) return this.getToken();
for(var i = 0; i < ele.whitespace.length; i++) {
if(ele.whitespace.charAt(i) == " ")
leadingSpace++;
if(ele.whitespace.charAt(i) == "\t")
leadingSpace = Math.floor(leadingSpace/tabstop+1)*tabstop;
}
var dc = (leadingSpace-oldLeadingSpace)/tabstop;
if(dc<0)
while(dc++)dentstack.push(new Token("DEDENT"));
else if(dc>0)
while(dc--)dentstack.push(new Token("INDENT"));
oldLeadingSpace = leadingSpace;
return this.getToken();
}
isLineStart = false;
isBlankLine = false;
if(ele.name) return new Token("NAME",ele.name);
if(ele.number) return new Token("NUMBER",ele.number);
if(ele.string) return new Token("STRING",ele.string);
if(ele.punctuator) return new Token("PUNCTUATOR",ele.punctuator);
if(ele.keyword) return new Token("KEYWORD",ele.keyword);
}
/*
NUMBER
STRING
NAME
NEWLINE
ENDMARKER
INDENT
DEDENT
*/
}
var lexer = new Lexer(code.value);
var token;
</script>
<pre>
<script>
var leadingspace = 0;
var isLineStart = true;
while((token=lexer.getToken()).name!="ENDMARKER")
{
if(token.name == "NEWLINE") {
document.write("<span class='newline'>"+"\n"+"</span>");
isLineStart = true;
continue;
}
if(token.name == "INDENT") {
leadingspace+=4;
//document.writeln("<span class='indent'>"+"indent"+"</span>");
continue;
}
if(token.name == "DEDENT") {
leadingspace-=4;
//document.writeln("<span class='dedent'>"+"dedent"+"</span>");
continue;
}
if(isLineStart) {
isLineStart = false;
document.write("<span class='name'>"+new Array(leadingspace+1).join(" ")+"</span>");
}
if(token.name == "NUMBER") document.write("<span class='number'>"+token.value+" </span>");
if(token.name == "STRING") document.write("<span class='string'>"+token.value+" </span>");
if(token.name == "NAME") document.write("<span class='name'>"+token.value+" </span>");
if(token.name == "KEYWORD") document.write("<span class='keyword'>"+token.value+" </span>");
if(token.name == "PUNCTUATOR") document.write("<span class='punctuator'>"+token.value+" </span>");
}
</script>
</pre>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment