Created
December 15, 2008 11:04
-
-
Save gyuque/35927 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module MicroWikiParse | |
require 'cgi' | |
RX_BR_OR_URL = %r<(\<[Bb][Rr] */?\>)|(((http[s]{0,1}|ftp)://[\(\)%#!/0-9a-zA-Z_$@.&+-,'"*=;?:~-]+)|([0-9a-zA-Z_.-]+@[\(\)%!0-9a-zA-Z_$.&+-,'"*-]+\.[\(\)%!0-9a-zA-Z_$.&+-,'"*-]+))> | |
HEADING_PREFIX = /^\*\s*(.*)/ | |
DL_PREFIX = /^:([^:]+):(.*)/ | |
# base classes ====================== | |
class WikiElement | |
def render_html | |
end | |
end | |
class Block < WikiElement | |
def initialize(doc) | |
@document = doc | |
@children = [] | |
end | |
def consume(ln) | |
next_block = @document.match_block(ln) | |
if next_block | |
@children << next_block | |
return next_block | |
end | |
# other... standard paragraph | |
@children << StdParagraph.new(@document, ln) | |
return self # leave the paragraph immediately | |
end | |
def render_html | |
ret = [] | |
@children.each {|c| | |
ret << c.render_html | |
} | |
ret.join("\n") | |
end | |
end | |
# =================================== | |
class Document < Block | |
def initialize(src) | |
super(self) | |
@block_parsers = [Heading, DefList] | |
@context = [self] | |
lines = RawReader.new(src) | |
lines.each{|ln| | |
next_ctx = @context.last.consume(ln) | |
if next_ctx | |
if @context.last != next_ctx | |
@context << next_ctx | |
next RawReader.unget | |
end | |
else | |
@context.pop | |
next RawReader.unget | |
end | |
} | |
end | |
def match_block(ln) | |
@block_parsers.each{|ps| | |
b = ps.match_notation(self, ln) | |
return b if b | |
} | |
nil | |
end | |
end | |
class Heading | |
def self.match_notation(doc, ln) | |
if ln =~ HEADING_PREFIX | |
ch = Heading.new(doc, $1) | |
return ch | |
end | |
end | |
def initialize(doc, raw) | |
@document = doc | |
@textnode = TextNode.new(doc, raw) | |
@consumed = false | |
end | |
def consume(ln) | |
return nil if @consumed | |
@consumed = true | |
return self | |
end | |
def render_html | |
"<h3>#{@textnode.render_html}</h3>" | |
end | |
end | |
class DefList < Block | |
def self.match_notation(doc, ln) | |
if ln =~ DL_PREFIX | |
ch = DefList.new(doc, $1) | |
return ch | |
end | |
end | |
def initialize(doc, raw) | |
super(doc) | |
end | |
def consume(ln) | |
return nil if !(ln =~ DL_PREFIX) | |
item = DefListItem.new(@document, $1, $2) | |
@children << item | |
return self | |
end | |
def render_html | |
"<dl>\n#{super}\n</dl>" | |
end | |
end | |
class DefListItem | |
def initialize(doc, dt, dd) | |
@document = doc | |
@t_text = TextNode.new(doc, dt) | |
@d_text = TextNode.new(doc, dd) | |
end | |
def render_html | |
" <dt>#{@t_text.render_html}</dt>\n <dd>#{@d_text.render_html}</dd>\n" | |
end | |
end | |
class StdParagraph < Block | |
def initialize(doc, raw) | |
super(doc) | |
@textnode = TextNode.new(doc, raw) | |
end | |
def render_html | |
"<p>#{@textnode.render_html}</p>" | |
end | |
end | |
class TextNode | |
def initialize(d, r) | |
@document = d | |
@raw = r | |
parse | |
end | |
def parse | |
@children = [] | |
pos = 0 | |
len = @raw.length | |
while true do | |
advance = 0 | |
embed_img = false | |
rr = @raw.index(RX_BR_OR_URL, pos) # find direct link or <br/> | |
break if rr == nil | |
advance = $&.length | |
extracted_url = $1 ? nil : $& | |
@children << CGI.escapeHTML(@raw[pos, rr-pos]) | |
pos = rr + advance | |
lknode = extracted_url ? LinkNode.new(extracted_url, embed_img) : BreakNode | |
@children << lknode | |
end | |
@children << CGI.escapeHTML(@raw[pos, len]) if pos < len | |
end | |
def render_html | |
@children.map{|n| | |
String === n ? n : n.render_html | |
}.join("") | |
end | |
end | |
class BreakNode | |
def self.render_html | |
"<br />\n" | |
end | |
end | |
class LinkNode | |
def initialize(u, ei) | |
@embed_img = ei | |
@url = LinkNode.escape(u) | |
end | |
def LinkNode.escape(u) | |
u.gsub('&', '&').gsub('&', '&') | |
end | |
def render_html | |
# if @embed_img | |
# embed_src = @url | |
# link_url = @url | |
# "<img alt=\"\" src=\"#{embed_src}\" />" | |
# else | |
"<a href=\"#{@url}\">#{@url}</a>" | |
# end | |
end | |
end | |
# utils ============================= | |
class RawReader | |
def RawReader.unget | |
:unget | |
end | |
def initialize(raw) | |
@lines = raw.gsub(/\n\n+/, "\n").split(/[\r\n]+/) | |
end | |
def each | |
len = @lines.length | |
return if len<1 | |
i = 0 | |
while true do | |
i += 1 if yield(@lines[i]) != RawReader.unget | |
break if i >= len | |
end | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment