Last active
May 19, 2019 03:22
-
-
Save c-smile/0a9c4af0d530fadc0a71904ddbe86ff9 to your computer and use it in GitHub Desktop.
Sciter: HTML to Markdown
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
namespace MD { | |
function makeTextStream() { | |
var out = []; // lines | |
var curl = ""; // current line | |
var curo = 0; // current line offset (a.k.a. level) | |
function outPrefix(text, level = 0) { | |
if(curl) out.push(curl); | |
curl = new String(level,' ') + (text || ""); | |
if(text === null) out.push(curl); | |
curo = curl.length; | |
return curo; | |
} | |
function outText(text) { | |
text = text.replace(/[ \n\r]{2,}/g," "); | |
curl += text; | |
return curo; | |
} | |
function getContent() { out.push(curl); curl = ""; return out.join("\n"); } | |
function getPos() { return curl.length; } | |
return { | |
prefix : outPrefix, | |
text : outText, | |
content: getContent, | |
pos : getPos, | |
out : function(txt) { curl += txt; } | |
}; | |
} | |
function gap(stream, el, level, force = false) { | |
if(el.prior || force) | |
stream.prefix(null,level); | |
} | |
function escape(text) | |
{ | |
const escapes = [ | |
[/\\/g, "\\\\"], | |
[/\*/g, "\\*"], | |
[/^-/g, "\\-"], | |
[/^\+ /g, "\\+ "], | |
[/^(=+)/g, "\\$1"], | |
[/^(#{1,6}) /g, "\\$1 "], | |
[/`/g, "\\`"], | |
[/^~~~/g, "\\~~~"], | |
[/\[/g, "\\["], | |
[/\]/g, "\\]"], | |
[/^>/g, "\\>"], | |
[/_/g, "\\_"], | |
[/^(\d+)\. /g, "$1\\. "] | |
]; | |
function reducer(acc, esc) { return acc.replace(esc[0], esc[1]); } | |
return escapes.reduce( reducer , text); | |
} | |
namespace blocks { | |
function li(stream, el, level) { const prefix = el.parent.tag == "ol" ? "1. " : "* "; emitContent(stream,el,stream.prefix(prefix,level)); } | |
function ol(stream, el, level) { gap(stream,el,level); emitContent(stream, el, level); } | |
function ul(stream, el, level) { gap(stream,el,level); emitContent(stream, el, level); } | |
function dl(stream, el, level) { gap(stream,el,level); emitContent(stream, el, level); } | |
function dt(stream, el, level) { emitContent(stream,el,stream.prefix("",level)); } | |
function dd(stream, el, level) { emitContent(stream,el,stream.prefix(": ",level)); } | |
function blockquote(stream, el, level) { gap(stream,el,level); emitContent(stream,el,stream.prefix("> ",level)); } | |
function p(stream, el, level) { gap(stream,el,level,true); emitContent(stream, el, level); } | |
function div(stream, el, level){ gap(stream,el,level); emitContent(stream,el,stream.prefix("",level)); } | |
function h1(stream, el, level) { gap(stream,el,level); emitContent(stream,el,stream.prefix("# ",level)); } | |
function h2(stream, el, level) { gap(stream,el,level); emitContent(stream,el,stream.prefix("## ",level)); } | |
function h3(stream, el, level) { gap(stream,el,level); emitContent(stream,el,stream.prefix("### ",level)); } | |
function h4(stream, el, level) { gap(stream,el,level); emitContent(stream,el,stream.prefix("#### ",level)); } | |
function h5(stream, el, level) { gap(stream,el,level); emitContent(stream,el,stream.prefix("##### ",level)); } | |
function h6(stream, el, level) { gap(stream,el,level); emitContent(stream,el,stream.prefix("###### ",level)); } | |
function pre(stream, el, level) { gap(stream,el,level); stream.prefix("```",level); stream.out("\n"+el.text); stream.prefix("```",level); } | |
function img(stream, el, level) { // | |
stream.out("!["); stream.text(el.attributes["alt"] || ""); stream.out("]"); stream.out("(" + el.attributes["src"] + ")"); | |
} | |
function figure(stream, el, level) { // | |
const caption = el.$(figcaption); | |
const img = el.$(img); | |
if(!img || !caption) return; | |
gap(stream,el,level); | |
stream.out("!["); | |
stream.text(caption.text || ""); //emitContent(stream,caption,level); | |
stream.out("]"); stream.out("(" + img.attributes["src"] + ")"); | |
} | |
function table(stream, el, level) { | |
var thead = el.$(thead); | |
var tbody = el.$(tbody); | |
var tfoot = el.$(tfoot); | |
gap(stream,el,level); | |
if( thead ) { | |
var cellwidths = []; | |
for(var tr in thead) { | |
stream.prefix("",level); | |
for(var td in tr) { | |
var pos = stream.pos(); emitContent(stream,td,level); cellwidths.push(stream.pos() - pos); | |
if( td.next ) stream.out(" | "); | |
} | |
} | |
stream.prefix("",level); | |
for(var (index,w) in cellwidths) { | |
stream.out(new String(w,'-')); | |
if( index != cellwidths.length - 1 ) stream.out(" | "); | |
} | |
} | |
if(tbody) { | |
for(var tr in tbody) { | |
stream.prefix("",level); | |
for(var td in tr) { | |
emitContent(stream,td,level); | |
if( td.next ) stream.out(" | "); | |
} | |
} | |
} | |
} | |
function thunk(stream, el, level) { emitContent(stream, el, level); } | |
} | |
namespace spans { | |
function strong(stream,el) { stream.out("**"); emitContent(stream, el); stream.out("**"); } | |
function em(stream,el) { stream.out("*"); emitContent(stream, el); stream.out("*"); } | |
function code(stream,el) { stream.out("`"); emitContent(stream, el); stream.out("`"); } | |
function a(stream,el) { stream.out("["); // [GitHub](http://github.com) | |
emitContent(stream, el); | |
stream.out("]"); | |
stream.out("(" + el.attributes["href"] + ")");} | |
// aliases | |
const b = strong; | |
const i = em; | |
//const var = code; | |
const kbd = code; | |
// unknown span(?) | |
function thunk(stream,el) { emitContent(stream, el); } | |
} | |
function emitBlock(stream, el, parentLevel = 0) { | |
const emitter = blocks[el.tag] || blocks.thunk; | |
emitter(stream,el,parentLevel); | |
} | |
function emitContent(stream, el, blockContentlevel = 0) { | |
const isBlock = !!el.state.flowType; | |
const nodes = el.nodes(); | |
for(var node in nodes) { | |
if(node.isText) { | |
var text = node.text; | |
if(isBlock) { | |
if(nodes.first === node) | |
text = text.trim(#left); | |
if(nodes.last === node) | |
text = text.trim(#right); | |
} | |
if(text) | |
stream.text(escape(text)); | |
} | |
else if(node.isElement) { | |
if(node.tag == "br") | |
stream.prefix(" "); | |
else if(node.state.flowType) { // block element | |
emitBlock(stream, node, blockContentlevel); | |
} else { // span element | |
var emitter = spans[node.tag] || spans.thunk; | |
emitter(stream, node); | |
} | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
real document test
Sciter Architecture
Sciter is not using DOM model like W3C DOM as we have found it too complicated (76 different classes, sic!).
DOM and Window classes
Sciter provides following 8 DOM and Window specific classes accessible in script:
Element - DOM element. All HTML elements including document(root), frame, inputs, etc. are Element's in the Sciter.
Image - image object. Represents bitmap image. You can draw on the image using Graphics methods.
Graphics - object wrapping drawing primitives. You can draw on surface of any Element and Image in the Sciter.
View - represents Sciter window. Main Sciter window and Dialog are views.
Event - represents current UI event.
Sciter - is a global object holding Sciter application specific methods.
View, document, frame and the root element.
Window that Sciter is attached to is represented by the View objects in script.
Each view has root property - reference of the document loaded into the view. This is a root element of the loaded document. Root element in the Sciter is
<html>
element of the loaded document and there is no dedicated Document class. Document is really root node of element tree -<html>
element and its children.Frames and framesets.
Frames and framesets are also ordinary DOM elements. element has single child element - element of the document loaded into it. parent property of the root element of the document loaded in the frame refer to the element this document loaded into. Simple as it is.
Frames ( elements) in the Sciter can appear in any part of the HTML (not only in ) so there is no difference between and <iframe> in the Sciter. Moreover any block element like
Element object has method load that allows to (re)load content of any element and from external source - url or stream (including in-memory dynamic stream). So there is not too much difference between block element like
The same approach is used with s - they are plain DOM elements and may appear at any place where block elements are acceptable. can contain not only elements but any block elements thus in the Sciter is a convenient way to define container with splitters. Morever any block element can be transformed into the frameset by declaring behavior:frameset in its style declaration.
Script evaluation.
Sciter knows and interprets only tiscript fragments and files. To include script block in the document use following elements:
or for inline script inclusion:
Global namespace, view and self objects.
Document establishes namespace for script execution. All classes and functions defined like this:
go to that global document namespace. self and view global variables are members of this namespace.
self
: is a reference to the document (
<html>
node)view
: is a reference to the view object (usually it is a Sciter window)
Script execution
Sciter executes scripts as a last step of document loading - after tag is being parsed. So at the moment of any script execution DOM is established and scripts can refer to it.
There are three major steps of script execution in the engine:
When document needs to be unloaded from the view (e.g. sciter window have got request to close from the user) engine is calling self.closing() method (if it was declared). If that method returns exactly
false
value then unloading stops. This way document can cancel its own unloading.