brecert · November 7, 2022 15:59
diff --git a/readme.md b/readme.md
diff --git a/examples b/examples
 # Basic Formatting
 **bold**

 *italic* (friendly)
 //italic//

 __underline__

 ~~strikethrough~~

 ||spoiler||

 `inline code` (friendly)
 ``inline code``

 # Blockquotes
 > single line blockquote (friendly)
 >> single line blockquote (does not stack with previous due to different > indents?)

 # Custom Expressions
 tag:argument[attributes] (friendly but more limitations)
 [tag:argument attributes]

 [link: https://example.com -> Example Website]
 https://example.com[Example Website]

 # Blocks
 argument and [attributes] can be elided

 more backticks ` can be added to require matching that amount of backticks
 this can help with including other blocks inside of the block

 implementation would determine what tags are defined for blocks to use

 ```
 default block (usually codeblock)
 ```

 ```tag:argument[attributes]
 text
 ```

 ```[tag:argument attributes]
 text
 ```

 ```[code:js]```

 ```code:js
 ```

 (friendly definitions)
 ```js
 ```

 ```blockquote
 multiline
 blockquote
 ```

 ``````text:inline[color: gray, font-size: large, font-family: monospace]
 ```
 large monospaced text
 ```
 ``````

 idealy it'd parse
 ```
 normal //italic **and bold**// normal
 >> //blockquote//

 ===js
 console.log("hello world")
 ===
 ```
 as
 ```
 {
  text: "normal //italic **and bold**// ...",
  entities: [
    Text { offset: 0,  length: 7 },
    Text { offset: 9,  length: 7, italic: true },
    Text { offset: 18, length: 8, italic: true, bold: true },
    Text { offset: 30, length: 8 },
    Blockquote { entities: [
      { type: .text, offset: 43, length: 10, italic: true }
    ]},
    Text { offset: 56, length: 1 },
    Block {
      tag: { offset: 60, length: 2 },
      content: { offset: 63, length: 27 }
    }
  ]
 }
 ```





diff --git a/markup.ts b/markup.ts
 interface RegExpMatchArray {
    indices?: RegExpIndicesArray;
 }

 interface RegExpExecArray {
    indices?: RegExpIndicesArray;
 }

 interface RegExpIndicesArray extends Array<[number, number]> {
    groups?: {
        [key: string]: [number, number] | undefined
    }
 }

 interface RegExp {
    /**
     * Returns a Boolean value indicating the state of the hasIndices flag (d) used with with a regular expression.
     * Default is false. Read-only.
     */
    readonly hasIndices: boolean
 }

 type ValueOf<T> = T[keyof T]


 type Span = { start: number, end: number }

 type Indice = [number, number]

 type Marker = { type: MarkerType, indice: Indice }
 type Entity =
    | { type: MarkerType, from: Indice, to: Indice }
    | { type: 'block', from: Indice, to: Indice, expression?: TextExpression }

 type MarkerType = ValueOf<typeof MARKER_TYPE> | 'blockquote' | 'line' | 'text'

 const enum EventType {
    Open = "open",
    Close = "close"
 }

 type EntityEvent =
    | { type: EventType, marker: MarkerType, indice: Indice }
    | { type: 'block', indice: Indice, inner: Indice, outer: Indice, expression?: TextExpression }

 const MARKER_TYPE = {
    '**': 'bold',
    '//': 'italic',
    '__': 'underline',
    '~~': 'strikethrough',
    '||': 'spoiler',
    '``': 'code',
 } as const


 type TextExpression = {
    tag: string,
    arg?: string,
    from: Indice,
    to: Indice,
 }

 function parseCustomExpression(source: string): TextExpression | undefined {
    if (source[0] != '[') return
    let tag: string | undefined

    for (let pos = 0; pos < source.length; pos++) {
        const char = source[pos]

        switch (char) {
            case '\n': {
                return
            }
            case ':':
                tag ??= source.slice(1, pos)
                break
            case '\\':
                pos += 1
                break
            case ']': {
                if (!tag) return
                return {
                    tag,
                    arg: source.slice(tag.length + 2, pos),
                    from: [0, (tag?.length ?? 0)],
                    to: [pos, pos + 1]
                }
            }
        }
    }

    return
 }

 function parseBlockCustomExpression(source: string): TextExpression | undefined {
    let expr = parseCustomExpression(source)
    if(expr) return expr
    const expressionSugarRe = /^(\w+)(?::(\w+))?$/m
    const match = source.match(expressionSugarRe)

    console.log(match)
    
    if(match) {
        return {
            tag: match[1],
            arg: match[2],
            from: [0, 0], // ???
            to: [match[0].length, match[0].length]
        }
    }

    return
 }

 // todo: there should be a better way to implement blockquote continuations
 function parse(source: string) {
    let markers: Marker[] = []
    let entities: Entity[] = []

    let tokens = [...source.matchAll(/```|\*\*|__|\/\/|~~|\|\||``|(?:^|\n)>> |\n|$|\w:\w+/dgm)]

    for (let i = 0; i < tokens.length; i++) {
        let match = tokens[i]
        let token = match[0]
        let indice = match.indices![0]

        switch (token) {
            // '': EOF
            case '':
            case '\n': {
                const prevMarker = markers.find(marker => marker.type === 'blockquote')
                if (prevMarker) {
                    entities.push({
                        type: 'blockquote',
                        from: prevMarker.indice,
                        to: indice,
                    })
                }


                const prevContinue = markers.find(marker => marker.type === 'line')
                if (prevContinue) {
                    entities.push({
                        type: 'line',
                        from: prevContinue.indice,
                        to: indice,
                    })
                }
                markers = []
                break
            }
            case '>> ':
            case '\n>> ': {
                const prevMarker = markers.find(marker => marker.type === 'blockquote')
                const prevContinue = markers.find(marker => marker.type === 'line')
                if (prevContinue) {
                    entities.push({
                        type: 'line',
                        from: prevContinue.indice,
                        to: indice,
                    })
                }

                // make markers per-line
                markers = []
                markers.push(prevMarker ?? { type: 'blockquote', indice })

                if (prevMarker) {
                    markers.push({ type: 'line', indice })
                }
                break
            }
            case '```': {
                const nextIndex = tokens.slice(i + 1).findIndex(t => t[0] == '```')
                if (nextIndex >= 0) {
                    const nextIndice = tokens[i + 1 + nextIndex].indices![0]

                    const expression = parseBlockCustomExpression(source.slice(indice[1], nextIndice[0]))
                    const currIndice: Indice = [indice[0], indice[1] + (expression?.to?.[1] ?? 0)]

                    entities.push({ type: 'block', from: currIndice, to: nextIndice, expression })

                    i = nextIndex + 1
                }
                break
            }
            case '``': {
                const nextIndex = tokens.slice(i + 1).findIndex(t => t[0] == '``')
                if (nextIndex >= 0) {
                    const currIndice = indice
                    const nextIndice = tokens[i + 1 + nextIndex].indices![0]
                    entities.push({ type: 'code', from: currIndice, to: nextIndice })

                    i = nextIndex + 1
                }
                break
            }
            case '**':
            case '//':
            case '__':
            case '~~':
            case '||': {
                const type = MARKER_TYPE[token]
                const prevIndex = markers.findIndex(marker => marker.type === type)
                if (prevIndex >= 0) {
                    const prevMarker = markers[prevIndex]
                    const currMarker = { type, indice }

                    // remove unfinished markers
                    markers.splice(prevIndex, markers.length)

                    entities.push({ type, from: prevMarker.indice, to: currMarker.indice })
                } else {
                    markers.push({ type, indice })
                }
                break
            }
            default: {
                throw "Invalid Token"
            }
        }
    }

    return entities
 }

 const inRange = (outer: Entity, inner: Entity) =>
    outer.from[1] < inner.from[1] && inner.to[0] < outer.to[0]

 function intoEvents(entities: Entity[]): EntityEvent[] {
    return entities
        .flatMap<EntityEvent>(e => {
            if (e.type === 'block') {
                return {
                    type: 'block',
                    expression: e.expression,
                    inner: [e.from[1], e.to[0]],
                    outer: [e.from[0], e.to[1]],
                    indice: [e.from[0], e.to[1]]
                }
            } else {
                return [
                    { type: EventType.Open, marker: e.type, indice: e.from },
                    { type: EventType.Close, marker: e.type, indice: e.to }
                ]
            }
        })
        .sort((a, b) => a.indice[0] - b.indice[0])
 }

 function render(events: EntityEvent[], text: string) {
    const spans = []
    let lastPos = 0

    for (const event of events) {
        switch (event.type) {
            case EventType.Open: {
                spans.push(`${text.slice(lastPos, event.indice[0])}<span class="${event.marker}">`)
                break
            }
            case EventType.Close: {
                spans.push(`${text.slice(lastPos, event.indice[0])}</span>`)
                break
            }
            case 'block': {
                switch (event?.expression?.tag) {
                    // NOT SEMANTIC, DEMONSTRATION ONLY!
                    case 'code': {
                        spans.push(`<pre class="code" lang="${event.expression.arg}">${text.slice(event.inner[0], event.inner[1]).trim()}</pre>`)
                        break
                    }
                }
                break
            }
        }
        lastPos = event.indice[1]
    }

    spans.push(text.slice(lastPos))

    return spans.join('')
 }

 // const text = '>> 0//**1__2__3__4__//**5\n>> next line\n>> next line\n\n>> new blockquote'
 const text = '```code:js\nfoo bar```'
 const entities = parse(text)
 const events = intoEvents(entities)
 const html = render(events, text)

 console.log(entities)
 console.log(text)
 console.log(html)
diff --git a/style.css b/style.css
 body {
  white-space: pre;
  background-color: #263238;
  color: #fcfcff;
  font-family: system-ui;
  padding: 30px;
 }

 .italic {
  font-style: italic;
 }

 .underline {
  text-decoration: underline;
 }

 .bold {
  font-weight: bold;
 }

 .blockquote {
  display: block;
  
  padding: 5px;
  border-inline-start: 5px solid #ccc;
  background: #fff1;
  border-radius: 0 4px 4px 0;
  margin-bottom: 5px;
 }

 .line {
  display: block;
 }

 pre.code {
  position: relative;
  padding: 5px;
  border: 2px solid #ccc;
  background: #fff1;
  border-radius: 4px;
  margin-bottom: 5px;
 }

 pre.code::before {
  content: attr(lang);
  position: absolute;
  top: -10px;
  right: 2px;
  background: #e7d34a;
  color: #333;
  padding: 4px;
  border-radius: 3px;
  box-shadow: rgb(120 118 118) 0px 2px;
  color: #003;
 }
	# Basic Formatting
	bold

	italic (friendly)
	//italic//

	__underline__

	~~strikethrough~~

	\|\|spoiler\|\|

	`inline code` (friendly)
	``inline code``

	# Blockquotes
	> single line blockquote (friendly)
	>> single line blockquote (does not stack with previous due to different > indents?)

	# Custom Expressions
	tag:argument[attributes] (friendly but more limitations)
	[tag:argument attributes]

	[link: https://example.com -> Example Website]
	https://example.com[Example Website]

	# Blocks
	argument and [attributes] can be elided

	more backticks ` can be added to require matching that amount of backticks
	this can help with including other blocks inside of the block

	implementation would determine what tags are defined for blocks to use

	```
	default block (usually codeblock)
	```

	```tag:argument[attributes]
	text
	```

	```[tag:argument attributes]
	text
	```

	```[code:js]```

	```code:js
	```

	(friendly definitions)
	```js
	```

	```blockquote
	multiline
	blockquote
	```

	``````text:inline[color: gray, font-size: large, font-family: monospace]
	```
	large monospaced text
	```
	``````

	idealy it'd parse
	```
	normal //italic and bold// normal
	>> //blockquote//

	===js
	console.log("hello world")
	===
	```
	as
	```
	{
	text: "normal //italic and bold// ...",
	entities: [
	Text { offset: 0, length: 7 },
	Text { offset: 9, length: 7, italic: true },
	Text { offset: 18, length: 8, italic: true, bold: true },
	Text { offset: 30, length: 8 },
	Blockquote { entities: [
	{ type: .text, offset: 43, length: 10, italic: true }
	]},
	Text { offset: 56, length: 1 },
	Block {
	tag: { offset: 60, length: 2 },
	content: { offset: 63, length: 27 }
	}
	]
	}
	```
	interface RegExpMatchArray {
	indices?: RegExpIndicesArray;
	}

	interface RegExpExecArray {
	indices?: RegExpIndicesArray;
	}

	interface RegExpIndicesArray extends Array<[number, number]> {
	groups?: {
	[key: string]: [number, number] \| undefined
	}
	}

	interface RegExp {
	/**
	* Returns a Boolean value indicating the state of the hasIndices flag (d) used with with a regular expression.
	* Default is false. Read-only.
	*/
	readonly hasIndices: boolean
	}

	type ValueOf<T> = T[keyof T]


	type Span = { start: number, end: number }

	type Indice = [number, number]

	type Marker = { type: MarkerType, indice: Indice }
	type Entity =
	\| { type: MarkerType, from: Indice, to: Indice }
	\| { type: 'block', from: Indice, to: Indice, expression?: TextExpression }

	type MarkerType = ValueOf<typeof MARKER_TYPE> \| 'blockquote' \| 'line' \| 'text'

	const enum EventType {
	Open = "open",
	Close = "close"
	}

	type EntityEvent =
	\| { type: EventType, marker: MarkerType, indice: Indice }
	\| { type: 'block', indice: Indice, inner: Indice, outer: Indice, expression?: TextExpression }

	const MARKER_TYPE = {
	'**': 'bold',
	'//': 'italic',
	'__': 'underline',
	'~~': 'strikethrough',
	'\|\|': 'spoiler',
	'``': 'code',
	} as const


	type TextExpression = {
	tag: string,
	arg?: string,
	from: Indice,
	to: Indice,
	}

	function parseCustomExpression(source: string): TextExpression \| undefined {
	if (source[0] != '[') return
	let tag: string \| undefined

	for (let pos = 0; pos < source.length; pos++) {
	const char = source[pos]

	switch (char) {
	case '\n': {
	return
	}
	case ':':
	tag ??= source.slice(1, pos)
	break
	case '\\':
	pos += 1
	break
	case ']': {
	if (!tag) return
	return {
	tag,
	arg: source.slice(tag.length + 2, pos),
	from: [0, (tag?.length ?? 0)],
	to: [pos, pos + 1]
	}
	}
	}
	}

	return
	}

	function parseBlockCustomExpression(source: string): TextExpression \| undefined {
	let expr = parseCustomExpression(source)
	if(expr) return expr
	const expressionSugarRe = /^(\w+)(?::(\w+))?$/m
	const match = source.match(expressionSugarRe)

	console.log(match)

	if(match) {
	return {
	tag: match[1],
	arg: match[2],
	from: [0, 0], // ???
	to: [match[0].length, match[0].length]
	}
	}

	return
	}

	// todo: there should be a better way to implement blockquote continuations
	function parse(source: string) {
	let markers: Marker[] = []
	let entities: Entity[] = []

	let tokens = [...source.matchAll(/```\|\\\|__\|\/\/\|~~\|\\|\\|\|``\|(?:^\|\n)>> \|\n\|$\|\w:\w+/dgm)]

	for (let i = 0; i < tokens.length; i++) {
	let match = tokens[i]
	let token = match[0]
	let indice = match.indices![0]

	switch (token) {
	// '': EOF
	case '':
	case '\n': {
	const prevMarker = markers.find(marker => marker.type === 'blockquote')
	if (prevMarker) {
	entities.push({
	type: 'blockquote',
	from: prevMarker.indice,
	to: indice,
	})
	}


	const prevContinue = markers.find(marker => marker.type === 'line')
	if (prevContinue) {
	entities.push({
	type: 'line',
	from: prevContinue.indice,
	to: indice,
	})
	}
	markers = []
	break
	}
	case '>> ':
	case '\n>> ': {
	const prevMarker = markers.find(marker => marker.type === 'blockquote')
	const prevContinue = markers.find(marker => marker.type === 'line')
	if (prevContinue) {
	entities.push({
	type: 'line',
	from: prevContinue.indice,
	to: indice,
	})
	}

	// make markers per-line
	markers = []
	markers.push(prevMarker ?? { type: 'blockquote', indice })

	if (prevMarker) {
	markers.push({ type: 'line', indice })
	}
	break
	}
	case '```': {
	const nextIndex = tokens.slice(i + 1).findIndex(t => t[0] == '```')
	if (nextIndex >= 0) {
	const nextIndice = tokens[i + 1 + nextIndex].indices![0]

	const expression = parseBlockCustomExpression(source.slice(indice[1], nextIndice[0]))
	const currIndice: Indice = [indice[0], indice[1] + (expression?.to?.[1] ?? 0)]

	entities.push({ type: 'block', from: currIndice, to: nextIndice, expression })

	i = nextIndex + 1
	}
	break
	}
	case '``': {
	const nextIndex = tokens.slice(i + 1).findIndex(t => t[0] == '``')
	if (nextIndex >= 0) {
	const currIndice = indice
	const nextIndice = tokens[i + 1 + nextIndex].indices![0]
	entities.push({ type: 'code', from: currIndice, to: nextIndice })

	i = nextIndex + 1
	}
	break
	}
	case '**':
	case '//':
	case '__':
	case '~~':
	case '\|\|': {
	const type = MARKER_TYPE[token]
	const prevIndex = markers.findIndex(marker => marker.type === type)
	if (prevIndex >= 0) {
	const prevMarker = markers[prevIndex]
	const currMarker = { type, indice }

	// remove unfinished markers
	markers.splice(prevIndex, markers.length)

	entities.push({ type, from: prevMarker.indice, to: currMarker.indice })
	} else {
	markers.push({ type, indice })
	}
	break
	}
	default: {
	throw "Invalid Token"
	}
	}
	}

	return entities
	}

	const inRange = (outer: Entity, inner: Entity) =>
	outer.from[1] < inner.from[1] && inner.to[0] < outer.to[0]

	function intoEvents(entities: Entity[]): EntityEvent[] {
	return entities
	.flatMap<EntityEvent>(e => {
	if (e.type === 'block') {
	return {
	type: 'block',
	expression: e.expression,
	inner: [e.from[1], e.to[0]],
	outer: [e.from[0], e.to[1]],
	indice: [e.from[0], e.to[1]]
	}
	} else {
	return [
	{ type: EventType.Open, marker: e.type, indice: e.from },
	{ type: EventType.Close, marker: e.type, indice: e.to }
	]
	}
	})
	.sort((a, b) => a.indice[0] - b.indice[0])
	}

	function render(events: EntityEvent[], text: string) {
	const spans = []
	let lastPos = 0

	for (const event of events) {
	switch (event.type) {
	case EventType.Open: {
	spans.push(`${text.slice(lastPos, event.indice[0])}<span class="${event.marker}">`)
	break
	}
	case EventType.Close: {
	spans.push(`${text.slice(lastPos, event.indice[0])}</span>`)
	break
	}
	case 'block': {
	switch (event?.expression?.tag) {
	// NOT SEMANTIC, DEMONSTRATION ONLY!
	case 'code': {
	spans.push(`<pre class="code" lang="${event.expression.arg}">${text.slice(event.inner[0], event.inner[1]).trim()}</pre>`)
	break
	}
	}
	break
	}
	}
	lastPos = event.indice[1]
	}

	spans.push(text.slice(lastPos))

	return spans.join('')
	}

	// const text = '>> 0//1__2__3__4__//5\n>> next line\n>> next line\n\n>> new blockquote'
	const text = '```code:js\nfoo bar```'
	const entities = parse(text)
	const events = intoEvents(entities)
	const html = render(events, text)

	console.log(entities)
	console.log(text)
	console.log(html)
	body {
	white-space: pre;
	background-color: #263238;
	color: #fcfcff;
	font-family: system-ui;
	padding: 30px;
	}

	.italic {
	font-style: italic;
	}

	.underline {
	text-decoration: underline;
	}

	.bold {
	font-weight: bold;
	}

	.blockquote {
	display: block;

	padding: 5px;
	border-inline-start: 5px solid #ccc;
	background: #fff1;
	border-radius: 0 4px 4px 0;
	margin-bottom: 5px;
	}

	.line {
	display: block;
	}

	pre.code {
	position: relative;
	padding: 5px;
	border: 2px solid #ccc;
	background: #fff1;
	border-radius: 4px;
	margin-bottom: 5px;
	}

	pre.code::before {
	content: attr(lang);
	position: absolute;
	top: -10px;
	right: 2px;
	background: #e7d34a;
	color: #333;
	padding: 4px;
	border-radius: 3px;
	box-shadow: rgb(120 118 118) 0px 2px;
	color: #003;
	}