Created
November 6, 2017 15:17
-
-
Save trevordevore/5eaa7333841d15f5bbbde490636dc143 to your computer and use it in GitHub Desktop.
LiveCode htmlText to Markdown
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function htmlTextToMarkdown pText, pEscape | |
put pEscape is not false into pEscape | |
replace "<b>" with "<strong>" in pText | |
replace "</b>" with "</strong>" in pText | |
## http://daringfireball.net/projects/markdown/syntax | |
## Doesn't support targets as far as I can tell | |
_RemoveATagTarget pText | |
## http://daringfireball.net/projects/markdown/syntax | |
## Note that we strip color currently | |
if pEscape then | |
_EscapeCharacters pText, "\`*_{}[]()#+-.!", "\" | |
end if | |
_ListReplace pText, "ol", "<p></p>", "", "<p>1. ", "</p>" | |
_ListReplace pText, "ul", "<p></p>", "", "<p>* ", "</p>" | |
## Locate instances of "Code" (Courier font) | |
_ConvertFontToPreTag "Courier", pText, "___pre___", "---pre---" | |
_OpeningTagReplace pText, "<i>", "*" | |
_ClosingTagReplace pText, "</i>", "*" | |
_OpeningTagReplace pText, "<b>", "**" | |
_ClosingTagReplace pText, "</b>", "**" | |
_OpeningTagReplace pText, "<strong>", "**" | |
_ClosingTagReplace pText, "</strong>", "**" | |
_OpeningTagReplace pText, "<u>", "*" ## markdown doesn't support underline, use italics | |
_ClosingTagReplace pText, "</u>", "*" | |
## Replace "<a href="URL">...</a> with ...:URL | |
## ... | |
_ClosingTagReplace pText, "</a>", "</a>" ## Fix spacing around </a> tag | |
-- Converts <a href> tags to markdown style links: [link text](url) | |
_ConvertAHrefToMarkup pText, "[", "]", "(", ")" | |
## Get plain UTF8 text | |
lock screen | |
lock messages | |
create field "htmlTextToMarkdownConverter" | |
set the HTMLText of it to pText | |
put textEncode(the text of it, "utf8") into pText | |
delete it | |
unlock messages | |
unlock screen | |
replace "___pre___" with "<pre>" in pText | |
replace "---pre---" with "</pre>" in pText | |
return pText | |
end htmlTextToMarkdown | |
-- Strip target="" from a tag | |
private command _RemoveATagTarget @pText | |
local theCharNo,theDeletedCharCount,theEndCharNo,theLink | |
local theOffset,theStartCharNo, theFoundChunks | |
put 0 into theOffset | |
repeat forever | |
## Seek out <a href=""> | |
put offset("<a href=" & quote, pText, theOffset) into theStartCharNo | |
if theStartCharNo > 0 then | |
add theOffset to theStartCharNo | |
put offset(quote & ">", pText, theStartCharNo) into theEndCharNo | |
if theEndCharNo > 0 then | |
## Found it, add to list of chunks | |
add theStartCharNo to theEndCharNo | |
## Build up list where first chunks are at end of list | |
put theStartCharNo,theEndCharNo & cr before theFoundChunks | |
put theEndCharNo + 1 into theOffset | |
else | |
exit repeat | |
end if | |
else | |
exit repeat | |
end if | |
end repeat | |
delete the last char of theFoundChunks | |
repeat for each line theChunk in theFoundChunks | |
replace " target=" & quote & "_blank" & quote with empty in char (item 1 of theChunk) to (item 2 of theChunk) of pText | |
end repeat | |
return empty | |
end _RemoveATagTarget | |
private command _EscapeCharacters @pString, pCharsToEscape, pEscapeChar | |
if pEscapeChar is empty then put "\" into pEscapeChar | |
repeat for each char theChar in pCharsToEscape | |
replace theChar with pEscapeChar & theChar in pString | |
end repeat | |
return empty | |
end _EscapeCharacters | |
private command _ListReplace @pText, pTag, pTagPrefix, pTagSuffix, pItemPrefix, pItemSuffix | |
local theCharNo, theEndCharNo, theReplacement | |
repeat forever | |
put offset("<" & pTag & ">", pText) into theCharNo | |
if theCharNo > 0 then | |
put offset("</" & pTag & ">", pText) into theEndCharNo | |
if theEndCharNo > 0 then | |
put theEndCharNo + 2 + length(pTag) into theEndCharNo | |
put char theCharNo to theEndCharNo of pText into theReplacement | |
replace "<" & pTag & ">" with pTagPrefix in theReplacement | |
replace "</" & pTag & ">" with pTagSuffix in theReplacement | |
replace "<li>" & cr & "<p>" with pItemPrefix in theReplacement | |
replace "</p>" & cr & "</li>" with pItemSuffix in theReplacement | |
put theReplacement into char theCharNo to theEndCharNo of pText | |
else | |
exit repeat | |
end if | |
else | |
exit repeat | |
end if | |
end repeat | |
end _ListReplace | |
command _ConvertFontToPreTag pFont, @pText, pOpenPreTag, pClosingPreTag | |
local theCharNo, theFirstStartCharNo, theLastStartCharNo | |
local theLastCharNo, theFoundA, theIndex | |
local theOffset, thePTagCharNo | |
if paramCount() < 3 then put "<pre>" into pOpenPreTag | |
if paramCount() < 4 then put "</pre>" into pClosingPreTag | |
-- <p><font face="Courier">This is great</font><br /> | |
-- <font face="Courier"> What is this?</font><br /> | |
-- <font face="Courier"> I don't know?</font><br /> | |
-- <font face="Courier">I like it</font></p> | |
-- <p><pre>This is great | |
-- What is this? | |
-- I don't know? | |
-- I like it</pre></p> | |
put 0 into theOffset | |
repeat forever | |
## Seek out first instance of <font face="Courier" | |
if theFirstStartCharNo is empty then | |
put offset("<font face=" & quote & pFont & quote, pText, theOffset) into theCharNo | |
if theCharNo > 0 then | |
## Store the start character of first instance of courier | |
add theCharNo to theOffset | |
put theOffset into theFirstStartCharNo | |
put theFirstStartCharNo into theLastStartCharNo | |
else | |
exit repeat # no font tags | |
end if | |
else | |
# p tags will terminate a found set (<br> will not). | |
put offset("</p>", pText, theOffset) into thePTagCharNo | |
if thePTagCharNo > 0 then add theOffset to thePTagCharNo | |
## Keep looking for more instances | |
put offset("<font face=" & quote & pFont & quote, pText, theOffset) into theCharNo | |
if theCharNo > 0 AND (theCharNo + theOffset) < thePTagCharNo then | |
-- if theCharNo > 0 then | |
add theCharNo to theOffset | |
-- put theOffset into theLastStartCharNo | |
else | |
## No more found. Find closing </font> so we now where to put closing </pre> tag. | |
put offset("</font>", pText, theOffset) into theCharNo | |
if theCharNo > 0 then | |
## Found the closing tag. Store it | |
add theCharNo to theOffset | |
put theOffset + 6 into theLastCharNo | |
## Now log everything and start over | |
put the number of elements of theFoundA + 1 into theIndex | |
put theFirstStartCharNo into theFoundA[theIndex]["start char no"] | |
put theLastCharNo into theFoundA[theIndex]["end char no"] | |
put empty into theFirstStartCharNo | |
-- put empty into theLastStartCharNo | |
put empty into theLastCharNo | |
else | |
exit repeat ## no closing font. Shouldn't happen. | |
end if | |
end if | |
end if | |
end repeat | |
local theText | |
repeat with theIndex = the number elements of theFoundA down to 1 | |
## 'start char no' = the start of <font face... | |
## 'end char no' is the last char in </font> | |
put char theFoundA[theIndex]["start char no"] to theFoundA[theIndex]["end char no"] of pText into theText | |
replace "<br />" with empty in theText | |
replace "<p>" with empty in theText | |
replace "</p>" with empty in theText | |
put pOpenPreTag & theText & pClosingPreTag into char theFoundA[theIndex]["start char no"] to theFoundA[theIndex]["end char no"] of pText | |
-- put pClosingPreTag after char theFoundA[theIndex]["end char no"] of pText | |
-- put pOpenPreTag before char theFoundA[theIndex]["start char no"] of pText | |
## Now replace <br>s that ScreenSteps inserted | |
-- replace "<br />" with empty in char theFoundA[theIndex]["start char no"] to theFoundA[theIndex]["end char no"] of pText | |
end repeat | |
return empty | |
end _ConvertFontToPreTag | |
private command _OpeningTagReplace @pText, pTag, pReplacement | |
local itMatched | |
local startTheTag, endTheTag, startTheWhiteSpace, endTheWhiteSpace, startTheText, endTheText | |
local theWhiteSpace, theText | |
-- <b>( +?)(\S) | |
-- \1*\2 | |
repeat forever | |
put matchChunk(pText, "(" & pTag & ")( +?)(\S)", startTheTag, endTheTag, \ | |
startTheWhiteSpace, endTheWhiteSpace, startTheText, endTheText) into itMatched | |
if itMatched then | |
put char startTheWhiteSpace to endTheWhiteSpace of pText into theWhiteSpace | |
put char startTheText to endTheText of pText into theText | |
put theWhiteSpace & pReplacement & theText into char startTheTag to endTheText of pText | |
else | |
## Any stragglers that didn't match | |
replace pTag with pReplacement in pText | |
exit repeat | |
end if | |
end repeat | |
return empty | |
end _OpeningTagReplace | |
private command _ClosingTagReplace @pText, pTag, pReplacement | |
local itMatched | |
local startTheWhiteSpace, endTheWhiteSpace, startTheTag, endTheTag | |
local theWhiteSpace | |
-- ( +?)(</b>) | |
-- *\1 | |
repeat forever | |
put matchChunk(pText, "( +?)(" & pTag & ")", startTheWhiteSpace, endTheWhiteSpace, startTheTag, endTheTag) into itMatched | |
if itMatched then | |
put char startTheWhiteSpace to endTheWhiteSpace of pText into theWhiteSpace | |
put pReplacement & theWhiteSpace into char startTheWhiteSpace to endTheTag of pText | |
else | |
## Any stragglers that didn't match | |
replace pTag with pReplacement in pText | |
exit repeat | |
end if | |
end repeat | |
return empty | |
end _ClosingTagReplace | |
-- Converts <a href> tags to markdown style links: [link text](url) | |
private command _ConvertAHrefToMarkup @pText, pPreLink, pPostLink, pPreURL, pPostURL | |
local theCharNo,theDeletedCharCount,theEndCharNo,theLink | |
local theOffset,theStartCharNo | |
put 0 into theOffset | |
repeat forever | |
## Seek out <a href=""> | |
put offset("<a href=" & quote, pText, theOffset) into theStartCharNo | |
if theStartCharNo > 0 then | |
add theOffset to theStartCharNo | |
put offset(quote & ">", pText, theStartCharNo) into theEndCharNo | |
if theEndCharNo > 0 then | |
add theStartCharNo to theEndCharNo | |
put char (theStartCharNo + 9) to (theEndCharNo - 1) of pText into theLink | |
## Seek out </a> | |
put offset("</a>", pText, theEndCharNo + 1) into theCharNo | |
if theCharNo > 0 then | |
local theLinkText | |
add theEndCharNo + 1 to theCharNo | |
put char (theEndCharNo + 1) to (theCharNo - 1) of pText into theLinkText | |
## Delete closing </a> tag | |
delete char theCharNo to (theCharNo + 3) of pText | |
## Delete opening <a href=""> tag | |
delete char theStartCharNo to (theEndCharNo + 1) of pText | |
put (theEndCharNo + 1) - theStartCharNo into theDeletedCharCount | |
## Add post-link, pre-url and post-url text | |
put pPostLink & pPreURL & theLink & pPostURL before char (theCharNo - theDeletedCharCount - 1) of pText | |
## Add pre link text | |
put pPreLink before char theStartCharNo of pText | |
## Update offset | |
put theStartCharNo + length(pPreLink) + length(pPostLink) + length(pPreURL) \ | |
+ length(pPostURL) + length(theLinkText) + length(theLink) - 1 into theOffset | |
-- put "start char:" && char theStartCharNo to theStartCharNo + 10 of pText & cr after msg | |
-- put "char offset:" && char theOffset to theOffset + 10 of pText & cr & "----" & cr after msg | |
else | |
exit repeat | |
end if | |
else | |
exit repeat | |
end if | |
else | |
exit repeat | |
end if | |
end repeat | |
return empty | |
end _ConvertAHrefToMarkup |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment