HoraceBury · February 14, 2014 13:37 · cjfpainter · Feb 25, 2015 · AleksCore · Oct 7, 2016
diff --git a/cleanhtml.lua b/cleanhtml.lua
 local t = [[ your html ]]

 -- list of strings to replace (the order is important to avoid conflicts)
 local cleaner = {
 	{ "&amp;", "&" }, -- decode ampersands
 	{ "&#151;", "-" }, -- em dash
 	{ "&#146;", "'" }, -- right single quote
 	{ "&#147;", "\"" }, -- left double quote
 	{ "&#148;", "\"" }, -- right double quote
 	{ "&#150;", "-" }, -- en dash
 	{ "&#160;", " " }, -- non-breaking space
 	{ "<br ?/?>", "\n" }, -- all <br> tags whether terminated or not (<br> <br/> <br />) become new lines
 	{ "</p>", "\n" }, -- ends of paragraphs become new lines
 	{ "(%b<>)", "" }, -- all other html elements are completely removed (must be done last)
 	{ "\r", "\n" }, -- return carriage become new lines
 	{ "[\n\n]+", "\n" }, -- reduce all multiple new lines with a single new line
 	{ "^\n*", "" }, -- trim new lines from the start...
 	{ "\n*$", "" }, -- ... and end
 }

 -- clean html from the string
 for i=1, #cleaner do
 	local cleans = cleaner[i]
 	t = string.gsub( t, cleans[1], cleans[2] )
 end

 print("["..t.."]") -- print the string with end indicators
	local t = [[ your html ]]

	-- list of strings to replace (the order is important to avoid conflicts)
	local cleaner = {
	{ "&", "&" }, -- decode ampersands
	{ "", "-" }, -- em dash
	{ "", "'" }, -- right single quote
	{ "", "\"" }, -- left double quote
	{ "", "\"" }, -- right double quote
	{ "", "-" }, -- en dash
	{ " ", " " }, -- non-breaking space
	{ "<br ?/?>", "\n" }, -- all <br> tags whether terminated or not (<br> <br/> <br />) become new lines
	{ "</p>", "\n" }, -- ends of paragraphs become new lines
	{ "(%b<>)", "" }, -- all other html elements are completely removed (must be done last)
	{ "\r", "\n" }, -- return carriage become new lines
	{ "[\n\n]+", "\n" }, -- reduce all multiple new lines with a single new line
	{ "^\n*", "" }, -- trim new lines from the start...
	{ "\n*$", "" }, -- ... and end
	}

	-- clean html from the string
	for i=1, #cleaner do
	local cleans = cleaner[i]
	t = string.gsub( t, cleans[1], cleans[2] )
	end

	print("["..t.."]") -- print the string with end indicators