Created
June 21, 2012 20:13
-
-
Save swinton/2968256 to your computer and use it in GitHub Desktop.
Convert special characters in text to HTML entities.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| """ | |
| Convert special characters in text to HTML entities. | |
| NOW WITH ADDED UNICODE SUPPORT! | |
| Usage: | |
| $ echo "<b>foo <i>bar</i></b>" | htmlescape.py | |
| """ | |
| import codecs | |
| import sys | |
| def html_escape(text): | |
| """ | |
| Convert special characters in text to HTML entities. | |
| """ | |
| html_escape_table = { | |
| u"&": u"&", | |
| u'"': u""", | |
| u"'": u"'", | |
| u">": u">", | |
| u"<": u"<", | |
| } | |
| # Convert to unicode | |
| text = unicode(text) | |
| return (u"".join(html_escape_table.get(char, char) for char in text)) | |
| if __name__ == "__main__": | |
| # Write UTF-8 to stdout | |
| sys.stdout = codecs.getwriter("utf-8")(sys.stdout) | |
| # Read and decode from stdin, so text is a unicode object | |
| text = raw_input().strip().decode(sys.stdin.encoding or "utf-8") | |
| print >> sys.stdout, html_escape(text) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment