Skip to content

Instantly share code, notes, and snippets.

@znz
Last active December 25, 2015 18:19
Show Gist options
  • Save znz/7019582 to your computer and use it in GitHub Desktop.
Save znz/7019582 to your computer and use it in GitHub Desktop.
replace from control chars to control pictures
#!/usr/bin/env ruby
replace_chars = {}
(0...0x20).each do |n|
replace_chars[n.chr] = (0x2400 + n).chr("utf-8")
end
#replace_chars[" "] = "\u{2423}"
while xml = ARGF.gets(">")
print xml.gsub(/[\x00-\x1F&&[^\t\r\n]]/, replace_chars)
end
#!/usr/bin/env ruby
require 'nokogiri'
puts RUBY_DESCRIPTION
puts "Nokogiri::VERSION = #{Nokogiri::VERSION}"
puts "Nokogiri::LIBXML_VERSION = #{Nokogiri::LIBXML_VERSION}"
class MyDocument < Nokogiri::XML::SAX::Document
DEBUG_PRINT = Hash.new($VERBOSE)
#DEBUG_PRINT[:fields_data] = true
def xmldecl(version, encoding, standalone)
p [:xmldecl, version, encoding, standalone] if DEBUG_PRINT[:xmldecl]
end
def start_document
p [:start_document] if DEBUG_PRINT[:start_document]
@text_stack = []
end
def end_document
p [:end_document] if DEBUG_PRINT[:end_document]
end
def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = [])
p [:start_element_namespace, name, attrs, prefix, uri, ns] if DEBUG_PRINT[:start_element_namespace]
@text_stack.push ""
end
def end_element_namespace(name, prefix = nil, uri = nil)
p [:end_element_namespace, name, prefix, uri] if DEBUG_PRINT[:end_element_namespace]
texts = @text_stack.pop
p [:end_element_namespace_texts, name, texts] if DEBUG_PRINT[:end_element_namespace_texts]
end
def characters(string)
p [:characters, string] if DEBUG_PRINT[:characters]
@text_stack.each do |text|
text << string
end
end
def comment(string)
p [:comment, string] if DEBUG_PRINT[:comment]
end
def warning(string)
p [:warning, string] if DEBUG_PRINT[:warning]
end
def error(string)
p [:error, string] if DEBUG_PRINT[:error]
end
def cdata_block(string)
p [:cdata_block, string] if DEBUG_PRINT[:cdata_block]
@text_stack.each do |text|
text << string
end
end
def processing_instruction name, content
p [:processing_instruction, name, content] if DEBUG_PRINT[:processing_instruction]
end
end
parser = Nokogiri::XML::SAX::Parser.new(MyDocument.new)
replace_chars = {}
xml = "<x>"
(0..0x20).each do |n|
replace_chars[n.chr] = [0x2400 + n].pack('U')
xml << n.chr
end
xml << " </x>"
xml = xml.gsub(/[\x00-\x1F&&[^\t\r\n]]/, replace_chars)
p xml
parser.parse(xml)
xml = xml.gsub(/[\x00-\x20]/, replace_chars)
p xml
replace_chars[" "] = "\u{2423}"
xml = xml.gsub(/[\x00-\x20]/, replace_chars)
p xml
% ruby -v replace-control-chars.rb
ruby 2.0.0p247 (2013-06-27 revision 41674) [x86_64-darwin11.4.2]
ruby 2.0.0p247 (2013-06-27 revision 41674) [x86_64-darwin11.4.2]
Nokogiri::VERSION = 1.6.0
Nokogiri::LIBXML_VERSION = 2.8.0
"<x>␀␁␂␃␄␅␆␇␈\t\n␋␌\r␎␏␐␑␒␓␔␕␖␗␘␙␚␛␜␝␞␟ </x>"
[:start_document]
[:start_element_namespace, "x", [], nil, nil, []]
[:characters, "␀␁␂␃␄␅␆␇␈\t\n␋␌\n␎␏␐␑␒␓␔␕␖␗␘␙␚␛␜␝␞␟ "]
[:end_element_namespace, "x", nil, nil]
[:end_element_namespace_texts, "x", "␀␁␂␃␄␅␆␇␈\t\n␋␌\n␎␏␐␑␒␓␔␕␖␗␘␙␚␛␜␝␞␟ "]
[:end_document]
"<x>␀␁␂␃␄␅␆␇␈␉␊␋␌␍␎␏␐␑␒␓␔␕␖␗␘␙␚␛␜␝␞␟␠␠</x>"
"<x>␀␁␂␃␄␅␆␇␈␉␊␋␌␍␎␏␐␑␒␓␔␕␖␗␘␙␚␛␜␝␞␟␠␠</x>"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment