Created
June 8, 2022 18:42
-
-
Save martijn/07d0029025229054137fa5c6622d09c1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require "compress/zip" | |
abstract class SaxParser | |
ATTR_REGEX = /([[:alnum:]]+)\=\"(.*?)\"/m | |
def initialize(xml : IO) | |
@xml = xml | |
end | |
def parse! | |
loop do | |
if (chars = @xml.gets('<', true)) | |
characters(chars) unless chars.empty? | |
else | |
# EOF Reached | |
break | |
end | |
if (tag = @xml.gets('>', true)) | |
if tag.starts_with?('/') | |
end_tag(tag[1..]) | |
else | |
tag_name, _, attrs = tag.partition(' ') | |
if attrs.empty? | |
start_tag(tag_name, nil) | |
else | |
attr_hash = attrs.scan(ATTR_REGEX).each_with_object({} of String => String) do |match, hash| | |
hash[match[1]] = match[2] | |
end | |
start_tag(tag_name, attr_hash) | |
end | |
if tag.ends_with?('/') | |
end_tag(tag_name) | |
end | |
end | |
end | |
end | |
end | |
abstract def start_tag(tag : String, params : Hash(String, String)?) | |
abstract def characters(chars : String) | |
abstract def end_tag(tag : String) | |
end | |
class MyParser < SaxParser | |
def characters(chars) | |
puts chars | |
end | |
def start_tag(tag, attrs) | |
if attrs | |
attr_string = attrs.map { |k, v| "#{k}=\"#{v}\""}.join(" ") | |
puts "<#{tag} #{attr_string}>" | |
else | |
puts "<#{tag}>" | |
end | |
end | |
def end_tag(tag) | |
puts "</#{tag}>" | |
end | |
end | |
zip = Compress::Zip::File.new("./office365-xl7.xlsx") | |
zip["xl/worksheets/sheet1.xml"].open do |xml| | |
MyParser.new(xml).parse! | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment