Created
November 11, 2009 22:37
-
-
Save trotter/232388 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # usage: ruby convert-html-to-json.rb PATH_TO_FILE | |
| require 'rubygems' | |
| require 'hpricot' | |
| require 'activesupport' | |
| def node_to_s(node, indent_level=0) | |
| return nil unless node.elem? | |
| ret = "" | |
| indentation = " " * indent_level | |
| ret << "\n%s%s(" % [indentation, node.name] | |
| ret << node.attributes.to_json.gsub(',', ', ') unless node.attributes.empty? | |
| if !node.children.empty? && node.children.detect{ |child| child.elem? } | |
| ret << "," unless node.attributes.empty? | |
| ret << node.children.map { |child| node_to_s(child, indent_level + 4) }.compact.join(',') | |
| ret << "\n%s)" % indentation | |
| elsif !node.children.empty? || !node.inner_text.blank? | |
| ret << ", " unless node.attributes.empty? | |
| ret << "'%s'" % [node.inner_text] | |
| ret << ")" | |
| else | |
| ret << ")" | |
| end | |
| return ret | |
| end | |
| file = File.read(ARGV[0]) | |
| doc = Hpricot.parse(file) | |
| print node_to_s(doc.root) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment