Skip to content

Instantly share code, notes, and snippets.

@zeroeth
Created November 10, 2010 23:15
Show Gist options
  • Save zeroeth/671711 to your computer and use it in GitHub Desktop.
Save zeroeth/671711 to your computer and use it in GitHub Desktop.
require 'rubygems'
require 'nokogiri'
# Place this file in your rails app/lib folder
class TplParse
attr_accessor :registrars
def initialize
self.registrars = []
end
def self.doooooooooooit
parser = self.new
parser.parseparse
parser
end
# ELEMENT SCAN
def index_of(string, elements)
match_index = nil
elements.each_with_index do |element, index|
match_index ||= index if element.text.match(string)
end
match_index
end
def parseparse
Dir[File.join("tplfiles", "*.tpl")].each do |file_name|
doc = Nokogiri::HTML File.open(file_name)
# NORMALIZE NAME
name = doc.at_css(".area_head").text
name = name.gsub("Obtaining a Birth Certificate","")
name = name.gsub("Birth Registry", "")
name = name.gsub("\n","")
name = name.strip.gsub(/-$/,"")
# NORMALIZE DETAILS
ptags = doc.css(".main_text > p")
start_tag = index_of("Registrars of Births", ptags)
end_tag = index_of("Ordering a certificate is", ptags)
raise "#{file_name} is malformed" if start_tag.nil? || end_tag.nil?
details = ptags[start_tag+1..end_tag-1].map{|element| element.text}.join("\n\n")
# STORE IT
self.registrars.push Registrar.new(name, details)
end
end
end
class Registrar
attr_accessor :name, :details
def initialize(name, details)
self.name = name
self.details = details
end
end
=begin
# place this in seed.rb
require 'tpl_parse'
parser = TplParse.doooooooooooit
parser.registrars.each do |registrar|
Registry.find_or_create_by_name :name => registrar.name, :details => registrar.details
end
=end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment