Skip to content

Instantly share code, notes, and snippets.

@PeteMichaud
Created June 10, 2012 17:20
Show Gist options
  • Save PeteMichaud/2906678 to your computer and use it in GitHub Desktop.
Save PeteMichaud/2906678 to your computer and use it in GitHub Desktop.
module ParserModule
attr :pm_data, :pm_pages
def pages
if @pm_pages.nil?
@pm_pages = @pm_data.split /^\d{4}\s*$/
end
@pm_pages
end
def law_firms
@pm_lawfirm_keys = [:lawyer_name, :address1, :address2, :city_state_zip, :phone, :for, :representing, :lawfirm_name]
lines = get_appearance_lines.reverse
lawfirms = []
while lines.length > 0
lawfirm = {}
unknown_lines = []
loop do
if end_of_lawfirm lawfirm, lines.peek
lawfirm.map { |k,v| v.capitalize! unless v.nil? }
lawfirms << lawfirm
break
end
line_type = get_line_type lines.peek
(unknown_lines << lines.pop and break) if line_type == :unknown
break if line_type == :blank
lawfirm[line_type], extra_data = get_line line_type, lines.pop
#special cases
case line_type
# address2 can't be detected by itself, but if it exists, it always comes after address1
when :address1
lawfirm[:address2] = line_is?(lines.peek, :city_state_zip) ? '' : lines.pop
# sometimes lawfirm names break onto two lines
when :lawfirm_name
lawfirm[:lawfirm_name] += (line_is?(lines.peek, :unknown, :lawfirm_name) ? " #{lines.pop}" : '')
# for sometimes has the parties in the same line, in extra_data,
# and sometimes that data breaks onto a second line
when :for
extra_data = "#{extra_data} #{lines.pop}" if line_is? lines.peek, :unknown
lawfirm[:representing] = extra_data
end
end # loop do
end # while lines
lawfirms
end
def line_is? line, *types
types.each do |t|
if get_line_type(line) == t
true
end
end
false
end
def end_of_lawfirm lawfirm, next_line
#if there's no line next, we're obviously at the end
true if next_line.nil?
#if all the keys are populated, we're at the end
missing_keys = @pm_lawfirm_keys.select { |k| !lawfirm.has_key? k }
true if missing_keys.length == 0
#if the only two keys that are missing are phone and repping, then we're done
missing_keys.length == 2 && missing_keys.has_key?(:phone) && missing_keys.has_key?(:representing)
end
def get_line type, line
ParserModule.send("get_#{type.to_s}", line)
end
def get_line_type line
case
when line =~ /^by:|esq(\.|uire)?$/i
:lawyer_name
when line =~ /^\d+ \w+/
:address1
when line =~ /^[\w\s]+,? [\w\s]+ \d{5}(-\d{4})?$/
:city_state_zip
when line =~ /^\(?([0-9]{3})\)?[-. ]?([0-9]{3})[-. ]?([0-9]{4})$/
:phone
when line =~ /(plaintiff|claimant|defendant|respondent)/i
:for
#when line =~
# :representing
when line =~ /(law office|esqs|&)/i
:lawfirm_name
when line.blank?
:blank
else
:unknown
end
end
def get_appearance_lines
appearance_pages = @pm_pages.select { |p|
p =~ /A\s?P\s?P\s?E\s?A\s?R\s?A\s?N\s?C\s?E/i
}
lines = []
appearance_pages.each do |p|
lines += p.split("\n") #break into lines
.map {|l| l.line_trim! } #trim each lines
.select { |l| l !=~ /A\s?P\s?P\s?E\s?A\s?R\s?A\s?N\s?C\s?E/i } #remove any appearance lines
.drop_while{|l| l.empty? }.reverse.drop_while{|l| l.empty? }.reverse # remove blank line from the top and bottom, but not the middle
end
lines
end
# Get Individual Fields
def self.get_lawyer_name line
line.gsub(/^by:/i, '').strip_or_self!
end
def self.get_address1 line
line
end
def self.get_city_state_zip line
line
end
def self.get_phone line
line.gsub! /^\(?([0-9]{3})\)?[-. ]?([0-9]{3})[-. ]?([0-9]{4})$/, "(\1) \2-\3"
end
def self.get_for line
if line =~ /(defendant|respondent)/i
representing = line.match /(defendant|respondent)s?,? (.+)/
return 'defendant', representing[2] unless representing.nil?
'defendant'
end
'plaintiff'
end
# End Get Individual Fields
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment