Created
June 10, 2012 17:20
-
-
Save PeteMichaud/2906678 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module ParserModule | |
attr :pm_data, :pm_pages | |
def pages | |
if @pm_pages.nil? | |
@pm_pages = @pm_data.split /^\d{4}\s*$/ | |
end | |
@pm_pages | |
end | |
def law_firms | |
@pm_lawfirm_keys = [:lawyer_name, :address1, :address2, :city_state_zip, :phone, :for, :representing, :lawfirm_name] | |
lines = get_appearance_lines.reverse | |
lawfirms = [] | |
while lines.length > 0 | |
lawfirm = {} | |
unknown_lines = [] | |
loop do | |
if end_of_lawfirm lawfirm, lines.peek | |
lawfirm.map { |k,v| v.capitalize! unless v.nil? } | |
lawfirms << lawfirm | |
break | |
end | |
line_type = get_line_type lines.peek | |
(unknown_lines << lines.pop and break) if line_type == :unknown | |
break if line_type == :blank | |
lawfirm[line_type], extra_data = get_line line_type, lines.pop | |
#special cases | |
case line_type | |
# address2 can't be detected by itself, but if it exists, it always comes after address1 | |
when :address1 | |
lawfirm[:address2] = line_is?(lines.peek, :city_state_zip) ? '' : lines.pop | |
# sometimes lawfirm names break onto two lines | |
when :lawfirm_name | |
lawfirm[:lawfirm_name] += (line_is?(lines.peek, :unknown, :lawfirm_name) ? " #{lines.pop}" : '') | |
# for sometimes has the parties in the same line, in extra_data, | |
# and sometimes that data breaks onto a second line | |
when :for | |
extra_data = "#{extra_data} #{lines.pop}" if line_is? lines.peek, :unknown | |
lawfirm[:representing] = extra_data | |
end | |
end # loop do | |
end # while lines | |
lawfirms | |
end | |
def line_is? line, *types | |
types.each do |t| | |
if get_line_type(line) == t | |
true | |
end | |
end | |
false | |
end | |
def end_of_lawfirm lawfirm, next_line | |
#if there's no line next, we're obviously at the end | |
true if next_line.nil? | |
#if all the keys are populated, we're at the end | |
missing_keys = @pm_lawfirm_keys.select { |k| !lawfirm.has_key? k } | |
true if missing_keys.length == 0 | |
#if the only two keys that are missing are phone and repping, then we're done | |
missing_keys.length == 2 && missing_keys.has_key?(:phone) && missing_keys.has_key?(:representing) | |
end | |
def get_line type, line | |
ParserModule.send("get_#{type.to_s}", line) | |
end | |
def get_line_type line | |
case | |
when line =~ /^by:|esq(\.|uire)?$/i | |
:lawyer_name | |
when line =~ /^\d+ \w+/ | |
:address1 | |
when line =~ /^[\w\s]+,? [\w\s]+ \d{5}(-\d{4})?$/ | |
:city_state_zip | |
when line =~ /^\(?([0-9]{3})\)?[-. ]?([0-9]{3})[-. ]?([0-9]{4})$/ | |
:phone | |
when line =~ /(plaintiff|claimant|defendant|respondent)/i | |
:for | |
#when line =~ | |
# :representing | |
when line =~ /(law office|esqs|&)/i | |
:lawfirm_name | |
when line.blank? | |
:blank | |
else | |
:unknown | |
end | |
end | |
def get_appearance_lines | |
appearance_pages = @pm_pages.select { |p| | |
p =~ /A\s?P\s?P\s?E\s?A\s?R\s?A\s?N\s?C\s?E/i | |
} | |
lines = [] | |
appearance_pages.each do |p| | |
lines += p.split("\n") #break into lines | |
.map {|l| l.line_trim! } #trim each lines | |
.select { |l| l !=~ /A\s?P\s?P\s?E\s?A\s?R\s?A\s?N\s?C\s?E/i } #remove any appearance lines | |
.drop_while{|l| l.empty? }.reverse.drop_while{|l| l.empty? }.reverse # remove blank line from the top and bottom, but not the middle | |
end | |
lines | |
end | |
# Get Individual Fields | |
def self.get_lawyer_name line | |
line.gsub(/^by:/i, '').strip_or_self! | |
end | |
def self.get_address1 line | |
line | |
end | |
def self.get_city_state_zip line | |
line | |
end | |
def self.get_phone line | |
line.gsub! /^\(?([0-9]{3})\)?[-. ]?([0-9]{3})[-. ]?([0-9]{4})$/, "(\1) \2-\3" | |
end | |
def self.get_for line | |
if line =~ /(defendant|respondent)/i | |
representing = line.match /(defendant|respondent)s?,? (.+)/ | |
return 'defendant', representing[2] unless representing.nil? | |
'defendant' | |
end | |
'plaintiff' | |
end | |
# End Get Individual Fields | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment