ngmaloney · February 10, 2022 13:42
diff --git a/map_extract.rb b/map_extract.rb
 require 'rmagick'
 require 'rtesseract'

 ## Utility for extracting the map page from a survey_map pdf
 class Utils::MapExtract
  attr_reader :pdf_file, :images

  def initialize(pdf_file)
    @pdf_file = pdf_file
    @images = []
  end

  # Returns an io instance of the page with fewest words
  def process!
    Magick::Image.read(pdf_file).each_with_index do |pdf, idx|
      filename = "#{basename}_#{idx}.png"
      tmp = "/tmp/#{filename}"
      pdf.write(tmp)
      word_count = word_count_on_page(tmp)
      images << { file: tmp, size: word_count }
    end
    io = File.read(map[:file])
    cleanup
    io
  end

  def map
    @map ||= images.sort_by { |i| i[:size] }.first
  end

  def cleanup
    images.each { |img| File.delete img[:file] }
  end

  def basename
    File.basename(pdf_file, '.pdf')
  end

  def word_count_on_page(img)
    RTesseract.new(img).to_s.split(' ').count
  end
 end
	require 'rmagick'
	require 'rtesseract'

	## Utility for extracting the map page from a survey_map pdf
	class Utils::MapExtract
	attr_reader :pdf_file, :images

	def initialize(pdf_file)
	@pdf_file = pdf_file
	@images = []
	end

	# Returns an io instance of the page with fewest words
	def process!
	Magick::Image.read(pdf_file).each_with_index do \|pdf, idx\|
	filename = "#{basename}_#{idx}.png"
	tmp = "/tmp/#{filename}"
	pdf.write(tmp)
	word_count = word_count_on_page(tmp)
	images << { file: tmp, size: word_count }
	end
	io = File.read(map[:file])
	cleanup
	io
	end

	def map
	@map \|\|= images.sort_by { \|i\| i[:size] }.first
	end

	def cleanup
	images.each { \|img\| File.delete img[:file] }
	end

	def basename
	File.basename(pdf_file, '.pdf')
	end

	def word_count_on_page(img)
	RTesseract.new(img).to_s.split(' ').count
	end
	end
No results found