rietta · September 14, 2020 13:09
diff --git a/pdf_text_extractor.rb b/pdf_text_extractor.rb
 # frozen_string_literal: true

 ##
 # Primary responsibility is extracting text from a PDF or confirming if
 # text is available in the PDF.
 #
 # Security note: This simple wrapper assumes that the PDF filename that you give it has been 
 # chosen by an internal method, such as a tempfile name. Do not pass unsafe user supplied file names
 # into this class.
 #
 # Copyright 2017 Rietta Inc. BSD Licensed.
 #
 class PdfTextExtractor
  attr_accessor :pdf_file

  def initialize(pdf_file:)
    unless command?('pdftotext')
      raise 'pdftotext is not installed, but is required.'
    end

    @pdf_file = pdf_file
  end

  # Determine if a command is available on the current Unix system.
  def command?(command)
    system("which #{command} > /dev/null 2>&1")
  end

  def text
    @text ||= `pdftotext '#{@pdf_file}' -`.strip
  end

  def text?
    text != ''
  end

  def as_json(_opts = {})
    {
      filename: @pdf_file,
      text: text
    }
  end
 end
	# frozen_string_literal: true

	##
	# Primary responsibility is extracting text from a PDF or confirming if
	# text is available in the PDF.
	#
	# Security note: This simple wrapper assumes that the PDF filename that you give it has been
	# chosen by an internal method, such as a tempfile name. Do not pass unsafe user supplied file names
	# into this class.
	#
	# Copyright 2017 Rietta Inc. BSD Licensed.
	#
	class PdfTextExtractor
	attr_accessor :pdf_file

	def initialize(pdf_file:)
	unless command?('pdftotext')
	raise 'pdftotext is not installed, but is required.'
	end

	@pdf_file = pdf_file
	end

	# Determine if a command is available on the current Unix system.
	def command?(command)
	system("which #{command} > /dev/null 2>&1")
	end

	def text
	@text \|\|= `pdftotext '#{@pdf_file}' -`.strip
	end

	def text?
	text != ''
	end

	def as_json(_opts = {})
	{
	filename: @pdf_file,
	text: text
	}
	end
	end
No results found