missingno15 · August 11, 2018 16:35
diff --git a/ocr.rb b/ocr.rb
 require "csv"
 require "base64"
 require "typhoeus" # HTTP toolkit that is backed by CuRL
 require "pry"  # debugger
 require "json" # JSON parser that comes with standard Ruby library
 require "oj" # JSON parser but uses C extensions

 keywords = []

 # {
 #   "Chapter Number" => 1,
 #   "Chapter Title" => "Basic Theory",
 #   "Section Number" -> 1.1,
 #   "Section Title" => "Basic Theory",
 #   "Topic" => "Discrete mathematics",
 #   "Importance Rating" => 3, # scale of 1-3 where 3 is most important
 #   "Keyword count",
 #   "Keyword" => [keyword]
 # }

 headers = [
  "Chapter Number",
  "Chapter Title",
  "Section Title",
  "Section Number",
  "Topic",
  "Importance Rating",
  "Keyword count",
  "Keywords"
 ]

 GOOGLE_VISION = "https://vision.googleapis.com/v1/images:annotate"
 API_KEY       = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"

 # Get all the images that I took of the section keywords
 images = Dir.
  children("images").
  map { |image| File.join(File.expand_path(File.dirname(__FILE__)), "images", image)  }.
  reject { |file| /DS_Store/.match?(file) }

 # Go through each image and run Google's OCR through it
 images.each do |image|
  
  # Prepare a POST body payload to send to Google
  payload = {
    "requests" => [{
      "image" => {
        "content" => Base64.encode64(File.read(image))
      },
      "features" => {
        "type" => "TEXT_DETECTION"
      }
    }]
  }	
  
  # Send request and transform it to a Ruby Hash/Map  
  response = Typhoeus.post(
    "#{GOOGLE_VISION}?fields=responses%2FfullTextAnnotation%2Ftext&key=#{API_KEY}", 
    body: JSON.dump(payload),
    headers: { "Content-Type" => "application/json"}
  ).body.yield_self { |body| Oj.load(body) }

  # Pull out results
  text = response.dig("responses", 0, "fullTextAnnotation", "text")

  if text
    # Clean up output so its more manageable
    words = text.split("\n").map { |keyword| keyword.gsub(/[^A-Za-z()\s]/, "").strip.capitalize }

    binding.pry
  end

  binding.pry
 end


 File.write("keywords.json", JSON.pretty_generate(keywords))
	require "csv"
	require "base64"
	require "typhoeus" # HTTP toolkit that is backed by CuRL
	require "pry" # debugger
	require "json" # JSON parser that comes with standard Ruby library
	require "oj" # JSON parser but uses C extensions

	keywords = []

	# {
	# "Chapter Number" => 1,
	# "Chapter Title" => "Basic Theory",
	# "Section Number" -> 1.1,
	# "Section Title" => "Basic Theory",
	# "Topic" => "Discrete mathematics",
	# "Importance Rating" => 3, # scale of 1-3 where 3 is most important
	# "Keyword count",
	# "Keyword" => [keyword]
	# }

	headers = [
	"Chapter Number",
	"Chapter Title",
	"Section Title",
	"Section Number",
	"Topic",
	"Importance Rating",
	"Keyword count",
	"Keywords"
	]

	GOOGLE_VISION = "https://vision.googleapis.com/v1/images:annotate"
	API_KEY = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"

	# Get all the images that I took of the section keywords
	images = Dir.
	children("images").
	map { \|image\| File.join(File.expand_path(File.dirname(__FILE__)), "images", image) }.
	reject { \|file\| /DS_Store/.match?(file) }

	# Go through each image and run Google's OCR through it
	images.each do \|image\|

	# Prepare a POST body payload to send to Google
	payload = {
	"requests" => [{
	"image" => {
	"content" => Base64.encode64(File.read(image))
	},
	"features" => {
	"type" => "TEXT_DETECTION"
	}
	}]
	}

	# Send request and transform it to a Ruby Hash/Map
	response = Typhoeus.post(
	"#{GOOGLE_VISION}?fields=responses%2FfullTextAnnotation%2Ftext&key=#{API_KEY}",
	body: JSON.dump(payload),
	headers: { "Content-Type" => "application/json"}
	).body.yield_self { \|body\| Oj.load(body) }

	# Pull out results
	text = response.dig("responses", 0, "fullTextAnnotation", "text")

	if text
	# Clean up output so its more manageable
	words = text.split("\n").map { \|keyword\| keyword.gsub(/[^A-Za-z()\s]/, "").strip.capitalize }

	binding.pry
	end

	binding.pry
	end


	File.write("keywords.json", JSON.pretty_generate(keywords))