ismasan · August 6, 2021 12:12
diff --git a/parametric_types_image_download_pipeline.rb b/parametric_types_image_download_pipeline.rb
 # frozen_string_literal: true

 require 'faraday'
 require 'parametric/types'
 require 'digest'

 include Parametric

 # A quick helper to benchmark function calls
 def bench(&block)
  start = Time.now
  yield
  elapsed = (Time.now - start).to_f * 1000
  p "took #{elapsed}"
 end

 module Types
  # Define some extra custom types
  URL = Types::String.rule(:matches?, /^https?/)
  Mime = Types::String.rule(:matches?, /\w+\/\w+/)
  IO = Types::Any.rule(:respond_to?, :read)
 end

 # Test them
 # result = Types::URL.call('https://google.com')
 # expect(result.success?).to be(true)
 # expect(result.value).to eq('https://google.com')
 #
 # result = Types::URL.call('nope')
 # expect(result.success?).to be(false)
 # expect(result.error).to eq("failed matches?(\"nope\", /^https?/)")
 # expect(result.value).to eq('nope')

 # A Hash representing an image hosted on an HTTP server
 image_hash = Types::Hash.schema(
  url: Types::URL,
  filename: Types::String
 )

 # A Hash representing an IO object with metadata
 io_hash = Types::Hash.schema(
  status: Types::Integer,
  content_type: Types::Mime,
  io: Types::IO
 )

 # An array of image_hash
 image_array = Types::Array.of(image_hash)

 # Take an image_hash and download it into an io_hash
 download = image_hash.constructor do |r|
  resp = Faraday.get(r.value[:url])
  if (200..299).include? resp.status
    r.success(
      status: resp.status,
      content_type: resp.headers['Content-Type'],
      io: StringIO.new(resp.body)
    )
  else
    r.failure(resp.status)
  end
 end

 # Take an io_hash and turn it into an MD5 digest of the IO's content
 checksum = Types::Any.transform { |h| Digest::MD5.hexdigest(h[:io].read) }

 # Compose a pipeline to download and digest an image
 image_pipeline = download > checksum

 img1 = {
  url: 'https://www.vets4pets.com/siteassets/species/cat/kitten/tiny-kitten-in-sunlight.jpg?w=185&scale=down',
  filename: 'kitten'
 }
 img2 = {
  url: 'https://images.unsplash.com/photo-1529778873920-4da4926a72c2?ixlib=rb-1.2.1&ixid=MnwxMjA3fDB8MHxleHBsb3JlLWZlZWR8OXx8fGVufDB8fHx8&w=1000&q=80',
  filename: 'another'
 }

 puts '### Maybe download'
 # Conditional pipeline
 # if the input is an image hash, download it into an IO.
 # else if it's already an IO, leave it untouched
 # finally, produce a checksum of it.
 maybe_download = ((image_hash > download > io_hash) | io_hash) > checksum

 puts 'With an image hash. Will download and checksum.'
 p maybe_download.call(img1)

 puts 'With an IO hash. Will skip download, checksum directly.'
 p maybe_download.call(status: 200, content_type: 'image/png', io: StringIO.new('data'))

 puts '### dowload pipeline'

 # Download and digest an array of images, concurrently
 bulk_download = Types::Array.of(image_pipeline).concurrent

 # ... Or, download concurrently but checksum the results sequentially (this is faster on my machine)
 bulk_download = Types::Array.of(download).concurrent > Types::Array.of(checksum)

 p bulk_download

 bench do
  p bulk_download.call([img1, img2, img1, img2])
 end
	# frozen_string_literal: true

	require 'faraday'
	require 'parametric/types'
	require 'digest'

	include Parametric

	# A quick helper to benchmark function calls
	def bench(&block)
	start = Time.now
	yield
	elapsed = (Time.now - start).to_f * 1000
	p "took #{elapsed}"
	end

	module Types
	# Define some extra custom types
	URL = Types::String.rule(:matches?, /^https?/)
	Mime = Types::String.rule(:matches?, /\w+\/\w+/)
	IO = Types::Any.rule(:respond_to?, :read)
	end

	# Test them
	# result = Types::URL.call('https://google.com')
	# expect(result.success?).to be(true)
	# expect(result.value).to eq('https://google.com')
	#
	# result = Types::URL.call('nope')
	# expect(result.success?).to be(false)
	# expect(result.error).to eq("failed matches?(\"nope\", /^https?/)")
	# expect(result.value).to eq('nope')

	# A Hash representing an image hosted on an HTTP server
	image_hash = Types::Hash.schema(
	url: Types::URL,
	filename: Types::String
	)

	# A Hash representing an IO object with metadata
	io_hash = Types::Hash.schema(
	status: Types::Integer,
	content_type: Types::Mime,
	io: Types::IO
	)

	# An array of image_hash
	image_array = Types::Array.of(image_hash)

	# Take an image_hash and download it into an io_hash
	download = image_hash.constructor do \|r\|
	resp = Faraday.get(r.value[:url])
	if (200..299).include? resp.status
	r.success(
	status: resp.status,
	content_type: resp.headers['Content-Type'],
	io: StringIO.new(resp.body)
	)
	else
	r.failure(resp.status)
	end
	end

	# Take an io_hash and turn it into an MD5 digest of the IO's content
	checksum = Types::Any.transform { \|h\| Digest::MD5.hexdigest(h[:io].read) }

	# Compose a pipeline to download and digest an image
	image_pipeline = download > checksum

	img1 = {
	url: 'https://www.vets4pets.com/siteassets/species/cat/kitten/tiny-kitten-in-sunlight.jpg?w=185&scale=down',
	filename: 'kitten'
	}
	img2 = {
	url: 'https://images.unsplash.com/photo-1529778873920-4da4926a72c2?ixlib=rb-1.2.1&ixid=MnwxMjA3fDB8MHxleHBsb3JlLWZlZWR8OXx8fGVufDB8fHx8&w=1000&q=80',
	filename: 'another'
	}

	puts '### Maybe download'
	# Conditional pipeline
	# if the input is an image hash, download it into an IO.
	# else if it's already an IO, leave it untouched
	# finally, produce a checksum of it.
	maybe_download = ((image_hash > download > io_hash) \| io_hash) > checksum

	puts 'With an image hash. Will download and checksum.'
	p maybe_download.call(img1)

	puts 'With an IO hash. Will skip download, checksum directly.'
	p maybe_download.call(status: 200, content_type: 'image/png', io: StringIO.new('data'))

	puts '### dowload pipeline'

	# Download and digest an array of images, concurrently
	bulk_download = Types::Array.of(image_pipeline).concurrent

	# ... Or, download concurrently but checksum the results sequentially (this is faster on my machine)
	bulk_download = Types::Array.of(download).concurrent > Types::Array.of(checksum)

	p bulk_download

	bench do
	p bulk_download.call([img1, img2, img1, img2])
	end