papaver · December 15, 2015 20:19
diff --git a/config.yaml b/config.yaml
 interval: 10
diff --git a/Gemfile b/Gemfile
 source 'http://rubygems.org'

 gem 'activerecord'
 gem 'fastercsv'
 gem 'sqlite3'
diff --git a/releaser.rb b/releaser.rb
 #!/usr/bin/env ruby -w

 #
 # SSC Group Programming Challenge: A Content Releaser
 #   by Moiz Merchant 
 #

 #------------------------------------------------------------------------------
 # requires
 #------------------------------------------------------------------------------

 # first task is figuring out the best way to ingest the csv content, looking 
 #  around on google quickly reveals a gem we can take advantage of
 require 'csv'

 # second task is storing the parsed csv data into a database, active record
 #  should help with abstracting away the data layer
 require 'active_record'

 # sqlite should be sufficient for this exercise
 require 'sqlite3'

 # quick and easy configs
 require 'yaml' 

 #------------------------------------------------------------------------------
 # globals
 #------------------------------------------------------------------------------

 # possible setups - the csv file could be uploaded from a webpage strait to the
 #  server managing the content delivery, or it could be an argument for a cmd
 #  line script
 @kCSVDataPath = "#{Dir.home}/Documents/content.csv"

 # holds settings for dynamic script control
 @kConfigPath = "config.yaml"

 #------------------------------------------------------------------------------
 # active_record setup
 #------------------------------------------------------------------------------

 # initiate a connection to the content db, normally a persistance db would be 
 #  used, a temporary in memory db will used for this example, this helps run 
 #  code multiple times without extra code being used to constantly reset the 
 #  database
 ActiveRecord::Base.establish_connection(
  :adapter  => 'sqlite3',
  :database => ':memory:'
 )

 #------------------------------------------------------------------------------
 # scheme
 #------------------------------------------------------------------------------

 puts "Initializing database..."

 # define a table to reflect our csv data
 ActiveRecord::Schema.define do
    create_table :contents do |table|
        table.column :title,       :string
        table.column :site,        :string
        table.column :released_at, :datetime
    end
 end

 #------------------------------------------------------------------------------
 # records
 #------------------------------------------------------------------------------

 # object reflecting entries into the content table
 class Content < ActiveRecord::Base

    #- scopes -----------------------------------------------------------------

    # return content which hasn't been released yet
    scope :active, lambda { 
        where('released_at > :time', :time => DateTime.current) 
    }

    # return content which has already been released
    scope :released, lambda { 
        where('released_at <= :time', :time => DateTime.current) 
    }

    # return content associated with a certain site
    scope :of_site, lambda { |site|
        where('site == :site', :site => site) 
    }

    #- methods ----------------------------------------------------------------

    # 
    # site_count - return the number of unique sites we have content for
    #
    def self.site_count
        count(:select => 'distinct site')
    end

    # 
    # sites - list of sites
    #
    def self.sites
        select('distinct site').map { |c| c.site }
    end

    # 
    # recommend_release_content - find a site to release content for making 
    #  sure the distribution is averaged out over days_history
    #
    def self.recommend_release_content(sites, days_history)
        site_hash = Hash[*sites.map { |s| [s, 0] }.flatten]                  # initialize hash with sites with available releases
        select(:site)                                                        # only load the site column from the db
            .released                                                        # look through the released content
            .order(:released_at).reverse_order                               # order backwards so latest rls is first
            .limit(days_history)                                             # grab days history worth of entires
            .to_a                                                            # run the query and convert into array
            .inject(site_hash) { |h,s| h[s.site] += 1 if h.key? s.site; h }  # count the number of times each site had a release
            .group_by { |k,v| v }                                            # group the sites with similar release numbers together
            .sort                                                            # sort the list to smallest released sites first
            .first.last                                                      # grab the sites with the least amount of content released
            .sample                                                          # pick a random site with the least content released
            .first                                                           # done
    end

    #--------------------------------------------------------------------------

    #
    # to_s - helpful for debuggin
    #
    def to_s
        "#{self.title}, #{self.site}, #{self.released_at}"
    end

 end

 #------------------------------------------------------------------------------
 # methods
 #------------------------------------------------------------------------------

 #
 # parseCSVData - parse the contents of the given csv file, it is assumed that 
 #   the file will contain header info
 #
 def parseCSVData(csv_path)
    CSV.parse(File.read(csv_path), :headers => true) do |row|
        Content.create!(
            :title       => row['title'],
            :site        => row['site'],
            :released_at => Date.strptime(row['released_at'], '%Y-%m-%d:%H:%M:%S')
        )
    end
 end

 #
 # release_content - release unreleasd content
 #
 def release_content

    # figure out how many sites we have so we can look at the history accordingly
    site_count = Content.site_count

    # get a list of all the active sites with content
    sites = Content.active.sites

    # no sites found with active content
    if sites.size == 0 then
        puts "Warning ->> xxx <<-- no releasable content found"
        return false
    end

    # find a fairly distributed site to release content for
    site = Content.recommend_release_content sites, site_count * 2

    # find random content for the site.  we could distribute how the content
    #  was pushed out as well by doing a simliar sort of history check and then
    #  finding the union of the available content for the picked site
    content = Content.active.of_site(site).sample

    # 'rls' the content by printing it out
    if not content.nil?
        content.released_at = DateTime.now
        puts "New Release ->> #{content.site} <<-- " \
                "#{content.title} released at #{content.released_at}"
        content.save
    else
        puts "Warning ->> #{site} <<-- no releasable content found"
    end

    not content.nil?
 end

 #------------------------------------------------------------------------------
 # main
 #------------------------------------------------------------------------------

 # load config file with interval time, can set this in code or provide a config
 #  for the user, really just depends on how often its invisioned the data is 
 #  going to change, we use a config as an exersize, we assume this exists, can
 #  always add make_a_default_config to faciliate auto generating one when none
 #  exists
 config = YAML.load(File.read(@kConfigPath))

 # validate the csv file exists at the path
 if not File.exists? @kCSVDataPath then
    puts "Error: specified CSV path is invalid: #{@kCSVDataPath}"
 end

 # load the cvs data into the database
 puts "Loading csv data..."
 parseCSVData @kCSVDataPath

 # we emulate a quickened release process to show the results of the script.
 #  minutes become seconds.  in a real enviornment a cron job could be used
 #  to run a script which releases content or this script with just sleeps
 #  for the configured intervals
 while release_content
    sleep config['interval']
 end

 #
 # this script will stop running once it runs out of content
 #
 # the order of the releases could also be pre sorted once and writen out and 
 #  only updated when new release content is added.  this would be much more
 #  efficient in terms of processing.
 #
	source 'http://rubygems.org'

	gem 'activerecord'
	gem 'fastercsv'
	gem 'sqlite3'
	#!/usr/bin/env ruby -w

	#
	# SSC Group Programming Challenge: A Content Releaser
	# by Moiz Merchant
	#

	#------------------------------------------------------------------------------
	# requires
	#------------------------------------------------------------------------------

	# first task is figuring out the best way to ingest the csv content, looking
	# around on google quickly reveals a gem we can take advantage of
	require 'csv'

	# second task is storing the parsed csv data into a database, active record
	# should help with abstracting away the data layer
	require 'active_record'

	# sqlite should be sufficient for this exercise
	require 'sqlite3'

	# quick and easy configs
	require 'yaml'

	#------------------------------------------------------------------------------
	# globals
	#------------------------------------------------------------------------------

	# possible setups - the csv file could be uploaded from a webpage strait to the
	# server managing the content delivery, or it could be an argument for a cmd
	# line script
	@kCSVDataPath = "#{Dir.home}/Documents/content.csv"

	# holds settings for dynamic script control
	@kConfigPath = "config.yaml"

	#------------------------------------------------------------------------------
	# active_record setup
	#------------------------------------------------------------------------------

	# initiate a connection to the content db, normally a persistance db would be
	# used, a temporary in memory db will used for this example, this helps run
	# code multiple times without extra code being used to constantly reset the
	# database
	ActiveRecord::Base.establish_connection(
	:adapter => 'sqlite3',
	:database => ':memory:'
	)

	#------------------------------------------------------------------------------
	# scheme
	#------------------------------------------------------------------------------

	puts "Initializing database..."

	# define a table to reflect our csv data
	ActiveRecord::Schema.define do
	create_table :contents do \|table\|
	table.column :title, :string
	table.column :site, :string
	table.column :released_at, :datetime
	end
	end

	#------------------------------------------------------------------------------
	# records
	#------------------------------------------------------------------------------

	# object reflecting entries into the content table
	class Content < ActiveRecord::Base

	#- scopes -----------------------------------------------------------------

	# return content which hasn't been released yet
	scope :active, lambda {
	where('released_at > :time', :time => DateTime.current)
	}

	# return content which has already been released
	scope :released, lambda {
	where('released_at <= :time', :time => DateTime.current)
	}

	# return content associated with a certain site
	scope :of_site, lambda { \|site\|
	where('site == :site', :site => site)
	}

	#- methods ----------------------------------------------------------------

	#
	# site_count - return the number of unique sites we have content for
	#
	def self.site_count
	count(:select => 'distinct site')
	end

	#
	# sites - list of sites
	#
	def self.sites
	select('distinct site').map { \|c\| c.site }
	end

	#
	# recommend_release_content - find a site to release content for making
	# sure the distribution is averaged out over days_history
	#
	def self.recommend_release_content(sites, days_history)
	site_hash = Hash[*sites.map { \|s\| [s, 0] }.flatten] # initialize hash with sites with available releases
	select(:site) # only load the site column from the db
	.released # look through the released content
	.order(:released_at).reverse_order # order backwards so latest rls is first
	.limit(days_history) # grab days history worth of entires
	.to_a # run the query and convert into array
	.inject(site_hash) { \|h,s\| h[s.site] += 1 if h.key? s.site; h } # count the number of times each site had a release
	.group_by { \|k,v\| v } # group the sites with similar release numbers together
	.sort # sort the list to smallest released sites first
	.first.last # grab the sites with the least amount of content released
	.sample # pick a random site with the least content released
	.first # done
	end

	#--------------------------------------------------------------------------

	#
	# to_s - helpful for debuggin
	#
	def to_s
	"#{self.title}, #{self.site}, #{self.released_at}"
	end

	end

	#------------------------------------------------------------------------------
	# methods
	#------------------------------------------------------------------------------

	#
	# parseCSVData - parse the contents of the given csv file, it is assumed that
	# the file will contain header info
	#
	def parseCSVData(csv_path)
	CSV.parse(File.read(csv_path), :headers => true) do \|row\|
	Content.create!(
	:title => row['title'],
	:site => row['site'],
	:released_at => Date.strptime(row['released_at'], '%Y-%m-%d:%H:%M:%S')
	)
	end
	end

	#
	# release_content - release unreleasd content
	#
	def release_content

	# figure out how many sites we have so we can look at the history accordingly
	site_count = Content.site_count

	# get a list of all the active sites with content
	sites = Content.active.sites

	# no sites found with active content
	if sites.size == 0 then
	puts "Warning ->> xxx <<-- no releasable content found"
	return false
	end

	# find a fairly distributed site to release content for
	site = Content.recommend_release_content sites, site_count * 2

	# find random content for the site. we could distribute how the content
	# was pushed out as well by doing a simliar sort of history check and then
	# finding the union of the available content for the picked site
	content = Content.active.of_site(site).sample

	# 'rls' the content by printing it out
	if not content.nil?
	content.released_at = DateTime.now
	puts "New Release ->> #{content.site} <<-- " \
	"#{content.title} released at #{content.released_at}"
	content.save
	else
	puts "Warning ->> #{site} <<-- no releasable content found"
	end

	not content.nil?
	end

	#------------------------------------------------------------------------------
	# main
	#------------------------------------------------------------------------------

	# load config file with interval time, can set this in code or provide a config
	# for the user, really just depends on how often its invisioned the data is
	# going to change, we use a config as an exersize, we assume this exists, can
	# always add make_a_default_config to faciliate auto generating one when none
	# exists
	config = YAML.load(File.read(@kConfigPath))

	# validate the csv file exists at the path
	if not File.exists? @kCSVDataPath then
	puts "Error: specified CSV path is invalid: #{@kCSVDataPath}"
	end

	# load the cvs data into the database
	puts "Loading csv data..."
	parseCSVData @kCSVDataPath

	# we emulate a quickened release process to show the results of the script.
	# minutes become seconds. in a real enviornment a cron job could be used
	# to run a script which releases content or this script with just sleeps
	# for the configured intervals
	while release_content
	sleep config['interval']
	end

	#
	# this script will stop running once it runs out of content
	#
	# the order of the releases could also be pre sorted once and writen out and
	# only updated when new release content is added. this would be much more
	# efficient in terms of processing.
	#