Created
November 8, 2017 10:32
-
-
Save PlanetRoast/79c687e8d856b24ef9685a3849adc7d7 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# FileGrabber | |
# Designed for downloading, unzipping, saving, and tidying of affiliate window product feeds. | |
# ------------------------------------------------------------------------------------------- | |
# Usage: @foo = FileGrabber.new(url: "product-feed-url") | |
# Optional agrs: | |
# folder_name (sets the name of the folder which contains the downloaded files) | |
# folder_path (sets where to store the downloaded files) | |
# trim (how many downloads do you want to keep saved? The rest will be deleted) | |
class FileGrabber | |
def initialize(args={}) | |
load_settings | |
@url = args[:url] # url of the product feed | |
@extension = args[:extension] ||= @default_extension # should be .csv.gz for affiliate window product feeds | |
@folder_name = args[:folder_name] ||= @default_folder_name # name of the local folder in which to store the file | |
@folder_path = args[:folder_path] ||= @default_folder_path # pathway leading to the folder | |
@name = args[:name] ||= @default_name # name of the file not including any time stamps | |
@trim = args[:trim] ||= @default_trim # how many files allowed in the folder before we delete old ones | |
@path = make_path # /app/assets/filegrabs | |
@full_path = make_full_path # /app/assets/filegrabs/grab001.csv | |
@timestamp = make_timestamp # unix timestamp to be appended to file name | |
@filename = make_filename | |
validate_args | |
validate_path | |
if valid? | |
download_file | |
unzip_and_save | |
delete_old_files | |
else | |
puts "FileGrabber didn't start because of a validation error." | |
end | |
end | |
def valid? | |
if has_errors? | |
false | |
else | |
true | |
end | |
end | |
def display_errors | |
# Prints out each error | |
# - See errors if you just want an array of the errors instead. | |
@errors.each do |e| | |
puts "FileGrabber error: #{e}" | |
end | |
end | |
def errors | |
# Returns an array of errors | |
# - See display_errors if you're too lazy to puts them out. | |
@errors | |
end | |
private | |
def make_filename | |
# => 10510987-grabbedfile | |
"#{@timestamp}-#{@name}" | |
end | |
def make_full_path | |
# = /home/al/sites/app/filegrabber/assorted/10510987-grabbedfile.csv | |
"#{@path}#{@filename}" | |
end | |
def delete_old_files | |
# Deletes old files from the path | |
# - Hopefully it doesn't become self aware and delete all of the files ._. | |
delete_count = list_files.count - @trim | |
list_files.first(delete_count).each do |f| | |
if f.include?(@name) # just to prevent accidentally deleting files that are not grabbed files | |
File.delete("#{@path}#{f}") | |
puts "Deleted #{@path}#{f}" | |
end | |
end | |
end | |
def list_files | |
# Looks in the path and returns a list of file names, newest files first | |
files = Dir.glob("#{@path}*") | |
results = [] | |
files.each do |f| | |
results << f.split("/").last | |
end | |
results.sort | |
end | |
def make_timestamp | |
# Unix timestamp for appending to the file name | |
Time.now.to_i | |
end | |
def validate_path | |
# Makes sure there is a folder to save files to | |
# - Not strictly speaking a validation as it won't return a boolean | |
unless File.directory?(@path) | |
create_folder | |
end | |
end | |
def create_folder | |
# Creates a folder for saving files to if one doesn't already exist. | |
require 'fileutils' | |
FileUtils::mkdir_p @path | |
end | |
def download_file | |
# Not sure what will happen if the file is not downloaded correctly? | |
# - maybe something to add in the future. | |
require 'open-uri' | |
@download = open(@url) | |
end | |
def save_file | |
# Not any use for Affiliate window feeds as they are all zipped up | |
# - use unzip_and_save instead. | |
IO.copy_stream(@download, "#{@path}#{@filename}") | |
end | |
def unzip_and_save | |
gz = Zlib::GzipReader.new(@download) | |
csv = gz.read | |
File.write("#{@path}/#{@filename}.csv", csv) | |
gz.close | |
end | |
def load_settings | |
@errors = [] # Empty array for filling up with error messages | |
@default_folder_name = "assorted" | |
@default_folder_path = "filegrabber" | |
@file_name = "worksmate.png" | |
@default_name = "grabbedfile" | |
@default_trim = 3 | |
@default_extension = ".csv.gz" | |
end | |
def validate_args | |
# validate args | |
validate_url | |
end | |
def validate_url | |
# FileGrabber needs at least a URL to kick things off | |
# - foo = FileGrabber.new(url: "www.my-product-feed.com") | |
if @url.blank? | |
@errors << "URL was left blank." | |
end | |
end | |
def has_errors? | |
# Because we can't use the built in rails validations we shall roll our own. | |
@errors.any? | |
end | |
def make_path | |
# make path | |
"#{Rails.root}/#{@folder_path}/#{@folder_name}/" | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment