Skip to content

Instantly share code, notes, and snippets.

@troyericg
Last active April 4, 2016 18:56
Show Gist options
  • Save troyericg/9a9354dee3f0a345cd4d81ce29d7c97a to your computer and use it in GitHub Desktop.
Save troyericg/9a9354dee3f0a345cd4d81ce29d7c97a to your computer and use it in GitHub Desktop.
ruby script for downloading public/published google spreadsheets as csvs
require 'rubygems'
require 'fileutils'
require 'nokogiri'
require 'open-uri'
require 'csv'
###############
# GET PUBLIC: # A script for downloading/parsing public google spreadsheets
###############
## The key to your public spreadsheet
DOC_KEY = "<KEY-GOES-HERE/>"
## Add folder name here specify download path
DIR_PATH = "gp_output/" # don't forget trailing slash
## What each sheet of your spreadsheet will be prepended with
SHEET_PREFIX = "gp_sheet-"
## Amount of time your script will wait before trying to parse downloaded file. Adjust depending on your connection speed
SLEEP_AMOUNT = 20
## Temporary file to be created and deleted
SRC_FILENAME = "gp_output_raw_tmp.html"
SRC_FILEPATH = "#{DIR_PATH}#{SRC_FILENAME}"
DOC_PATH = "https://docs.google.com/spreadsheet/pub?key=#{DOC_KEY}&single=true&ndplr=1&output=csv"
def init
puts ":: Starting parsing process ::"
puts ":: URL = #{DOC_PATH}"
puts ":: FILE PATH = #{SRC_FILEPATH}"
puts ":: FILE PREFIX = #{SHEET_PREFIX}"
puts "----------------"
puts
get_doc
end
def get_doc
puts ":: Downloading html ::"
# MAKE FOLDER IF IT DOESN'T ALREADY EXIST
dir_name = DIR_PATH.split("/")[0] unless DIR_PATH[-1, 1] != "/"
Dir.mkdir(dir_name) unless File.exists?(dir_name)
cmd = "wget -O #{SRC_FILEPATH} #{DOC_PATH}"
system cmd
sleep SLEEP_AMOUNT
puts "(Okay, file should be downloaded by now, time to start parsing...)"
puts
parse
end
def parse
puts ":: Parsing downloaded html ::"
doc = Nokogiri::HTML(open(SRC_FILEPATH))
tables = doc.css('table')
sections = []
tables.each_with_index do |table,i|
section = {
'name' => "#{SHEET_PREFIX}#{i}",
'rows' => table.css('tbody tr')
}
sections << section
end
make_csvs(sections)
end
def make_csvs(sections)
sections.each do |sect|
section_fname = sect['name']
section_file = "#{DIR_PATH}#{section_fname}.csv"
unless File.exists?(section_file)
puts "Making #{section_fname}... "
csv = CSV.open(section_file, 'w',{:col_sep => ",", :quote_char => '\'', :force_quotes => true})
sect["rows"].each do |row|
tarray = []
row.css('td').each do |cell|
tarray << cell.text
end
csv << tarray
end
csv.close
end
puts "|_ Generated #{section_fname}.csv!"
end
puts
puts "(deleting source file now...)"
system "rm #{SRC_FILEPATH}"
puts
puts "All done!"
end
# START ME UP
init
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment