jsonfry · March 17, 2013 00:10
diff --git a/springpad_to_txt.rb b/springpad_to_txt.rb
 require 'rexml/document'
 require 'time'
 require 'fileutils'

 OUTPUT_FOLDER = 'notes'
 WORKING_HTML_FILEPATH = 'index.html.working'

 def get_foldername(element)
 	begin
 		return get_contents_element_from_span(element, "actual-name, 'workbooks'").get_text().to_s()
 	rescue
 		return ""
 	end
 end

 def get_filename(element)
 	return element.elements['h2'].get_text().to_s().gsub('/','_')+".txt"
 end

 def get_filepath(element)
 	OUTPUT_FOLDER+"/"+get_foldername(element)+"/"+get_filename(element)
 end

 def get_contents_element_from_div(element, filter)
 	return element.elements["div[contains(@"+filter+")]/span[contains(@class,'content')]"]
 end

 def get_contents_element_from_span(element, filter)
 	return element.elements['div/span[contains(@'+filter+')]'].parent.elements["span[contains(@class,'content')]"]
 end

 def write_single_element_to_file(element, file)
 	element.get_text().to_s().each_line { |string| file.puts string.strip }
 end

 def write_tree_to_file(element, file)
 	element.each_element_with_text() { |child| write_single_element_to_file(child,file) }
 end

 def write_tags_to_file(element, file)
 	begin
 		write_tree_to_file(get_contents_element_from_div(element, "class,'meta'"),file)
 	rescue
 	end
 end

 def write_note_contents_to_file(element, file)
 	begin
 		write_single_element_to_file(get_contents_element_from_span(element, "actual-name,'text'"),file)
 	rescue
 	end
 end

 def write_richnote_contents_to_file(element, file)
 	begin
 		write_tree_to_file(get_contents_element_from_span(element, "actual-name,'text'"),file)
 	rescue
 	end
 end

 def write_author_to_file(element, file)
 	file.puts 'Author: '
 	begin
 		write_single_element_to_file(get_contents_element_from_span(element, "actual-name,'author'"),file)
 	rescue
 	end
 end

 def write_isbn_to_file(element, file)
 	file.puts 'ISBN: '
 	begin
 		write_single_element_to_file(get_contents_element_from_span(element, "actual-name,'isbn'"),file)
 	rescue
 	end
 end

 def write_book_description_to_file(element, file)
 	begin
 		write_single_element_to_file(get_contents_element_from_span(element, "actual-name,'description'"),file)
 	rescue
 	end
 end

 def set_file_times(element, filename)
 	mtime= element.elements["div[contains(@class,'date')]/span[contains(@class,'content')]/abbr"].attribute('title')
 	atime= element.elements["div[contains(@class,'date')]/span[contains(@class,'content')]/abbr[2]"].attribute('title')
 	File.utime(Time.parse(atime.to_s()),Time.parse(mtime.to_s()),filename)
 end

 def create_note_folder(element)
 	begin
 		Dir::mkdir(OUTPUT_FOLDER+'/'+get_foldername(element))
 	rescue
 	end
 end

 def write_note_to_file(element)
 	filepath=get_filepath(element)
 	File.open(filepath,'w') do |file|
 		write_tags_to_file(element, file)
 		write_note_contents_to_file(element, file)
 		write_richnote_contents_to_file(element, file)
 	end
 	set_file_times(element, filepath)
 end

 def write_book_to_file(element)
 	filepath=get_filepath(element)
 	File.open(filepath,'w') do |file|
 		write_author_to_file(element, file)
 		write_isbn_to_file(element, file)
 		write_book_description_to_file(element, file)
 	end
 	set_file_times(element, filepath)
 end

 def create_output_folder()
 	begin
 		Dir::mkdir(OUTPUT_FOLDER)
 	rescue
 	end
 end

 def tidy_html_file(filepath)
 	text = File.read(filepath)
 	text = text.gsub!('<br />', '')
 	text = text.gsub!(' & ', ' &amp; ')
 	File.open(filepath, 'w') { |file| file.puts text }
 end

 def open_html_file(filepath)
 	begin
 		return REXML::Document.new(File.new(filepath), { :compress_whitespace => :all })
 	rescue REXML::ParseException => e
 		puts 'Error opening html file, sometimes you might need to edit it manually to fix unclosed tags. See the error for more details:'
 		puts e.message
 		return false
 	else
 		return false
 	end
 end

 puts 'making copy of index.html...'
 FileUtils.cp('index.html',WORKING_HTML_FILEPATH)

 puts 'tidying springpad html file...'
 tidy_html_file(WORKING_HTML_FILEPATH)

 puts 'opening spingpad html file...'
 springpad_doc =  open_html_file(WORKING_HTML_FILEPATH)
 if springpad_doc != false
 	puts 'creating output folder "notes"...'
 	create_output_folder()

 	puts 'creating folders for notes...'
 	springpad_doc.elements.each("html/body/div/div[contains(@class,'instance')]") { |element| create_note_folder(element)}

 	puts 'exporting notes...'
 	springpad_doc.elements.each("html/body/div/div[contains(@class,'type:Note')]") { |element| write_note_to_file(element)}

 	puts 'exporting books...'
 	springpad_doc.elements.each("html/body/div/div[contains(@class,'type:Book')]") { |element| write_book_to_file(element)}
 end

 puts 'removing temporary files...'
 FileUtils.remove_file(WORKING_HTML_FILEPATH)
 puts 'done!'
	require 'rexml/document'
	require 'time'
	require 'fileutils'

	OUTPUT_FOLDER = 'notes'
	WORKING_HTML_FILEPATH = 'index.html.working'

	def get_foldername(element)
	begin
	return get_contents_element_from_span(element, "actual-name, 'workbooks'").get_text().to_s()
	rescue
	return ""
	end
	end

	def get_filename(element)
	return element.elements['h2'].get_text().to_s().gsub('/','_')+".txt"
	end

	def get_filepath(element)
	OUTPUT_FOLDER+"/"+get_foldername(element)+"/"+get_filename(element)
	end

	def get_contents_element_from_div(element, filter)
	return element.elements["div[contains(@"+filter+")]/span[contains(@class,'content')]"]
	end

	def get_contents_element_from_span(element, filter)
	return element.elements['div/span[contains(@'+filter+')]'].parent.elements["span[contains(@class,'content')]"]
	end

	def write_single_element_to_file(element, file)
	element.get_text().to_s().each_line { \|string\| file.puts string.strip }
	end

	def write_tree_to_file(element, file)
	element.each_element_with_text() { \|child\| write_single_element_to_file(child,file) }
	end

	def write_tags_to_file(element, file)
	begin
	write_tree_to_file(get_contents_element_from_div(element, "class,'meta'"),file)
	rescue
	end
	end

	def write_note_contents_to_file(element, file)
	begin
	write_single_element_to_file(get_contents_element_from_span(element, "actual-name,'text'"),file)
	rescue
	end
	end

	def write_richnote_contents_to_file(element, file)
	begin
	write_tree_to_file(get_contents_element_from_span(element, "actual-name,'text'"),file)
	rescue
	end
	end

	def write_author_to_file(element, file)
	file.puts 'Author: '
	begin
	write_single_element_to_file(get_contents_element_from_span(element, "actual-name,'author'"),file)
	rescue
	end
	end

	def write_isbn_to_file(element, file)
	file.puts 'ISBN: '
	begin
	write_single_element_to_file(get_contents_element_from_span(element, "actual-name,'isbn'"),file)
	rescue
	end
	end

	def write_book_description_to_file(element, file)
	begin
	write_single_element_to_file(get_contents_element_from_span(element, "actual-name,'description'"),file)
	rescue
	end
	end

	def set_file_times(element, filename)
	mtime= element.elements["div[contains(@class,'date')]/span[contains(@class,'content')]/abbr"].attribute('title')
	atime= element.elements["div[contains(@class,'date')]/span[contains(@class,'content')]/abbr[2]"].attribute('title')
	File.utime(Time.parse(atime.to_s()),Time.parse(mtime.to_s()),filename)
	end

	def create_note_folder(element)
	begin
	Dir::mkdir(OUTPUT_FOLDER+'/'+get_foldername(element))
	rescue
	end
	end

	def write_note_to_file(element)
	filepath=get_filepath(element)
	File.open(filepath,'w') do \|file\|
	write_tags_to_file(element, file)
	write_note_contents_to_file(element, file)
	write_richnote_contents_to_file(element, file)
	end
	set_file_times(element, filepath)
	end

	def write_book_to_file(element)
	filepath=get_filepath(element)
	File.open(filepath,'w') do \|file\|
	write_author_to_file(element, file)
	write_isbn_to_file(element, file)
	write_book_description_to_file(element, file)
	end
	set_file_times(element, filepath)
	end

	def create_output_folder()
	begin
	Dir::mkdir(OUTPUT_FOLDER)
	rescue
	end
	end

	def tidy_html_file(filepath)
	text = File.read(filepath)
	text = text.gsub!('<br />', '')
	text = text.gsub!(' & ', ' & ')
	File.open(filepath, 'w') { \|file\| file.puts text }
	end

	def open_html_file(filepath)
	begin
	return REXML::Document.new(File.new(filepath), { :compress_whitespace => :all })
	rescue REXML::ParseException => e
	puts 'Error opening html file, sometimes you might need to edit it manually to fix unclosed tags. See the error for more details:'
	puts e.message
	return false
	else
	return false
	end
	end

	puts 'making copy of index.html...'
	FileUtils.cp('index.html',WORKING_HTML_FILEPATH)

	puts 'tidying springpad html file...'
	tidy_html_file(WORKING_HTML_FILEPATH)

	puts 'opening spingpad html file...'
	springpad_doc = open_html_file(WORKING_HTML_FILEPATH)
	if springpad_doc != false
	puts 'creating output folder "notes"...'
	create_output_folder()

	puts 'creating folders for notes...'
	springpad_doc.elements.each("html/body/div/div[contains(@class,'instance')]") { \|element\| create_note_folder(element)}

	puts 'exporting notes...'
	springpad_doc.elements.each("html/body/div/div[contains(@class,'type:Note')]") { \|element\| write_note_to_file(element)}

	puts 'exporting books...'
	springpad_doc.elements.each("html/body/div/div[contains(@class,'type:Book')]") { \|element\| write_book_to_file(element)}
	end

	puts 'removing temporary files...'
	FileUtils.remove_file(WORKING_HTML_FILEPATH)
	puts 'done!'