Created
March 17, 2013 00:10
-
-
Save jsonfry/5178921 to your computer and use it in GitHub Desktop.
A little ruby script to convert springpad's (http://springpad.com/) html export file into a series of plain text files, in folders corresponding to their note books! To get your html export, in Springpad go to settings>services and choose create backup, wait a few minutes, and then download the backup. Place this ruby file in your unzipped expor…
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'rexml/document' | |
require 'time' | |
require 'fileutils' | |
OUTPUT_FOLDER = 'notes' | |
WORKING_HTML_FILEPATH = 'index.html.working' | |
def get_foldername(element) | |
begin | |
return get_contents_element_from_span(element, "actual-name, 'workbooks'").get_text().to_s() | |
rescue | |
return "" | |
end | |
end | |
def get_filename(element) | |
return element.elements['h2'].get_text().to_s().gsub('/','_')+".txt" | |
end | |
def get_filepath(element) | |
OUTPUT_FOLDER+"/"+get_foldername(element)+"/"+get_filename(element) | |
end | |
def get_contents_element_from_div(element, filter) | |
return element.elements["div[contains(@"+filter+")]/span[contains(@class,'content')]"] | |
end | |
def get_contents_element_from_span(element, filter) | |
return element.elements['div/span[contains(@'+filter+')]'].parent.elements["span[contains(@class,'content')]"] | |
end | |
def write_single_element_to_file(element, file) | |
element.get_text().to_s().each_line { |string| file.puts string.strip } | |
end | |
def write_tree_to_file(element, file) | |
element.each_element_with_text() { |child| write_single_element_to_file(child,file) } | |
end | |
def write_tags_to_file(element, file) | |
begin | |
write_tree_to_file(get_contents_element_from_div(element, "class,'meta'"),file) | |
rescue | |
end | |
end | |
def write_note_contents_to_file(element, file) | |
begin | |
write_single_element_to_file(get_contents_element_from_span(element, "actual-name,'text'"),file) | |
rescue | |
end | |
end | |
def write_richnote_contents_to_file(element, file) | |
begin | |
write_tree_to_file(get_contents_element_from_span(element, "actual-name,'text'"),file) | |
rescue | |
end | |
end | |
def write_author_to_file(element, file) | |
file.puts 'Author: ' | |
begin | |
write_single_element_to_file(get_contents_element_from_span(element, "actual-name,'author'"),file) | |
rescue | |
end | |
end | |
def write_isbn_to_file(element, file) | |
file.puts 'ISBN: ' | |
begin | |
write_single_element_to_file(get_contents_element_from_span(element, "actual-name,'isbn'"),file) | |
rescue | |
end | |
end | |
def write_book_description_to_file(element, file) | |
begin | |
write_single_element_to_file(get_contents_element_from_span(element, "actual-name,'description'"),file) | |
rescue | |
end | |
end | |
def set_file_times(element, filename) | |
mtime= element.elements["div[contains(@class,'date')]/span[contains(@class,'content')]/abbr"].attribute('title') | |
atime= element.elements["div[contains(@class,'date')]/span[contains(@class,'content')]/abbr[2]"].attribute('title') | |
File.utime(Time.parse(atime.to_s()),Time.parse(mtime.to_s()),filename) | |
end | |
def create_note_folder(element) | |
begin | |
Dir::mkdir(OUTPUT_FOLDER+'/'+get_foldername(element)) | |
rescue | |
end | |
end | |
def write_note_to_file(element) | |
filepath=get_filepath(element) | |
File.open(filepath,'w') do |file| | |
write_tags_to_file(element, file) | |
write_note_contents_to_file(element, file) | |
write_richnote_contents_to_file(element, file) | |
end | |
set_file_times(element, filepath) | |
end | |
def write_book_to_file(element) | |
filepath=get_filepath(element) | |
File.open(filepath,'w') do |file| | |
write_author_to_file(element, file) | |
write_isbn_to_file(element, file) | |
write_book_description_to_file(element, file) | |
end | |
set_file_times(element, filepath) | |
end | |
def create_output_folder() | |
begin | |
Dir::mkdir(OUTPUT_FOLDER) | |
rescue | |
end | |
end | |
def tidy_html_file(filepath) | |
text = File.read(filepath) | |
text = text.gsub!('<br />', '') | |
text = text.gsub!(' & ', ' & ') | |
File.open(filepath, 'w') { |file| file.puts text } | |
end | |
def open_html_file(filepath) | |
begin | |
return REXML::Document.new(File.new(filepath), { :compress_whitespace => :all }) | |
rescue REXML::ParseException => e | |
puts 'Error opening html file, sometimes you might need to edit it manually to fix unclosed tags. See the error for more details:' | |
puts e.message | |
return false | |
else | |
return false | |
end | |
end | |
puts 'making copy of index.html...' | |
FileUtils.cp('index.html',WORKING_HTML_FILEPATH) | |
puts 'tidying springpad html file...' | |
tidy_html_file(WORKING_HTML_FILEPATH) | |
puts 'opening spingpad html file...' | |
springpad_doc = open_html_file(WORKING_HTML_FILEPATH) | |
if springpad_doc != false | |
puts 'creating output folder "notes"...' | |
create_output_folder() | |
puts 'creating folders for notes...' | |
springpad_doc.elements.each("html/body/div/div[contains(@class,'instance')]") { |element| create_note_folder(element)} | |
puts 'exporting notes...' | |
springpad_doc.elements.each("html/body/div/div[contains(@class,'type:Note')]") { |element| write_note_to_file(element)} | |
puts 'exporting books...' | |
springpad_doc.elements.each("html/body/div/div[contains(@class,'type:Book')]") { |element| write_book_to_file(element)} | |
end | |
puts 'removing temporary files...' | |
FileUtils.remove_file(WORKING_HTML_FILEPATH) | |
puts 'done!' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment