-
-
Save naupaka/b9622a4527ea3f3508c4bae277e330ab to your computer and use it in GitHub Desktop.
This ruby class is aimed at the XML files of the macOS app Tinderbox with methods to parse attributes, links and notes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/Users/bcdav/.rbenv/shims/ruby | |
# frozen_string_literal: false | |
Encoding.default_external = Encoding::UTF_8 | |
# Bernardo C. D. A. Vasconcelos # | |
# 2022-01-06-10-02 # | |
# A Ruby class for Tinderbox Documents with methods to parse attributes, links and notes. | |
# It deals directly with the XML file and does not rely on the application's OAS interface. | |
# At the moment one can only retrive information, but not alter the document in any way. | |
# Use example can be found at the bottom. | |
# To do: add methods for colors, menu, link types, macros, preferences, windows, searches, filters, gallery, badges | |
# A complete description of Tinderbox's XML is available at [aTBref 9.0](https://www.acrobatfaq.com/atbref9/index/SyntaxLibrary/TheXMLTBXformat.html) | |
# Begin Class | |
class TinderboxDocument | |
require 'nokogiri' | |
attr_accessor :content | |
def initialize(tbxfile) | |
if tbxfile.include?('.tbx') | |
@content = Nokogiri::XML(File.open(tbxfile)) | |
else | |
puts 'ERROR: Please select a Tinderbox file' | |
end | |
end | |
attr_accessor :tbx_name | |
def tbx_name | |
@tbx_name = content.xpath('//tinderbox//item')[0].at('.//attribute[@name="Name"]').text | |
end | |
attr_accessor :tbx_links | |
def tbx_links | |
return unless content.instance_of?(Nokogiri::XML::Document) | |
@tbx_links = [] | |
links = content.xpath('//link') | |
links.each do |node| | |
@tbx_links << { | |
name: node.attr('name'), | |
sourceid: node.attr('sourceid'), | |
destid: node.attr('destid'), | |
sstart: node.attr('sstart'), | |
slen: node.attr('slen') | |
} | |
end | |
@tbx_links | |
end | |
# I am not sure this is the best way to do it. | |
# We are getting all attributes first and offering it up via `doc.tbx_all_attributes` | |
# Then removing attributes that are already deprecated and offering it up via `doc.tbx_attributes` | |
# It seems slightly innefective to get all of the values and later removing some of them | |
# See https://acrobatfaq.com/atbref8/index/Attributes/Attributesgroupedbypurpo/Deprecatedattributes.html | |
attr_accessor :tbx_attributes | |
def tbx_attributes | |
return unless content.instance_of?(Nokogiri::XML::Document) | |
@tbx_attributes = tbx_all_attributes | |
@tbx_attributes.except( | |
:anything, :System, :User, :AutomaticIndent, :Color2, :HideKeyAttributes, :HTMLMarkDown, | |
:HTMLOverwriteImages, :KeyAttributeDateFormat, :KeyAttributeFont, :KeyAttributeFontSize, | |
:KeyAttributes, :LeafBase, :LeafBend, :LeafDirection, :LeafTip, :MapBackgroundColor2, | |
:MapPrototypeColor, :MapTextSize, :mt_allow_comments, :mt_allow_pings, :mt_convert_breaks, | |
:mt_keywords, :OutlineTextSize, :RSSChannelTemplate, :RSSItemLimit, :RSSItemTemplate, :ShowTitle, | |
:TextAlign, :TextExportTemplate, :TextPaneRatio, :TextPaneWidth, :TextSidebar, | |
:TitleBackgroundColor, :TitleFont, :TitleForegroundColor, :WeblogPostID | |
) | |
end | |
attr_accessor :tbx_all_attributes | |
def tbx_all_attributes | |
return unless content.instance_of?(Nokogiri::XML::Document) | |
@tbx_all_attributes = {} | |
attrs = content.xpath('//attrib') | |
attrs.each do |node| | |
@tbx_all_attributes[:"#{node.attr('Name')}"] = { | |
parent: node.attr('parent').to_s, | |
editable: node.attr('editable').to_s, | |
visible: node.attr('visibleInEditor').to_s, | |
type: node.attr('type').to_s, | |
default: node.attr('default').to_s | |
} | |
end | |
@tbx_all_attributes | |
end | |
# This is the main method | |
# It retrieves all of the notes with all of its attributes in one of three formats. | |
# Hash (default), Array or CSV/TSV (useful for opening the data in a spreadsheet and for pasting it back to Tinderbox) | |
attr_accessor :tbx_notes | |
def tbx_notes(*options) | |
the_notes = content.xpath('//tinderbox//item') | |
the_attributes = tbx_attributes.keys | |
case options[0] | |
when :Array | |
result = [] | |
the_notes.each do |the_note| | |
result << get_note_attributes(the_attributes, the_note, :Array, options[1]) | |
end | |
when :TSV | |
result = the_attributes.to_s.gsub!(/(:|\[|\])/, '') | |
result = result.gsub!(/, /, "\"\t\"") | |
result = result.gsub!(/(^|$)/, '"') | |
the_notes.each do |the_note| | |
result += "\n#{get_note_attributes(the_attributes, the_note, :TSV, options[1])}" | |
end | |
when :CSV | |
result = the_attributes.to_s.gsub!(/(:|\[|\])/, '').gsub!(/, /, '\", \"').gsub!(/(^|$)/, '"') | |
the_notes.each do |the_note| | |
note_attributes = get_note_attributes(the_attributes, the_note, :CSV, options[1]) | |
result += "\n#{note_attributes}" | |
end | |
else | |
result = {} | |
the_notes.each do |the_note| | |
note_attributes = get_note_attributes(the_attributes, the_note, options[1]) | |
result[note_attributes[:ID]] = note_attributes | |
end | |
end | |
@tbx_notes = result | |
end | |
# The method for retrieving all of the attributes belonging to a specific note | |
# ARGS: attributes to be collected, note and options (Hash, Array or TSV/CSV) | |
def get_note_attributes(attributes, note, *options) | |
return unless note.instance_of?(Nokogiri::XML::Element) | |
note_attributes = case options[0] | |
when :Array then [] | |
when :TSV then '' | |
when :CSV then '' | |
else {} | |
end | |
attributes.each do |attribute| | |
attribute_value = case attribute | |
when :ID then note.attr('ID') || '' | |
when :Prototype then note.attr('proto') || '' | |
when :Creator then note.attr('Creator') || '' | |
when :Container then get_note_attribute_container(note) | |
when :Path then "#{get_note_attribute_container(note)}/#{note.at('./attribute[@name="Name"]').text}" | |
when :Name then note.at('./attribute[@name="Name"]').text | |
when :Text | |
the_text = note.at('./text ') ? (note.at('./text ').text).to_s : '' | |
if options[1] == :Links && the_text.to_s.length > 5 | |
found_links = tbx_links.select { |link_hash| link_hash[:sourceid] == note.attr('ID') } | |
if found_links.to_s.length > 5 | |
found_links.each do |found_link| | |
unless found_link[:slen].to_i < 4 | |
link_start = found_link[:sstart].to_i | |
link_slen = found_link[:slen].to_i | |
link_text = the_text.gsub(the_text[0...link_start], '') | |
link_text = link_text[0...link_slen] | |
the_text.gsub!(link_text, "[[#{link_text}]]") | |
end | |
end | |
end | |
end | |
attribute_value = the_text | |
else note.at("./attribute[@name=\"#{attribute.to_s.gsub(/:/, '')}\"]") ? note.at("./attribute[@name=\"#{attribute.to_s.gsub(/:/, '')}\"]").text : '' | |
end | |
case options[0] | |
when :Array | |
note_attributes << attribute_value | |
when :TSV | |
if attribute_value.to_s.length > 5 | |
attribute_value = attribute_value.gsub(/\n/, '\n') | |
attribute_value = attribute_value.gsub(/\t/, '\t') | |
attribute_value = attribute_value.gsub(/"/, '\"') | |
end | |
note_attributes = "#{note_attributes}#{attribute_value}\t" | |
when :CSV | |
attribute_value.gsub!(/\n/, '\n') unless attribute_value == '' | |
attribute_value.gsub!(/\t/, '\t') unless attribute_value == '' | |
attribute_value.gsub!(/"/, '\"') unless attribute_value == '' | |
note_attributes = "#{note_attributes}\"#{attribute_value}\", " | |
else | |
note_attributes[attribute] = attribute_value unless attribute_value == '' | |
end | |
end | |
note_attributes | |
end | |
# A method I came up with to build the value of the $Container and $Path attributes, which are not readily available | |
# Not sure once again if this is the best way to do it, but it gets done | |
def get_note_attribute_container(note) | |
return unless note.instance_of?(Nokogiri::XML::Element) | |
container = name = parent = '' | |
until name == tbx_name | |
the_command = "note.parent#{parent}.at('.//attribute[@name=\"Name\"]').text" | |
parent += '.parent' | |
name = eval(the_command) | |
container = "/#{name}#{container}" unless name == tbx_name | |
end | |
container | |
end | |
end | |
# End Class | |
#### Use case examples #### | |
tbx_file = '/Users/bcdav/Dropbox/Github/Tindergit/Docu.tbx' | |
### First we need to create an instance of TinderboxDocument class | |
doc = TinderboxDocument.new(tbx_file) | |
## Parse all of the notes with all their attributes | |
notes = doc.tbx_notes(:TSV) # To Tab-separated value | |
# notes = doc.tbx_notes(:TSV, :Links) # With wiki links added | |
# notes = doc.tbx_notes # Hash | |
# Create spreadsheet file | |
spreadsheet_file = tbx_file.gsub(".tbx", ".tsv") | |
%x(`touch "#{spreadsheet_file}"`) | |
# Write to file | |
File.write(spreadsheet_file, notes) && %x(`open "#{spreadsheet_file}"`) | |
# Other methods | |
# p doc.tbx_name | |
# p doc.tbx_links | |
# p doc.tbx_attributes | |
# p doc.tbx_all_attributes |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment