naupaka · April 4, 2022 04:35
diff --git a/Tinderbox-Ruby.rb b/Tinderbox-Ruby.rb
 #!/Users/bcdav/.rbenv/shims/ruby
 # frozen_string_literal: false

 Encoding.default_external = Encoding::UTF_8

 # Bernardo C. D. A. Vasconcelos #
 # 2022-01-06-10-02 #

 # A Ruby class for Tinderbox Documents with methods to parse attributes, links and notes.
 # It deals directly with the XML file and does not rely on the application's OAS interface.
 # At the moment one can only retrive information, but not alter the document in any way.

 # Use example can be found at the bottom.
 # To do: add methods for colors, menu, link types, macros, preferences, windows, searches, filters, gallery, badges

 # A complete description of Tinderbox's XML is available at [aTBref 9.0](https://www.acrobatfaq.com/atbref9/index/SyntaxLibrary/TheXMLTBXformat.html)

 # Begin Class
 class TinderboxDocument
  require 'nokogiri'

  attr_accessor :content

  def initialize(tbxfile)
    if tbxfile.include?('.tbx')
      @content = Nokogiri::XML(File.open(tbxfile))
    else
      puts 'ERROR: Please select a Tinderbox file'
    end
  end

  attr_accessor :tbx_name

  def tbx_name
    @tbx_name = content.xpath('//tinderbox//item')[0].at('.//attribute[@name="Name"]').text
  end

  attr_accessor :tbx_links

  def tbx_links
    return unless content.instance_of?(Nokogiri::XML::Document)

    @tbx_links = []
    links = content.xpath('//link')
    links.each do |node|
      @tbx_links << {
        name: node.attr('name'),
        sourceid: node.attr('sourceid'),
        destid: node.attr('destid'),
        sstart: node.attr('sstart'),
        slen: node.attr('slen')
      }
    end
    @tbx_links
  end

  # I am not sure this is the best way to do it.
  # We are getting all attributes first and offering it up via `doc.tbx_all_attributes`
  # Then removing attributes that are already deprecated and offering it up via `doc.tbx_attributes`
  # It seems slightly innefective to get all of the values and later removing some of them

  # See https://acrobatfaq.com/atbref8/index/Attributes/Attributesgroupedbypurpo/Deprecatedattributes.html

  attr_accessor :tbx_attributes

  def tbx_attributes
    return unless content.instance_of?(Nokogiri::XML::Document)

    @tbx_attributes = tbx_all_attributes
    @tbx_attributes.except(
      :anything, :System, :User, :AutomaticIndent, :Color2, :HideKeyAttributes, :HTMLMarkDown,
      :HTMLOverwriteImages, :KeyAttributeDateFormat, :KeyAttributeFont, :KeyAttributeFontSize,
      :KeyAttributes, :LeafBase, :LeafBend, :LeafDirection, :LeafTip, :MapBackgroundColor2,
      :MapPrototypeColor, :MapTextSize, :mt_allow_comments, :mt_allow_pings, :mt_convert_breaks,
      :mt_keywords, :OutlineTextSize, :RSSChannelTemplate, :RSSItemLimit, :RSSItemTemplate, :ShowTitle,
      :TextAlign, :TextExportTemplate, :TextPaneRatio, :TextPaneWidth, :TextSidebar,
      :TitleBackgroundColor, :TitleFont, :TitleForegroundColor, :WeblogPostID
    )
  end

  attr_accessor :tbx_all_attributes

  def tbx_all_attributes
    return unless content.instance_of?(Nokogiri::XML::Document)

    @tbx_all_attributes = {}
    attrs = content.xpath('//attrib')
    attrs.each do |node|
      @tbx_all_attributes[:"#{node.attr('Name')}"] = {
        parent: node.attr('parent').to_s,
        editable: node.attr('editable').to_s,
        visible: node.attr('visibleInEditor').to_s,
        type: node.attr('type').to_s,
        default: node.attr('default').to_s
      }
    end
    @tbx_all_attributes
  end

  # This is the main method
  # It retrieves all of the notes with all of its attributes in one of three formats.
  # Hash (default), Array or CSV/TSV (useful for opening the data in a spreadsheet and for pasting it back to Tinderbox)

  attr_accessor :tbx_notes

  def tbx_notes(*options)
    the_notes = content.xpath('//tinderbox//item')
    the_attributes = tbx_attributes.keys

    case options[0]
    when :Array
      result = []
      the_notes.each do |the_note|
        result << get_note_attributes(the_attributes, the_note, :Array, options[1])
      end
    when :TSV
      result = the_attributes.to_s.gsub!(/(:|\[|\])/, '')
      result = result.gsub!(/, /, "\"\t\"")
      result = result.gsub!(/(^|$)/, '"')
      the_notes.each do |the_note|
        result += "\n#{get_note_attributes(the_attributes, the_note, :TSV, options[1])}"
      end
    when :CSV
      result = the_attributes.to_s.gsub!(/(:|\[|\])/, '').gsub!(/, /, '\", \"').gsub!(/(^|$)/, '"')
      the_notes.each do |the_note|
        note_attributes = get_note_attributes(the_attributes, the_note, :CSV, options[1])
        result += "\n#{note_attributes}"
      end
    else
      result = {}
      the_notes.each do |the_note|
        note_attributes = get_note_attributes(the_attributes, the_note, options[1])
        result[note_attributes[:ID]] = note_attributes
      end
    end
    @tbx_notes = result
  end

  # The method for retrieving all of the attributes belonging to a specific note
  # ARGS: attributes to be collected, note and options (Hash, Array or TSV/CSV)

  def get_note_attributes(attributes, note, *options)
    return unless note.instance_of?(Nokogiri::XML::Element)

    note_attributes = case options[0]
                      when :Array then []
                      when :TSV then ''
                      when :CSV then ''
                      else {}
                      end

    attributes.each do |attribute|
      attribute_value = case attribute
                        when :ID then note.attr('ID') || ''
                        when :Prototype then note.attr('proto') || ''
                        when :Creator then note.attr('Creator') || ''
                        when :Container then get_note_attribute_container(note)
                        when :Path then "#{get_note_attribute_container(note)}/#{note.at('./attribute[@name="Name"]').text}"
                        when :Name then note.at('./attribute[@name="Name"]').text
                        when :Text
                          the_text = note.at('./text ') ? (note.at('./text ').text).to_s : ''
                          if options[1] == :Links && the_text.to_s.length > 5
                            found_links = tbx_links.select { |link_hash| link_hash[:sourceid] == note.attr('ID') }
                            if found_links.to_s.length > 5
                              found_links.each do |found_link|
                                unless found_link[:slen].to_i < 4
                                  link_start = found_link[:sstart].to_i
                                  link_slen = found_link[:slen].to_i
                                  link_text = the_text.gsub(the_text[0...link_start], '')
                                  link_text = link_text[0...link_slen]
                                  the_text.gsub!(link_text, "[[#{link_text}]]")
                                end
                              end
                            end
                          end
                          attribute_value = the_text
                        else note.at("./attribute[@name=\"#{attribute.to_s.gsub(/:/, '')}\"]") ? note.at("./attribute[@name=\"#{attribute.to_s.gsub(/:/, '')}\"]").text : ''
                        end

      case options[0]
      when :Array
        note_attributes << attribute_value
      when :TSV
        if attribute_value.to_s.length > 5
          attribute_value = attribute_value.gsub(/\n/, '\n')
          attribute_value = attribute_value.gsub(/\t/, '\t')
          attribute_value = attribute_value.gsub(/"/, '\"')
        end
        note_attributes = "#{note_attributes}#{attribute_value}\t"
      when :CSV
        attribute_value.gsub!(/\n/, '\n') unless attribute_value == ''
        attribute_value.gsub!(/\t/, '\t') unless attribute_value == ''
        attribute_value.gsub!(/"/, '\"') unless attribute_value == ''
        note_attributes = "#{note_attributes}\"#{attribute_value}\", "
      else
        note_attributes[attribute] = attribute_value unless attribute_value == ''
      end
    end
    note_attributes
  end

  # A method I came up with to build the value of the $Container and $Path attributes, which are not readily available
  # Not sure once again if this is the best way to do it, but it gets done
  def get_note_attribute_container(note)
    return unless note.instance_of?(Nokogiri::XML::Element)

    container = name = parent = ''
    until name == tbx_name
      the_command = "note.parent#{parent}.at('.//attribute[@name=\"Name\"]').text"
      parent += '.parent'
      name = eval(the_command)
      container = "/#{name}#{container}" unless name == tbx_name
    end
    container
  end

 end
 # End Class


 #### Use case examples ####
 tbx_file = '/Users/bcdav/Dropbox/Github/Tindergit/Docu.tbx'

 ### First we need to create an instance of TinderboxDocument class
 doc = TinderboxDocument.new(tbx_file)

 ## Parse all of the notes with all their attributes
 notes = doc.tbx_notes(:TSV) # To Tab-separated value
 # notes = doc.tbx_notes(:TSV, :Links) # With wiki links added
 # notes = doc.tbx_notes # Hash

 # Create spreadsheet file
 spreadsheet_file = tbx_file.gsub(".tbx", ".tsv")
 %x(`touch "#{spreadsheet_file}"`)

 # Write to file
 File.write(spreadsheet_file, notes) && %x(`open "#{spreadsheet_file}"`)


 # Other methods

 # p doc.tbx_name
 # p doc.tbx_links
 # p doc.tbx_attributes
 # p doc.tbx_all_attributes
	#!/Users/bcdav/.rbenv/shims/ruby
	# frozen_string_literal: false

	Encoding.default_external = Encoding::UTF_8

	# Bernardo C. D. A. Vasconcelos #
	# 2022-01-06-10-02 #

	# A Ruby class for Tinderbox Documents with methods to parse attributes, links and notes.
	# It deals directly with the XML file and does not rely on the application's OAS interface.
	# At the moment one can only retrive information, but not alter the document in any way.

	# Use example can be found at the bottom.
	# To do: add methods for colors, menu, link types, macros, preferences, windows, searches, filters, gallery, badges

	# A complete description of Tinderbox's XML is available at [aTBref 9.0](https://www.acrobatfaq.com/atbref9/index/SyntaxLibrary/TheXMLTBXformat.html)

	# Begin Class
	class TinderboxDocument
	require 'nokogiri'

	attr_accessor :content

	def initialize(tbxfile)
	if tbxfile.include?('.tbx')
	@content = Nokogiri::XML(File.open(tbxfile))
	else
	puts 'ERROR: Please select a Tinderbox file'
	end
	end

	attr_accessor :tbx_name

	def tbx_name
	@tbx_name = content.xpath('//tinderbox//item')[0].at('.//attribute[@name="Name"]').text
	end

	attr_accessor :tbx_links

	def tbx_links
	return unless content.instance_of?(Nokogiri::XML::Document)

	@tbx_links = []
	links = content.xpath('//link')
	links.each do \|node\|
	@tbx_links << {
	name: node.attr('name'),
	sourceid: node.attr('sourceid'),
	destid: node.attr('destid'),
	sstart: node.attr('sstart'),
	slen: node.attr('slen')
	}
	end
	@tbx_links
	end

	# I am not sure this is the best way to do it.
	# We are getting all attributes first and offering it up via `doc.tbx_all_attributes`
	# Then removing attributes that are already deprecated and offering it up via `doc.tbx_attributes`
	# It seems slightly innefective to get all of the values and later removing some of them

	# See https://acrobatfaq.com/atbref8/index/Attributes/Attributesgroupedbypurpo/Deprecatedattributes.html

	attr_accessor :tbx_attributes

	def tbx_attributes
	return unless content.instance_of?(Nokogiri::XML::Document)

	@tbx_attributes = tbx_all_attributes
	@tbx_attributes.except(
	:anything, :System, :User, :AutomaticIndent, :Color2, :HideKeyAttributes, :HTMLMarkDown,
	:HTMLOverwriteImages, :KeyAttributeDateFormat, :KeyAttributeFont, :KeyAttributeFontSize,
	:KeyAttributes, :LeafBase, :LeafBend, :LeafDirection, :LeafTip, :MapBackgroundColor2,
	:MapPrototypeColor, :MapTextSize, :mt_allow_comments, :mt_allow_pings, :mt_convert_breaks,
	:mt_keywords, :OutlineTextSize, :RSSChannelTemplate, :RSSItemLimit, :RSSItemTemplate, :ShowTitle,
	:TextAlign, :TextExportTemplate, :TextPaneRatio, :TextPaneWidth, :TextSidebar,
	:TitleBackgroundColor, :TitleFont, :TitleForegroundColor, :WeblogPostID
	)
	end

	attr_accessor :tbx_all_attributes

	def tbx_all_attributes
	return unless content.instance_of?(Nokogiri::XML::Document)

	@tbx_all_attributes = {}
	attrs = content.xpath('//attrib')
	attrs.each do \|node\|
	@tbx_all_attributes[:"#{node.attr('Name')}"] = {
	parent: node.attr('parent').to_s,
	editable: node.attr('editable').to_s,
	visible: node.attr('visibleInEditor').to_s,
	type: node.attr('type').to_s,
	default: node.attr('default').to_s
	}
	end
	@tbx_all_attributes
	end

	# This is the main method
	# It retrieves all of the notes with all of its attributes in one of three formats.
	# Hash (default), Array or CSV/TSV (useful for opening the data in a spreadsheet and for pasting it back to Tinderbox)

	attr_accessor :tbx_notes

	def tbx_notes(*options)
	the_notes = content.xpath('//tinderbox//item')
	the_attributes = tbx_attributes.keys

	case options[0]
	when :Array
	result = []
	the_notes.each do \|the_note\|
	result << get_note_attributes(the_attributes, the_note, :Array, options[1])
	end
	when :TSV
	result = the_attributes.to_s.gsub!(/(:\|\[\|\])/, '')
	result = result.gsub!(/, /, "\"\t\"")
	result = result.gsub!(/(^\|$)/, '"')
	the_notes.each do \|the_note\|
	result += "\n#{get_note_attributes(the_attributes, the_note, :TSV, options[1])}"
	end
	when :CSV
	result = the_attributes.to_s.gsub!(/(:\|\[\|\])/, '').gsub!(/, /, '\", \"').gsub!(/(^\|$)/, '"')
	the_notes.each do \|the_note\|
	note_attributes = get_note_attributes(the_attributes, the_note, :CSV, options[1])
	result += "\n#{note_attributes}"
	end
	else
	result = {}
	the_notes.each do \|the_note\|
	note_attributes = get_note_attributes(the_attributes, the_note, options[1])
	result[note_attributes[:ID]] = note_attributes
	end
	end
	@tbx_notes = result
	end

	# The method for retrieving all of the attributes belonging to a specific note
	# ARGS: attributes to be collected, note and options (Hash, Array or TSV/CSV)

	def get_note_attributes(attributes, note, *options)
	return unless note.instance_of?(Nokogiri::XML::Element)

	note_attributes = case options[0]
	when :Array then []
	when :TSV then ''
	when :CSV then ''
	else {}
	end

	attributes.each do \|attribute\|
	attribute_value = case attribute
	when :ID then note.attr('ID') \|\| ''
	when :Prototype then note.attr('proto') \|\| ''
	when :Creator then note.attr('Creator') \|\| ''
	when :Container then get_note_attribute_container(note)
	when :Path then "#{get_note_attribute_container(note)}/#{note.at('./attribute[@name="Name"]').text}"
	when :Name then note.at('./attribute[@name="Name"]').text
	when :Text
	the_text = note.at('./text ') ? (note.at('./text ').text).to_s : ''
	if options[1] == :Links && the_text.to_s.length > 5
	found_links = tbx_links.select { \|link_hash\| link_hash[:sourceid] == note.attr('ID') }
	if found_links.to_s.length > 5
	found_links.each do \|found_link\|
	unless found_link[:slen].to_i < 4
	link_start = found_link[:sstart].to_i
	link_slen = found_link[:slen].to_i
	link_text = the_text.gsub(the_text[0...link_start], '')
	link_text = link_text[0...link_slen]
	the_text.gsub!(link_text, "[[#{link_text}]]")
	end
	end
	end
	end
	attribute_value = the_text
	else note.at("./attribute[@name=\"#{attribute.to_s.gsub(/:/, '')}\"]") ? note.at("./attribute[@name=\"#{attribute.to_s.gsub(/:/, '')}\"]").text : ''
	end

	case options[0]
	when :Array
	note_attributes << attribute_value
	when :TSV
	if attribute_value.to_s.length > 5
	attribute_value = attribute_value.gsub(/\n/, '\n')
	attribute_value = attribute_value.gsub(/\t/, '\t')
	attribute_value = attribute_value.gsub(/"/, '\"')
	end
	note_attributes = "#{note_attributes}#{attribute_value}\t"
	when :CSV
	attribute_value.gsub!(/\n/, '\n') unless attribute_value == ''
	attribute_value.gsub!(/\t/, '\t') unless attribute_value == ''
	attribute_value.gsub!(/"/, '\"') unless attribute_value == ''
	note_attributes = "#{note_attributes}\"#{attribute_value}\", "
	else
	note_attributes[attribute] = attribute_value unless attribute_value == ''
	end
	end
	note_attributes
	end

	# A method I came up with to build the value of the $Container and $Path attributes, which are not readily available
	# Not sure once again if this is the best way to do it, but it gets done
	def get_note_attribute_container(note)
	return unless note.instance_of?(Nokogiri::XML::Element)

	container = name = parent = ''
	until name == tbx_name
	the_command = "note.parent#{parent}.at('.//attribute[@name=\"Name\"]').text"
	parent += '.parent'
	name = eval(the_command)
	container = "/#{name}#{container}" unless name == tbx_name
	end
	container
	end

	end
	# End Class


	#### Use case examples ####
	tbx_file = '/Users/bcdav/Dropbox/Github/Tindergit/Docu.tbx'

	### First we need to create an instance of TinderboxDocument class
	doc = TinderboxDocument.new(tbx_file)

	## Parse all of the notes with all their attributes
	notes = doc.tbx_notes(:TSV) # To Tab-separated value
	# notes = doc.tbx_notes(:TSV, :Links) # With wiki links added
	# notes = doc.tbx_notes # Hash

	# Create spreadsheet file
	spreadsheet_file = tbx_file.gsub(".tbx", ".tsv")
	%x(`touch "#{spreadsheet_file}"`)

	# Write to file
	File.write(spreadsheet_file, notes) && %x(`open "#{spreadsheet_file}"`)


	# Other methods

	# p doc.tbx_name
	# p doc.tbx_links
	# p doc.tbx_attributes
	# p doc.tbx_all_attributes