Created
November 18, 2010 11:45
-
-
Save kaineer/704889 to your computer and use it in GitHub Desktop.
Имплементация btree
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/ruby | |
class BTree | |
class Node | |
def initialize( tree, offset, start_value ) | |
@tree = tree | |
@offset = offset | |
@values = [ start_value ] | |
@subtree_offsets = [ nil ] * ( @tree.elements_per_node + 1 ) | |
flush | |
end | |
class LoadedNode < BTree::Node | |
def initialize( tree, offset, values, subtree_offsets ) | |
@tree = tree | |
@offset = offset | |
@values = values | |
@subtree_offsets = subtree_offsets | |
end | |
end | |
attr_reader :tree | |
attr_reader :offset | |
attr_reader :values | |
attr_reader :subtree_offsets | |
def flush | |
@tree.file.seek( @offset ) | |
chunk = self.serialize | |
@tree.file.write( self.serialize ) | |
end | |
def padded_values | |
@values + ( [ 0 ] * ( @tree.elements_per_node - @values.size ) ) | |
end | |
def serialize | |
[ @values.size ].pack( "V*" ) + | |
padded_values.pack( @tree.element_pattern + "*" ) + | |
@subtree_offsets.map{|v|v.nil? ? 0 : v }.pack( "V*" ) | |
end | |
def self.load( tree, offset ) | |
tree.file.seek( offset ) | |
values_count = tree.file.read( 4 ).unpack( "V" ).first | |
values = tree.file.read( tree.bytes_per_value * tree.elements_per_node ).unpack( tree.element_pattern + "*" ) | |
values = values[ 0, values_count ] | |
subtree_offsets = tree.file.read( 4 * ( tree.elements_per_node + 1 ) ).unpack( "V*" ).map{|v|v==0 ? nil : v} | |
LoadedNode.new( tree, offset, values, subtree_offsets ) | |
end | |
def where_to_insert( value ) | |
pos = 0 | |
while pos < @values.size && @values[ pos ] < value | |
pos += 1 | |
end | |
pos | |
end | |
def full? | |
@values.size >= @tree.elements_per_node | |
end | |
def subtree_node_for( insertion_point, value ) | |
offset = @subtree_offsets[ insertion_point ] | |
if offset | |
subnode = Node.load( @tree, offset ) | |
return subnode # insert here or deeper | |
else | |
subnode = @tree.new_node( value ) | |
@subtree_offsets[ insertion_point ] = subnode.offset | |
subnode = nil # do not keep reference | |
self.flush | |
return nil # value already stored | |
end | |
end | |
def insertion_node_for( value ) | |
insertion_point = where_to_insert( value ) | |
unless full? | |
return self | |
else | |
subtree_node_for( insertion_point, value ) | |
end | |
end | |
def add_value( value ) | |
insertion_point = where_to_insert( value ) | |
if full? | |
insert_into_subtree( value, insertion_point ) | |
else | |
if insertion_point < @values.size | |
@values.insert( insertion_point, value ) | |
else | |
@values << value | |
end | |
self.flush # store into file | |
end | |
end | |
def iterator | |
Iterator.new( self ) | |
end | |
class Iterator | |
def initialize( node, idx = 0 ) | |
@node = node | |
@idx = idx | |
end | |
attr_reader :idx | |
attr_reader :node | |
def serialize | |
[ @node.offset, @idx ].pack( "V*" ) | |
end | |
CHUNK_SIZE = 8 # node offset and idx | |
def self.load( tree, file ) | |
offset = file.read( 4 ).unpack( "V" ).first | |
idx = file.read( 4 ).unpack( "V" ).first | |
node = Node.load( tree, offset ) | |
Iterator.new( node, idx ) | |
end | |
def next | |
result = get_value_or_node | |
@idx += 1 | |
result | |
end | |
protected | |
def get_value_or_node | |
i = ( @idx >> 1 ) | |
if ( @idx % 2 ) == 1 # getting value | |
if i < @node.values.size | |
return @node.values[ i ] | |
else | |
return nil | |
end | |
else | |
offset = @node.subtree_offsets[ i ] | |
if offset # loading node | |
return Node.load( @node.tree, offset ) | |
else | |
@idx += 1 # getting value after empty subnode | |
return get_value_or_node | |
end | |
end | |
end | |
end | |
end | |
def initialize( file, element_pattern = "Q", bytes_per_value = 8, elements_per_node = 4 ) | |
@file = file | |
@element_pattern = element_pattern | |
@elements_per_node = elements_per_node | |
@bytes_per_value = bytes_per_value | |
@root = nil | |
end | |
def add_value( value ) | |
unless @root | |
@root = Node.new( self, 0, value ) | |
else | |
node = @root | |
insertion_node = nil | |
while true | |
insertion_node = node.insertion_node_for( value ) | |
if insertion_node == node | |
node.add_value( value ) | |
break | |
elsif insertion_node.nil? # This means we just created new node | |
break | |
else | |
node = insertion_node | |
end | |
end | |
end | |
end | |
def new_node( value ) | |
@file.seek( 0, IO::SEEK_END ) | |
pos = @file.pos | |
Node.new( self, @file.pos, value ) | |
end | |
attr_reader :element_pattern | |
attr_reader :elements_per_node | |
attr_reader :bytes_per_value | |
attr_reader :file | |
attr_reader :root | |
class Iterator | |
# | |
# | |
def initialize( tree, stack_file ) | |
@tree = tree | |
@stack_file = stack_file | |
@depth = 0 | |
@node = @tree.root | |
@node_iterator = @node.iterator | |
end | |
def next | |
result = @node_iterator.next | |
while result.is_a?( BTree::Node ) | |
push_iterator( result ) | |
result = @node_iterator.next | |
end | |
while result.nil? && @depth > 0 | |
pop_iterator | |
result = @node_iterator.next | |
end | |
result | |
end | |
def push_iterator( new_node ) | |
chunk = @node_iterator.serialize | |
@stack_file.seek( @depth * BTree::Node::Iterator::CHUNK_SIZE ) | |
@stack_file.write( chunk ) | |
@depth += 1 | |
@node_iterator = new_node.iterator | |
end | |
def pop_iterator | |
@depth -= 1 | |
@stack_file.seek( @depth * BTree::Node::Iterator::CHUNK_SIZE ) | |
@node_iterator = BTree::Node::Iterator.load( @tree, @stack_file ) | |
end | |
end | |
def iterator( stack_file ) | |
BTree::Iterator.new( self, stack_file ) | |
end | |
end | |
if $0 == __FILE__ | |
if ARGV.first | |
# Now we can sort anything.. Well, but files btree.idx and stack.idx ;) | |
btree_file = File.new( "btree.idx", "w+" ) | |
stack_file = File.new( "stack.idx", "w+" ) | |
tree = BTree.new( btree_file ) | |
# Reading file and adding into b-tree | |
input_file = File.open( ARGV.first ) | |
while !input_file.eof? | |
value = input_file.read( 8 ).unpack( "Q" ).first | |
tree.add_value( value ) unless value.nil? | |
end | |
# Let's iterate through sorted numbers | |
iterator = tree.iterator( stack_file ) | |
while ( value = iterator.next ) | |
puts value | |
end | |
else | |
puts "Usage: ruby btree.rb <filetosort>" | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment