Last active
December 21, 2015 02:19
-
-
Save jsanders/6234243 to your computer and use it in GitHub Desktop.
WIP Blob library for working with arbitrary unstructured binary data, and SHA1 implementation demonstrating its use.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Public: Represent and manipulate arbitrary data. | |
class Blob | |
# Public: Create Blob from an array of bytes. | |
# | |
# bytes - The Array of numbers in the range [0, 255] | |
# | |
# Examples | |
# Blob.new([ 97, 98, 99, 100 ]).to_str | |
# # => "abcd" | |
# Blob.from_str([ 97, 98, 99, 100 ]).to_hex | |
# # => "61626364" | |
def initialize(bytes) | |
@bytes = bytes | |
end | |
# Public: Create Blob from string data. | |
# | |
# str - The String to create Blob from. | |
# | |
# Examples | |
# Blob.from_str('abcd').to_str | |
# # => "abcd" | |
# Blob.from_str('hello').to_bytes | |
# # => [ 97, 98, 99, 100 ] | |
# Blob.from_str('hello').to_hex | |
# # => "61626364" | |
# | |
# Returns an instance of Blob. | |
def self.from_str(str) | |
new(str.each_byte.to_a) # This is actually the same as str.unpack('C*'). Not sure which is better. | |
end | |
def self.from_hex(hex) | |
from_str([hex].pack('H*')) | |
end | |
def self.from_base64(base64) | |
from_str(base64.unpack('m*').first) | |
end | |
def self.bytes(howmany, byte) | |
new(Array.new(howmany, byte)) | |
end | |
def self.zeros(howmany) | |
bytes(howmany, 0) | |
end | |
def self.rand_bytes(howmany) | |
new(Array.new(howmany) { rand(255) }) | |
end | |
def to_bytes | |
@bytes | |
end | |
def to_str | |
@bytes.pack('C*') | |
end | |
def to_hex | |
@bytes.map { | byte | '%02x' % byte }.join | |
end | |
def to_pretty_hex(split = 2) | |
each_slice(split).map(&:to_hex).join(' ') | |
end | |
def pretty_print(desc: nil, per_row: 16) | |
each_slice(per_row) do | blob | | |
puts "#{"#{desc}:\t" if desc}#{blob.to_pretty_hex}" | |
end | |
end | |
def length | |
@bytes.length | |
end | |
alias :size :length | |
def length_in_bits | |
length * 8 | |
end | |
alias :size_in_bits :length_in_bits | |
def +(other) | |
self.class.new(@bytes + other.to_bytes) | |
end | |
def ^(other) | |
other_bytes = other.to_bytes | |
self.class.new(@bytes.each_index.map { | i | @bytes[i] ^ other_bytes[i] }) | |
end | |
include Enumerable | |
require 'forwardable' | |
extend Forwardable | |
def_delegators :@bytes, :each, :[]= | |
def each_slice(size) | |
if block_given? | |
@bytes.each_slice(size) { | bytes | yield self.class.new(bytes) } | |
else | |
to_enum(__method__, size) | |
end | |
end | |
def take(length) | |
self.class.new(super(length)) | |
end | |
def [](*args) | |
self.class.new(@bytes.send(:[], *args)) | |
end | |
def ==(other) | |
@bytes == other.to_bytes | |
end | |
end | |
# Public: Represent fixed-width words. | |
class Word | |
BYTE_SIZE = 8 | |
BYTE_MASK = 0xFF | |
BIG_ENDIAN = :big | |
LITTLE_ENDIAN = :little | |
DEFAULT_ENDIAN = BIG_ENDIAN | |
# Public: Create a word `width` bytes wide from given number `int`. | |
# If `int` is larger than the maximum value of `size` bytes, | |
# it is truncated. | |
# | |
# int - The Integer number to represent as a fixed-width word. | |
# width - The Integer width of the word in bytes. Defaults to the | |
# smallest power-of-2 value that can hold `int`. | |
# | |
# Examples | |
# | |
# Word.new(0xbeef, 2).to_hex | |
# # => 'beef' | |
# Word.new(0xbeef, 1).to_hex | |
# # => 'ef' | |
# Word.new(0xdeadbeef).to_hex | |
# # => 'deadbeef' | |
# Word.new(0xdbeef).to_hex | |
# # => '000dbeef' | |
def initialize(int, width = nil) | |
@width = width || min_width(int) | |
@int = int & mask | |
end | |
def self.from_blob(blob, endian: DEFAULT_ENDIAN) | |
bytes = blob.to_bytes | |
bytes = bytes.reverse if endian == BIG_ENDIAN | |
int = bytes.each_with_index.map { | byte, i | byte << (BYTE_SIZE * i) }.reduce(&:|) | |
Word.new(int, blob.length) | |
end | |
def to_int | |
@int | |
end | |
def to_bytes(endian: DEFAULT_ENDIAN) | |
bytes = Array.new(@width) { | i | (@int >> (i * BYTE_SIZE)) & BYTE_MASK } | |
endian == LITTLE_ENDIAN ? bytes : bytes.reverse | |
end | |
def to_blob(endian: DEFAULT_ENDIAN) | |
Blob.new(to_bytes(endian: endian)) | |
end | |
extend Forwardable | |
def_delegators :to_blob, :to_hex, :to_pretty_hex, :pretty_print | |
def mask | |
@width.times.reduce(0) { | mask, i | mask | (BYTE_MASK << (i * BYTE_SIZE)) } | |
end | |
%w[^ & | +].each do | operator | | |
define_method(operator) do | other | | |
int = @int.send(operator, other.to_int) & mask | |
Word.new(int, @width) | |
end | |
end | |
# Don't use newer Ruby's broken ~ | |
def ~ | |
int = (@int ^ mask) & mask | |
Word.new(int, @width) | |
end | |
def leftrotate(amount = 1) | |
int = ((@int << amount) | (@int >> ((@width * BYTE_SIZE) - amount))) & mask | |
Word.new(int, @width) | |
end | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'blob' | |
class SHA1 | |
ENDIAN = :big | |
CHUNK_SIZE = 64 # 64 bytes (512 bits) | |
WORD_SIZE = 4 # 4 bytes (32 bits) | |
INITIAL_STATE = [ | |
0x67452301, | |
0xEFCDAB89, | |
0x98BADCFE, | |
0x10325476, | |
0xC3D2E1F0 | |
].map { | i | Word.new(i, WORD_SIZE) } | |
attr_accessor :message, :state | |
def initialize(message) | |
@message = preprocess(message) | |
@state = INITIAL_STATE | |
end | |
def digest | |
@message.each_slice(CHUNK_SIZE) do | chunk | | |
# Break chunk into 32-bit (4-byte) integer words | |
words = chunk.each_slice(WORD_SIZE).map do| blob | | |
Word.from_blob(blob, endian: ENDIAN) | |
end | |
(16..79).each do | i | | |
words << (words[i-3] ^ words[i-8] ^ words[i-14] ^ words[i-16]).leftrotate(1) | |
end | |
a, b, c, d, e = *@state | |
words.each_with_index do | word, i | | |
f, k = if (0..19).include?(i) | |
[ (b & c) | ((~b) & d), 0x5A827999 ] | |
elsif (20..39).include?(i) | |
[ b ^ c ^ d, 0x6ED9EBA1 ] | |
elsif (40..59).include?(i) | |
[ (b & c) | (b & d) | (c & d), 0x8F1BBCDC ] | |
elsif (60..79).include?(i) | |
[ b ^ c ^ d, 0xCA62C1D6 ] | |
end | |
new_a = (a.leftrotate(5) + f + e + k + word) | |
new_c = b.leftrotate(30) | |
a, b, c, d, e = [ new_a, a, new_c, c, d ] | |
end | |
@state = @state.zip([ a, b, c, d, e ]).map { | l, r | l + r } | |
end | |
@state.map { | word | word.to_blob(endian: ENDIAN) }.reduce(:+) | |
end | |
def hexdigest | |
digest.to_hex | |
end | |
private | |
def preprocess(message) | |
length_word = Word.new(message.length_in_bits, 8) # Message length in bits as 8-byte (64-bit) word | |
length_blob = length_word.to_blob(endian: ENDIAN) | |
pad_needed = CHUNK_SIZE - ((message.length + length_blob.length) % CHUNK_SIZE) | |
pad = Blob.new([ 0x80 ]) + Blob.zeros(pad_needed - 1) # First bit of pad is 1, so first byte is 0x80 | |
message + pad + length_blob | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment