Skip to content

Instantly share code, notes, and snippets.

@swdunlop
Created December 29, 2010 06:08
Show Gist options
  • Save swdunlop/758248 to your computer and use it in GitHub Desktop.
Save swdunlop/758248 to your computer and use it in GitHub Desktop.
Decodes a binary blob encoded by Ruby's "marshal" module.
// Copyright (C) 2010, Scott W. Dunlop <swdunlop at gmail.com>
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
// DAMAGE.
//
// In other words: You're happy to use it, but I don't claim it won't eat your
// children. Unless you wanted it to do that. That'd be extra.
package main
import "os"
import "fmt"
import "io"
var MajorNotSupported os.Error
var ExpectedSymbol os.Error
func init() {
MajorNotSupported = os.NewError("major version of format not supported")
}
func main() {
var err os.Error = nil
n := len(os.Args)
if n < 2 {
printUsage(1)
}
v := os.Args[1]
n -= 2
switch {
case v == "ruby" && n == 0:
err = readRubyStdin()
case v == "ruby" && n == 1:
err = readRubyFile(os.Args[2])
default:
printUsage(2)
}
switch {
case err == nil:
os.Exit(0)
case err == os.EOF:
fmt.Fprintln(os.Stderr, "\nerror: input ended before data fully unmarshalled.")
os.Exit(3)
default:
fmt.Fprintln(os.Stderr, "\nerror:", err)
os.Exit(4)
}
}
func printUsage(code int) {
fmt.Println(" decode ruby <src> -- decodes a ruby marshal file")
fmt.Println(" decode ruby -- as above, reading from stdin")
os.Exit(code)
}
func readRubyFile(path string) os.Error {
f, err := os.Open(path, os.O_RDONLY, 0666)
if err != nil {
return err
}
return readRuby(f)
}
func readRubyStdin() os.Error {
return readRuby(os.Stdin)
}
type RubyDecoder struct {
inp io.Reader
sym map[int]string
}
func readRuby(inp io.Reader) os.Error {
var rd RubyDecoder
rd.inp = inp
rd.sym = make( map[int]string )
major, err := readByte(inp)
switch {
case major != 4:
return MajorNotSupported
case err != nil:
return nil
}
minor, err := readByte(inp)
switch {
case minor != 8:
fmt.Fprintln(os.Stderr, "warning: minor version not supported: ", minor)
case err != nil:
return nil
}
for {
err = rd.readRubyValue( )
switch {
case err == os.EOF:
return nil
case err != nil:
return err
}
}
return nil // sigh
}
// Ported from https://github.com/ruby/ruby/raw/trunk/marshal.c, 12/28/2010
func (rd *RubyDecoder) readRubyValue() os.Error {
t, err := readByte(rd.inp)
switch {
case err != nil:
return err
case t == '{':
return rd.readRubyHash()
case t == ':':
return rd.readRubySymreal()
case t == '"':
return rd.readRubyString()
case t == 'o':
return rd.readRubyObject()
case t == 'I':
return rd.readRubyIvar()
case t == 'C':
return rd.readRubyUclass()
case t == ';':
return rd.readRubySymlink()
case t == '0':
fmt.Print("nil")
return nil
case t == 'T':
fmt.Print("true")
return nil
case t == 'F':
fmt.Print("false")
return nil
}
return os.NewError(fmt.Sprint("Object type ", t, " not supported."))
}
func (rd *RubyDecoder) readRubyObject() os.Error {
fmt.Print("<obj ")
defer fmt.Print(">")
err := rd.readRubySymbol()
if err != nil {
return err
}
fmt.Print(" ")
return rd.readRubyItems()
}
func (rd *RubyDecoder) readRubyUclass() os.Error {
fmt.Print("<uclass ")
defer fmt.Print(">")
err := rd.readRubyValue()
if err != nil {
return err
}
fmt.Print(", ")
return rd.readRubyValue()
}
func (rd *RubyDecoder) readRubyIvar() os.Error {
fmt.Print("<ivar ")
defer fmt.Print(">")
err := rd.readRubyValue()
if err != nil {
return err
}
fmt.Print(", ")
return rd.readRubyItems()
}
func (rd *RubyDecoder) readRubyHash() os.Error {
n, err := readRubyLong(rd.inp)
if err != nil {
return err
}
fmt.Print("{")
defer fmt.Print("}")
for n > 0 {
err = rd.readRubyValue()
if err != nil {
return err
}
fmt.Print(": ")
err = rd.readRubyValue()
if err != nil {
return err
}
n -= 1
if n > 0 {
fmt.Print(", ")
}
}
return nil
}
func (rd *RubyDecoder) readRubySymbol() os.Error {
b, err := readByte(rd.inp)
switch {
case err != nil:
return err
case b == ':':
return rd.readRubySymreal()
case b == ';':
return rd.readRubySymreal()
}
return os.NewError(fmt.Sprint("expected real or linked symbol tag, got ", b))
}
func (rd *RubyDecoder) readRubySymlink() os.Error {
n, err := readRubyLong(rd.inp)
if err != nil {
return err
}
s := rd.sym[int(n)]
if s == "" {
return os.NewError( fmt.Sprint(
"expected symbol link less than ", len(rd.sym), ", got ", n ) )
}
fmt.Print( s );
return nil
}
func (rd *RubyDecoder) readRubySymreal() os.Error {
b, err := readRubyBytes(rd.inp)
if err != nil {
return err
}
s := string(b)
rd.sym[len(rd.sym)] = s
fmt.Print(s)
return nil
}
func (rd *RubyDecoder) readRubyString() os.Error {
b, err := readRubyBytes(rd.inp)
if err != nil {
return err
}
fmt.Print("\"")
defer fmt.Print("\"")
fmt.Print(string(b))
return nil
}
func (rd *RubyDecoder) readRubyItems() os.Error {
n, err := readRubyLong(rd.inp)
if err != nil {
return err
}
for n > 0 {
n -= 1
err = rd.readRubySymbol()
if err != nil {
return err
}
fmt.Print(":")
err = rd.readRubyValue()
if err != nil {
return err
}
if n > 0 {
fmt.Print(", ")
}
}
return nil
}
func readRubyLong(inp io.Reader) (int32, os.Error) {
// Heavily simplified from the self-conflicting r_long in marshal.c
// ( Seriously. How much crack did they smoke that day? )
b, err := readByte(inp)
c := int8(b)
var x int32
var i int8
switch {
case err != nil:
return 0, err
case c == 0:
return 0, nil
case c > 4:
return int32(c - 5), nil
case c < -4:
return int32(c + 5), nil
case c > 0:
x = 0
for i = 0; i < c; i++ {
b, err = readByte(inp)
if err != nil {
return 0, err
}
x |= int32(b) << (8 * uint(i))
}
default:
x = -1
for i = 0; i < c; i++ {
b, err = readByte(inp)
if err != nil {
return 0, err
}
x &= ^(0xff << (8 * uint(i)))
x |= int32(b) << (8 * uint(i))
}
}
return x, nil
}
func readRubyBytes(inp io.Reader) ([]byte, os.Error) {
n, err := readRubyLong(inp)
if err != nil {
return nil, err
}
b := make([]byte, n)
r, err := inp.Read(b)
switch {
case err != nil:
return nil, err
case r < int(n):
return nil, os.EOF
}
return b, nil
}
func readByte(inp io.Reader) (byte, os.Error) {
b := make([]byte, 1)
_, err := inp.Read(b)
return b[0], err
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment