Created
April 5, 2014 18:40
-
-
Save smpallen99/9996137 to your computer and use it in GitHub Desktop.
An approach to handling serialized binary data (i.e. with a C program)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
defmodule CStructure do | |
alias :binary, as: Bin | |
@defmoudule """ | |
Parses a binary received from the line that created by sending a C structure. | |
The structure is defined by a keyword list of the field name, the message type | |
and the size of the field. | |
Integer fields are reversed and string fields have 0 padding removed. | |
The output is a keyword list of the field name and that resulting data | |
Options: | |
* schema - the list of field definitions | |
* endian - the endianness [:little, :big] default: :little | |
Notes: | |
* integer lengths are specified in bits | |
* string lengths are specified in byte count | |
* defaults to integer | |
* defaults to little endian | |
## Examples | |
iex(1)> schema = [one: [string: 5], two: [integer: 8], three: 32] | |
[one: [string: 05], two: [integer: 08], three: 20] | |
iex(2)> binary = <<0,"ab",0,0>> <> <<99, 1,2,3,4>> | |
<<0, 97, 98, 0, 0, 99, 1, 2, 3, 4>> | |
iex(3)> CStructure.build(binary, schema) | |
[one: "ab", two: 63, three: 0201] | |
iex(4)> | |
Another example is how to wrap it in a record for easier access | |
iex(1)> defrecord MyDataStr, one: "", two: 0, three: 0 do | |
...(1)> use CStructure, schema: [one: [string: 5], two: 8, three: 32] | |
...(1)> end | |
{:module, MyDataStr, | |
<<70, 79, 82, 49, 0, 0, 21, 40, 66, 69, 65, 77, 65, 116, 111, 109, 0, 0, 1, 33, 0, 0, 0, 33, 16, 69, 108, 105, 120, 105, 114, 46, 77, 121, 68, 97, 116, 97, 83, 116, 114, 8, 95, 95, 105, 110, 102, 111, 95, 95, ...>>, | |
{:load, 1}} | |
iex(2)> data = MyDataStr.load(<<0,"ab",0,0>> <> <<99, 1,2,3,4>>) | |
MyDataStr[one: "ab", two: 99, three: 67305985] | |
iex(3)> data.one | |
"ab" | |
iex(4)> data.two | |
99 | |
iex(5)> data.three | |
67305985 | |
iex(6)> | |
iex(1)> defrecord MyDataStr, one: "", two: 0, three: 0 do | |
...(1)> use CStructure, endian: :big, schema: [one: [string: 5], two: 8, three: 32] | |
...(1)> end | |
""" | |
defmacro __using__(opts) do | |
schemaa = Keyword.get(opts, :schema) | |
unless is_list(schemaa), do: throw({:error, "schema must be a list"}) | |
endian = Keyword.get(opts, :endian, :little) | |
unless endian in [:big, :little] do | |
throw {:error, "unsupported endian: #{endian}"} | |
end | |
quote do | |
import CStructure | |
if unquote(schemaa) do | |
def schema(r), do: unquote(schemaa) | |
def schema(), do: unquote(schemaa) | |
def load(binary) do | |
CStructure.build(binary, unquote(schemaa), unquote(endian)) |> __MODULE__.new | |
end | |
def serialize(r) do | |
CStructure.serialize(r.to_keywords, unquote(schemaa), unquote(endian)) | |
end | |
def size(r), do: size | |
def size() do | |
CStructure.get_size(unquote(schemaa)) | |
end | |
end | |
end | |
end | |
@doc """ | |
Parse the binary message given the provides schema keyword list | |
Return a keyword list of the parsed data | |
""" | |
def build(binary, schema, endian \\ :little) when is_binary(binary) do | |
_build([], binary, schema, endian) | |
end | |
@doc """ | |
Serialize a data structure defined by the schema macro | |
Returns the binary data | |
""" | |
def serialize(list, schema, endian \\ :little) when is_list(list) do | |
_serialize(<<>>, list, schema, endian) | |
end | |
def get_size(schema) do | |
Enum.reduce(schema, 0, fn({_, val}, acc) -> acc + _get_size(val) end) | |
end | |
############### | |
# Private helpers | |
defp _get_size(val) when is_integer(val), do: val | |
defp _get_size([{:string, len}]), do: (len * 8) | |
defp _get_size([{:integer, len}]), do: len | |
defp _get_size([{:record, module}]), do: module.size | |
defp _get_size([{:list, {size, count}}]) when is_integer(size), do: size * count | |
defp _get_size([{:list, {type, count}}]) do | |
_get_size(type) * count | |
end | |
defp _get_size(_, _), do: throw({:error, "invalid descriptor"}) | |
# Recursive builders. Converts the provided binary data into | |
# a keyword list of the record fields by applying the provided | |
# schema to the binary data | |
############### | |
# Loading | |
defp _build(result, _, [], _endian), do: result | |
defp _build(result, <<>>, _, _endian), do: result | |
defp _build(result, binary, [{name, [{type, size}]} | tail], endian) do | |
_build(type, result, binary, [{name, size}] ++ tail, endian) | |
end | |
defp _build(result, binary, [{name, size} | tail], endian) do | |
_build(:integer, result, binary, [{name, size}] ++ tail, endian) | |
end | |
defp _build(type, result, binary, [{name, sz} | tail], endian) do | |
{stripped, rest} = construct type, binary, sz, endian | |
_build(result ++ [{name, stripped}], rest, tail, endian) | |
end | |
# Decodes a schema tuple's binary data. | |
# returns a tuple {data, remaining_binary} | |
defp construct(:list, binary, {[{type, sz}], count}, endian) do | |
1..count |> Enum.map_reduce(binary, fn(_, acc) -> | |
construct(type, acc, sz, endian) | |
end) | |
end | |
defp construct(:list, binary, {sz, count}, endian) do | |
construct(:list, binary, {[{:integer, sz}], count}, endian) | |
end | |
defp construct(:integer, binary, sz, endian) do | |
sz = div(sz,8) | |
num = :binary.part(binary, 0, sz) |> :binary.decode_unsigned(endian) | |
rest = Bin.part(binary, sz, size(binary) - sz) | |
{num, rest} | |
end | |
defp construct(:string, binary, sz, _endian) do | |
sz = sz * 8 | |
<<first::[bitstring, size(sz)], rest::bitstring>> = binary | |
stripped = String.split(first, <<0>>) |> | |
Enum.reduce("", &(if &1 != "", do: &2 <> &1, else: &2)) | |
{stripped, rest} | |
end | |
defp construct(:record, binary, module_name, _indian) do | |
size = byte_size(binary) | |
module_size = div(module_name.size, 8) | |
if size >= module_size do | |
module = module_name.load(:binary.part(binary, 0, module_size)) | |
rest = :binary.part(binary, module_size, size - module_size) | |
else | |
module = nil | |
rest = "" | |
end | |
{module, rest} | |
end | |
################### | |
# Serialization | |
defp _serialize(result, _, [], _endian), do: result | |
defp _serialize(result, list, [{name, [{type, size}]} | tail], endian) do | |
_serialize(type, result, list, [{name, size}] ++ tail, endian) | |
end | |
defp _serialize(result, list, [{name, sz} | tail], endian) do | |
_serialize(:integer, result, list, [{name, sz}] ++ tail, endian) | |
end | |
defp _serialize(type, result, list, [{name, sz} | tail], endian) do | |
{binary, rest} = deconstruct type, list, name, sz, endian | |
_serialize(result <> binary, rest, tail, endian) | |
end | |
defp deconstruct(:integer, list, name, sz, endian) do | |
num = Keyword.get list, name | |
rest = Keyword.delete list, name | |
unpadded = Bin.encode_unsigned num, endian | |
padding_cnt = sz - bit_size(unpadded) | |
padding = <<0::[integer, size(padding_cnt)]>> | |
binary = if endian == :little do | |
unpadded <> padding | |
else | |
padding <> unpadded | |
end | |
{binary, Keyword.delete(list, name)} | |
end | |
defp deconstruct(:string, list, name, sz, _endian) do | |
sz = sz * 8 | |
value = Keyword.get(list, name) | |
val_sz = bit_size value | |
binary = if val_sz < sz do | |
new_sz = sz - val_sz | |
value <> <<0::size(new_sz)>> | |
else | |
<<new_val::[bitstring, size(sz)], _::bitstring>> = value | |
new_val | |
end | |
{binary, Keyword.delete(list, name)} | |
end | |
defp deconstruct(:record, list, name, _module_name, endian) do | |
record = Keyword.get(list, name) | |
{record.serialize, Keyword.delete(list, name)} | |
end | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Code.require_file "../../../test_helper.exs", __ENV__.file | |
defmodule MdseTest.CStructure do | |
use Amrita.Sweet | |
describe "build" do | |
it "extracts the int and short data" do | |
binary = [3,0,0,0,1,2] |> list_to_bitstring |> elem(0) | |
assert CStructure.build(binary, [int_val: 32, word_val: 16]) == [int_val: 3, word_val: 0x201] | |
end | |
end | |
describe "serialize" do | |
it "handles integer data" do | |
binary = <<2,3,0,0, 99, 1,2,3,4>> | |
schema = [one: 32, two: 8, three: 32] | |
list = CStructure.build(binary, schema) | |
assert CStructure.serialize(list, schema) == binary | |
end | |
it "handles integer data as option" do | |
binary = <<2,3,0,0, 99, 1,2,3,4>> | |
schema = [one: [integer: 32], two: [integer: 8], three: 32] | |
list = CStructure.build(binary, schema) | |
assert CStructure.serialize(list, schema) == binary | |
end | |
it "handles string data" do | |
binary = <<2,3>> <> "test" | |
schema = [one: 16, two: [string: 4]] | |
list = CStructure.build(binary, schema) | |
assert CStructure.serialize(list, schema) == binary | |
end | |
it "handles short string data" do | |
binary = <<2,3>> <> "test" <> <<0,0>> | |
schema = [one: 16, two: [string: 6]] | |
list = CStructure.build(binary, schema) | |
assert CStructure.serialize(list, schema) == binary | |
end | |
it "handles long string data" do | |
binary = <<2,3>> <> "test" | |
schema = [one: 16, two: [string: 4]] | |
list = [one: 0x302, two: "testing"] | |
assert CStructure.serialize(list, schema) == binary | |
end | |
end | |
defrecord MyData, one: 0, two: 0, three: 0 do | |
use CStructure, schema: [one: 32, two: 8, three: 32] | |
end | |
defrecord MyDataInt, one: '', two: 0, three: 0 do | |
use CStructure, schema: [one: [integer: 32], two: 8, three: 32] | |
end | |
defrecord MyDataStr, one: "", two: 0, three: 0 do | |
use CStructure, schema: [one: [string: 5], two: 8, three: 32] | |
end | |
defrecord MyD, one: 0, two: 0 do | |
use CStructure, schema: [one: 16, two: 32] | |
end | |
defrecord MyDataStrBig, one: "", two: 0, three: 0 do | |
use CStructure, schema: [one: [string: 5], two: 8, three: 32], endian: :big | |
end | |
defrecord MyDataStrLittle, one: "", two: 0, three: 0 do | |
use CStructure, schema: [one: [string: 5], two: 8, three: 32], endian: :little | |
end | |
it "handles new schema" do | |
data = MyD.new | |
assert data.schema == [one: 16, two: 32] | |
end | |
it "loads data" do | |
binary = <<1,2,3,4,5,6>> | |
data = MyD.load binary | |
assert data.one == 0x201 | |
assert data.two == 0x6050403 | |
end | |
it "handles loading long form" do | |
data = MyData.load(<<2,3,0,0, 99, 1,2,3,4>>) | |
assert data.one == 0x0302 | |
assert data.two == 99 | |
assert data.three == 0x04030201 | |
end | |
it "handles loading long form for integer field" do | |
data = MyDataInt.load(<<2,3,0,0, 99, 1,2,3,4>>) | |
assert data.one == 0x0302 | |
assert data.two == 99 | |
assert data.three == 0x04030201 | |
end | |
it "handles loading long form for string field" do | |
data = MyDataStr.load("steve" <> <<99, 1,2,3,4>>) | |
assert data.one == "steve" | |
assert data.two == 99 | |
assert data.three == 0x04030201 | |
data2 = MyDataStr.load(<<0,"ab",0,0>> <> <<99, 1,2,3,4>>) | |
assert data2.one == "ab" | |
assert data2.two == 99 | |
assert data2.three == 0x04030201 | |
end | |
it "handles loading long form for string field little" do | |
data = MyDataStrLittle.load("steve" <> <<99, 1,2,3,4>>) | |
assert data.one == "steve" | |
assert data.two == 99 | |
assert data.three == 0x04030201 | |
data2 = MyDataStr.load(<<0,"ab",0,0>> <> <<99, 1,2,3,4>>) | |
assert data2.one == "ab" | |
assert data2.two == 99 | |
assert data2.three == 0x04030201 | |
end | |
it "handles loading long form for string field big" do | |
data = MyDataStrBig.load("steve" <> <<99, 1,2,3,4>>) | |
assert data.one == "steve" | |
assert data.two == 99 | |
assert data.three == 0x1020304 | |
data2 = MyDataStrBig.load(<<0,"ab",0,0>> <> <<99, 1,2,3,4>>) | |
assert data2.one == "ab" | |
assert data2.two == 99 | |
assert data2.three == 0x1020304 | |
end | |
it "serializes string data" do | |
binary = "steve" <> <<99, 1,2,3,4>> | |
data = MyDataStr.load(binary) | |
assert data.serialize == binary | |
end | |
defrecord LongTest, one: 0, two: 0, three: 99 do | |
use CStructure, schema: [one: 32, two: 16, three: 8], endian: :big | |
end | |
it "loads shorter binary data" do | |
msg = <<0x1020304::32, 0x506::16>> | |
data = LongTest.load msg | |
assert data.one == 0x1020304 | |
assert data.two == 0x506 | |
assert data.three == 99 | |
end | |
it "loads longer binary data" do | |
msg = <<0x1020304::32, 0x506::16, 0x99ff::16>> | |
data = LongTest.load msg | |
assert data.one == 0x1020304 | |
assert data.two == 0x506 | |
assert data.three == 0x99 | |
end | |
defrecord LongStrTest, one: 0, two: "", three: 99 do | |
use CStructure, schema: [one: 32, two: [string: 5], three: 8], endian: :big | |
end | |
it "loads shorter string data" do | |
msg = <<1::32, "abcde">> | |
data = LongStrTest.load msg | |
assert data.one == 1 | |
assert data.two == "abcde" | |
assert data.three == 99 | |
end | |
it "loads longer string data" do | |
msg = <<1::32, "abcde", 0x88ff::16>> | |
data = LongStrTest.load msg | |
assert data.one == 1 | |
assert data.two == "abcde" | |
assert data.three == 0x88 | |
end | |
defrecord MyMixed, one: "", two: 0, three: 0 do | |
use CStructure, schema: [one: [string: 5], two: 8, three: [integer: 32]] | |
end | |
defrecord SubData, one: 0, two: 0 do | |
use CStructure, endian: :big, schema: [ one: 16, two: 16 ] | |
end | |
defrecord Nested, one: 0, nested: nil, three: 0 do | |
use CStructure, endian: :big, schema: [ | |
one: 32, | |
nested: [record: SubData], | |
three: 16 | |
] | |
end | |
describe "size" do | |
it "returns size of integer structure" do | |
assert MyData.size == 72 | |
end | |
it "returns size of mixed string integer structure" do | |
assert MyMixed.size == 80 | |
end | |
it "returns size of nested record" do | |
assert Nested.size == 80 | |
end | |
end | |
it "loads nested records" do | |
msg = <<0xffffffff::32, 1::16, 2::16, 0x1010::16>> | |
data = Nested.load msg | |
data.one |> 0xffffffff | |
data.three |> 0x1010 | |
data.nested.one |> 1 | |
data.nested.two |> 2 | |
end | |
it "serilizes nested records" do | |
msg = <<0xffffffff::32, 1::16, 2::16, 0x1010::16>> | |
data = Nested.load msg | |
assert data.serialize == msg | |
end | |
defrecord MyListInt, one: 32, two: [] do | |
use CStructure, schema: [one: 32, two: [list: {16, 2}]], endian: :big | |
end | |
it "loads list" do | |
msg = <<0x1234::32, 1::16, 2::16>> | |
data = MyListInt.load msg | |
assert data == MyListInt.new([{:one, 0x1234}, {:two, [0x0001, 0x0002]}]) | |
IO.inspect data | |
assert data.one == 0x1234 | |
assert Enum.at(data.two, 0) == 1 | |
assert Enum.at(data.two, 1) == 2 # review this test case | |
end | |
defrecord MyListStr, one: 32, two: [] do | |
use CStructure, schema: [one: 32, two: [list: {[string: 2], 3}]], endian: :big | |
end | |
it "gets size of string array" do | |
assert MyListStr.size == 80 | |
end | |
it "load list of strings" do | |
msg = <<0x4321::32, "abcdef">> | |
data = MyListStr.load msg | |
assert data.one == 0x4321 | |
assert Enum.at(data.two, 0) == "ab" | |
assert Enum.at(data.two, 1) == "cd" | |
assert Enum.at(data.two, 2) == "ef" | |
end | |
defrecord SubData2, f1: 0, f2: 0 do | |
use CStructure, endian: :big, schema: [ f1: 16, f2: 16 ] | |
end | |
defrecord MyListRecords, one: 0, two: [], three: 0 do | |
use CStructure, schema: [one: 32, two: [list: {[record: SubData2], 3}], three: 8], | |
endian: :big | |
end | |
it "gets size of record array" do | |
assert MyListRecords.size == 136 | |
end | |
it "loads a list of records" do | |
msg = <<0x9999::32, 0x100::16, 0x101::16, | |
0x102::16, 0x103::16, | |
0xf104::16, 0x9823::16, | |
0xfe::8>> | |
data = MyListRecords.load msg | |
assert data.one == 0x9999 | |
assert Enum.at(data.two, 0).f1 == 0x100 | |
assert Enum.at(data.two, 0).f2 == 0x101 | |
assert Enum.at(data.two, 1).f1 == 0x102 | |
assert Enum.at(data.two, 1).f2 == 0x103 | |
assert Enum.at(data.two, 2).f1 == 0xf104 | |
assert Enum.at(data.two, 2).f2 == 0x9823 | |
assert data.three == 0xfe | |
end | |
it "loads a subset of the array" do | |
msg = <<0x9999::32, 0x100::16, 0x101::16, | |
0x102::16, 0x103::16>> | |
data = MyListRecords.load msg | |
Enum.count(data.two) |> 3 | |
List.last(data.two) |> nil | |
data.one |> 0x9999 | |
Enum.at(data.two, 0).f1 |> 0x100 | |
Enum.at(data.two, 0).f2 |> 0x101 | |
Enum.at(data.two, 1).f1 |> 0x102 | |
Enum.at(data.two, 1).f2 |> 0x103 | |
end | |
defrecord SubData3, f1: 0, f2: 0 do | |
use CStructure, endian: :big, schema: [ f1: 16, f2: 8 ] | |
end | |
defrecord MyListRecords2, one: [], two: [], three: 0 do | |
use CStructure, schema: [one: [list: {[integer: 8], 4}], | |
two: [list: {[record: SubData3], 3}], | |
three: [list: {[string: 4], 2}]], | |
endian: :big | |
end | |
it "gets the size of record and string lists" do | |
MyListRecords2.size |> 168 | |
end | |
it "load a list of records and strings" do | |
msg = <<0xfefdfcfb::32, 1::16, 2::8, 0xabcd::16, 0xbd::8, 0xffff::16, 0xdd::8, | |
"abcd", "good">> | |
data = MyListRecords2.load msg | |
Enum.at(data.one, 0) |> 0xfe | |
Enum.at(data.one, 1) |> 0xfd | |
Enum.at(data.one, 2) |> 0xfc | |
Enum.at(data.one, 3) |> 0xfb | |
Enum.at(data.two, 0).f1 |> 1 | |
Enum.at(data.two, 0).f2 |> 2 | |
Enum.at(data.two, 1).f1 |> 0xabcd | |
Enum.at(data.two, 1).f2 |> 0xbd | |
Enum.at(data.two, 2).f1 |> 0xffff | |
Enum.at(data.two, 2).f2 |> 0xdd | |
Enum.at(data.three, 0) |> "abcd" | |
Enum.at(data.three, 1) |> "good" | |
end | |
defrecord SubDataTop, sub_data: nil, two: 0 do | |
use CStructure, endian: :big, schema: [sub_data: [list: {[record: SubData3], 2}], two: 16] | |
def get_sub_data(inx, r), do: Enum.at(r.sub_data, inx) | |
end | |
defrecord DoubleNested, one: 0, top: nil do | |
use CStructure, endian: :big, schema: [one: 32, top: [record: SubDataTop]] | |
end | |
it "loads 2nd level nested structure list" do | |
bin_sub_3_1 = <<0x1234::16, 0xaa::8>> | |
bin_sub_3_2 = <<0x4321::16, 0x22::8>> | |
bin_top = <<bin_sub_3_1::binary, bin_sub_3_2::binary, 0x99::16>> | |
bin_msg = <<0x90909090::32, bin_top::binary>> | |
data = DoubleNested.load bin_msg | |
data.one |> 0x90909090 | |
data.top.two |> 0x99 | |
data.top.get_sub_data(0).f1 |>0x1234 | |
data.top.get_sub_data(0).f2 |> 0xaa | |
data.top.get_sub_data(1).f2 |> 0x22 | |
end | |
end |
Are you going to turn this into a library? I've been looking all over for something like this!
Noticed it didn't do bitfields (which is what I was really after) but I had no luck trying to hack it in :(
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I'm using the following approach to deal with binary data from socket connections. This still has some work to do (i.e serialization does not work for lists yet). I will likely create create this as a separate package and publish on github when I have some time.