Skip to content

Instantly share code, notes, and snippets.

@kyamagu
Last active December 6, 2016 17:26
Show Gist options
  • Save kyamagu/31a4b6f782670a28098b to your computer and use it in GitHub Desktop.
Save kyamagu/31a4b6f782670a28098b to your computer and use it in GitHub Desktop.
Caffe Datum proto converter
// Caffe proto converter.
//
// Build.
//
// mex -I/path/to/matlab-lmdb/include ...
// -I/path/to/caffe/build/src/caffe/proto ...
// caffe_proto_.cc ...
// /path/to/caffe/build/src/caffe/proto/caffe.pb.o ...
// -lprotobuf CXXFLAGS="$CXXFLAGS -std=c++11"
//
// Usage.
//
// fid = fopen('cat.jpg', 'r');
// jpg_image = fread(fid, inf, 'uint8=>uint8');
// fclose(fid);
// datum = caffe_proto_('toEncodedDatum', jpg_image, label);
// [jpg_image, label] = caffe_proto_('fromDatum', datum);
//
// image = imread('cat.jpg');
// label = 1;
// datum = caffe_proto_('toDatum', image, label);
// [image, label] = caffe_proto_('fromDatum', datum);
//
// Importing into LMDB database.
//
// addpath('/path/to/matlab-lmdb');
// image_files = {
// '/path/to/image-1.jpg', ...
// '/path/to/image-2.jpg', ...
// ...
// };
// database = lmdb.DB('/path/to/lmdb');
// for i = 1:numel(image_files)
// label = 0;
// fid = fopen(image_files{i}, 'r');
// jpg_image = fread(fid, inf, 'uint8=>uint8');
// fclose(fid);
// datum = caffe_proto_('toEncodedDatum', jpg_image, label);
// database.put(image_files{i}, datum);
// end
// clear database;
//
#include "caffe.pb.h"
#include "mexplus.h"
using namespace std;
using namespace mexplus;
#define ASSERT(condition, ...) \
if (!(condition)) mexErrMsgIdAndTxt("caffe_proto:error", __VA_ARGS__)
MEX_DEFINE(toEncodedDatum) (int nlhs, mxArray* plhs[],
int nrhs, const mxArray* prhs[]) {
InputArguments input(nrhs, prhs, 2);
OutputArguments output(nlhs, plhs, 1);
caffe::Datum datum;
MxArray array(input.get(0));
datum.set_data(array.getData<uint8_t>(), array.size());
datum.set_label(input.get<int>(1));
datum.set_encoded(true);
output.set(0, datum.SerializeAsString());
}
MEX_DEFINE(toDatum) (int nlhs, mxArray* plhs[],
int nrhs, const mxArray* prhs[]) {
InputArguments input(nrhs, prhs, 2);
OutputArguments output(nlhs, plhs, 1);
caffe::Datum datum;
MxArray array(input.get(0));
datum.set_label(input.get<int>(1));
vector<mwSize> dimensions = array.dimensions();
int width = dimensions[1];
int height = dimensions[0];
int channels = 1;
for (int i = 2; i < dimensions.size(); ++i)
channels *= dimensions[i];
datum.set_channels(channels);
datum.set_width(width);
datum.set_height(height);
vector<mwIndex> subscripts(3);
if (array.isUint8()) {
datum.mutable_data()->reserve(array.size());
for (int k = channels - 1; k >= 0; --k) { // RGB to BGR order.
subscripts[2] = k;
for (int i = 0; i < height; ++i) {
subscripts[0] = i;
for (int j = 0; j < width; ++j) {
subscripts[1] = j;
datum.mutable_data()->push_back(array.at<uint8_t>(subscripts));
}
}
}
}
else {
datum.mutable_float_data()->Reserve(array.size());
for (int k = channels - 1; k >= 0; --k) { // RGB to BGR order.
subscripts[2] = k;
for (int i = 0; i < height; ++i) {
subscripts[0] = i;
for (int j = 0; j < width; ++j) {
subscripts[1] = j;
datum.add_float_data(array.at<float>(subscripts));
}
}
}
}
output.set(0, datum.SerializeAsString());
}
MEX_DEFINE(fromDatum) (int nlhs, mxArray* plhs[],
int nrhs, const mxArray* prhs[]) {
InputArguments input(nrhs, prhs, 1);
OutputArguments output(nlhs, plhs, 2);
caffe::Datum datum;
ASSERT(datum.ParseFromString(input.get<string>(0)),
"Failed to parse datum.");
if (datum.has_encoded() && datum.encoded()) {
output.set(0, datum.data());
}
else {
vector<mwIndex> dimensions(3);
dimensions[0] = (datum.has_height()) ? datum.height() : 0;
dimensions[1] = (datum.has_width()) ? datum.width() : 0;
dimensions[2] = (datum.has_channels()) ? datum.channels() : 0;
MxArray array;
vector<mwIndex> subscripts(3);
int index = 0;
if (datum.has_data()) {
array.reset(mxCreateNumericArray(dimensions.size(),
&dimensions[0],
mxUINT8_CLASS,
mxREAL));
const string& data = datum.data();
for (int k = dimensions[2] - 1; k >= 0; --k) { // BGR to RGB order.
subscripts[2] = k;
for (int i = 0; i < dimensions[0]; ++i) {
subscripts[0] = i;
for (int j = 0; j < dimensions[1]; ++j) {
subscripts[1] = j;
array.set(subscripts, data[index++]);
}
}
}
}
else if (datum.float_data_size() > 0) {
array.reset(mxCreateNumericArray(dimensions.size(),
&dimensions[0],
mxSINGLE_CLASS,
mxREAL));
for (int k = dimensions[2] - 1; k >= 0; --k) { // BGR to RGB order.
subscripts[2] = k;
for (int i = 0; i < dimensions[0]; ++i) {
subscripts[0] = i;
for (int j = 0; j < dimensions[1]; ++j) {
subscripts[1] = j;
array.set(subscripts, datum.float_data(index++));
}
}
}
}
output.set(0, array.release());
}
output.set(1, (datum.has_label()) ? datum.label() : 0);
}
MEX_DISPATCH
@kyamagu
Copy link
Author

kyamagu commented Sep 24, 2015

@MarcBS That could be simply a matter of adding -fPIC to CXXFLAGS. Or, you might need to modify mex config file under $USER/.matlab/$VERSION/.

mex CXXFLAGS="$CXXFLAGS -std=c++11 -fPIC" ...

@bayar87
Copy link

bayar87 commented Oct 11, 2015

Hi, I used your code as follows:

addpath /path/to/matlab-lmdb
data_train = lmdb.DB('/path/to/matlab-lmdb', 'MAPSIZE', 1024^3);
files = dir('*.jpg')
image_files = {};
for i=1:numel(files)
    image_files{i} = files(i).name;
end
label = 0;
for i = 1:numel(image_files)   
    jpg_image = imread(image_files{i});
    datum = caffe_proto_('toEncodedDatum', jpg_image, label);
    data_train.put(image_files{i}, datum);
end
save('data.mdb','data_train');

But when I restart matlab again and try to load the data as follows

addpath /path/to/matlab-lmdbdb_path      
db_path      = pwd;
database = lmdb.DB(db_path, 'RDONLY', true, 'NOLOCK', true);

I get the following error:
Error using LMDB_
MDB_INVALID: File is not an LMDB file

Error in lmdb.DB (line 65)
this.id_ = LMDB_('new', filename, varargin{:});

Any help would be appreciated!

@kyamagu
Copy link
Author

kyamagu commented Oct 11, 2015

@bayar87 You make a few mistakes.

  1. You cannot use imread to create an Encoded datum. Use fread or use toDatum instead.
  2. Your path /path/to/matlab-lmdb does not look correct. Use the correct location of your database directory. Using pwd is 100% incorrect.

@kyamagu
Copy link
Author

kyamagu commented Oct 11, 2015

@bayar87 If you're creating your database at /path/to/matlab-lmdb (which sounds very confusing), you have to specify the database at the same place: /path/to/matlab-lmdb when read-only. Otherwise you'll get an error. Example,

database = lmdb.DB('/home/someone/project/mydata.lmdb', 'MAPSIZE', 1024^3);
% Do whatever with the writable database
clear database;

database = lmdb.DB('/home/someone/project/mydata.lmdb', 'RDONLY', true, 'NOLOCK', true);
% this time the database is read-only
clear database;

Also, the following line doesn't make sense.

save('data.mdb','data_train');

@bayar87
Copy link

bayar87 commented Oct 11, 2015

@kyamagu: Thanks for your replies. Actually I use the save command because once I convert my data which I created in the "write" mode I want to save it to use it later in the "read" mode using a different code.
I might be very confused, my understanding is using your above code I can create .mdb data by using my own images then save it to be used later in matcaffe as
database = lmdb.DB('/home/someone/data_directory', 'RDONLY', true, 'NOLOCK', true);

@kyamagu
Copy link
Author

kyamagu commented Oct 12, 2015

@bayar87 You cannot save the database variable in a mat file (save command). This is a handle object like graphics. lmdb.DB() just opens a database at a specified location with optional flags (like read-only mode) and returns a handle to it.

Actually you even don't need to create an LMDB file if you are using matcaffe for testing only. Just imread a file and give it to matcaffe. LMDB would be useful when you train a new model via c++ command line tool.

@bayar87
Copy link

bayar87 commented Oct 12, 2015

@kyamagu actually that's what I want to do later is to create a data then train from scratch using the c++ command line.

@peerajak
Copy link

Hi. I have problem with fread jpg file. and MAPSIZE, and finally the usage.

Let me start with MAPSIZE
on example
database = lmdb.DB('/home/someone/project/mydata.lmdb', 'MAPSIZE', 1024^3);
Here the image of example is 1024^3? Is it that huge? I guess it is 1024x1024x3?
I have 32x32x3 images. Do I need to set MAPSIZE as 32^3?

My 2nd question fread jpg file.
My image is 32x32x3 but when I

jpg_image = imread(image_files{i});
I found my jpg_image has size only 832x1, while I am expecting something like 3072x1. Is that correct?

My 3rd question is
After I created the db, I should put the db postion at train_test.prototxt something like below? Am I correct?
Where should I put the word "data", and "label" on my matlab-lmdb code?

name: "sss_quick"
layers {
  name: "sss"
  type: DATA
  top: "data"
  top: "label"
  data_param {
    source: "examples/sss/sss_lmdb"
    batch_size: 100
    backend: LMDB
  }
  transform_param {
    mean_file: "examples/sss/mean.binaryproto"
  }
  include: { phase: TRAIN }
}
layers {
  name: "sss"
  type: DATA
  top: "data"
  top: "label"
  data_param {
    source: "examples/sss/sss_lmdb"
    batch_size: 100
    backend: LMDB
  }
  transform_param {
    mean_file: "examples/sss/mean.binaryproto"
  }
  include: { phase: TEST }
}

Hi I just want to say. I got my problem solved.

  1. just use 1024^3.
  2. still don't know
  3. no need.

@peerajak
Copy link

Hi I thought My problem solved but its not.
It seems that the lmdb is created but the content inside is not correct.
How can I verify that the contents are actually images and labels I input?

@201power
Copy link

@kyamagu It seems when the image is encoded, it won't decode,
I add the DecodeDatumNative(&datum); for decoding, however, it always return "Datum cannot decode image", do you know any possible reason?

Code:
https://github.com/201power/lmdb_matlab_caffe/blob/master/caffe_proto_.cc

@amir-abdi
Copy link

I receive this error when I build using the command:
mex -lprotobuf CXXFLAGS='$CXXFLAGS -std=c++11' ...
-I'matlab-lmdb-master/include'...
-I'../../../build/src/caffe/proto' ...
-I'../../../build/src/caffe/proto/caffe.pb.o' ...
caffe_proto_.cc ...

Error using mex
/tmp/mex_3730232941044066_28647/caffe_proto_.o: In function
Operation_toEncodedDatum::operator()(int, mxArray_tag**, int, mxArray_tag const**)': caffe_proto_.cc:(.text+0x48a): undefined reference tocaffe::Datum::Datum()'
caffe_proto_.cc:(.text+0x688): undefined reference to caffe::Datum::~Datum()' caffe_proto_.cc:(.text+0x72d): undefined reference tocaffe::Datum::~Datum()'
/tmp/mex_3730232941044066_28647/caffe_proto_.o: In function Operation_toDatum::operator()(int, mxArray_tag**, int, mxArray_tag const**)': caffe_proto_.cc:(.text+0x7f6): undefined reference tocaffe::Datum::Datum()'
caffe_proto_.cc:(.text+0xccf): undefined reference to caffe::Datum::~Datum()' caffe_proto_.cc:(.text+0xdc0): undefined reference tocaffe::Datum::~Datum()'
/tmp/mex_3730232941044066_28647/caffe_proto_.o: In function Operation_fromDatum::operator()(int, mxArray_tag**, int, mxArray_tag const**)': caffe_proto_.cc:(.text+0xeea): undefined reference tocaffe::Datum::Datum()'
caffe_proto_.cc:(.text+0x1414): undefined reference to caffe::Datum::~Datum()' caffe_proto_.cc:(.text+0x14e1): undefined reference tocaffe::Datum::~Datum()'
collect2: error: ld returned 1 exit status

Error in testlmdb (line 3)
mex -lprotobuf CXXFLAGS='$CXXFLAGS -std=c++11' ...

@amir-abdi
Copy link

@201power

Your code works, but I noticed something which I'm not sure if affects the results or not. But thought it's worthy of letting you know (you haven't activated comments on your repository)

I used your implementation to decode a Caffe datum using the "fromDatum" flag, and then used it to convert back with "toDatum" flag.
The resulting datum was 2 chars longer than the original datum. I'm not sure about the source of this mismatch.
But everything works fine.

@201power
Copy link

201power commented Mar 6, 2016

@amir-abdi, thanks. I checked you can open issues in my repository.

It's great it worked for you, although I am not sure the bug. For me, it always shows "cannot decode"

@cedricseah
Copy link

Hi, I've tried compiling this using the following command

mex -I"/path/to/matlab-lmdb-master/include" -I"/path/to/caffe/src/caffe/proto" caffe_proto_.cc -lprotobuf CXXFLAGS="$CXXFLAGS -std=c++11 -fPIC"

But I have the following error(s)

Building with 'g++'.
/tmp/mex_22659658470642_18000/caffe_proto_.o: In function Operation_toDatum::operator()(int, mxArray_tag**, int, mxArray_tag const**)': caffe_proto_.cc:(.text+0x2fd): undefined reference tocaffe::Datum::Datum()'
caffe_proto_.cc:(.text+0x807): undefined reference to caffe::Datum::~Datum()' caffe_proto_.cc:(.text+0x90a): undefined reference tocaffe::Datum::~Datum()'
/tmp/mex_22659658470642_18000/caffe_proto_.o: In function Operation_toEncodedDatum::operator()(int, mxArray_tag**, int, mxArray_tag const**)': caffe_proto_.cc:(.text+0xa0a): undefined reference tocaffe::Datum::Datum()'
caffe_proto_.cc:(.text+0xb85): undefined reference to caffe::Datum::~Datum()' caffe_proto_.cc:(.text+0xc06): undefined reference tocaffe::Datum::~Datum()'
/tmp/mex_22659658470642_18000/caffe_proto_.o: In function Operation_fromDatum::operator()(int, mxArray_tag**, int, mxArray_tag const**)': caffe_proto_.cc:(.text+0xcbc): undefined reference tocaffe::Datum::Datum()'
caffe_proto_.cc:(.text+0x1126): undefined reference to caffe::Datum::~Datum()' caffe_proto_.cc:(.text+0x11d3): undefined reference tocaffe::Datum::~Datum()'
collect2: error: ld returned 1 exit status

I suspect it could be because my command doesn't try to build

/path/to/caffe/build/src/caffe/proto/caffe.pb.o

Unfortunately, I am unable to locate this particular file and I have no idea how to generate it. I have alrady tried rebuilding caffe (make clean, make all). Can anyone help?

@kyamagu
Copy link
Author

kyamagu commented Mar 28, 2016

Hi folks, Github gist doesn't notify me any new comment and I didn't know so many discussions happening here!

@peerajak You can encode and decode to check if they are identical.

@201power I believe your encoded datum is not in the expected binary format. Just check if you can fwrite the binary to a file and able to open. My implementation does not do encoding/decoding but only conversion, because Matlab can imread/imwrite for decoding/encoding. That's why there is fread in the example.

@amir-abdi You have to compile caffe first to link. Make sure your relative path is correctly pointing to the caffe.pb.o object.

@cedricseah You must correctly specify /path/to/caffe. Check the caffe directory.

@CeSul
Copy link

CeSul commented Dec 2, 2016

Hi, Thank you for posting this.

Could you provide example usage for using 'fromDatum' to read from an existing database? I tried

datum = read_db.get(filename);
[im, label] = caffe_proto_('fromDatum',datum)

based on your example and received this error:

Error using caffe_proto_
Failed to parse datum.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment