-
-
Save kyamagu/31a4b6f782670a28098b to your computer and use it in GitHub Desktop.
// Caffe proto converter. | |
// | |
// Build. | |
// | |
// mex -I/path/to/matlab-lmdb/include ... | |
// -I/path/to/caffe/build/src/caffe/proto ... | |
// caffe_proto_.cc ... | |
// /path/to/caffe/build/src/caffe/proto/caffe.pb.o ... | |
// -lprotobuf CXXFLAGS="$CXXFLAGS -std=c++11" | |
// | |
// Usage. | |
// | |
// fid = fopen('cat.jpg', 'r'); | |
// jpg_image = fread(fid, inf, 'uint8=>uint8'); | |
// fclose(fid); | |
// datum = caffe_proto_('toEncodedDatum', jpg_image, label); | |
// [jpg_image, label] = caffe_proto_('fromDatum', datum); | |
// | |
// image = imread('cat.jpg'); | |
// label = 1; | |
// datum = caffe_proto_('toDatum', image, label); | |
// [image, label] = caffe_proto_('fromDatum', datum); | |
// | |
// Importing into LMDB database. | |
// | |
// addpath('/path/to/matlab-lmdb'); | |
// image_files = { | |
// '/path/to/image-1.jpg', ... | |
// '/path/to/image-2.jpg', ... | |
// ... | |
// }; | |
// database = lmdb.DB('/path/to/lmdb'); | |
// for i = 1:numel(image_files) | |
// label = 0; | |
// fid = fopen(image_files{i}, 'r'); | |
// jpg_image = fread(fid, inf, 'uint8=>uint8'); | |
// fclose(fid); | |
// datum = caffe_proto_('toEncodedDatum', jpg_image, label); | |
// database.put(image_files{i}, datum); | |
// end | |
// clear database; | |
// | |
#include "caffe.pb.h" | |
#include "mexplus.h" | |
using namespace std; | |
using namespace mexplus; | |
#define ASSERT(condition, ...) \ | |
if (!(condition)) mexErrMsgIdAndTxt("caffe_proto:error", __VA_ARGS__) | |
MEX_DEFINE(toEncodedDatum) (int nlhs, mxArray* plhs[], | |
int nrhs, const mxArray* prhs[]) { | |
InputArguments input(nrhs, prhs, 2); | |
OutputArguments output(nlhs, plhs, 1); | |
caffe::Datum datum; | |
MxArray array(input.get(0)); | |
datum.set_data(array.getData<uint8_t>(), array.size()); | |
datum.set_label(input.get<int>(1)); | |
datum.set_encoded(true); | |
output.set(0, datum.SerializeAsString()); | |
} | |
MEX_DEFINE(toDatum) (int nlhs, mxArray* plhs[], | |
int nrhs, const mxArray* prhs[]) { | |
InputArguments input(nrhs, prhs, 2); | |
OutputArguments output(nlhs, plhs, 1); | |
caffe::Datum datum; | |
MxArray array(input.get(0)); | |
datum.set_label(input.get<int>(1)); | |
vector<mwSize> dimensions = array.dimensions(); | |
int width = dimensions[1]; | |
int height = dimensions[0]; | |
int channels = 1; | |
for (int i = 2; i < dimensions.size(); ++i) | |
channels *= dimensions[i]; | |
datum.set_channels(channels); | |
datum.set_width(width); | |
datum.set_height(height); | |
vector<mwIndex> subscripts(3); | |
if (array.isUint8()) { | |
datum.mutable_data()->reserve(array.size()); | |
for (int k = channels - 1; k >= 0; --k) { // RGB to BGR order. | |
subscripts[2] = k; | |
for (int i = 0; i < height; ++i) { | |
subscripts[0] = i; | |
for (int j = 0; j < width; ++j) { | |
subscripts[1] = j; | |
datum.mutable_data()->push_back(array.at<uint8_t>(subscripts)); | |
} | |
} | |
} | |
} | |
else { | |
datum.mutable_float_data()->Reserve(array.size()); | |
for (int k = channels - 1; k >= 0; --k) { // RGB to BGR order. | |
subscripts[2] = k; | |
for (int i = 0; i < height; ++i) { | |
subscripts[0] = i; | |
for (int j = 0; j < width; ++j) { | |
subscripts[1] = j; | |
datum.add_float_data(array.at<float>(subscripts)); | |
} | |
} | |
} | |
} | |
output.set(0, datum.SerializeAsString()); | |
} | |
MEX_DEFINE(fromDatum) (int nlhs, mxArray* plhs[], | |
int nrhs, const mxArray* prhs[]) { | |
InputArguments input(nrhs, prhs, 1); | |
OutputArguments output(nlhs, plhs, 2); | |
caffe::Datum datum; | |
ASSERT(datum.ParseFromString(input.get<string>(0)), | |
"Failed to parse datum."); | |
if (datum.has_encoded() && datum.encoded()) { | |
output.set(0, datum.data()); | |
} | |
else { | |
vector<mwIndex> dimensions(3); | |
dimensions[0] = (datum.has_height()) ? datum.height() : 0; | |
dimensions[1] = (datum.has_width()) ? datum.width() : 0; | |
dimensions[2] = (datum.has_channels()) ? datum.channels() : 0; | |
MxArray array; | |
vector<mwIndex> subscripts(3); | |
int index = 0; | |
if (datum.has_data()) { | |
array.reset(mxCreateNumericArray(dimensions.size(), | |
&dimensions[0], | |
mxUINT8_CLASS, | |
mxREAL)); | |
const string& data = datum.data(); | |
for (int k = dimensions[2] - 1; k >= 0; --k) { // BGR to RGB order. | |
subscripts[2] = k; | |
for (int i = 0; i < dimensions[0]; ++i) { | |
subscripts[0] = i; | |
for (int j = 0; j < dimensions[1]; ++j) { | |
subscripts[1] = j; | |
array.set(subscripts, data[index++]); | |
} | |
} | |
} | |
} | |
else if (datum.float_data_size() > 0) { | |
array.reset(mxCreateNumericArray(dimensions.size(), | |
&dimensions[0], | |
mxSINGLE_CLASS, | |
mxREAL)); | |
for (int k = dimensions[2] - 1; k >= 0; --k) { // BGR to RGB order. | |
subscripts[2] = k; | |
for (int i = 0; i < dimensions[0]; ++i) { | |
subscripts[0] = i; | |
for (int j = 0; j < dimensions[1]; ++j) { | |
subscripts[1] = j; | |
array.set(subscripts, datum.float_data(index++)); | |
} | |
} | |
} | |
} | |
output.set(0, array.release()); | |
} | |
output.set(1, (datum.has_label()) ? datum.label() : 0); | |
} | |
MEX_DISPATCH |
@i-akbari You have to specify -std=c++11
either in the mex configuration file ($HOME/.matlab/$VERSION/mexopts.sh
or $HOME/.matlab/$VERSION/mex_C++_$ARCH.xml
), or at a compile time:
mex CXXFLAGS="\$CXXFLAGS -std=c++11" ...
The LMDB database only accepts string or binary. You have to convert data to keep numbers (e.g., num2str
)
Hi,
I am quite confused by this. Could I directly use your matlab-lmdb to convert images for the effective input of caffe? Thanks so much for replying.
Hi, I keep getting protobuf errors that crashes my Matlab whenever I used this script along with matcaffe. They both work fine when they are separate. The errors are like the following:
[libprotobuf ERROR google/protobuf/descriptor_database.cc:57] File
already exists in database: caffe.proto
[libprotobuf FATAL google/protobuf/descriptor.cc:1018] CHECK failed:
generated_database_->Add(encoded_file_descriptor, size)
Therefore I tried to put the function inside matcaffe and they work fine now (illidanlab/caffe@a7397da). I don't know if there are better options?
Thanks
Here is the import example.
addpath('/path/to/matlab-lmdb');
image_files = {
'/path/to/image-1.jpg', ...
'/path/to/image-2.jpg', ...
...
};
database = lmdb.DB('/path/to/lmdb');
for i = 1:numel(image_files)
label = 0;
fid = fopen(image_files{i}, 'r');
jpg_image = fread(fid, inf, 'uint8=>uint8');
fclose(fid);
datum = caffe_proto_('toEncodedDatum', jpg_image, label);
database.put(image_files{i}, datum);
end
clear database;
The issue is that matcaffe
already defines Datum
and cannot load caffe_proto_
mex which also separately defines Datum
. The solution is as you did compile them together in some way.
Hi!
thanks for your code! but when I implemented I got an error!
Error using LMDB_
MDB_BAD_VALSIZE: Unsupported size of key/DB name/data, or wrong DUPFIXED size
Error in lmdb.DB/put (line 89)
LMDB_('put', this.id_, key, value, varargin{:});
Error in dataconverter1 (line 25)
database.put(Image, datum);
Can you help me with this please?
Also, Can my labels be pictures as well?
Thanks!
@aarafati You probably insert a wrong key here:
database.put(Image, datum);
Image
has to be a string.
Also, Can my labels be pictures as well?
Check caffe proto file for acceptable types.
Hi, I am getting the following error when compiling:
/usr/bin/ld: caffe_proto_.o: relocation R_X86_64_32 against `pthread_cancel' can not be used when making a shared object; recompile with -fPIC
caffe_proto_.o: error adding symbols: Bad value
collect2: error: ld returned 1 exit statusmex: link of ' "caffe_proto_.mexa64"' failed.
The compilation command I am using is the following one:
mex CXXFLAGS="$CXXFLAGS -std=c++11" -I/media/user/HDD/CNN_v5/Utils/LMDB/matlab-lmdb-master/include -I/usr/local/caffe-master2/.build_release/src/caffe/proto caffe_proto_.cc /usr/local/caffe-master2/.build_release/src/caffe/proto/caffe.pb.o -lprotobuf
Thanks.
@MarcBS That could be simply a matter of adding -fPIC
to CXXFLAGS
. Or, you might need to modify mex config file under $USER/.matlab/$VERSION/
.
mex CXXFLAGS="$CXXFLAGS -std=c++11 -fPIC" ...
Hi, I used your code as follows:
addpath /path/to/matlab-lmdb
data_train = lmdb.DB('/path/to/matlab-lmdb', 'MAPSIZE', 1024^3);
files = dir('*.jpg')
image_files = {};
for i=1:numel(files)
image_files{i} = files(i).name;
end
label = 0;
for i = 1:numel(image_files)
jpg_image = imread(image_files{i});
datum = caffe_proto_('toEncodedDatum', jpg_image, label);
data_train.put(image_files{i}, datum);
end
save('data.mdb','data_train');
But when I restart matlab again and try to load the data as follows
addpath /path/to/matlab-lmdbdb_path
db_path = pwd;
database = lmdb.DB(db_path, 'RDONLY', true, 'NOLOCK', true);
I get the following error:
Error using LMDB_
MDB_INVALID: File is not an LMDB file
Error in lmdb.DB (line 65)
this.id_ = LMDB_('new', filename, varargin{:});
Any help would be appreciated!
@bayar87 You make a few mistakes.
- You cannot use
imread
to create an Encoded datum. Usefread
or usetoDatum
instead. - Your path
/path/to/matlab-lmdb
does not look correct. Use the correct location of your database directory. Usingpwd
is 100% incorrect.
@bayar87 If you're creating your database at /path/to/matlab-lmdb
(which sounds very confusing), you have to specify the database at the same place: /path/to/matlab-lmdb
when read-only. Otherwise you'll get an error. Example,
database = lmdb.DB('/home/someone/project/mydata.lmdb', 'MAPSIZE', 1024^3);
% Do whatever with the writable database
clear database;
database = lmdb.DB('/home/someone/project/mydata.lmdb', 'RDONLY', true, 'NOLOCK', true);
% this time the database is read-only
clear database;
Also, the following line doesn't make sense.
save('data.mdb','data_train');
@kyamagu: Thanks for your replies. Actually I use the save command because once I convert my data which I created in the "write" mode I want to save it to use it later in the "read" mode using a different code.
I might be very confused, my understanding is using your above code I can create .mdb data by using my own images then save it to be used later in matcaffe as
database = lmdb.DB('/home/someone/data_directory', 'RDONLY', true, 'NOLOCK', true);
@bayar87 You cannot save the database
variable in a mat file (save
command). This is a handle object like graphics. lmdb.DB()
just opens a database at a specified location with optional flags (like read-only mode) and returns a handle to it.
Actually you even don't need to create an LMDB file if you are using matcaffe
for testing only. Just imread
a file and give it to matcaffe
. LMDB would be useful when you train a new model via c++ command line tool.
@kyamagu actually that's what I want to do later is to create a data then train from scratch using the c++ command line.
Hi. I have problem with fread jpg file. and MAPSIZE, and finally the usage.
Let me start with MAPSIZE
on example
database = lmdb.DB('/home/someone/project/mydata.lmdb', 'MAPSIZE', 1024^3);
Here the image of example is 1024^3? Is it that huge? I guess it is 1024x1024x3?
I have 32x32x3 images. Do I need to set MAPSIZE as 32^3?
My 2nd question fread jpg file.
My image is 32x32x3 but when I
jpg_image = imread(image_files{i});
I found my jpg_image has size only 832x1, while I am expecting something like 3072x1. Is that correct?
My 3rd question is
After I created the db, I should put the db postion at train_test.prototxt something like below? Am I correct?
Where should I put the word "data", and "label" on my matlab-lmdb code?
name: "sss_quick"
layers {
name: "sss"
type: DATA
top: "data"
top: "label"
data_param {
source: "examples/sss/sss_lmdb"
batch_size: 100
backend: LMDB
}
transform_param {
mean_file: "examples/sss/mean.binaryproto"
}
include: { phase: TRAIN }
}
layers {
name: "sss"
type: DATA
top: "data"
top: "label"
data_param {
source: "examples/sss/sss_lmdb"
batch_size: 100
backend: LMDB
}
transform_param {
mean_file: "examples/sss/mean.binaryproto"
}
include: { phase: TEST }
}
Hi I just want to say. I got my problem solved.
- just use 1024^3.
- still don't know
- no need.
Hi I thought My problem solved but its not.
It seems that the lmdb is created but the content inside is not correct.
How can I verify that the contents are actually images and labels I input?
@kyamagu It seems when the image is encoded, it won't decode,
I add the DecodeDatumNative(&datum);
for decoding, however, it always return "Datum cannot decode image", do you know any possible reason?
Code:
https://github.com/201power/lmdb_matlab_caffe/blob/master/caffe_proto_.cc
I receive this error when I build using the command:
mex -lprotobuf CXXFLAGS='$CXXFLAGS -std=c++11' ...
-I'matlab-lmdb-master/include'...
-I'../../../build/src/caffe/proto' ...
-I'../../../build/src/caffe/proto/caffe.pb.o' ...
caffe_proto_.cc ...
Error using mex
/tmp/mex_3730232941044066_28647/caffe_proto_.o: In function
Operation_toEncodedDatum::operator()(int, mxArray_tag**, int, mxArray_tag const**)': caffe_proto_.cc:(.text+0x48a): undefined reference to
caffe::Datum::Datum()'
caffe_proto_.cc:(.text+0x688): undefined reference to caffe::Datum::~Datum()' caffe_proto_.cc:(.text+0x72d): undefined reference to
caffe::Datum::~Datum()'
/tmp/mex_3730232941044066_28647/caffe_proto_.o: In function Operation_toDatum::operator()(int, mxArray_tag**, int, mxArray_tag const**)': caffe_proto_.cc:(.text+0x7f6): undefined reference to
caffe::Datum::Datum()'
caffe_proto_.cc:(.text+0xccf): undefined reference to caffe::Datum::~Datum()' caffe_proto_.cc:(.text+0xdc0): undefined reference to
caffe::Datum::~Datum()'
/tmp/mex_3730232941044066_28647/caffe_proto_.o: In function Operation_fromDatum::operator()(int, mxArray_tag**, int, mxArray_tag const**)': caffe_proto_.cc:(.text+0xeea): undefined reference to
caffe::Datum::Datum()'
caffe_proto_.cc:(.text+0x1414): undefined reference to caffe::Datum::~Datum()' caffe_proto_.cc:(.text+0x14e1): undefined reference to
caffe::Datum::~Datum()'
collect2: error: ld returned 1 exit status
Error in testlmdb (line 3)
mex -lprotobuf CXXFLAGS='$CXXFLAGS -std=c++11' ...
Your code works, but I noticed something which I'm not sure if affects the results or not. But thought it's worthy of letting you know (you haven't activated comments on your repository)
I used your implementation to decode a Caffe datum using the "fromDatum" flag, and then used it to convert back with "toDatum" flag.
The resulting datum was 2 chars longer than the original datum. I'm not sure about the source of this mismatch.
But everything works fine.
@amir-abdi, thanks. I checked you can open issues in my repository.
It's great it worked for you, although I am not sure the bug. For me, it always shows "cannot decode"
Hi, I've tried compiling this using the following command
mex -I"/path/to/matlab-lmdb-master/include" -I"/path/to/caffe/src/caffe/proto" caffe_proto_.cc -lprotobuf CXXFLAGS="$CXXFLAGS -std=c++11 -fPIC"
But I have the following error(s)
Building with 'g++'.
/tmp/mex_22659658470642_18000/caffe_proto_.o: In functionOperation_toDatum::operator()(int, mxArray_tag**, int, mxArray_tag const**)': caffe_proto_.cc:(.text+0x2fd): undefined reference to
caffe::Datum::Datum()'
caffe_proto_.cc:(.text+0x807): undefined reference tocaffe::Datum::~Datum()' caffe_proto_.cc:(.text+0x90a): undefined reference to
caffe::Datum::~Datum()'
/tmp/mex_22659658470642_18000/caffe_proto_.o: In functionOperation_toEncodedDatum::operator()(int, mxArray_tag**, int, mxArray_tag const**)': caffe_proto_.cc:(.text+0xa0a): undefined reference to
caffe::Datum::Datum()'
caffe_proto_.cc:(.text+0xb85): undefined reference tocaffe::Datum::~Datum()' caffe_proto_.cc:(.text+0xc06): undefined reference to
caffe::Datum::~Datum()'
/tmp/mex_22659658470642_18000/caffe_proto_.o: In functionOperation_fromDatum::operator()(int, mxArray_tag**, int, mxArray_tag const**)': caffe_proto_.cc:(.text+0xcbc): undefined reference to
caffe::Datum::Datum()'
caffe_proto_.cc:(.text+0x1126): undefined reference tocaffe::Datum::~Datum()' caffe_proto_.cc:(.text+0x11d3): undefined reference to
caffe::Datum::~Datum()'
collect2: error: ld returned 1 exit status
I suspect it could be because my command doesn't try to build
/path/to/caffe/build/src/caffe/proto/caffe.pb.o
Unfortunately, I am unable to locate this particular file and I have no idea how to generate it. I have alrady tried rebuilding caffe (make clean, make all). Can anyone help?
Hi folks, Github gist doesn't notify me any new comment and I didn't know so many discussions happening here!
@peerajak You can encode and decode to check if they are identical.
@201power I believe your encoded datum is not in the expected binary format. Just check if you can fwrite
the binary to a file and able to open. My implementation does not do encoding/decoding but only conversion, because Matlab can imread
/imwrite
for decoding/encoding. That's why there is fread
in the example.
@amir-abdi You have to compile caffe first to link. Make sure your relative path is correctly pointing to the caffe.pb.o
object.
@cedricseah You must correctly specify /path/to/caffe
. Check the caffe directory.
Hi, Thank you for posting this.
Could you provide example usage for using 'fromDatum' to read from an existing database? I tried
datum = read_db.get(filename);
[im, label] = caffe_proto_('fromDatum',datum)
based on your example and received this error:
Error using caffe_proto_
Failed to parse datum.
Also I want to read a value of a lmdb file that is created by Caffe. An entery is like
(key, val) = (5 , 0.4452).
By this code I get key=5 and tmp=�����5Õ{�?5�×�¿
How can I get the corret double number in matlab?
Thanks again!
clc
clear
addpath .../matlab-lmdb-master;
path='.../LMDBs/tmp/lmdb_test';
database = lmdb.DB(path);
keys = database.keys();
values = database.values();
tmp=values{1}
clear database
display('finished');