Ideas:
pfi::data::serialization
(currently using)- MessagePack
- Protocol Buffer
- Thrift
- Avro
- JSON ... depends on the perser when use big integer(> 53bit)
- XML
- YAML
View Points:
- Interoperability with other software implemented in various language (other than C++)
- Ease of handling nested structure data (container data like array map)
- Compact size
- Smaller overhead to load
I think, MessagePack is seem good. Because of followings:
- Already use in Jubatus (not need new dependencies)
- Support Many language(Ruby, Python, Perl, Java, Node, Erlang .. and more).
- Compact compare to JSON, XML, YAML.
- Use
pfi::data::serialization
- Add Information that must be included using new struct and mixable_holder
struct snap_info {
template <class Ar>
void serialize(Ar& ar) {
ar & MEMBER(version)
& MEMBER(config)
& MEMBER(type)
& MEMBER(timestamp)
std::string version;
pfi::text::json::json config;
std::string type;
std::string timestamp;
}
bool server_base::save(const std::string& id) {
(.. snip ..)
snap_info info;
info.config = config_;
(.. snip ..)
std::ofstream ofs(path.c_str(), std::ios::trunc|std::ios::binary);
pfi::data::serialization::binary_oarchive oa(ofs);
oa << info;
std::vector<mixable0*> mixables = get_mixable_holder()->get_mixables();
for (size_t i = 0; i < mixables.size(); ++i) {
mixables[i]->save(ofs);
}
(.. snip ..)
}
- Use
messagepack::pack
for interoperability - Add Information that must be included using new struct and mixable_holder
struct snap_info {
std::string version;
pfi::text::json::json config;
std::string type;
std::string timestamp;
MSGPACK_DEFINE(version, config, type, timestamp);
}
bool server_base::save(const std::string& id) {
(.. snip ..)
snap_info info;
info.config = config_;
(.. snip ..)
std::ofstream ofs(path.c_str(), std::ios::trunc|std::ios::binary);
msgpack::packer<std::ofstream> packer(&ofs);
packer.pack(info);
std::vector<mixable0*> mixables = get_mixable_holder()->get_mixables();
for (size_t i = 0; i < mixables.size(); ++i) {
mixables[i]->save(ofs);
}
(.. snip ..)
}
- Currently, the targets of
save
ismixable
classes held bymixable_holder
. Onlymixable
? Not redundant? - Must be divided targets of
save
andmixable
.
- Add
externalizable
class and target class ofsave
inherit this class
class externalizable {
virtual bool write(std::ostream&) = 0;
virtual bool read(std::istream&) = 0;
}
class snap_info : public externalizable {
private:
std::string version;
pfi::text::json::json config;
std::string type;
std::string timestamp;
public:
MSGPACK_DEFINE(version, config, type, timestamp);
}
class save_target_class : public externalizable, public mixable<> {}
private:
std::unordered_map columns;
std::unordered_map rows;
public:
MSGPACK_DEFINE(columns,rows);
(.. snip ..)
bool write(std::ostream& ofs) {
msgpack::pack(ofs, this)
}
(.. snip ..)
}
bool server_base::save(const std::string& id) {
(.. snip ..)
std::ofstream ofs(path.c_str(), std::ios::trunc|std::ios::binary);
msgpack::packer<std::ofstream> packer(&ofs);
std::vector<externalizable*> externalizables = get_externalizable_holder()->get_externalizables();
for (size_t i = 0; i < externalizables.size(); ++i) {
packer.pack(externalizables[i]->write(ofs));
}
(.. snip ..)
}
We have to consider more about the details
class server_base {
(.. snip ..)
static const std::string VERSION(JUBATUS_VERSION);
static const uint64_t FORMAT_VERSION = 1;
std::string system_data_containor_sum;
system_data_containor s_containor;
std::string user_data_containors_sum;
vector<data_containor*> u_containors;
MSGPACK_DEFINE(VERSION, FORMAT_VERSION, system_data_containor_sum, s_containor, user_data_containors_sum, u_containors);
void register_user_data_containor(data_containor* d) {
u_containors.push_back(d);
}
(.. snip ..)
bool server_base::save(const std::string& id) {
(.. snip ..)
std::ofstream ofs(path.c_str(), std::ios::trunc|std::ios::binary);
msgpack::packer<std::ofstream> packer(&ofs)
// fixed order
packer.pack(VERSION);
packer.pack(FORMAT_VERSION);
system_data_containor(s_containor);
packer.pack(md5sum::md5sum(&s_containor))
packer.pack(s_containor);
packer.pack(md5sum::md5sum(&u_containors))
packer.pack(u_containors);
(.. snip ..)
}
}
class data_containor {
public:
data_containor : version_(1) {}
uint64_t version_;
virtual uint64_t version() = 0;
MSGPACK_DEFINE(version_);
}
class system_data_containor : public data_containor {
time_t timestamp;
std::string type; // task(engine) type name
std::string id; // unique name
std::string config; // if impossible to compare, may be use JSON object
uint64_t version() { return 1 ; }
MSGPACK_DEFINE(version, timestamp, type, id, config);
}
class user_data_containor : public data_containor {
model_t model;
storage_t storage;
weight_mgr_t wm;
fv_converter_t fv;
id_generator_t idgen;
update_cnt_t update_cnt;
uint64_t version() { return 1 ; }
MSGPACK_DEFINE(version, model, storage, wm, fv, idgen, update_cnt);
}
class classifier_serv : public framework::server_base {
(.. snip ..)
shared_ptr<user_data_containor> d_;
d_->model = model_;
d_->storage = storage_;
d_->wm = wm_;
:
register_user_data_containor(d_);
(.. snip ..)
}