Last active
December 6, 2018 21:55
-
-
Save knoguchi/d16f6f2d7536608d8e93867235d73018 to your computer and use it in GitHub Desktop.
dynamic protobuf parsing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <unistd.h> | |
#include <iostream> | |
#define BOOST_FILESYSTEM_NO_DEPRECATED | |
#define BOOST_FILESYSTEM_VERSION 3 | |
#include <boost/filesystem.hpp> | |
#include <google/protobuf/compiler/importer.h> | |
#include <google/protobuf/io/zero_copy_stream_impl.h> | |
#include <google/protobuf/dynamic_message.h> | |
#include <google/protobuf/text_format.h> | |
#include "myprj.h" | |
using namespace std; | |
using namespace google::protobuf::compiler; | |
using namespace google::protobuf; | |
using namespace boost::filesystem; | |
class ErrorPrinter: public MultiFileErrorCollector, | |
public io::ErrorCollector, | |
public DescriptorPool::ErrorCollector { | |
public: | |
ErrorPrinter(ErrorFormat format, DiskSourceTree *tree = NULL): | |
found_errors_(false) {} | |
~ErrorPrinter() {} | |
// implements MultiFileErrorCollector | |
void AddError(const string& filename, int line, int column, const string& message) { | |
found_errors_ = true; | |
AddErrorOrWarning(filename, line, column, message, "error", cerr); | |
} | |
void AddWarning(const string& filename, int line, int column, const string& message) { | |
found_errors_ = true; | |
AddErrorOrWarning(filename, line, column, message, "warning", clog); | |
} | |
// implements io::ErrorCollector | |
void AddError(int line, int column, const string& message) { | |
AddError("input", line, column, message); | |
} | |
void AddWarning(int line, int column, const string& message) { | |
AddErrorOrWarning("input", line, column, message, "warning", clog); | |
} | |
// implements DescriptorPool::ErrorCollector | |
void AddError( | |
const string& filename, | |
const string& element_name, | |
const Message* descriptor, | |
ErrorLocation location, | |
const string& message) { | |
AddErrorOrWarning(filename, -1, -1, message, "error", cerr); | |
} | |
void AddWarning( | |
const string& filename, | |
const string& element_name, | |
const Message* descriptor, | |
ErrorLocation location, | |
const string& message) { | |
AddErrorOrWarning(filename, -1, -1, message, "warning", cerr); | |
} | |
bool FoundErrors() const { return found_errors_;} | |
private: | |
void AddErrorOrWarning(const string& filename, int line, int column, const string& message, const string& type, ostream& out) { | |
} | |
bool found_errors_; | |
}; | |
bool InitializeDiskSourceTree(DiskSourceTree* source_tree) { | |
// Add default proto paths | |
// Set up the source tree. | |
for (int i=0; i<proto_path_.size(); i++) { | |
source_tree->MapPath(proto_path_[i].first, proto_path_[i].second); | |
} | |
/* | |
// Map input files to virtual paths if possible. | |
if(!MakeInputsBeProtoPathRelative(source_tree)) { | |
return false; | |
} | |
*/ | |
return true; | |
} | |
bool MakeProtoProtoPathRelative( | |
DiskSourceTree* source_tree, string* proto, | |
DescriptorDatabase* fallback_database) { | |
FileDescriptorProto fallback_file; | |
bool in_fallback_database = fallback_database != nullptr && fallback_database->FindFileByName(*proto, &fallback_file); | |
if(access(proto->c_str(), F_OK) < 0) { | |
string disk_file; | |
if(source_tree->VirtualFileToDiskFile(*proto, &disk_file) || | |
in_fallback_database) { | |
return true; | |
} | |
else { | |
cerr <<*proto <<": "<< strerror(ENOENT) << endl; | |
return false; | |
} | |
} | |
string virtual_file, shadowing_disk_file; | |
switch(source_tree->DiskFileToVirtualFile( | |
*proto, &virtual_file, &shadowing_disk_file)) { | |
case DiskSourceTree::SUCCESS: | |
*proto = virtual_file; | |
break; | |
case DiskSourceTree::SHADOWED: | |
cerr<< *proto | |
<< ": Input is shadowed in the --proto_path by \"" | |
<< shadowing_disk_file | |
<< "\". Fiether use the latter file as your input or reoder " | |
" the --proto_path so that the former file's location comes firs,." << endl; | |
return false; | |
case DiskSourceTree::CANNOT_OPEN: | |
if (in_fallback_database) { | |
return true; | |
} | |
cerr << *proto << ": " << strerror(errno) << endl; | |
return false; | |
case DiskSourceTree::NO_MAPPING: | |
{ | |
// try to intepret the path as a virtual path. | |
string disk_file; | |
if (source_tree->VirtualFileToDiskFile(*proto, &disk_file) || | |
in_fallback_database) { | |
return true; | |
} else { | |
// the input file path can't be mapped to any --proto_path and it also | |
// can't be interpreted as a virtual path. | |
return false; | |
} | |
} | |
} | |
return true; | |
} | |
bool MakeInputsBeProtoPathRelative( | |
DiskSourceTree* source_tree, DescriptorDatabase* fallback_database) { | |
for (auto& input_file: input_files_) { | |
if(!MakeProtoProtoPathRelative(source_tree, &input_file, fallback_database)) { | |
return false; | |
} | |
} | |
return true; | |
} | |
bool ParseInputFiles( | |
DescriptorPool* descriptor_pool, | |
vector<const FileDescriptor*>* parsed_files) { | |
for (int i=0; i < input_files_.size(); i++) { | |
// import the file | |
descriptor_pool->AddUnusedImportTrackFile(input_files_[i]); | |
const FileDescriptor* parsed_file = descriptor_pool->FindFileByName(input_files_[i]); | |
descriptor_pool->ClearUnusedImportTrackFiles(); | |
if (parsed_file == NULL) { | |
if (!descriptor_set_in_names_.empty()) { | |
cerr <<input_files_[i] <<": " <<strerror(ENOENT) << endl; | |
} | |
return false; | |
} | |
parsed_files->push_back(parsed_file); | |
} | |
return true; | |
} | |
bool EncodeOrDecode(const DescriptorPool* pool) { | |
const Descriptor* type = pool->FindMessageTypeByName(codec_type_); | |
if (type == NULL) { | |
cerr << "Type not defined: " << codec_type_ << endl; | |
return false; | |
} | |
DynamicMessageFactory dynamic_factory(pool); | |
unique_ptr<Message> message(dynamic_factory.GetPrototype(type)->New()); | |
cout << type->field_count(); | |
io::FileInputStream in(STDIN_FILENO); | |
io::FileOutputStream out(STDOUT_FILENO); | |
if (!message->ParsePartialFromZeroCopyStream(&in)) { | |
cerr << "Failed to parse input." << endl; | |
return false; | |
} | |
if(!message->IsInitialized()) { | |
cerr << "warning Input message is missing required fields: " | |
<< message->InitializationErrorString() << endl; | |
} | |
if(!TextFormat::Print(*message, &out)) { | |
cerr << "output: I/O error" << endl; | |
return false; | |
} | |
return true; | |
} | |
int main(int argc, char *argv[]) { | |
vector<const FileDescriptor*> parsed_files; | |
unique_ptr<DiskSourceTree> disk_source_tree; | |
unique_ptr<ErrorPrinter> error_collector; | |
unique_ptr<DescriptorPool> descriptor_pool; | |
unique_ptr<DescriptorDatabase> descriptor_database; | |
unique_ptr<SourceTreeDescriptorDatabase> source_tree_database; | |
proto_path_.clear(); | |
const path format_schema_dir = "format_schema"; | |
if(!exists(format_schema_dir) || !is_directory(format_schema_dir)) { | |
cout << "format_schema directory does not exist: " << format_schema_dir; | |
return 2; | |
} | |
recursive_directory_iterator it(format_schema_dir); | |
recursive_directory_iterator endit; | |
while(it != endit) | |
{ | |
if(is_regular_file(*it)) { | |
if( it->path().extension().c_str() == string(".proto")) { | |
cout << "Adding " << it->path().c_str() << endl; | |
search_path_.insert(it->path().parent_path().c_str()); | |
input_files_.push_back(it->path().filename().c_str()); | |
} | |
} | |
++it; | |
} | |
for (auto const& element : search_path_) { | |
proto_path_.push_back(pair<string, string>("", element.c_str())); | |
} | |
// add the top directory in case no search path provided | |
if (proto_path_.empty() && descriptor_set_in_names_.empty()) { | |
proto_path_.push_back(pair<string, string>("", ".")); | |
} | |
if (descriptor_set_in_names_.empty()) { | |
// add proto files | |
disk_source_tree.reset(new DiskSourceTree()); | |
if(!InitializeDiskSourceTree(disk_source_tree.get())) { | |
return 1; | |
} | |
error_collector.reset(new ErrorPrinter(error_format_, disk_source_tree.get())); | |
// at this point, it knows the dirs to look for proto files | |
SourceTreeDescriptorDatabase* database = new SourceTreeDescriptorDatabase(disk_source_tree.get()); | |
database->RecordErrorsTo(error_collector.get()); | |
descriptor_database.reset(database); | |
descriptor_pool.reset(new DescriptorPool(descriptor_database.get(), database->GetValidationErrorCollector())); | |
} | |
descriptor_pool->EnforceWeakDependencies(true); | |
if(!ParseInputFiles(descriptor_pool.get(), &parsed_files)) { | |
return 1; | |
} | |
// check the type exists | |
codec_type_ = argv[1]; | |
const Descriptor* type = descriptor_pool->FindMessageTypeByName(codec_type_); | |
if (type == NULL) { | |
cerr << "Type not defined: " << codec_type_ << endl; | |
return 1; | |
} | |
// ready to parse | |
cout << "Ready to parse. Waiting for input\n"; | |
if (!EncodeOrDecode(descriptor_pool.get())) { | |
return 1; | |
} | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using namespace std; | |
vector<string> input_files_; | |
vector<pair<string, string>> proto_path_; | |
vector<string> descriptor_set_in_names_; | |
set<string> search_path_; | |
string codec_type_; | |
#define PROTO_EXT ".proto" | |
enum ErrorFormat { | |
ERROR_FORMAT_BLAH | |
}; | |
ErrorFormat error_format_; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
What does this gist do?
Read *.proto files from multiple directories, then deserialize protobuf message that is fed from STDIN.
Based on protobuf v3.0.0 protoc code.
Why so complicated?
Kenton Varda, the original author of Protobuf once said
Apparently there are protobuf type databases that are remotely accessed at Google. The db is backed by disk file system and possibly another remote back end dbs. The client accesses the db via a thread safe pool. The enterprise features were removed for the open source version protobuf. We don't need all the complexity when we locally use the protobuf but it greatly helps if you know the design.