Skip to content

Instantly share code, notes, and snippets.

@sparrowu93
Last active March 16, 2018 09:32
Show Gist options
  • Save sparrowu93/1f2a32516244538ed6c4a660338e1558 to your computer and use it in GitHub Desktop.
Save sparrowu93/1f2a32516244538ed6c4a660338e1558 to your computer and use it in GitHub Desktop.
protobuf .js file to origin .proto file
# usage: python3 [-h] [--js_path JS_PATH] [--js_library JS_LIBRARY]
# optional arguments:
# -h, --help show this help message and exit
# --js_path JS_PATH, -p JS_PATH
# javascript pb file path
# --js_library JS_LIBRARY, -l JS_LIBRARY
# the library name of pb
def is_repeat_needed(is_needed):
base = ""
if is_needed:
base = "repeated" + " "
return base
def camel_to_pascal(camelName):
camelName = list(camelName)
camelName[0] = camelName[0].lower()
value_name = ''.join('_' + c.lower() if c.isupper() else c for c in camelName)
return value_name
def get_var_type_with_line(line, is_packed):
valueName = line[7: line.index('(')]
value_name = camel_to_pascal(valueName)
if is_packed:
value_name = value_name.replace('_list', '')
return value_name
def get_var_name_with_line(line):
packed = False
valueName = line[line.index('(')+1: line.index(');')].replace('reader.read', '')
valueName = valueName.replace('()', '')
value_name = camel_to_pascal(valueName)
if value_name.startswith('packed_'):
value_name = value_name.replace('packed_', '')
packed = True
return (value_name, packed)
class JspbParser(object):
def __init__(self, js_path, library_name):
self.library_name = library_name
self.js_path = js_path
self.data_dict = {}
self.data_list = []
self.sub_data_dict = {}
self.sub_data_list = []
self.content = None
self.lines_to_parse = None
self.load_file(self.js_path)
def load_file(self, js_path):
with open('{}_pb.js'.format(library_name)) as f:
self.content = f.readlines()
deserialize_pattern = 'proto.{}.[a-zA-z0-9\.]+.deserializeBinaryFromReader'.format(self.library_name)
self.lines_to_parse = filter(lambda x: x.startswith('proto.{}'.format(self.library_name)) and len(x.split('.')) > 3, self.content)
def file_header(self):
syntax = 'syntax = "proto3";\n'
package = 'package {};\n\n'.format(self.library_name)
return syntax + package
def update_repeated_list(self):
def update_index_tuple(prop_list, repeat_index):
for x in range(0, len(prop_list)):
prop_type, relate_class_name, value_name, prop_index, is_needed = prop_list[x]
if prop_index in repeat_index:
prop_list[x] = (prop_type, relate_class_name, value_name, prop_index, True)
return prop_list
import re
repeated_field_pattern = 'proto.{}.[a-zA-z0-9\.]+.repeatedFields_'.format(self.library_name)
repeated_list = filter(lambda x: re.match(repeated_field_pattern, x), self.content)
for rp in repeated_list:
rp_index_list = rp[rp.index('[')+1: rp.index(']')].split(',')
rp_class = rp[11: rp.index('.repeatedFields_')]
rp_class_name_list = rp_class.split('.')
prop_list = []
if len(rp_class_name_list) > 1:
self.sub_data_dict[rp_class] = update_index_tuple(self.sub_data_dict[rp_class],
rp_index_list)
else:
self.data_dict[rp_class] = update_index_tuple(self.data_dict[rp_class],
rp_index_list)
def write_proto(self, class_name, prop_list, level=1):
print_lines = ""
print_lines += "{}message {}\n".format(" "*(level-1)*4, class_name)
print_lines += "{}{{\n".format(" "*(level-1)*4)
for prop in prop_list:
prop_type, relate_class_name, value_name, prop_index, is_needed = prop
if prop_type in [1, 2]:
line_to_print = "{}{}{} {} = {};\n".format(" "*level*4, is_repeat_needed(is_needed), relate_class_name, value_name, prop_index)
elif prop_type == 3:
sub_prop_list = self.sub_data_dict[relate_class_name]
inner_class_name = relate_class_name.split('.')[1]
line_to_print = ""
line_to_print = self.write_proto(inner_class_name, sub_prop_list, level=2)
sub_prop_type = relate_class_name.split('.')[1]
line_to_print += "\n{}{}{} {} = {};\n".format(" "*level*4, is_repeat_needed(is_needed), sub_prop_type, value_name, prop_index)
print_lines += line_to_print
print_lines += "{}}}".format(" "*(level-1)*4)
if level == 1:
print_lines += '\n'
return print_lines
def parse_content(self):
for item in self.lines_to_parse:
lc, rc = 1,0
prop_index = 0
if item.split('.')[-1].startswith('deserializeBinaryFromReader'):
prop_list = []
idx = self.content.index(item)
class_name = '.'.join(x for x in item.split('.')[2:-1])
while lc > rc and idx < len(self.content) - 1:
idx += 1
line = self.content[idx].strip(" ")
if line.startswith('case'):
prop_index = line[line.index(" ")+1: line.index(':')]
lc += 1 if '{' in line else 0
rc += 1 if '}' in line else 0
if line.startswith('var value = /**'):
prop_type = 1
value_type, packed = get_var_name_with_line(line)
nx_line = self.content[idx+1].strip(" ")
value_name = get_var_type_with_line(nx_line, packed)
prop_list.append((prop_type, value_type, value_name, prop_index, False))
if line.startswith('var value = new proto.{}'.format(library_name)) and len(line.split('.')) <= 3:
prop_type = 2
referName = line.split('.')[2].replace(';\n', '')
nx_line = self.content[idx+2].strip(" ")
value_name = get_var_type_with_line(nx_line, False)
prop_list.append((prop_type, referName, value_name, prop_index, False))
if line.startswith('var value = new proto.{}.{}'.format(library_name, class_name)) and len(line.split('.')) > 3:
prop_type = 3
sub_class_name = line.split('.')[2:]
sub_class_name[1] = sub_class_name[1].replace(';\n', '')
referName = '.'.join(x for x in sub_class_name)
nx_line = self.content[idx+2].strip(" ")
value_name = get_var_type_with_line(nx_line, False)
prop_list.append((prop_type, referName, value_name, prop_index, False))
if len(class_name.split('.')) > 1:
self.sub_data_dict[class_name] = prop_list
self.sub_data_list.append(class_name)
else:
self.data_dict[class_name] = prop_list
self.data_list.append(class_name)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(prog='python3')
parser.add_argument('--js_path', '-p', type=str, help='javascript pb file path')
parser.add_argument('--js_library', '-l', type=str, help='the library name of pb')
args = parser.parse_args()
js_path = args.js_path
library_name = args.js_library
if js_path and library_name:
js_parser = JspbParser(js_path, library_name)
js_parser.parse_content()
js_parser.update_repeated_list()
print(js_parser.file_header())
for data in js_parser.data_list:
prop_list = js_parser.data_dict[data]
print(js_parser.write_proto(data, prop_list))
else:
parser.print_help()
@sparrowu93
Copy link
Author

Simplely run

python3 jspb_parser.py --js_path=.path/to/file/<js_file_name>.js --js_library=<library_name> >> <out_file>.proto

to output to file

@sparrowu93
Copy link
Author

sparrowu93 commented Aug 15, 2017

For some reason the current project I have been working on had missing the origin protobuf file and leaves only a javascript file. I searched on the web and didn't get a proper way, so I made one.
It haven't been fully tested and may have some logic bugs, so if you found one(including grammar problems lol), please tell me directly in the comments below.
Also, if you have any questions, feel free to comment!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment