Last active
February 23, 2016 22:41
-
-
Save seominjoon/e059b6fb51a3313fe800 to your computer and use it in GitHub Desktop.
Convert .bif file to .json file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
-------------------------------------------------------------------------------- | |
This code is modified version of: | |
https://github.com/eBay/bayesian-belief-networks/blob/master/bayesian/examples/bif/bif_parser.py. | |
NOTE: regex pattern of this parser is sensitive to spaces (will be fixed soon). | |
The output json file can be iterated with the following pseudo-code (d is the loaded json): | |
variables = d['variables'] | |
edges = d['edges'] | |
for variable_name, domain in variables: | |
# domain is the list of possible values that variable can take | |
for variable_names, dist in edges: | |
# variable_names[:-1] are the parents, and variable_names[-1] is the current | |
for variable_vals, prob in dist: | |
# prob is the probability of variable_names[-1] = variable_vals[-1], | |
# given variable_names[:-1] = variable_vals[:-1] | |
-------------------------------------------------------------------------------- | |
""" | |
import re | |
import argparse | |
import json | |
__author__ = "Minjoon Seo" | |
__email__ = "[email protected]" | |
def get_args(): | |
parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, description=__doc__) | |
parser.add_argument("bif_path", help="path to the input bif file.") | |
parser.add_argument("json_path", help="path to the output json file.") | |
return parser.parse_args() | |
def bif2json(args): | |
infile = open(args.bif_path, "rb") | |
infile.readline() | |
infile.readline() | |
# Regex patterns for parsing | |
variable_pattern = re.compile(r" type discrete \[ \d+ \] \{ (.+) \};\s*") | |
prior_probability_pattern_1 = re.compile( | |
r"probability \( ([^|]+) \) \{\s*") | |
prior_probability_pattern_2 = re.compile(r" table (.+);\s*") | |
conditional_probability_pattern_1 = ( | |
re.compile(r"probability \((.+)\|(.+)\) \{\s*")) | |
conditional_probability_pattern_2 = re.compile(r" \((.+)\) (.+);\s*") | |
variables = [] # domains | |
variable_dict = {} | |
edges = [] | |
# For every line in the file | |
while True: | |
line = infile.readline() | |
# End of file | |
if not line: | |
break | |
# Variable declaration | |
if line.startswith("variable"): | |
match = variable_pattern.match(infile.readline()) | |
# Extract domain and place into dictionary | |
if match: | |
key, val = line[9:-3], re.split('\s*,\s*', match.group(1).lstrip().rstrip()) | |
variables.append([key, val]) | |
variable_dict[key] = val | |
else: | |
raise Exception("Unrecognised variable declaration:\n" + line) | |
infile.readline() | |
# Probability distribution | |
elif line.startswith("probability"): | |
match = prior_probability_pattern_1.match(line) | |
if match: | |
# Prior probabilities | |
variable = match.group(1).lstrip().rstrip() | |
line = infile.readline() | |
match = prior_probability_pattern_2.match(line) | |
infile.readline() # } | |
edges.append([[variable], zip(([x] for x in variable_dict[variable]), map(float, re.split('\s*,\s*', match.group(1).lstrip().rstrip())))]) | |
else: | |
match = conditional_probability_pattern_1.match(line) | |
if match: | |
# Conditional probabilities | |
variable = match.group(1).lstrip().rstrip() | |
given = match.group(2).lstrip().rstrip() | |
d = [] | |
# Iterate through the conditional probability table | |
while True: | |
line = infile.readline() # line of the CPT | |
if line == '}\n': | |
break | |
match = conditional_probability_pattern_2.match(line) | |
given_values = re.split('\s*,\s*', match.group(1).lstrip().rstrip()) | |
for value, prob in zip(variable_dict[variable], map(float, re.split('\s*,\s*', match.group(2).lstrip().rstrip()))): | |
key = list(given_values) + [value] | |
d.append([key, prob]) | |
key = tuple(re.split('\s*,\s*', given)) + (variable,) | |
edges.append([key, d]) | |
else: | |
raise Exception( | |
"Unrecognised probability declaration:\n" + line) | |
# sanity check | |
for variable_names, dist in edges: | |
for variable_name in variable_names: | |
assert variable_name in variable_dict, "%r not in %r" % (variable_name, variable_dict.keys()) | |
for variable_vals, prob in dist: | |
for variable_name, variable_val in zip(variable_names, variable_vals): | |
assert variable_val in variable_dict[variable_name], "%r not in %r" % (variable_val, variable_dict[variable_name]) | |
out = {'edges': edges, 'variables': variables} | |
json.dump(out, open(args.json_path, "wb")) | |
if __name__ == "__main__": | |
ARGS = get_args() | |
bif2json(ARGS) |
Error: Python regex was not imported.
"import re" should be added
Fixed. Regex is very badly written...
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Quick start: