Skip to content

Instantly share code, notes, and snippets.

@chrishokamp
Created February 5, 2015 13:43
Show Gist options
  • Save chrishokamp/8f03ac71d447cf3da3e3 to your computer and use it in GitHub Desktop.
Save chrishokamp/8f03ac71d447cf3da3e3 to your computer and use it in GitHub Desktop.
simple parsing of hpt rules into preauthoring autocomplete templates
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": ""
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import re, codecs, json"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def parse_rule_template(tokens):\n",
"# {'text': 'Chris X', template: [{'text': 'Chris' }, {'type': 'nonterminal'}]},\n",
" template_obj = []\n",
" for tok in tokens:\n",
" if tok == u'[X][X]':\n",
" template_obj.append({'type': 'nonterminal'})\n",
" else:\n",
" template_obj.append({'text': tok})\n",
" \n",
" return template_obj"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def map_template_to_string(template):\n",
" string_rep = []\n",
" for node in template:\n",
" if 'type' in node:\n",
" string_rep.append('X')\n",
" else:\n",
" string_rep.append(node['text'])\n",
" return ' '.join(string_rep)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def parse_hpt_rules(lines):\n",
" \n",
" def parse_rule(hpt_rule):\n",
" # remove the final [X], then split into units\n",
" source_rule_units = re.sub(r' \\[X\\]$', '', hpt_rule).split()\n",
" rule_template = parse_rule_template(source_rule_units)\n",
" template_string = map_template_to_string(rule_template)\n",
" return {'text': template_string, 'template': rule_template}\n",
" \n",
" return [parse_rule(line) for line in lines]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"rule_file = '/home/chris/projects/preauthoring_ui/data/Alchmey/filter/uniquerule'\n",
"with codecs.open(rule_file, encoding='utf8') as hpt_rules:\n",
" sample_rules = [l.strip() for l in hpt_rules]\n",
"\n",
"ui_rules = parse_hpt_rules(sample_rules)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"ui_rules[:5]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 6,
"text": [
"[{'template': [{'text': u\"'s\"}], 'text': u\"'s\"},\n",
" {'template': [{'text': u\"'s\"}, {'type': 'nonterminal'}, {'text': u'.'}],\n",
" 'text': u\"'s X .\"},\n",
" {'template': [{'text': u\"'s\"}, {'type': 'nonterminal'}], 'text': u\"'s X\"},\n",
" {'template': [{'text': u\"'s\"}, {'text': u'consent'}], 'text': u\"'s consent\"},\n",
" {'template': [{'text': u\"'s\"},\n",
" {'text': u'consent'},\n",
" {'type': 'nonterminal'},\n",
" {'text': u'.'}],\n",
" 'text': u\"'s consent X .\"}]"
]
}
],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# dump the parsed rules\n",
"json_representation = json.dumps(ui_rules, indent=4)\n",
"output = '/home/chris/projects/preauthoring_ui/data/Alchmey/filter/uniquerule.json'\n",
"with codecs.open(output, 'w', encoding='utf8') as out:\n",
" out.write(json_representation)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment