Skip to content

Instantly share code, notes, and snippets.

@zenloner
Created June 14, 2013 08:57
Show Gist options
  • Save zenloner/5780470 to your computer and use it in GitHub Desktop.
Save zenloner/5780470 to your computer and use it in GitHub Desktop.
#coding=utf-8
import re
def extract_info(pattern, input_file_name, output_file_name, prefix_list):
output_file = open(output_file_name, 'w')
for line in open(input_file_name,'r'):
m = re.match(pattern, line)
if m:
origin_field = m.group(1)
for prefix in prefix_list:
if origin_field.startswith(prefix):
origin_field = origin_field[len(prefix):]
break
table_field = m.group(1)
table_type = m.group(2)
comment = m.group(3)
sample = m.group(4)
result_line = '\t'.join([origin_field, table_field, table_type, comment, sample])+'\n'
output_file.write(result_line)
output_file.close()
if __name__ == '__main__':
pattern = r"\s*<Attribute\s*name\s*=\s*\"(.*?)\".*type\s*=\s*\"(.*?)\"\s*comment\s*=\s*\"(.*?)\"\s*sample\s*=\s*\"(.*?)\".*"
input_file_name = 'udwetl_nsclick.xml'
output_file_name = 'out1'
prefix_list = ['event_', 'nsclick_']
extract_info(pattern, input_file_name, output_file_name, prefix_list)
print 'ok'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment