Last active
August 29, 2015 14:08
-
-
Save erochest/4debe339ec9d2bf5ea5a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_field(item, key): | |
"""Returns key from the sourceResource as a list of strings.""" | |
values = item[u'sourceResource'].get(key, [u' ']) | |
if isinstance(values, basestring): | |
values = [values] | |
return values | |
def get_text(f, json_data): | |
for each in json_data: | |
title = get_field(each, u'title') | |
title = title[-1] | |
description = get_field(each, u'description') | |
description = description[-1] | |
subjects = [] | |
for subj_info in get_field(each, u'subject'): | |
subj = subj_info[u'name'] | |
subjects.append(subj) | |
subject = u', '.join(subjects) | |
data = u'{}; {}; {}.\n'.format(title, description, subject) | |
# Write the sentence to the 'text_results' file | |
f.write(data.decode('utf8')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment