Skip to content

Instantly share code, notes, and snippets.

@ishideo
Last active October 14, 2016 01:58
Show Gist options
  • Save ishideo/79b5fc2a311214ddb0398a7796508143 to your computer and use it in GitHub Desktop.
Save ishideo/79b5fc2a311214ddb0398a7796508143 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# coding:utf-8
from __future__ import unicode_literals
from more_itertools import *
from lxml import etree
import codecs
def convert_atom2list(filename):
xml = etree.fromstring(codecs.open(filename, 'rb').read())
items_title = [[x] for x in xml.xpath('//title/text()')]
items_link = [[x] for x in xml.xpath('//link/@href')]
items_id = [[x] for x in xml.xpath('//id/text()')]
items_update = [[x] for x in xml.xpath('//updated/text()')]
items_summary = [[x] for x in xml.xpath('//summary/text()')]
return zip(list_title, list_link, list_id, list_update, list_summary)
def export_list2tsv(xs_tuple):
xs = map(lambda x: list(flatten(x)), xs_tuple)
list_lines = map('\t'.join, xs)
string_lines = '\n'.join(list_lines)
f = codecs.open('atom2tsv.txt', 'w', 'utf-8')
f.write(string_lines)
f.close()
if __name__ == '__main__':
xs_tuple = convert_atom2list('atom.xml')
export_list2tsv(xs_tuple)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment