Created
November 30, 2015 13:17
-
-
Save gustavofonseca/b16eb70d2b5402b80180 to your computer and use it in GitHub Desktop.
Exemplo de script para coletar XMLs do articlemeta
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
"""Script para exemplificar o uso do articlemeta.scielo.org | |
para coletar XMLs dos artigos da rede SciELO. Novamente, isso é | |
apenas um EXEMPLO! | |
""" | |
import json | |
from urllib import request | |
ISSNs = [ | |
'0101-8175', | |
'0102-3306', | |
'1677-941X', | |
'1806-9657', | |
'1806-9088', | |
'1678-4685', | |
] | |
URL_IDENTIFICADORES = 'http://articlemeta.scielo.org/api/v1/article/identifiers/?issn={issn}' | |
URL_META_ARTIGO = 'http://articlemeta.scielo.org/api/v1/article/?code={pid}&format=xmlrsps' | |
def gera_pids(issns): | |
for issn in issns: | |
try: | |
resp = request.urlopen(URL_IDENTIFICADORES.format(issn=issn)) | |
resp_texto = resp.read().decode('utf-8') | |
finally: | |
resp.close() | |
resp_py = json.loads(resp_texto) | |
for item in resp_py.get('objects', []): | |
yield item.get('code') | |
def baixa_e_salva(pid): | |
try: | |
resp = request.urlopen(URL_META_ARTIGO.format(pid=pid)) | |
xml_data = resp.read() | |
finally: | |
resp.close() | |
with open(pid + '.xml', 'wb') as file: | |
file.write(xml_data) | |
pids = gera_pids(ISSNs) | |
for pid in pids: | |
baixa_e_salva(pid) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment