Created
March 17, 2021 21:36
-
-
Save cydal/c047cc79c0f28abd883ebf3e5a498696 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import Libraries | |
import requests | |
import pandas as pd | |
import json | |
## Define lists to hold article information | |
titles = [] | |
authors = [] | |
publisher = [] | |
doi = [] | |
publishedDate = [] | |
description = [] | |
fullText = [] | |
urll = [] | |
document_type = [] | |
types = [] | |
# Define number of pages | |
pageCount = 100 | |
# Function to append article info to lists | |
def append_to_list(dic): | |
## For each article returned | |
for eachObject in dic: | |
titles.append(eachObject["_source"]["title"]) | |
authors.append(eachObject["_source"]["authors"]) | |
publisher.append(eachObject["_source"]["publisher"]) | |
doi.append(eachObject["_source"]["doi"]) | |
publishedDate.append(eachObject["_source"]["datePublished"]) | |
description.append(eachObject["_source"]["description"]) | |
fullText.append(eachObject["_source"]["fullText"]) | |
urll.append(eachObject["_source"]["downloadUrl"]) | |
document_type.append(eachObject["_source"]["documentType"]) | |
types.append(eachObject["_type"]) | |
# Send request to CORE & call append_to_list | |
for page in range(1, pageCount): | |
params = {"page": page, "pageSize": 100, "apiKey": apikey} | |
response = requests.get(url+query[0], params=params) | |
response_json = response.json() | |
print("Page - ", page) | |
print("Length - ", len(response_json["data"])) | |
append_to_list(response_json["data"]) | |
# Create Dictionary to hold lists | |
dicto = { | |
"Title": titles, | |
"authors": authors, | |
"publisher": publisher, | |
"doi": doi, | |
"publishedDate": publishedDate, | |
"abstract": description, | |
"description": fullText, | |
"url": urll, | |
"document_type": document_type, | |
"type": types | |
} | |
# Save dictinary as json to disk | |
with open("dicto.json", 'w') as outfile: | |
json.dump(dicto, outfile) | |
# Create Pandas Dataframe using lists | |
ExtractDf = pd.DataFrame( | |
{ | |
"Title": titles, | |
"authors": authors, | |
"publisher": publisher, | |
"doi": doi, | |
"publishedDate": publishedDate, | |
"abstract": description, | |
"description": fullText, | |
"url": urll, | |
"document_type": document_type, | |
"type": types | |
} | |
) | |
# Save pandas dataframe to disk | |
ExtractDf.to_csv("core.csv", index=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment