cydal · March 17, 2021 21:36
diff --git a/collect_CORE.py b/collect_CORE.py
 # Import Libraries
 import requests
 import pandas as pd
 import json

 ## Define lists to hold article information
 titles = []
 authors = []
 publisher = []
 doi = []
 publishedDate = []
 description = []
 fullText = []
 urll = []
 document_type = []
 types = []

 # Define number of pages
 pageCount = 100


 # Function to append article info to lists
 def append_to_list(dic):
  
  ## For each article returned
  for eachObject in dic:
    titles.append(eachObject["_source"]["title"])
    authors.append(eachObject["_source"]["authors"])
    publisher.append(eachObject["_source"]["publisher"])
    doi.append(eachObject["_source"]["doi"])
    publishedDate.append(eachObject["_source"]["datePublished"])
    description.append(eachObject["_source"]["description"])
    fullText.append(eachObject["_source"]["fullText"])
    urll.append(eachObject["_source"]["downloadUrl"])
    document_type.append(eachObject["_source"]["documentType"])
    types.append(eachObject["_type"])
    
 # Send request to CORE & call append_to_list
 for page in range(1, pageCount):
  params = {"page": page, "pageSize": 100, "apiKey": apikey}
  response = requests.get(url+query[0], params=params)
  response_json = response.json()
  print("Page -  ", page)
  print("Length -  ", len(response_json["data"]))
  append_to_list(response_json["data"])
  
  
 # Create Dictionary to hold lists
 dicto = {    
    "Title": titles,
    "authors": authors, 
    "publisher": publisher,
    "doi": doi,
    "publishedDate": publishedDate, 
    "abstract": description,
    "description": fullText,
    "url": urll,
    "document_type": document_type,
    "type": types
 }

 # Save dictinary as json to disk
 with open("dicto.json", 'w') as outfile:
  json.dump(dicto, outfile)
  
  
 # Create Pandas Dataframe using lists
 ExtractDf = pd.DataFrame(
 {    
    "Title": titles,
    "authors": authors, 
    "publisher": publisher,
    "doi": doi,
    "publishedDate": publishedDate, 
    "abstract": description,
    "description": fullText,
    "url": urll,
    "document_type": document_type,
    "type": types
 }
 )

 # Save pandas dataframe to disk
 ExtractDf.to_csv("core.csv", index=False)
	# Import Libraries
	import requests
	import pandas as pd
	import json

	## Define lists to hold article information
	titles = []
	authors = []
	publisher = []
	doi = []
	publishedDate = []
	description = []
	fullText = []
	urll = []
	document_type = []
	types = []

	# Define number of pages
	pageCount = 100


	# Function to append article info to lists
	def append_to_list(dic):

	## For each article returned
	for eachObject in dic:
	titles.append(eachObject["_source"]["title"])
	authors.append(eachObject["_source"]["authors"])
	publisher.append(eachObject["_source"]["publisher"])
	doi.append(eachObject["_source"]["doi"])
	publishedDate.append(eachObject["_source"]["datePublished"])
	description.append(eachObject["_source"]["description"])
	fullText.append(eachObject["_source"]["fullText"])
	urll.append(eachObject["_source"]["downloadUrl"])
	document_type.append(eachObject["_source"]["documentType"])
	types.append(eachObject["_type"])

	# Send request to CORE & call append_to_list
	for page in range(1, pageCount):
	params = {"page": page, "pageSize": 100, "apiKey": apikey}
	response = requests.get(url+query[0], params=params)
	response_json = response.json()
	print("Page - ", page)
	print("Length - ", len(response_json["data"]))
	append_to_list(response_json["data"])


	# Create Dictionary to hold lists
	dicto = {
	"Title": titles,
	"authors": authors,
	"publisher": publisher,
	"doi": doi,
	"publishedDate": publishedDate,
	"abstract": description,
	"description": fullText,
	"url": urll,
	"document_type": document_type,
	"type": types
	}

	# Save dictinary as json to disk
	with open("dicto.json", 'w') as outfile:
	json.dump(dicto, outfile)


	# Create Pandas Dataframe using lists
	ExtractDf = pd.DataFrame(
	{
	"Title": titles,
	"authors": authors,
	"publisher": publisher,
	"doi": doi,
	"publishedDate": publishedDate,
	"abstract": description,
	"description": fullText,
	"url": urll,
	"document_type": document_type,
	"type": types
	}
	)

	# Save pandas dataframe to disk
	ExtractDf.to_csv("core.csv", index=False)