Created
March 13, 2019 04:46
-
-
Save K-Wu/e8fe24311093574ccd3d88cc5d70cb54 to your computer and use it in GitHub Desktop.
A script that obtains journals and conferences in all sub-fields in computer science by leveraging Microsoft Academic Knowledge API
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Reference 1: https://dev.labs.cognitive.microsoft.com/docs/services/56332331778daf02acc0a50b/operations/565d9001ca73072048922d97 | |
# Reference 2: https://docs.microsoft.com/en-us/azure/cognitive-services/academic-knowledge/paperentityattributes | |
key1 = 'put_your_key_here' | |
key2 = 'put_your_key_here' | |
key = 'put_your_key_here' | |
CS_CATEGORIES = ["artificial intelligence", "computer hardware", "computer vision", "computer network", | |
"real-time computing", "distributed computing", "pattern recognition", "data mining", | |
"machine learning", "embedded system", "knowledge management", "multimedia", "library science", | |
"simulation", "algorithm", "database", "world wide web", "computer security", "speech recognition", | |
"telecommunications", "natural language processing", "theoretical computer science", | |
"information retrieval", "programming language", "computer architecture", "software engineering", | |
"operating system", "parallel computing", "human–computer interaction", "computer graphics", | |
"computational science", "computer engineering", "data science", "internet privacy"] | |
import requests | |
import json | |
import utils | |
import time | |
def _obtain_all(category,attributes): | |
offset = 0 | |
results=[] | |
total_entries=1 | |
while 1: | |
time.sleep(7) | |
if offset>=total_entries: | |
break | |
if key==key1: | |
headers = {'Ocp-Apim-Subscription-Key': key2} | |
else: | |
headers = {'Ocp-Apim-Subscription-Key': key1} | |
url = "https://api.labs.cognitive.microsoft.com/academic/v1.0/calchistogram?expr=And(Composite(F.FN=='{category}'),Y>2010)&attributes={attributes}&count=1000&offset={offset}&timeout=3600000".format( | |
category=category,attributes=attributes, offset=offset) | |
response = requests.get(url, headers=headers) | |
if response.status_code!=200: | |
print("WARNING: request failed in category: {category} attributes: {attributes}") | |
print(str(response.content)) | |
continue | |
response_dict = json.loads(response.content) | |
if offset==0: | |
total_entries = response_dict['histograms'][0]['distinct_values'] | |
results.extend([single_dict for single_dict in response_dict['histograms'][0]['histogram']]) | |
offset+=1000 | |
return results | |
def obtain_all_conferences(category): | |
attributes="C.CN" | |
return _obtain_all(category,attributes) | |
def obtain_all_journals(category): | |
attributes="J.JN" | |
return _obtain_all(category,attributes) | |
def main(): | |
for category in CS_CATEGORIES: | |
curr_journals_list = obtain_all_journals(category) | |
curr_conferences_list = obtain_all_conferences(category) | |
result=dict() | |
result['journals']=curr_journals_list | |
result['conferences']=curr_conferences_list | |
utils.save_obj(result,"{category}.pkl".format(category=category.replace(" ","_"))) | |
if __name__ == "__main__": | |
def test(): | |
headers = {'Ocp-Apim-Subscription-Key': key1} | |
# url="https://api.labs.cognitive.microsoft.com/academic/v1.0/calchistogram?expr=And(Composite(AA.AuN=='jaime teevan'),Y>2012)&attributes=Y,F.FN&count=4" | |
url = "https://api.labs.cognitive.microsoft.com/academic/v1.0/calchistogram?expr=Y>2017&attributes=F.FN&count=262942&timeout=3600000" | |
url = "https://api.labs.cognitive.microsoft.com/academic/v1.0/calchistogram?expr=And(Composite(F.FN=='artificial intelligence'),Y>2010)&attributes=J.JN&count=262942&timeout=3600000" | |
url = "https://api.labs.cognitive.microsoft.com/academic/v1.0/interpret?query= journal by Yong Li after 2012" | |
url = "https://api.labs.cognitive.microsoft.com/academic/v1.0/calchistogram?expr=And(Composite(F.FN=='artificial intelligence'),Y>2010)&attributes=J.JN&count=1000&timeout=3600000" | |
url = "https://api.labs.cognitive.microsoft.com/academic/v1.0/calchistogram?expr=And(Composite(F.FN=='computer architecture'),Y>2010)&attributes=C.CN&count=1000&timeout=3600000" | |
response = requests.get(url, headers=headers) | |
pass | |
main() | |
pass |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment