Last active
June 8, 2022 12:23
-
-
Save carlinmack/3e3c67ca1a42cc03b61ff535956e37b5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
If query.sparql and PMCID_list.txt are downloaded, the script can simply be called as | |
python iq-to-qid.py | |
""" | |
import argparse | |
import os | |
import subprocess | |
import time | |
import pandas as pd | |
import requests | |
from requests.exceptions import HTTPError | |
from tqdm import tqdm | |
HEADERS = {"User-Agent": "ID-to-QID"} | |
def main( | |
test: bool = False, | |
inputFile: str = "PMCID_list.txt", | |
batchSize: int = 100, | |
): | |
"""From a list of IDs, go through and sequentially output a list of their properties and values.""" | |
if not os.path.isfile(inputFile): | |
print(inputFile + " does not exist") | |
exit() | |
with open("query.sparql") as r: | |
query = r.readlines() | |
query = "".join(query) | |
with open(inputFile) as f: | |
inputList = [line.strip() for line in f] | |
data = [] | |
t = tqdm( | |
total=round(len(inputList) / 100) + 1, bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} {elapsed_s:.0f}s" | |
) | |
for i in range(0, len(inputList), batchSize): | |
IDs = inputList[i:i+batchSize] | |
t.update() | |
t.set_description(IDs[0]) | |
IDstring = " ".join(["'" + q + "'" for q in IDs]) | |
data += getData(query, IDstring) | |
if test: | |
break | |
df = pd.DataFrame(data, columns =['qid', 'pmc']) | |
df.to_csv("qids-pmcs.csv", index=False) | |
t.close() | |
def getData(query, IDstring): | |
data = runQuery( | |
query.format( | |
values=IDstring, | |
) | |
) | |
output = [] | |
for item in data["results"]["bindings"]: | |
QID = item["item"]["value"][31:] | |
id = item["id"]["value"] | |
output.append([QID, id]) | |
return output | |
def runQuery(query): | |
url = "https://query.wikidata.org/sparql" | |
params = {"query": query, "format": "json"} | |
try: | |
response = requests.get(url, params=params, headers=HEADERS) | |
return response.json() | |
except HTTPError as e: | |
print(response.text) | |
print(e.response.text) | |
exit() | |
def timer(tick, msg=""): | |
print("--- %s %.3f seconds ---" % (msg, time.time() - tick)) | |
return time.time() | |
def defineArgParser(): | |
"""Creates parser for command line arguments""" | |
parser = argparse.ArgumentParser( | |
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter | |
) | |
parser.add_argument( | |
"-t", | |
"--test", | |
help="", | |
action="store_true", | |
) | |
return parser | |
if __name__ == "__main__": | |
argParser = defineArgParser() | |
clArgs = argParser.parse_args() | |
tick = time.time() | |
main( | |
test=clArgs.test, | |
) | |
timer(tick) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
6042825 | |
6628208 | |
3228863 | |
4111116 | |
7718173 | |
4207244 | |
1008230 | |
6800166 | |
3750528 | |
3962008 | |
5082607 | |
3685583 | |
3430699 | |
3722524 | |
4760270 | |
5127930 | |
6396093 | |
5105880 | |
6345265 | |
4752027 | |
4826505 | |
4497953 | |
2752617 | |
3649846 | |
5100142 | |
4103489 | |
5978649 | |
2376761 | |
3888149 | |
5266326 | |
2971582 | |
2844990 | |
3413384 | |
5105883 | |
5996309 | |
4708103 | |
6717989 | |
4423409 | |
5726607 | |
2971575 | |
6013758 | |
4380025 | |
5860209 | |
4909101 | |
2375448 | |
3065683 | |
3521214 | |
2530886 | |
4141640 | |
5638226 | |
3659198 | |
6492420 | |
6707396 | |
2910028 | |
3967109 | |
7422831 | |
4410647 | |
5370611 | |
2796819 | |
4277354 | |
5624584 | |
3009535 | |
4426829 | |
5089131 | |
4816032 | |
4271147 | |
4176421 | |
6607905 | |
2572702 | |
4183485 | |
4287948 | |
4103495 | |
3262844 | |
6053696 | |
403769 | |
4133763 | |
4168749 | |
6306265 | |
7584444 | |
5472867 | |
2982160 | |
4236746 | |
5333177 | |
4264639 | |
3654709 | |
5167065 | |
2935447 | |
4642848 | |
4168754 | |
3835703 | |
3599859 | |
3582273 | |
4653389 | |
6880260 | |
6039917 | |
3646686 | |
4174321 | |
2693143 | |
4224201 | |
3651407 | |
4670004 | |
5588246 | |
4535771 | |
6039939 | |
6821346 | |
2638934 | |
6392199 | |
4743236 | |
2427162 | |
4288400 | |
3083346 | |
3855388 | |
4670012 | |
3307106 | |
226690 | |
2666812 | |
4411368 | |
5998007 | |
3307116 | |
6203375 | |
6051191 | |
3031041 | |
7194485 | |
4499222 | |
6013757 | |
6974772 | |
5260057 | |
3514294 | |
4828368 | |
2958747 | |
4706054 | |
3530913 | |
4642660 | |
3190406 | |
4197741 | |
4184317 | |
5618529 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
qid | pmc | |
---|---|---|
Q35790798 | 4708103 | |
Q24515682 | 403769 | |
Q64077738 | 6492420 | |
Q41943550 | 4103495 | |
Q55286499 | 5996309 | |
Q33806851 | 3065683 | |
Q31147256 | 5127930 | |
Q21266675 | 3651407 | |
Q55497195 | 6013758 | |
Q38504935 | 2971575 | |
Q38390303 | 3962008 | |
Q27339233 | 4752027 | |
Q33329732 | 2375448 | |
Q33490300 | 2752617 | |
Q35531204 | 4410647 | |
Q36259785 | 5266326 | |
Q34281990 | 4183485 | |
Q41854744 | 3888149 | |
Q36647776 | 2376761 | |
Q36846151 | 3654709 | |
Q35227430 | 4236746 | |
Q39181972 | 5105880 | |
Q47314507 | 5860209 | |
Q104283396 | 7718173 | |
Q42375668 | 5638226 | |
Q50315248 | 5100142 | |
Q63484960 | 6607905 | |
Q34041069 | 3228863 | |
Q34549678 | 3599859 | |
Q29788355 | 3649846 | |
Q36597303 | 4760270 | |
Q33805002 | 5472867 | |
Q93060711 | 6717989 | |
Q42957908 | 4207244 | |
Q23922436 | 1008230 | |
Q91831664 | 6880260 | |
Q36145819 | 3413384 | |
Q30557509 | 3835703 | |
Q24614450 | 2844990 | |
Q31119178 | 3722524 | |
Q33452871 | 2693143 | |
Q33528268 | 3009535 | |
Q34070756 | 3262844 | |
Q34110503 | 2935447 | |
Q38384902 | 5082607 | |
Q38472163 | 4168749 | |
Q30584124 | 3521214 | |
Q41959403 | 4653389 | |
Q41991813 | 4176421 | |
Q41991463 | 4380025 | |
Q42213893 | 2530886 | |
Q42792284 | 4133763 | |
Q34880603 | 4287948 | |
Q34078162 | 4141640 | |
Q33622881 | 2910028 | |
Q62004288 | 6800166 | |
Q64233711 | 6396093 | |
Q35074242 | 3967109 | |
Q34737302 | 4271147 | |
Q34550500 | 3582273 | |
Q28655509 | 4174321 | |
Q47099616 | 5726607 | |
Q30883967 | 4426829 | |
Q36361689 | 5333177 | |
Q35529162 | 4264639 | |
Q34604749 | 3646686 | |
Q34820613 | 3750528 | |
Q40866280 | 3659198 | |
Q41863524 | 4103489 | |
Q37726558 | 5370611 | |
Q40394437 | 4423409 | |
Q35836836 | 4497953 | |
Q30279334 | 4168754 | |
Q56894078 | 6053696 | |
Q92495052 | 6628208 | |
Q56396501 | 6039917 | |
Q35984089 | 4826505 | |
Q47756564 | 6042825 | |
Q31110857 | 4909101 | |
Q33718938 | 2982160 | |
Q36751372 | 4816032 | |
Q38420858 | 4224201 | |
Q42368936 | 5624584 | |
Q95820211 | 7422831 | |
Q38373292 | 5105883 | |
Q89032722 | 5978649 | |
Q38425559 | 4111116 | |
Q94028784 | 6707396 | |
Q33367888 | 2572702 | |
Q34776355 | 4277354 | |
Q39331895 | 5167065 | |
Q60933724 | 6306265 | |
Q100953652 | 7584444 | |
Q33711706 | 2971582 | |
Q33551816 | 2796819 | |
Q28681323 | 3685583 | |
Q34297809 | 3430699 | |
Q40289422 | 4642848 | |
Q37384237 | 5089131 | |
Q61814400 | 6345265 | |
Q38429157 | 5618529 | |
Q27987806 | 3855388 | |
Q36643662 | 226690 | |
Q24655412 | 2638934 | |
Q34161898 | 3307116 | |
Q34269466 | 3514294 | |
Q39992601 | 4411368 | |
Q39138287 | 2427162 | |
Q35870081 | 4670004 | |
Q35914932 | 4743236 | |
Q56395352 | 6051191 | |
Q41967577 | 4288400 | |
Q30576319 | 3530913 | |
Q24570122 | 2666812 | |
Q30987231 | 4535771 | |
Q34871306 | 3083346 | |
Q35688593 | 4499222 | |
Q58549562 | 6203375 | |
Q90028327 | 6821346 | |
Q35841044 | 4642660 | |
Q30859718 | 4184317 | |
Q30385112 | 4828368 | |
Q42756598 | 5588246 | |
Q40033547 | 4706054 | |
Q55692909 | 6039939 | |
Q34029521 | 3190406 | |
Q36268833 | 5260057 | |
Q38398397 | 4670012 | |
Q93016263 | 6974772 | |
Q94602451 | 7194485 | |
Q29547471 | 3031041 | |
Q33694229 | 2958747 | |
Q55439024 | 6013757 | |
Q58810554 | 6392199 | |
Q55283816 | 5998007 | |
Q35838429 | 3307106 | |
Q41939614 | 4197741 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT ?item ?id {{ | |
VALUES ?id {{ {values} }} | |
?item wdt:P932 ?id . | |
SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en" }} | |
}} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment