Skip to content

Instantly share code, notes, and snippets.

@carlinmack
Last active June 8, 2022 12:23
Show Gist options
  • Save carlinmack/3e3c67ca1a42cc03b61ff535956e37b5 to your computer and use it in GitHub Desktop.
Save carlinmack/3e3c67ca1a42cc03b61ff535956e37b5 to your computer and use it in GitHub Desktop.
"""
If query.sparql and PMCID_list.txt are downloaded, the script can simply be called as
python iq-to-qid.py
"""
import argparse
import os
import subprocess
import time
import pandas as pd
import requests
from requests.exceptions import HTTPError
from tqdm import tqdm
HEADERS = {"User-Agent": "ID-to-QID"}
def main(
test: bool = False,
inputFile: str = "PMCID_list.txt",
batchSize: int = 100,
):
"""From a list of IDs, go through and sequentially output a list of their properties and values."""
if not os.path.isfile(inputFile):
print(inputFile + " does not exist")
exit()
with open("query.sparql") as r:
query = r.readlines()
query = "".join(query)
with open(inputFile) as f:
inputList = [line.strip() for line in f]
data = []
t = tqdm(
total=round(len(inputList) / 100) + 1, bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} {elapsed_s:.0f}s"
)
for i in range(0, len(inputList), batchSize):
IDs = inputList[i:i+batchSize]
t.update()
t.set_description(IDs[0])
IDstring = " ".join(["'" + q + "'" for q in IDs])
data += getData(query, IDstring)
if test:
break
df = pd.DataFrame(data, columns =['qid', 'pmc'])
df.to_csv("qids-pmcs.csv", index=False)
t.close()
def getData(query, IDstring):
data = runQuery(
query.format(
values=IDstring,
)
)
output = []
for item in data["results"]["bindings"]:
QID = item["item"]["value"][31:]
id = item["id"]["value"]
output.append([QID, id])
return output
def runQuery(query):
url = "https://query.wikidata.org/sparql"
params = {"query": query, "format": "json"}
try:
response = requests.get(url, params=params, headers=HEADERS)
return response.json()
except HTTPError as e:
print(response.text)
print(e.response.text)
exit()
def timer(tick, msg=""):
print("--- %s %.3f seconds ---" % (msg, time.time() - tick))
return time.time()
def defineArgParser():
"""Creates parser for command line arguments"""
parser = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument(
"-t",
"--test",
help="",
action="store_true",
)
return parser
if __name__ == "__main__":
argParser = defineArgParser()
clArgs = argParser.parse_args()
tick = time.time()
main(
test=clArgs.test,
)
timer(tick)
6042825
6628208
3228863
4111116
7718173
4207244
1008230
6800166
3750528
3962008
5082607
3685583
3430699
3722524
4760270
5127930
6396093
5105880
6345265
4752027
4826505
4497953
2752617
3649846
5100142
4103489
5978649
2376761
3888149
5266326
2971582
2844990
3413384
5105883
5996309
4708103
6717989
4423409
5726607
2971575
6013758
4380025
5860209
4909101
2375448
3065683
3521214
2530886
4141640
5638226
3659198
6492420
6707396
2910028
3967109
7422831
4410647
5370611
2796819
4277354
5624584
3009535
4426829
5089131
4816032
4271147
4176421
6607905
2572702
4183485
4287948
4103495
3262844
6053696
403769
4133763
4168749
6306265
7584444
5472867
2982160
4236746
5333177
4264639
3654709
5167065
2935447
4642848
4168754
3835703
3599859
3582273
4653389
6880260
6039917
3646686
4174321
2693143
4224201
3651407
4670004
5588246
4535771
6039939
6821346
2638934
6392199
4743236
2427162
4288400
3083346
3855388
4670012
3307106
226690
2666812
4411368
5998007
3307116
6203375
6051191
3031041
7194485
4499222
6013757
6974772
5260057
3514294
4828368
2958747
4706054
3530913
4642660
3190406
4197741
4184317
5618529
qid pmc
Q35790798 4708103
Q24515682 403769
Q64077738 6492420
Q41943550 4103495
Q55286499 5996309
Q33806851 3065683
Q31147256 5127930
Q21266675 3651407
Q55497195 6013758
Q38504935 2971575
Q38390303 3962008
Q27339233 4752027
Q33329732 2375448
Q33490300 2752617
Q35531204 4410647
Q36259785 5266326
Q34281990 4183485
Q41854744 3888149
Q36647776 2376761
Q36846151 3654709
Q35227430 4236746
Q39181972 5105880
Q47314507 5860209
Q104283396 7718173
Q42375668 5638226
Q50315248 5100142
Q63484960 6607905
Q34041069 3228863
Q34549678 3599859
Q29788355 3649846
Q36597303 4760270
Q33805002 5472867
Q93060711 6717989
Q42957908 4207244
Q23922436 1008230
Q91831664 6880260
Q36145819 3413384
Q30557509 3835703
Q24614450 2844990
Q31119178 3722524
Q33452871 2693143
Q33528268 3009535
Q34070756 3262844
Q34110503 2935447
Q38384902 5082607
Q38472163 4168749
Q30584124 3521214
Q41959403 4653389
Q41991813 4176421
Q41991463 4380025
Q42213893 2530886
Q42792284 4133763
Q34880603 4287948
Q34078162 4141640
Q33622881 2910028
Q62004288 6800166
Q64233711 6396093
Q35074242 3967109
Q34737302 4271147
Q34550500 3582273
Q28655509 4174321
Q47099616 5726607
Q30883967 4426829
Q36361689 5333177
Q35529162 4264639
Q34604749 3646686
Q34820613 3750528
Q40866280 3659198
Q41863524 4103489
Q37726558 5370611
Q40394437 4423409
Q35836836 4497953
Q30279334 4168754
Q56894078 6053696
Q92495052 6628208
Q56396501 6039917
Q35984089 4826505
Q47756564 6042825
Q31110857 4909101
Q33718938 2982160
Q36751372 4816032
Q38420858 4224201
Q42368936 5624584
Q95820211 7422831
Q38373292 5105883
Q89032722 5978649
Q38425559 4111116
Q94028784 6707396
Q33367888 2572702
Q34776355 4277354
Q39331895 5167065
Q60933724 6306265
Q100953652 7584444
Q33711706 2971582
Q33551816 2796819
Q28681323 3685583
Q34297809 3430699
Q40289422 4642848
Q37384237 5089131
Q61814400 6345265
Q38429157 5618529
Q27987806 3855388
Q36643662 226690
Q24655412 2638934
Q34161898 3307116
Q34269466 3514294
Q39992601 4411368
Q39138287 2427162
Q35870081 4670004
Q35914932 4743236
Q56395352 6051191
Q41967577 4288400
Q30576319 3530913
Q24570122 2666812
Q30987231 4535771
Q34871306 3083346
Q35688593 4499222
Q58549562 6203375
Q90028327 6821346
Q35841044 4642660
Q30859718 4184317
Q30385112 4828368
Q42756598 5588246
Q40033547 4706054
Q55692909 6039939
Q34029521 3190406
Q36268833 5260057
Q38398397 4670012
Q93016263 6974772
Q94602451 7194485
Q29547471 3031041
Q33694229 2958747
Q55439024 6013757
Q58810554 6392199
Q55283816 5998007
Q35838429 3307106
Q41939614 4197741
SELECT ?item ?id {{
VALUES ?id {{ {values} }}
?item wdt:P932 ?id .
SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en" }}
}}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment