This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# public domain | |
from bs4 import BeautifulSoup | |
import requests | |
def main(): | |
manifest = {} | |
for id in range(1, 687): # starting with PGCH #1 and going to #686, the last one | |
if id == 553: # this one is irregular and should be skipped |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Step 1: Get list of any Wikidata item with NPG ID and anything that is a subclass of chemical hazard | |
# Step 2: Iterate through each item for invoked items and properties | |
# (for claim in claims; for subclaim in claim: 'Q' + str(subclaim['mainsnak']['data-value']['value']['numeric-id']) | |
# and subclaim['mainsnak']['property'] where claim[0]['datatype'] == 'wikibase-item') | |
# Step 3: De-duplicate to generate exhaustive list of each item/property of interest to NIOSH | |
# Step 4: Check labels: en, es, zh, fr, de | |
# Step 5: Prepare HTML table that lists each item/property of interest, highlighting cells where values are missing | |
# Step 6: Take percentages of coverage in each language; save to a timestamped log | |
import requests |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import csv | |
from collections import defaultdict | |
def get_citation(inputstring): | |
r = requests.get("https://citoid.wikimedia.org/api?format=mediawiki&search=" + inputstring) | |
return r.json()[0] | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import time | |
import csv | |
from bs4 import BeautifulSoup | |
def main(sourcefile): | |
url_template = "https://tools.wmflabs.org/sourcemd/?id={0}&doit=Check+source" | |
with open(sourcefile) as f: | |
csvdump = csv.reader(f) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
niosh_mode = False | |
if niosh_mode == True: | |
seed = "https://query.wikidata.org/sparql?format=json&query=SELECT%20%3Fitem%20WHERE%20%7B%0A%20%20%3Fitem%20wdt%3AP932%20%3Fdummy0%20.%0A%20%20%3Fitem%20wdt%3AP859%20wd%3AQ60346%20.%0A%7D" | |
else: | |
seed = "https://query.wikidata.org/sparql?format=json&query=SELECT%20%3Fitem%20WHERE%20%7B%0A%20%20%3Fitem%20wdt%3AP932%20%3Fdummy0%20.%0A%20%20MINUS%20%7B%20%3Fitem%20wdt%3AP859%20wd%3AQ60346%20%7D%0A%7D" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import html | |
import requests | |
import threading | |
class AskPubMed(threading.Thread): | |
def __init__ (self, threadID, name, packages): | |
threading.Thread.__init__(self) | |
self.threadID = threadID | |
self.name = name | |
self.packages = packages |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import requests | |
from time import sleep | |
while True: | |
random_id = ''.join(random.choices('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_', k=11)) | |
url = 'https://www.youtube.com/watch?v=' + random_id | |
r = requests.get(url) | |
if r.text.find('This video is unavailable') == -1: | |
print(url) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Timeout | |
Processing wikidump-000001399.ttl.gz | |
SPARQL-UPDATE: updateStr=LOAD <file:///srv/mungeOut//wikidump-000001399.ttl.gz> | |
java.util.concurrent.TimeoutException | |
at java.util.concurrent.FutureTask.get(FutureTask.java:205) | |
at com.bigdata.rdf.sail.webapp.BigdataServlet.submitApiTask(BigdataServlet.java:292) | |
at com.bigdata.rdf.sail.webapp.QueryServlet.doSparqlUpdate(QueryServlet.java:460) | |
at com.bigdata.rdf.sail.webapp.QueryServlet.doPost(QueryServlet.java:241) | |
at com.bigdata.rdf.sail.webapp.RESTServlet.doPost(RESTServlet.java:269) | |
at com.bigdata.rdf.sail.webapp.MultiTenancyServlet.doPost(MultiTenancyServlet.java:195) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
jh@Scatter-1 ~ % curl -i http://localhost:63342/Cyberbot_II/IABot/www/setup.php\?_ijt=ek248v577c3ch1l8u1c3mq48gb | |
HTTP/1.1 200 OK | |
X-Powered-By: PHP/7.2.28 | |
Set-Cookie: IABotManagementConsole=dbgrl4h62b779fr9luufh5qmm4; expires=Sun, 12-Apr-2020 20:54:31 GMT; Max-Age=2592000; path=/Cyberbot_II/IABot/www | |
Cache-Control: no-store, must-revalidate | |
server: PhpStorm 2019.3.3 | |
content-length: 9486 | |
set-cookie: Phpstorm-e21bdce2=b191c1a9-572c-4e8a-b862-7d21cf880eae; Max-Age=315360000; Expires=Mon, 11 Mar 2030 20:54:31 GMT; Path=/; HTTPOnly; SameSite=strict | |
Set-Cookie: IABotManagementConsole=dbgrl4h62b779fr9luufh5qmm4; expires=Sun, 12-Apr-2020 20:54:31 GMT; Max-Age=2592000; path=/Cyberbot_II/IABot/www |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import json | |
from time import sleep | |
from pprint import pprint | |
# This is a quick script I came up with for ingesting "munged" Wikidata TTL dumps | |
# into Amazon Neptune, one at a time, going as fast as possible while respecting | |
# queue limits. | |
for i in range(0, 4243): |
OlderNewer