Created
January 17, 2025 01:56
-
-
Save k1pfel/f3054a11a1f3243bbac2d36ce3c06563 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import json | |
from tqdm import tqdm | |
from concurrent.futures import ThreadPoolExecutor, as_completed | |
url = "https://api.wikimedia.org/service/lw/inference/v1/models/revertrisk-language-agnostic:predict" | |
headers = { | |
'Content-Type': 'application/json', | |
} | |
INTERVAL = 100000 | |
end_rev_id = 85706740 | |
start_rev_id = end_rev_id - INTERVAL + 1 | |
max_threads = 50 | |
high_probability_revisions = [] | |
def fetch_prediction(rev_id): | |
payload = { | |
"rev_id": rev_id, | |
"lang": "zh" | |
} | |
try: | |
response = requests.post(url, headers=headers, data=json.dumps(payload)) | |
if response.status_code == 200: | |
data = response.json() | |
true_prob = data['output']['probabilities']['true'] | |
false_prob = data['output']['probabilities']['false'] | |
rev_url = f"https://zh.wikipedia.org/w/index.php?oldid={rev_id}" | |
result = { | |
'rev_id': rev_id, | |
'true_prob': true_prob, | |
'false_prob': false_prob, | |
'url': rev_url, | |
} | |
# 'very-cautious' => 0.990, | |
# 'cautious' => 0.985, | |
# 'somewhat-cautious' => 0.980, | |
# 'less-cautious' => 0.975 | |
if true_prob >= 0.99: | |
return result | |
else: | |
return None | |
except Exception as e: | |
return None | |
with ThreadPoolExecutor(max_workers=max_threads) as executor: | |
futures = {executor.submit(fetch_prediction, rev_id): rev_id for rev_id in range(start_rev_id, end_rev_id + 1)} | |
for future in tqdm(as_completed(futures), total=len(futures), desc="Querying revisions", unit="rev_id"): | |
result = future.result() | |
if result: | |
high_probability_revisions.append(result) | |
if high_probability_revisions: | |
print("\nRevisions with true probability >= 0.99:") | |
for item in high_probability_revisions: | |
print(f"Rev ID: {item['rev_id']}, True Probability: {item['true_prob']}, False Probability: {item['false_prob']}, URL: {item['url']}") | |
else: | |
print("\nNo revisions found with true probability >= 0.99.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment