Skip to content

Instantly share code, notes, and snippets.

@rneiss
Created January 13, 2025 23:11
Show Gist options
  • Save rneiss/cdacf7f3b388c45d1dddf9499b5773bd to your computer and use it in GitHub Desktop.
Save rneiss/cdacf7f3b388c45d1dddf9499b5773bd to your computer and use it in GitHub Desktop.
A python script to download all of the Klein entries via Sefaria's text API
import requests
import json
import time
import csv
import urllib.parse
def fetch_data(url, retries=5, backoff_factor=1):
attempt = 0
while attempt < retries:
try:
response = requests.get(url)
if response.status_code == 200:
return response.json()
elif 400 <= response.status_code < 600:
print(f"Received error {response.status_code} from {url}. Retrying...")
attempt += 1
sleep_time = backoff_factor * (2 ** attempt) # Exponential backoff
print(f"Retrying in {sleep_time} seconds...")
time.sleep(sleep_time)
else:
print(f"Received unexpected status {response.status_code} from {url}.")
return None
except requests.RequestException as e:
# Catch any network-related or other requests errors
print(f"Request failed with error: {e}. Retrying...")
attempt += 1
sleep_time = backoff_factor * (2 ** attempt)
print(f"Retrying in {sleep_time} seconds...")
time.sleep(sleep_time)
print(f"Failed to fetch data from {url} after {retries} attempts.")
return None
def main():
base_url = "https://www.sefaria.org/api/v3/texts/"
initial_url = base_url + "Klein_Dictionary,_א.1"
def write_to_csv(ref, text):
with open('output.csv', 'a', newline='', encoding='utf-8') as csvfile:
writer = csv.writer(csvfile)
writer.writerow([ref.replace('Klein Dictionary, ', ''), text])
def process_data(url):
data = fetch_data(url)
if data:
write_to_csv(data['ref'], data['versions'][0]['text'][0])
return data.get('next')
return None
next_ref = process_data(initial_url)
while next_ref:
next_url = base_url + next_ref
print(f"Fetching {next_url}")
next_ref = process_data(next_url)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment