Created
April 26, 2017 21:34
-
-
Save leoloobeek/3be8b835988e8d926a4387019370db8d to your computer and use it in GitHub Desktop.
Download all gists for a specific user
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# first: mkdir user && cd user && cp /path/to/get_gists.py . | |
# python3 get_gists.py user | |
import requests | |
import sys | |
from subprocess import call | |
user = sys.argv[1] | |
r = requests.get('https://api.github.com/users/{0}/gists'.format(user)) | |
for i in r.json(): | |
call(['git', 'clone', i['git_pull_url']]) | |
description_file = './{0}/description.txt'.format(i['id']) | |
with open(description_file, 'w') as f: | |
f.write('{0}\n'.format(i['description'])) |
Just added some codestyle, shebang, one useless comment and parallelism.
🙃
#!/usr/bin/env python3
import os
import sys
import json
import hashlib
import requests
from subprocess import call
from concurrent.futures import ThreadPoolExecutor as PoolExecutor
def download_all_from_user(user: str):
next_page = True
page = 1
while next_page:
url = f"https://api.github.com/users/{user}/gists?page={page}"
response = requests.get(url)
if not len(response.json()):
next_page = False
else:
page += 1
download_all(response.json())
def download_all(gists: list):
with PoolExecutor(max_workers=10) as executor:
for _ in executor.map(download, gists):
pass
def download(gist):
target = gist["id"] + hashlib.md5(gist["updated_at"].encode('utf-8')).hexdigest()
call(["git", "clone", gist["git_pull_url"], target])
description_file = os.path.join(target, "description.txt")
with open(description_file, "w") as f:
f.write(f"{gist['description']}\n")
# Run
user = sys.argv[1]
download_all_from_user(user)
There is more direct way to get at these files, as it is shared by Observable's Ian in the following notebook.
https://observablehq.com/@enjalot/blockbuilder-search-data
The data is already available as JSON format, which provides more insights into the gists as per the notebook. I have made a crude way of getting at gist ids and the thumbnails, here
https://kamalabot.github.io/M3nD3/blocksD3.html
@antonydevanchi @leoloobeek thanks, both code works fine
One-liner to get the first 100 of your own private gists:
curl -H "Authorization: Bearer <your_token>" 'https://api.github.com/gists?per_page=100' | jq '.[] | .git_pull_url' | xargs -n 1 git clone
Relies on jq.
import os
import sys
import json
import hashlib
import requests
import logging
from subprocess import call, CalledProcessError
from concurrent.futures import ThreadPoolExecutor as PoolExecutor
# Set up basic logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def download_all_from_user(user: str):
next_page = True
page = 1
while next_page:
url = f"https://api.github.com/users/{user}/gists?page={page}"
response = requests.get(url)
try:
gists = response.json()
if not gists:
next_page = False
continue
except json.JSONDecodeError:
logging.error("Invalid JSON response")
break
page += 1
download_all(gists)
def download_all(gists: list):
with PoolExecutor(max_workers=10) as executor:
for _ in executor.map(download, gists):
pass
def download(gist):
if "id" not in gist or "updated_at" not in gist or "git_pull_url" not in gist:
logging.error("Missing required gist information")
return
target = gist["id"] + hashlib.md5(gist["updated_at"].encode('utf-8')).hexdigest()
try:
call(["git", "clone", gist["git_pull_url"], target])
except CalledProcessError as e:
logging.error(f"Failed to clone gist: {e}")
return
description_file = os.path.join(target, "description.txt")
try:
with open(description_file, "w") as f:
f.write(f"{gist.get('description', 'No description')}\n")
except IOError as e:
logging.error(f"Failed to write description file: {e}")
# Main execution
if __name__ == "__main__":
if len(sys.argv) > 1:
user = sys.argv[1]
download_all_from_user(user)
else:
logging.error("No user specified")
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Only downloads the first page of gists, forked to download all -> https://gist.github.com/selimslab/958e2255a105f9a3f4b421896bce715d