Skip to content

Instantly share code, notes, and snippets.

@leoloobeek
Created April 26, 2017 21:34
Show Gist options
  • Save leoloobeek/3be8b835988e8d926a4387019370db8d to your computer and use it in GitHub Desktop.
Save leoloobeek/3be8b835988e8d926a4387019370db8d to your computer and use it in GitHub Desktop.
Download all gists for a specific user
# first: mkdir user && cd user && cp /path/to/get_gists.py .
# python3 get_gists.py user
import requests
import sys
from subprocess import call
user = sys.argv[1]
r = requests.get('https://api.github.com/users/{0}/gists'.format(user))
for i in r.json():
call(['git', 'clone', i['git_pull_url']])
description_file = './{0}/description.txt'.format(i['id'])
with open(description_file, 'w') as f:
f.write('{0}\n'.format(i['description']))
@epogrebnyak
Copy link

Thanks for sharing this code. I added directory name slugs from django which condense i['description'] to a valid path, so that one can omit the description file. See at https://gist.github.com/epogrebnyak/c14d6d2ca2740d1e1018e701ea00472a

@pfuntner
Copy link

Really useful! Thanks! I had no idea that each gist was stored its own repository but it makes sense.

@brossetti1
Copy link

if anyones interested in using the description as the folder name instead of the id:

# first: mkdir user && cd user && cp /path/to/get_gists.py .
# python3 get_gists.py user
import requests
import sys
from subprocess import call

user = sys.argv[1]

r = requests.get('https://api.github.com/users/{0}/gists'.format(user))

for i in r.json():
  folder = i['description'] if i['description'] else i['id']
  call(['git', 'clone', i['git_pull_url'], folder])

  description_file = './{0}/description.txt'.format(folder)
  with open(description_file, 'w') as f:
    f.write('{0}\n'.format(i['description']))

@aabarbosa
Copy link

The Linux File System has a limit of 255 characters for naming folders.

fatal: could not create leading directories of 'long description.': File name too long

It also works other than naming all files with those descriptions. I was guessing how to print it besides the folder name (any help would be interesting).
cat **/*.txt

You would prefer this solution:
folder = i['description'][0:255] if i['description'] else i['id']

This is the final code (for those against the clock)

# first: mkdir user && cd user && cp /path/to/get_gists.py .
# python3 get_gists.py user
import requests
import sys
from subprocess import call

user = sys.argv[1]

r = requests.get('https://api.github.com/users/{0}/gists'.format(user))

for i in r.json():
        folder = i['description'][0:255] if i['description'] else i['id']
        call(['git', 'clone', i['git_pull_url'], folder])
        description_file = './{0}/description.txt'.format(folder)
        with open(description_file, 'w') as f:
                f.write('{0}\n'.format(i['description']))

@jamesbrink
Copy link

thank you all for saving me time always appreciated

@pixelstorm
Copy link

for some reason it does not download all the gists. i only get about a quarter of all my gists

@renat-abbyazov
Copy link

@pixelstorm it seems there is a restriction for only 30 gists for github api call response, one could add a page number in order to get all gists
https://stackoverflow.com/a/16233710

@zubair1024
Copy link

Try this if you're a NodeJS person:
https://github.com/zubair1024/gist-puller

@selimslab
Copy link

Only downloads the first page of gists, forked to download all -> https://gist.github.com/selimslab/958e2255a105f9a3f4b421896bce715d

@antonydevanchi
Copy link

Just added some codestyle, shebang, one useless comment and parallelism.

🙃

#!/usr/bin/env python3

import os
import sys
import json
import hashlib
import requests

from subprocess import call
from concurrent.futures import ThreadPoolExecutor as PoolExecutor

def download_all_from_user(user: str):
    
    next_page = True
    page = 1
    
    while next_page:
        
        url = f"https://api.github.com/users/{user}/gists?page={page}"
        
        response = requests.get(url)

        if not len(response.json()):
            next_page = False
        else:
            page += 1

        download_all(response.json())

def download_all(gists: list):
    with PoolExecutor(max_workers=10) as executor:
        for _ in executor.map(download, gists):
            pass

def download(gist):
    
    target = gist["id"] + hashlib.md5(gist["updated_at"].encode('utf-8')).hexdigest()
    
    call(["git", "clone", gist["git_pull_url"], target])

    description_file = os.path.join(target, "description.txt")
    
    with open(description_file, "w") as f:
        f.write(f"{gist['description']}\n")

# Run

user = sys.argv[1]

download_all_from_user(user)

@Kamalabot
Copy link

There is more direct way to get at these files, as it is shared by Observable's Ian in the following notebook.
https://observablehq.com/@enjalot/blockbuilder-search-data
The data is already available as JSON format, which provides more insights into the gists as per the notebook. I have made a crude way of getting at gist ids and the thumbnails, here
https://kamalabot.github.io/M3nD3/blocksD3.html

@AysadKozanoglu
Copy link

@antonydevanchi @leoloobeek thanks, both code works fine

@santry
Copy link

santry commented Dec 13, 2022

One-liner to get the first 100 of your own private gists:

curl -H "Authorization: Bearer <your_token>" 'https://api.github.com/gists?per_page=100' | jq '.[] | .git_pull_url' | xargs -n 1 git clone

Relies on jq.

@graylan0
Copy link

import os
import sys
import json
import hashlib
import requests
import logging

from subprocess import call, CalledProcessError
from concurrent.futures import ThreadPoolExecutor as PoolExecutor

# Set up basic logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def download_all_from_user(user: str):
    next_page = True
    page = 1
    
    while next_page:
        url = f"https://api.github.com/users/{user}/gists?page={page}"
        response = requests.get(url)

        try:
            gists = response.json()
            if not gists:
                next_page = False
                continue
        except json.JSONDecodeError:
            logging.error("Invalid JSON response")
            break

        page += 1
        download_all(gists)

def download_all(gists: list):
    with PoolExecutor(max_workers=10) as executor:
        for _ in executor.map(download, gists):
            pass

def download(gist):
    if "id" not in gist or "updated_at" not in gist or "git_pull_url" not in gist:
        logging.error("Missing required gist information")
        return

    target = gist["id"] + hashlib.md5(gist["updated_at"].encode('utf-8')).hexdigest()

    try:
        call(["git", "clone", gist["git_pull_url"], target])
    except CalledProcessError as e:
        logging.error(f"Failed to clone gist: {e}")
        return

    description_file = os.path.join(target, "description.txt")
    
    try:
        with open(description_file, "w") as f:
            f.write(f"{gist.get('description', 'No description')}\n")
    except IOError as e:
        logging.error(f"Failed to write description file: {e}")

# Main execution
if __name__ == "__main__":
    if len(sys.argv) > 1:
        user = sys.argv[1]
        download_all_from_user(user)
    else:
        logging.error("No user specified")

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment