-
-
Save leoloobeek/3be8b835988e8d926a4387019370db8d to your computer and use it in GitHub Desktop.
# first: mkdir user && cd user && cp /path/to/get_gists.py . | |
# python3 get_gists.py user | |
import requests | |
import sys | |
from subprocess import call | |
user = sys.argv[1] | |
r = requests.get('https://api.github.com/users/{0}/gists'.format(user)) | |
for i in r.json(): | |
call(['git', 'clone', i['git_pull_url']]) | |
description_file = './{0}/description.txt'.format(i['id']) | |
with open(description_file, 'w') as f: | |
f.write('{0}\n'.format(i['description'])) |
Thanks for sharing this code. I added directory name slugs from django which condense i['description']
to a valid path, so that one can omit the description file. See at https://gist.github.com/epogrebnyak/c14d6d2ca2740d1e1018e701ea00472a
Really useful! Thanks! I had no idea that each gist was stored its own repository but it makes sense.
if anyones interested in using the description
as the folder name instead of the id
:
# first: mkdir user && cd user && cp /path/to/get_gists.py .
# python3 get_gists.py user
import requests
import sys
from subprocess import call
user = sys.argv[1]
r = requests.get('https://api.github.com/users/{0}/gists'.format(user))
for i in r.json():
folder = i['description'] if i['description'] else i['id']
call(['git', 'clone', i['git_pull_url'], folder])
description_file = './{0}/description.txt'.format(folder)
with open(description_file, 'w') as f:
f.write('{0}\n'.format(i['description']))
The Linux File System has a limit of 255 characters for naming folders.
fatal: could not create leading directories of 'long description.': File name too long
It also works other than naming all files with those descriptions. I was guessing how to print it besides the folder name (any help would be interesting).
cat **/*.txt
You would prefer this solution:
folder = i['description'][0:255] if i['description'] else i['id']
This is the final code (for those against the clock)
# first: mkdir user && cd user && cp /path/to/get_gists.py .
# python3 get_gists.py user
import requests
import sys
from subprocess import call
user = sys.argv[1]
r = requests.get('https://api.github.com/users/{0}/gists'.format(user))
for i in r.json():
folder = i['description'][0:255] if i['description'] else i['id']
call(['git', 'clone', i['git_pull_url'], folder])
description_file = './{0}/description.txt'.format(folder)
with open(description_file, 'w') as f:
f.write('{0}\n'.format(i['description']))
thank you all for saving me time always appreciated
for some reason it does not download all the gists. i only get about a quarter of all my gists
@pixelstorm it seems there is a restriction for only 30 gists for github api call response, one could add a page number in order to get all gists
https://stackoverflow.com/a/16233710
Try this if you're a NodeJS person:
https://github.com/zubair1024/gist-puller
Only downloads the first page of gists, forked to download all -> https://gist.github.com/selimslab/958e2255a105f9a3f4b421896bce715d
Just added some codestyle, shebang, one useless comment and parallelism.
🙃
#!/usr/bin/env python3
import os
import sys
import json
import hashlib
import requests
from subprocess import call
from concurrent.futures import ThreadPoolExecutor as PoolExecutor
def download_all_from_user(user: str):
next_page = True
page = 1
while next_page:
url = f"https://api.github.com/users/{user}/gists?page={page}"
response = requests.get(url)
if not len(response.json()):
next_page = False
else:
page += 1
download_all(response.json())
def download_all(gists: list):
with PoolExecutor(max_workers=10) as executor:
for _ in executor.map(download, gists):
pass
def download(gist):
target = gist["id"] + hashlib.md5(gist["updated_at"].encode('utf-8')).hexdigest()
call(["git", "clone", gist["git_pull_url"], target])
description_file = os.path.join(target, "description.txt")
with open(description_file, "w") as f:
f.write(f"{gist['description']}\n")
# Run
user = sys.argv[1]
download_all_from_user(user)
There is more direct way to get at these files, as it is shared by Observable's Ian in the following notebook.
https://observablehq.com/@enjalot/blockbuilder-search-data
The data is already available as JSON format, which provides more insights into the gists as per the notebook. I have made a crude way of getting at gist ids and the thumbnails, here
https://kamalabot.github.io/M3nD3/blocksD3.html
@antonydevanchi @leoloobeek thanks, both code works fine
One-liner to get the first 100 of your own private gists:
curl -H "Authorization: Bearer <your_token>" 'https://api.github.com/gists?per_page=100' | jq '.[] | .git_pull_url' | xargs -n 1 git clone
Relies on jq.
import os
import sys
import json
import hashlib
import requests
import logging
from subprocess import call, CalledProcessError
from concurrent.futures import ThreadPoolExecutor as PoolExecutor
# Set up basic logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def download_all_from_user(user: str):
next_page = True
page = 1
while next_page:
url = f"https://api.github.com/users/{user}/gists?page={page}"
response = requests.get(url)
try:
gists = response.json()
if not gists:
next_page = False
continue
except json.JSONDecodeError:
logging.error("Invalid JSON response")
break
page += 1
download_all(gists)
def download_all(gists: list):
with PoolExecutor(max_workers=10) as executor:
for _ in executor.map(download, gists):
pass
def download(gist):
if "id" not in gist or "updated_at" not in gist or "git_pull_url" not in gist:
logging.error("Missing required gist information")
return
target = gist["id"] + hashlib.md5(gist["updated_at"].encode('utf-8')).hexdigest()
try:
call(["git", "clone", gist["git_pull_url"], target])
except CalledProcessError as e:
logging.error(f"Failed to clone gist: {e}")
return
description_file = os.path.join(target, "description.txt")
try:
with open(description_file, "w") as f:
f.write(f"{gist.get('description', 'No description')}\n")
except IOError as e:
logging.error(f"Failed to write description file: {e}")
# Main execution
if __name__ == "__main__":
if len(sys.argv) > 1:
user = sys.argv[1]
download_all_from_user(user)
else:
logging.error("No user specified")
simple and efficient. I have added a dictionary file to translate id to readable names and use this script to backup my gists (fork).