Last active
March 10, 2024 20:55
-
-
Save micktwomey/3b4ef5e7293a1594ccc4 to your computer and use it in GitHub Desktop.
Python script to export your gists (public or private) to a single Markdown file. This is useful for backing up or exporting. Doesn't preserve full revision history.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Noddy to dump out all your gists to Markdown | |
https://developer.github.com/v3/gists/ | |
Requires the requests library. | |
""" | |
import argparse | |
import hashlib | |
import json | |
import logging | |
import os | |
import sys | |
import requests | |
GIST_TEMPLATE = """ | |
# {updated_at} {description} | |
{url} | |
## Comments | |
{comments} | |
## Files | |
{files} | |
""" | |
COMMENT_TEMPLATE = """ | |
### {login} @ {updated_at} | |
{body} | |
""".strip() | |
FILE_TEMPLATE = """ | |
### {filename} | |
```{language} | |
{content} | |
``` | |
""".strip() | |
class GitHubSession(object): | |
def __init__(self, cachedir, token): | |
self.session = requests.Session() | |
self.session.headers.update({"Authorization": "token {}".format(token)}) | |
self.cachedir = cachedir | |
try: | |
os.makedirs(self.cachedir) | |
except OSError: | |
pass | |
def _get(self, url, load, read, dump): | |
"""Retrieve a url and process it with provided functions | |
:param url: URL to download | |
:param load: callable which will be passed the cache file object | |
:param read: callable which will be passed the requests response to process | |
:param dump: callable which will be passed the output and output file object to write to | |
""" | |
cache_filename = os.path.join(self.cachedir, hashlib.sha256(url).hexdigest()) | |
try: | |
output = load(open(cache_filename, "rb")) | |
logging.info("Cache HIT for {}".format(url)) | |
except IOError: | |
logging.info("Cache MISS for {}".format(url)) | |
output = self.session.get(url) | |
output = read(output) | |
with open(cache_filename, "wb") as fp: | |
dump(output, fp) | |
output = load(open(cache_filename, "rb")) | |
return output | |
def get_raw(self, url): | |
"""Get the URL and return the raw content""" | |
return self._get( | |
url, | |
load=lambda fp: fp.read(), | |
read=lambda response: response.content, | |
dump=lambda output, fp: fp.write(output) | |
) | |
def get_json(self, url): | |
"""Get the URL and read content as JSON | |
:returns: {"json": response.json(), "links": response.links} | |
""" | |
return self._get( | |
url, | |
load=json.load, | |
read=lambda response: {"json": response.json(), "links": response.links}, | |
dump=json.dump, | |
) | |
def get_all(self, url): | |
"""Uses link headers to fetch all items in a chain of URLs | |
Assumes they return JSON lists | |
Yields individual items. | |
""" | |
response = self.get_json(url) | |
for item in response["json"]: | |
yield item | |
while "next" in response["links"]: | |
url = response["links"]["next"]["url"] | |
response = self.get_json(url) | |
for item in response["json"]: | |
yield item | |
def main(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--private-only", help="Only download private gists", action="store_true", default=False) | |
parser.add_argument("--public-only", help="Only download public gists", action="store_true", default=False) | |
parser.add_argument("--max-file-size", help="Maxiumum size of a file to export (in bytes)", type=int, default=10 * 1024) | |
parser.add_argument("cache", help="File to cache responses in") | |
parser.add_argument("token", help="Personal access token, see https://github.com/settings/applications") | |
parser.add_argument("username", help="Username to dump out") | |
parser.add_argument("skip", help="GIST ids to skip downloading", nargs="*") | |
args = parser.parse_args() | |
logging.basicConfig(level=logging.INFO) | |
session = GitHubSession(args.cache, args.token) | |
for gist in session.get_all("https://api.github.com/users/{}/gists".format(args.username)): | |
if gist["id"] in args.skip: | |
logging.info("Skipping gist {} {}".format(gist["url"], gist["description"])) | |
continue | |
if gist["public"] and args.private_only: | |
logging.info("Skipping public gist {} {}".format(gist["url"], gist["description"])) | |
continue | |
if not gist["public"] and args.public_only: | |
logging.info("Skipping private gist {} {}".format(gist["url"], gist["description"])) | |
continue | |
comments = [] | |
for comment in session.get_all(gist["comments_url"]): | |
comments.append(COMMENT_TEMPLATE.format( | |
login=comment["user"]["login"], | |
body=comment["body"], | |
updated_at=comment["updated_at"], | |
)) | |
files = [] | |
for filename, file_info in gist["files"].iteritems(): | |
content = session.get_raw(file_info["raw_url"]) | |
language = file_info.get("language", "") | |
if language and language.lower() == "markdown": | |
content = "\n".join(" {}".format(line) for line in content.splitlines()) | |
if language: | |
language = language.lower() | |
if len(content) > args.max_file_size: | |
logging.info("File {} too big, skipping".format(filename)) | |
content = "\nToo long to show, see original GIST for content\n" | |
language = "" | |
files.append(FILE_TEMPLATE.format( | |
filename=filename, | |
language=language, | |
content=content, | |
)) | |
comments = "\n".join(comments) if comments else "\nNo Comments" | |
updated_at = gist["updated_at"].split("T")[0] | |
sys.stdout.write(GIST_TEMPLATE.format( | |
updated_at=updated_at, | |
description=gist["description"], | |
url=gist["url"], | |
comments=comments, | |
files="\n".join(files), | |
)) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
ok