-
-
Save pdashford/2e4bcd4fc2343e2fd03efe4da17f577d to your computer and use it in GitHub Desktop.
""" | |
Downloads folders from github repo | |
Requires PyGithub | |
pip install PyGithub | |
""" | |
import os | |
import sys | |
import base64 | |
import shutil | |
import getopt | |
from github import Github | |
from github import GithubException | |
def get_sha_for_tag(repository, tag): | |
""" | |
Returns a commit PyGithub object for the specified repository and tag. | |
""" | |
branches = repository.get_branches() | |
matched_branches = [match for match in branches if match.name == tag] | |
if matched_branches: | |
return matched_branches[0].commit.sha | |
tags = repository.get_tags() | |
matched_tags = [match for match in tags if match.name == tag] | |
if not matched_tags: | |
raise ValueError('No Tag or Branch exists with that name') | |
return matched_tags[0].commit.sha | |
def download_directory(repository, sha, server_path): | |
""" | |
Download all contents at server_path with commit tag sha in | |
the repository. | |
""" | |
if os.path.exists(server_path): | |
shutil.rmtree(server_path) | |
os.makedirs(server_path) | |
contents = repository.get_dir_contents(server_path, ref=sha) | |
for content in contents: | |
print "Processing %s" % content.path | |
if content.type == 'dir': | |
os.makedirs(content.path) | |
download_directory(repository, sha, content.path) | |
else: | |
try: | |
path = content.path | |
file_content = repository.get_contents(path, ref=sha) | |
file_data = base64.b64decode(file_content.content) | |
file_out = open(content.path, "w+") | |
file_out.write(file_data) | |
file_out.close() | |
except (GithubException, IOError) as exc: | |
print('Error processing %s: %s', content.path, exc) | |
def usage(): | |
""" | |
Prints the usage command lines | |
""" | |
print "usage: gh-download --token=token --org=org --repo=repo --branch=branch --folder=folder" | |
def main(argv): | |
""" | |
Main function block | |
""" | |
try: | |
opts, args = getopt.getopt(argv, "t:o:r:b:f:", ["token=", "org=", "repo=", "branch=", "folder="]) | |
except getopt.GetoptError as err: | |
print str(err) | |
usage() | |
sys.exit(2) | |
for opt, arg in opts: | |
if opt in ("-t", "--token"): | |
token = arg | |
elif opt in ("-o", "--org"): | |
org = arg | |
elif opt in ("-r", "--repo"): | |
repo = arg | |
elif opt in ("-b", "--branch"): | |
branch = arg | |
elif opt in ("-f", "--folder"): | |
folder = arg | |
github = Github(token) | |
organization = github.get_organization(org) | |
repository = organization.get_repo(repo) | |
sha = get_sha_for_tag(repository, branch) | |
download_directory(repository, sha, folder) | |
if __name__ == "__main__": | |
""" | |
Entry point | |
""" | |
main(sys.argv[1:]) |
Sorry for my ignorance, which fields do i need to fill with the repository information? The values inside the parentheses (token,org,repo..)?
github = Github(token)
organization = github.get_organization(org)
repository = organization.get_repo(repo)
sha = get_sha_for_tag(repository, branch)
download_directory(repository, sha, folder)
Hey @RenanBancke - here is a short snippet of my production code, with the values explained line by line. It's not the same as your example but perhaps it's close enough to help you (or someone else).
g = github.Github(token)
Here, token
is a string of length 40 starting with "ghp_"
. Look here for more information.
user = g.get_user(user_str)
repo = user.get_repo(repo_str)
Here user_str
is "pjcpjc"
and repo_str
is "tts_netflow"
. This is referring to the public repo here. I believe it all works similarly if you want to refer to repos owned by an organization as opposed to a repo owned by a user.
sha = _get_sha_for_tag(repo, release_tag)
Here, release_tag
is "0.0.2"
. So I'm going to get the files from here. I believe you can use all sorts of identifying strings for this second argument, to include the long randomized SHA strings.
From here my code gets more complicated, but there is a line that's like download_directory(repository, sha, folder)
, with folder
being "tts_netflow"
. Note that in this case, I'm referring to the directory named "tts_netflow"
and not the repo named "tts_netflow"
. Specifically, I'm downloading this directory. If I were to pass "test_tts_netflow"
as the folder
then I would be downloading this directory.
Hopefully that helps. At any rate, it helped me review my production code.
# This code was copied from
# https://gist.github.com/pdashford/2e4bcd4fc2343e2fd03efe4da17f577d
# and modified to work with Python 3, type hints, correct format and
# simplified the code to our needs.
"""
Downloads folders from github repo
Requires PyGithub
pip install PyGithub
"""
import base64
import getopt
import os
import shutil
import sys
from typing import Optional
from github import Github, GithubException
from github.ContentFile import ContentFile
from github.Repository import Repository
def get_sha_for_tag(repository: Repository, tag: str) -> str:
"""
Returns a commit PyGithub object for the specified repository and tag.
"""
branches = repository.get_branches()
matched_branches = [match for match in branches if match.name == tag]
if matched_branches:
return matched_branches[0].commit.sha
tags = repository.get_tags()
matched_tags = [match for match in tags if match.name == tag]
if not matched_tags:
raise ValueError("No Tag or Branch exists with that name")
return matched_tags[0].commit.sha
def download_directory(repository: Repository, sha: str, server_path: str) -> None:
"""
Download all contents at server_path with commit tag sha in
the repository.
"""
if os.path.exists(server_path):
shutil.rmtree(server_path)
os.makedirs(server_path)
contents = repository.get_dir_contents(server_path, ref=sha)
for content in contents:
print("Processing %s" % content.path)
if content.type == "dir":
os.makedirs(content.path)
download_directory(repository, sha, content.path)
else:
try:
path = content.path
file_content = repository.get_contents(path, ref=sha)
if not isinstance(file_content, ContentFile):
raise ValueError("Expected ContentFile")
file_out = open(content.path, "w+")
if file_content.content:
file_data = base64.b64decode(file_content.content)
file_out.write(file_data.decode("utf-8"))
file_out.close()
except (GithubException, IOError, ValueError) as exc:
print("Error processing %s: %s", content.path, exc)
def usage():
"""
Prints the usage command lines
"""
print("usage: gh-download --repo=repo --branch=branch --folder=folder")
def main(argv):
"""
Main function block
"""
try:
opts, _ = getopt.getopt(argv, "r:b:f:", ["repo=", "branch=", "folder="])
except getopt.GetoptError as err:
print(str(err))
usage()
sys.exit(2)
repo: Optional[str] = None
branch: Optional[str] = None
folder: Optional[str] = None
for opt, arg in opts:
if opt in ("-r", "--repo"):
repo = arg
elif opt in ("-b", "--branch"):
branch = arg
elif opt in ("-f", "--folder"):
folder = arg
if not repo:
print("Repo is required")
usage()
sys.exit(2)
if not branch:
print("Branch is required")
usage()
sys.exit(2)
if not folder:
print("Folder is required")
usage()
sys.exit(2)
github = Github(None)
repository = github.get_repo(repo)
sha = get_sha_for_tag(repository, branch)
download_directory(repository, sha, folder)
if __name__ == "__main__":
"""
Entry point
"""
main(sys.argv[1:])
Hey here is short version of this code
import os
import sys
import base64
import shutil
import argparse
from github import Github
def get_sha_for_tag(repository, tag):
"""
Returns a commit PyGithub object for the specified repository and tag.
"""
branches = repository.get_branches()
matched_branches = [match for match in branches if match.name == tag]
if matched_branches:
return matched_branches[0].commit.sha
tags = repository.get_tags()
matched_tags = [match for match in tags if match.name == tag]
if not matched_tags:
raise ValueError('No Tag or Branch exists with that name')
return matched_tags[0].commit.sha
def download_directory(repository, sha, server_path):
"""
Download all contents at server_path with commit tag sha in
the repository.
"""
if os.path.exists(server_path):
shutil.rmtree(server_path)
os.makedirs(server_path)
contents = repository.get_dir_contents(server_path, ref=sha)
for content in contents:
print(f"Processing {content.path}")
if content.type == 'dir':
os.makedirs(content.path)
download_directory(repository, sha, content.path)
else:
try:
path = content.path
file_content = repository.get_contents(path, ref=sha)
file_data = base64.b64decode(file_content.content)
with open(content.path, "w+") as file_out:
file_out.write(file_data)
except Exception as exc:
print(f"Error processing {content.path}: {exc}")
def parse_args():
"""
Parses command line arguments.
"""
parser = argparse.ArgumentParser(description='Download a directory from a Github repo.')
parser.add_argument('--token', required=True, help='Github API token')
parser.add_argument('--org', required=True, help='Github organization')
parser.add_argument('--repo', required=True, help='Github repository')
parser.add_argument('--branch', required=True, help='Github branch or tag')
parser.add_argument('--folder', required=True, help='Folder to download')
return parser.parse_args()
def main():
"""
Main function block
"""
args = parse_args()
github = Github(args.token)
organization = github.get_organization(args.org)
repository = organization.get_repo(args.repo)
sha = get_sha_for_tag(repository, args.branch)
download_directory(repository, sha, args.folder)
if name == "main":
"""
Entry point
"""
main()
This helped me a lot, thanks!
My version of
download_directory
is a bit shorter. Mine doesn't make a directory, then delete a directory, then make the directory again, when executing theif content.type == 'dir':
clause. But overall, great code!