Skip to content

Instantly share code, notes, and snippets.

@dansku
Created April 3, 2023 22:14
Show Gist options
  • Save dansku/ef274d41c650eed241d6359ed77fe889 to your computer and use it in GitHub Desktop.
Save dansku/ef274d41c650eed241d6359ed77fe889 to your computer and use it in GitHub Desktop.
download all json files from a folder
import os
import requests
from bs4 import BeautifulSoup
import json
def download_file(url, local_filename):
with requests.get(url, stream=True) as r:
r.raise_for_status()
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
def download_files_from_github_url(github_url):
base_url = "https://github.com"
raw_base_url = "https://raw.githubusercontent.com"
response = requests.get(github_url)
soup = BeautifulSoup(response.text, "html.parser")
files = soup.find_all("a", class_="js-navigation-open Link--primary")
for file in files:
file_path = file["href"]
file_name = file_path.split("/")[-1]
raw_file_url = raw_base_url + file_path.replace("/blob", "")
file_extesion = file_name.split(".")[-1]
if file_extesion == "json":
print("Downloading file: " + file_name)
download_file(raw_file_url, file_name)
def files_in_directory(directory):
files = os.listdir(directory)
files_list = []
for file in files:
file_extention = file.split(".")[-1]
if file_extention == "json":
files_list.append(file)
return files_list
if __name__ == "__main__":
# Connect to the database.
connection = connect_to_db()
# download all files
github_url = "https://github.com/hotspotty/depindd/tree/main/src/app/(docs)/(pages)/projects"
download_files_from_github_url(github_url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment