-
-
Save jmolivas/917e0d6bb45bd7e83186fea7be433785 to your computer and use it in GitHub Desktop.
Script to crawl Drupal JSON:API endpoint to local filesystem.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
## | |
# Script to crawl Drupal JSON:API endpoint to local filesystem. | |
# | |
# Assumes endpoint to be /jsonapi and uses basic auth. | |
# | |
# Requirements: | |
# ag - https://github.com/ggreer/the_silver_searcher | |
# jq - https://github.com/stedolan/jq | |
## | |
echo "In order to run this script you need to change REPLACE_DOMAIN to your domain" | |
API_ROOT_URL="http://REPLACE_DOMAIN/jsonapi" | |
BASIC_AUTH=("username:password") | |
OUT_DIR=`pwd` | |
# $1 = Absolute url | |
# $2 = relative path | |
function download_json { | |
echo "Creating directory content$2" | |
mkdir -p content$2 | |
echo "Downloading $1 to $OUT_DIR$2/index.json" | |
curl -u "$BASIC_AUTH" -sS $1 | jq . > content$2/index.json | |
} | |
# Get the root index first. | |
download_json $API_ROOT_URL /index | |
jq -r '.links[].href' content/index/index.json | while read -r API_PATH; do | |
RELATIVE_PATH=${API_PATH#"$API_ROOT_URL"} | |
download_json $API_PATH $RELATIVE_PATH | |
done | |
# @todo Loop for paginated content | |
# Finally fetch all content. | |
# @todo Do not hardcode domain. | |
ag --nofilename --no-numbers --ignore-case --only-matching --silent 'http://REPLACE_DOMAIN/jsonapi[^"]+' content | while read -r VALUE ; do | |
if [ -z "$VALUE" ] | |
then | |
# Do nothing, because value is empty. | |
echo "\$VALUE is empty" | |
else | |
RELATIVE_PATH=${VALUE#"$API_ROOT_URL"} | |
download_json $VALUE $RELATIVE_PATH | |
fi | |
done | |
FILE_COUNT=$(ls -Rs content |wc -l) | |
echo "Downloaded totally $FILE_COUNT files" | |
# @todo Do not hardcode domains. | |
ag -l 'REPLACE_DOMAIN' . | xargs sed -i '' 's/REPLACE_DOMAIN/localhost\:3000/g' | |
echo "All done" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment