Skip to content

Instantly share code, notes, and snippets.

@axgkl
Last active December 12, 2021 06:43
Show Gist options
  • Save axgkl/9acf71366348e0ab0b85dd9258374792 to your computer and use it in GitHub Desktop.
Save axgkl/9acf71366348e0ab0b85dd9258374792 to your computer and use it in GitHub Desktop.
#!/usr/bin/env bash
set -eo pipefail
# from moby project - w/o go dependency (nailing amd64) and w/o jq dep (using
# python)
# hello-world latest ef872312fe1b 3 months ago 910 B
# hello-world latest ef872312fe1bbc5e05aae626791a47ee9b032efa8f3bda39cc0be7b56bfe59b9 3 months ago 910 B
# debian latest f6fab3b798be 10 weeks ago 85.1 MB
# debian latest f6fab3b798be3174f45aa1eb731f8182705555f89c9026d8c1ef230cbf8301dd 10 weeks ago 85.1 MB
# check if essential commands are in our PATH
for cmd in curl; do
if ! command -v $cmd &> /dev/null; then
echo >&2 "error: \"$cmd\" not found!"
exit 1
fi
done
#set -x
py_jq () {
python -Ssc 'if 1:
#log = lambda s: open("/tmp/arg", "a").write("\n----" + str(s))
# from time import sleep # debugging in pipes, make it stop, watch log
import json as j, sys; jl=j.loads; jd=j.dumps
r=sys.stdin.read; w=sys.stdout.write
a=sys.argv[1:]; ra="--raw-output"; co="--compact-output"
if a[0] == "id_and_parent":
s, m = jl(open(a[1]).read()), {}
m["id"] = s["id"]
p = s.get("parent")
if p is not None:
m["parent"] = p
w(jd(m))
elif a[0] == "make_mani":
w(jd("".join(a[1:])))
sys.exit(0)
elif a[0] == "del_hist_and_root":
fn = a[-1]
j = jl(open(a[-1]).read())
j.update(jl("".join(a[1:5])))
j.pop("history")
j.pop("rootfs")
w(jd(j, indent=4))
elif len(a) == 2 and ra in a and a[-1].startswith("."):
js_in = jl(r())
p = a[-1][1:].split(".")
while p:
js_in = js_in[p.pop(0)]
w(str(js_in))
elif a and a[-1] in (".manifests[]", ".layers[]"):
js_in = jl(r())
for m in js_in[a[-1][1:-2]]:
print(jd(m))
else:
raise Exception("not implemented: %s" % str(a))
' $* || exit 1
}
usage() {
echo "usage: $0 dir image[:tag][@digest] ..."
echo " $0 /tmp/old-hello-world hello-world:latest@sha256:8be990ef2aeb16dbcb9271ddfe2610fa6658d13f6dfb8bc72074cc1ca36966a7"
[ -z "$1" ] || exit "$1"
}
dir="$1" # dir for building tar in
shift || usage 1 >&2
[ $# -gt 0 -a "$dir" ] || usage 2 >&2
mkdir -p "$dir"
# hacky workarounds for Bash 3 support (no associative arrays)
images=()
rm -f "$dir"/tags-*.tmp
manifestJsonEntries=()
doNotGenerateManifestJson=
# repositories[busybox]='"latest": "...", "ubuntu-14.04": "..."'
# bash v4 on Windows CI requires CRLF separator
newlineIFS=$'\n'
registryBase='https://registry-1.docker.io'
authBase='https://auth.docker.io'
authService='registry.docker.io'
# https://github.com/moby/moby/issues/33700
fetch_blob() {
local token="$1"; shift
local image="$1"; shift
local digest="$1"; shift
local targetFile="$1"; shift
local curlArgs=( "$@" )
local curlHeaders="$(
curl -S "${curlArgs[@]}" \
-H "Authorization: Bearer $token" \
"$registryBase/v2/$image/blobs/$digest" \
-o "$targetFile" \
-D-
)"
curlHeaders="$(echo "$curlHeaders" | tr -d '\r')"
if grep -qE "^HTTP/[0-9].[0-9] 3" <<<"$curlHeaders"; then
rm -f "$targetFile"
local blobRedirect="$(echo "$curlHeaders" | awk -F ': ' 'tolower($1) == "location" { print $2; exit }')"
if [ -z "$blobRedirect" ]; then
echo >&2 "error: failed fetching '$image' blob '$digest'"
echo "$curlHeaders" | head -1 >&2
return 1
fi
curl -fSL "${curlArgs[@]}" \
"$blobRedirect" \
-o "$targetFile"
fi
}
# handle 'application/vnd.docker.distribution.manifest.v2+json' manifest
handle_single_manifest_v2() {
local manifestJson="$1"; shift
local configDigest="$(echo "$manifestJson" | py_jq --raw-output '.config.digest')"
local imageId="${configDigest#*:}" # strip off "sha256:"
local configFile="$imageId.json"
fetch_blob "$token" "$image" "$configDigest" "$dir/$configFile" -s
local layersFs="$(echo "$manifestJson" | py_jq --raw-output --compact-output '.layers[]')"
local IFS="$newlineIFS"
local layers=( $layersFs )
unset IFS
echo "Downloading '$imageIdentifier' (${#layers[@]} layers)..."
local layerId=
local layerFiles=()
for i in "${!layers[@]}"; do
local layerMeta="${layers[$i]}"
local layerMediaType="$(echo "$layerMeta" | py_jq --raw-output '.mediaType')"
local layerDigest="$(echo "$layerMeta" | py_jq --raw-output '.digest')"
# save the previous layer's ID
local parentId="$layerId"
# create a new fake layer ID based on this layer's digest and the previous layer's fake ID
layerId="$(echo "$parentId"$'\n'"$layerDigest" | sha256sum | cut -d' ' -f1)"
# this accounts for the possibility that an image contains the same layer twice (and thus has a duplicate digest value)
mkdir -p "$dir/$layerId"
echo '1.0' > "$dir/$layerId/VERSION"
if [ ! -s "$dir/$layerId/json" ]; then
# this starter JSON is taken directly from Docker's own "docker save" output for unimportant layers
echo -e '
{
"id": "'$parentId'",
"parent": "'$layerId'",
"created": "0001-01-01T00:00:00Z",
"container_config": {
"Hostname": "",
"Domainname": "",
"User": "",
"AttachStdin": false,
"AttachStdout": false,
"AttachStderr": false,
"Tty": false,
"OpenStdin": false,
"StdinOnce": false,
"Env": null,
"Cmd": null,
"Image": "",
"Volumes": null,
"WorkingDir": "",
"Entrypoint": null,
"OnBuild": null,
"Labels": null
}
}' > "$dir/$layerId/json"
fi
case "$layerMediaType" in
application/vnd.docker.image.rootfs.diff.tar.gzip)
local layerTar="$layerId/layer.tar"
layerFiles=( "${layerFiles[@]}" "$layerTar" )
# TODO figure out why "-C -" doesn't work here
# "curl: (33) HTTP server doesn't seem to support byte ranges. Cannot resume."
# "HTTP/1.1 416 Requested Range Not Satisfiable"
if [ -f "$dir/$layerTar" ]; then
# TODO hackpatch for no -C support :'(
echo "skipping existing ${layerId:0:12}"
continue
fi
local token="$(curl -fsSL "$authBase/token?service=$authService&scope=repository:$image:pull" | py_jq --raw-output '.token')"
fetch_blob "$token" "$image" "$layerDigest" "$dir/$layerTar" --progress
;;
*)
echo >&2 "error: unknown layer mediaType ($imageIdentifier, $layerDigest): '$layerMediaType'"
exit 1
;;
esac
done
# change "$imageId" to be the ID of the last layer we added (needed for old-style "repositories" file which is created later -- specifically for older Docker daemons)
imageId="$layerId"
# munge the top layer image manifest to have the appropriate image configuration for older daemons
#local imageOldConfig="$(py_jq --raw-output --compact-output '{ id: .id } + if .parent then { parent: .parent } else {} end' "$dir/$imageId/json")"
local imageOldConfig="$(py_jq id_and_parent "$dir/$imageId/json")"
py_jq del_hist_and_root "$imageOldConfig" "$dir/$configFile" > "$dir/$imageId/json"
local ll=""
for layerFile in "${layerFiles[@]}"; do ll="$ll\"$layerFile\","; done
local manifestJsonEntry="$(
py_jq make_mani '{
"Config": "'"$configFile"'",
"RepoTags": ["'"${image#library\/}:$tag"'"],
"Layers": ['${ll%?}']
}'
)"
manifestJsonEntries=( "${manifestJsonEntries[@]}" "$manifestJsonEntry" )
}
while [ $# -gt 0 ]; do
imageTag="$1"
shift
image="${imageTag%%[:@]*}"
imageTag="${imageTag#*:}"
digest="${imageTag##*@}"
tag="${imageTag%%@*}"
# add prefix library if passed official image
if [[ "$image" != *"/"* ]]; then
image="library/$image"
fi
imageFile="${image//\//_}" # "/" can't be in filenames :)
token="$(curl -fsSL "$authBase/token?service=$authService&scope=repository:$image:pull" | py_jq --raw-output '.token')"
manifestJson="$(
curl -fsSL \
-H "Authorization: Bearer $token" \
-H 'Accept: application/vnd.docker.distribution.manifest.v2+json' \
-H 'Accept: application/vnd.docker.distribution.manifest.list.v2+json' \
-H 'Accept: application/vnd.docker.distribution.manifest.v1+json' \
"$registryBase/v2/$image/manifests/$digest"
)"
if [ "${manifestJson:0:1}" != '{' ]; then
echo >&2 "error: /v2/$image/manifests/$digest returned something unexpected:"
echo >&2 " $manifestJson"
exit 1
fi
imageIdentifier="$image:$tag@$digest"
schemaVersion="$(echo "$manifestJson" | py_jq --raw-output '.schemaVersion')"
case "$schemaVersion" in
2)
mediaType="$(echo "$manifestJson" | py_jq --raw-output '.mediaType')"
case "$mediaType" in
application/vnd.docker.distribution.manifest.v2+json)
handle_single_manifest_v2 "$manifestJson"
;;
application/vnd.docker.distribution.manifest.list.v2+json)
layersFs="$(echo "$manifestJson" | py_jq --raw-output --compact-output '.manifests[]')"
IFS="$newlineIFS"
layers=( $layersFs )
unset IFS
found=""
# parse first level multi-arch manifest
for i in "${!layers[@]}"; do
layerMeta="${layers[$i]}"
maniArch="$(echo "$layerMeta" | py_jq --raw-output '.platform.architecture')"
if [ "$maniArch" = "amd64" ]; then
digest="$(echo "$layerMeta" | py_jq --raw-output '.digest')"
# get second level single manifest
submanifestJson="$(
curl -fsSL \
-H "Authorization: Bearer $token" \
-H 'Accept: application/vnd.docker.distribution.manifest.v2+json' \
-H 'Accept: application/vnd.docker.distribution.manifest.list.v2+json' \
-H 'Accept: application/vnd.docker.distribution.manifest.v1+json' \
"$registryBase/v2/$image/manifests/$digest"
)"
handle_single_manifest_v2 "$submanifestJson"
found="found"
break
fi
done
if [ -z "$found" ]; then
echo >&2 "error: manifest for $maniArch is not found"
exit 1
fi
;;
*)
echo >&2 "error: unknown manifest mediaType ($imageIdentifier): '$mediaType'"
exit 1
;;
esac
;;
1)
if [ -z "$doNotGenerateManifestJson" ]; then
echo >&2 "warning: '$imageIdentifier' uses schemaVersion '$schemaVersion'"
echo >&2 " this script cannot (currently) recreate the 'image config' to put in a 'manifest.json' (thus any schemaVersion 2+ images will be imported in the old way, and their 'docker history' will suffer)"
echo >&2
doNotGenerateManifestJson=1
fi
layersFs="$(echo "$manifestJson" | py_jq --raw-output '.fsLayers | .[] | .blobSum')"
IFS="$newlineIFS"
layers=( $layersFs )
unset IFS
history="$(echo "$manifestJson" | py_jq '.history | [.[] | .v1Compatibility]')"
imageId="$(echo "$history" | py_jq --raw-output '.[0]' | py_jq --raw-output '.id')"
echo "Downloading '$imageIdentifier' (${#layers[@]} layers)..."
for i in "${!layers[@]}"; do
imageJson="$(echo "$history" | py_jq --raw-output ".[${i}]")"
layerId="$(echo "$imageJson" | py_jq --raw-output '.id')"
imageLayer="${layers[$i]}"
mkdir -p "$dir/$layerId"
echo '1.0' > "$dir/$layerId/VERSION"
echo "$imageJson" > "$dir/$layerId/json"
# TODO figure out why "-C -" doesn't work here
# "curl: (33) HTTP server doesn't seem to support byte ranges. Cannot resume."
# "HTTP/1.1 416 Requested Range Not Satisfiable"
if [ -f "$dir/$layerId/layer.tar" ]; then
# TODO hackpatch for no -C support :'(
echo "skipping existing ${layerId:0:12}"
continue
fi
token="$(curl -fsSL "$authBase/token?service=$authService&scope=repository:$image:pull" | py_jq --raw-output '.token')"
fetch_blob "$token" "$image" "$imageLayer" "$dir/$layerId/layer.tar" --progress
done
;;
*)
echo >&2 "error: unknown manifest schemaVersion ($imageIdentifier): '$schemaVersion'"
exit 1
;;
esac
echo
if [ -s "$dir/tags-$imageFile.tmp" ]; then
echo -n ', ' >> "$dir/tags-$imageFile.tmp"
else
images=( "${images[@]}" "$image" )
fi
echo -n '"'"$tag"'": "'"$imageId"'"' >> "$dir/tags-$imageFile.tmp"
done
echo -n '{' > "$dir/repositories"
firstImage=1
for image in "${images[@]}"; do
imageFile="${image//\//_}" # "/" can't be in filenames :)
image="${image#library\/}"
[ "$firstImage" ] || echo -n ',' >> "$dir/repositories"
firstImage=
echo -n $'\n\t' >> "$dir/repositories"
echo -n '"'"$image"'": { '"$(cat "$dir/tags-$imageFile.tmp")"' }' >> "$dir/repositories"
done
echo -n $'\n}\n' >> "$dir/repositories"
rm -f "$dir"/tags-*.tmp
if [ -z "$doNotGenerateManifestJson" ] && [ "${#manifestJsonEntries[@]}" -gt 0 ]; then
ll=""; for e in "${manifestJsonEntries[@]}"; do ll="${ll}$e,"; done
python -Ssc "import json; import sys; print(json.dumps([json.loads(k) for k in [$ll]], indent=4))" > "$dir/manifest.json"
else
rm -f "$dir/manifest.json"
fi
echo "Download of images into '$dir' complete."
echo "Use something like the following to load the result into a Docker daemon:"
echo " tar -cC '$dir' . | docker load"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment