Last active
December 12, 2021 06:43
-
-
Save axgkl/9acf71366348e0ab0b85dd9258374792 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
set -eo pipefail | |
# from moby project - w/o go dependency (nailing amd64) and w/o jq dep (using | |
# python) | |
# hello-world latest ef872312fe1b 3 months ago 910 B | |
# hello-world latest ef872312fe1bbc5e05aae626791a47ee9b032efa8f3bda39cc0be7b56bfe59b9 3 months ago 910 B | |
# debian latest f6fab3b798be 10 weeks ago 85.1 MB | |
# debian latest f6fab3b798be3174f45aa1eb731f8182705555f89c9026d8c1ef230cbf8301dd 10 weeks ago 85.1 MB | |
# check if essential commands are in our PATH | |
for cmd in curl; do | |
if ! command -v $cmd &> /dev/null; then | |
echo >&2 "error: \"$cmd\" not found!" | |
exit 1 | |
fi | |
done | |
#set -x | |
py_jq () { | |
python -Ssc 'if 1: | |
#log = lambda s: open("/tmp/arg", "a").write("\n----" + str(s)) | |
# from time import sleep # debugging in pipes, make it stop, watch log | |
import json as j, sys; jl=j.loads; jd=j.dumps | |
r=sys.stdin.read; w=sys.stdout.write | |
a=sys.argv[1:]; ra="--raw-output"; co="--compact-output" | |
if a[0] == "id_and_parent": | |
s, m = jl(open(a[1]).read()), {} | |
m["id"] = s["id"] | |
p = s.get("parent") | |
if p is not None: | |
m["parent"] = p | |
w(jd(m)) | |
elif a[0] == "make_mani": | |
w(jd("".join(a[1:]))) | |
sys.exit(0) | |
elif a[0] == "del_hist_and_root": | |
fn = a[-1] | |
j = jl(open(a[-1]).read()) | |
j.update(jl("".join(a[1:5]))) | |
j.pop("history") | |
j.pop("rootfs") | |
w(jd(j, indent=4)) | |
elif len(a) == 2 and ra in a and a[-1].startswith("."): | |
js_in = jl(r()) | |
p = a[-1][1:].split(".") | |
while p: | |
js_in = js_in[p.pop(0)] | |
w(str(js_in)) | |
elif a and a[-1] in (".manifests[]", ".layers[]"): | |
js_in = jl(r()) | |
for m in js_in[a[-1][1:-2]]: | |
print(jd(m)) | |
else: | |
raise Exception("not implemented: %s" % str(a)) | |
' $* || exit 1 | |
} | |
usage() { | |
echo "usage: $0 dir image[:tag][@digest] ..." | |
echo " $0 /tmp/old-hello-world hello-world:latest@sha256:8be990ef2aeb16dbcb9271ddfe2610fa6658d13f6dfb8bc72074cc1ca36966a7" | |
[ -z "$1" ] || exit "$1" | |
} | |
dir="$1" # dir for building tar in | |
shift || usage 1 >&2 | |
[ $# -gt 0 -a "$dir" ] || usage 2 >&2 | |
mkdir -p "$dir" | |
# hacky workarounds for Bash 3 support (no associative arrays) | |
images=() | |
rm -f "$dir"/tags-*.tmp | |
manifestJsonEntries=() | |
doNotGenerateManifestJson= | |
# repositories[busybox]='"latest": "...", "ubuntu-14.04": "..."' | |
# bash v4 on Windows CI requires CRLF separator | |
newlineIFS=$'\n' | |
registryBase='https://registry-1.docker.io' | |
authBase='https://auth.docker.io' | |
authService='registry.docker.io' | |
# https://github.com/moby/moby/issues/33700 | |
fetch_blob() { | |
local token="$1"; shift | |
local image="$1"; shift | |
local digest="$1"; shift | |
local targetFile="$1"; shift | |
local curlArgs=( "$@" ) | |
local curlHeaders="$( | |
curl -S "${curlArgs[@]}" \ | |
-H "Authorization: Bearer $token" \ | |
"$registryBase/v2/$image/blobs/$digest" \ | |
-o "$targetFile" \ | |
-D- | |
)" | |
curlHeaders="$(echo "$curlHeaders" | tr -d '\r')" | |
if grep -qE "^HTTP/[0-9].[0-9] 3" <<<"$curlHeaders"; then | |
rm -f "$targetFile" | |
local blobRedirect="$(echo "$curlHeaders" | awk -F ': ' 'tolower($1) == "location" { print $2; exit }')" | |
if [ -z "$blobRedirect" ]; then | |
echo >&2 "error: failed fetching '$image' blob '$digest'" | |
echo "$curlHeaders" | head -1 >&2 | |
return 1 | |
fi | |
curl -fSL "${curlArgs[@]}" \ | |
"$blobRedirect" \ | |
-o "$targetFile" | |
fi | |
} | |
# handle 'application/vnd.docker.distribution.manifest.v2+json' manifest | |
handle_single_manifest_v2() { | |
local manifestJson="$1"; shift | |
local configDigest="$(echo "$manifestJson" | py_jq --raw-output '.config.digest')" | |
local imageId="${configDigest#*:}" # strip off "sha256:" | |
local configFile="$imageId.json" | |
fetch_blob "$token" "$image" "$configDigest" "$dir/$configFile" -s | |
local layersFs="$(echo "$manifestJson" | py_jq --raw-output --compact-output '.layers[]')" | |
local IFS="$newlineIFS" | |
local layers=( $layersFs ) | |
unset IFS | |
echo "Downloading '$imageIdentifier' (${#layers[@]} layers)..." | |
local layerId= | |
local layerFiles=() | |
for i in "${!layers[@]}"; do | |
local layerMeta="${layers[$i]}" | |
local layerMediaType="$(echo "$layerMeta" | py_jq --raw-output '.mediaType')" | |
local layerDigest="$(echo "$layerMeta" | py_jq --raw-output '.digest')" | |
# save the previous layer's ID | |
local parentId="$layerId" | |
# create a new fake layer ID based on this layer's digest and the previous layer's fake ID | |
layerId="$(echo "$parentId"$'\n'"$layerDigest" | sha256sum | cut -d' ' -f1)" | |
# this accounts for the possibility that an image contains the same layer twice (and thus has a duplicate digest value) | |
mkdir -p "$dir/$layerId" | |
echo '1.0' > "$dir/$layerId/VERSION" | |
if [ ! -s "$dir/$layerId/json" ]; then | |
# this starter JSON is taken directly from Docker's own "docker save" output for unimportant layers | |
echo -e ' | |
{ | |
"id": "'$parentId'", | |
"parent": "'$layerId'", | |
"created": "0001-01-01T00:00:00Z", | |
"container_config": { | |
"Hostname": "", | |
"Domainname": "", | |
"User": "", | |
"AttachStdin": false, | |
"AttachStdout": false, | |
"AttachStderr": false, | |
"Tty": false, | |
"OpenStdin": false, | |
"StdinOnce": false, | |
"Env": null, | |
"Cmd": null, | |
"Image": "", | |
"Volumes": null, | |
"WorkingDir": "", | |
"Entrypoint": null, | |
"OnBuild": null, | |
"Labels": null | |
} | |
}' > "$dir/$layerId/json" | |
fi | |
case "$layerMediaType" in | |
application/vnd.docker.image.rootfs.diff.tar.gzip) | |
local layerTar="$layerId/layer.tar" | |
layerFiles=( "${layerFiles[@]}" "$layerTar" ) | |
# TODO figure out why "-C -" doesn't work here | |
# "curl: (33) HTTP server doesn't seem to support byte ranges. Cannot resume." | |
# "HTTP/1.1 416 Requested Range Not Satisfiable" | |
if [ -f "$dir/$layerTar" ]; then | |
# TODO hackpatch for no -C support :'( | |
echo "skipping existing ${layerId:0:12}" | |
continue | |
fi | |
local token="$(curl -fsSL "$authBase/token?service=$authService&scope=repository:$image:pull" | py_jq --raw-output '.token')" | |
fetch_blob "$token" "$image" "$layerDigest" "$dir/$layerTar" --progress | |
;; | |
*) | |
echo >&2 "error: unknown layer mediaType ($imageIdentifier, $layerDigest): '$layerMediaType'" | |
exit 1 | |
;; | |
esac | |
done | |
# change "$imageId" to be the ID of the last layer we added (needed for old-style "repositories" file which is created later -- specifically for older Docker daemons) | |
imageId="$layerId" | |
# munge the top layer image manifest to have the appropriate image configuration for older daemons | |
#local imageOldConfig="$(py_jq --raw-output --compact-output '{ id: .id } + if .parent then { parent: .parent } else {} end' "$dir/$imageId/json")" | |
local imageOldConfig="$(py_jq id_and_parent "$dir/$imageId/json")" | |
py_jq del_hist_and_root "$imageOldConfig" "$dir/$configFile" > "$dir/$imageId/json" | |
local ll="" | |
for layerFile in "${layerFiles[@]}"; do ll="$ll\"$layerFile\","; done | |
local manifestJsonEntry="$( | |
py_jq make_mani '{ | |
"Config": "'"$configFile"'", | |
"RepoTags": ["'"${image#library\/}:$tag"'"], | |
"Layers": ['${ll%?}'] | |
}' | |
)" | |
manifestJsonEntries=( "${manifestJsonEntries[@]}" "$manifestJsonEntry" ) | |
} | |
while [ $# -gt 0 ]; do | |
imageTag="$1" | |
shift | |
image="${imageTag%%[:@]*}" | |
imageTag="${imageTag#*:}" | |
digest="${imageTag##*@}" | |
tag="${imageTag%%@*}" | |
# add prefix library if passed official image | |
if [[ "$image" != *"/"* ]]; then | |
image="library/$image" | |
fi | |
imageFile="${image//\//_}" # "/" can't be in filenames :) | |
token="$(curl -fsSL "$authBase/token?service=$authService&scope=repository:$image:pull" | py_jq --raw-output '.token')" | |
manifestJson="$( | |
curl -fsSL \ | |
-H "Authorization: Bearer $token" \ | |
-H 'Accept: application/vnd.docker.distribution.manifest.v2+json' \ | |
-H 'Accept: application/vnd.docker.distribution.manifest.list.v2+json' \ | |
-H 'Accept: application/vnd.docker.distribution.manifest.v1+json' \ | |
"$registryBase/v2/$image/manifests/$digest" | |
)" | |
if [ "${manifestJson:0:1}" != '{' ]; then | |
echo >&2 "error: /v2/$image/manifests/$digest returned something unexpected:" | |
echo >&2 " $manifestJson" | |
exit 1 | |
fi | |
imageIdentifier="$image:$tag@$digest" | |
schemaVersion="$(echo "$manifestJson" | py_jq --raw-output '.schemaVersion')" | |
case "$schemaVersion" in | |
2) | |
mediaType="$(echo "$manifestJson" | py_jq --raw-output '.mediaType')" | |
case "$mediaType" in | |
application/vnd.docker.distribution.manifest.v2+json) | |
handle_single_manifest_v2 "$manifestJson" | |
;; | |
application/vnd.docker.distribution.manifest.list.v2+json) | |
layersFs="$(echo "$manifestJson" | py_jq --raw-output --compact-output '.manifests[]')" | |
IFS="$newlineIFS" | |
layers=( $layersFs ) | |
unset IFS | |
found="" | |
# parse first level multi-arch manifest | |
for i in "${!layers[@]}"; do | |
layerMeta="${layers[$i]}" | |
maniArch="$(echo "$layerMeta" | py_jq --raw-output '.platform.architecture')" | |
if [ "$maniArch" = "amd64" ]; then | |
digest="$(echo "$layerMeta" | py_jq --raw-output '.digest')" | |
# get second level single manifest | |
submanifestJson="$( | |
curl -fsSL \ | |
-H "Authorization: Bearer $token" \ | |
-H 'Accept: application/vnd.docker.distribution.manifest.v2+json' \ | |
-H 'Accept: application/vnd.docker.distribution.manifest.list.v2+json' \ | |
-H 'Accept: application/vnd.docker.distribution.manifest.v1+json' \ | |
"$registryBase/v2/$image/manifests/$digest" | |
)" | |
handle_single_manifest_v2 "$submanifestJson" | |
found="found" | |
break | |
fi | |
done | |
if [ -z "$found" ]; then | |
echo >&2 "error: manifest for $maniArch is not found" | |
exit 1 | |
fi | |
;; | |
*) | |
echo >&2 "error: unknown manifest mediaType ($imageIdentifier): '$mediaType'" | |
exit 1 | |
;; | |
esac | |
;; | |
1) | |
if [ -z "$doNotGenerateManifestJson" ]; then | |
echo >&2 "warning: '$imageIdentifier' uses schemaVersion '$schemaVersion'" | |
echo >&2 " this script cannot (currently) recreate the 'image config' to put in a 'manifest.json' (thus any schemaVersion 2+ images will be imported in the old way, and their 'docker history' will suffer)" | |
echo >&2 | |
doNotGenerateManifestJson=1 | |
fi | |
layersFs="$(echo "$manifestJson" | py_jq --raw-output '.fsLayers | .[] | .blobSum')" | |
IFS="$newlineIFS" | |
layers=( $layersFs ) | |
unset IFS | |
history="$(echo "$manifestJson" | py_jq '.history | [.[] | .v1Compatibility]')" | |
imageId="$(echo "$history" | py_jq --raw-output '.[0]' | py_jq --raw-output '.id')" | |
echo "Downloading '$imageIdentifier' (${#layers[@]} layers)..." | |
for i in "${!layers[@]}"; do | |
imageJson="$(echo "$history" | py_jq --raw-output ".[${i}]")" | |
layerId="$(echo "$imageJson" | py_jq --raw-output '.id')" | |
imageLayer="${layers[$i]}" | |
mkdir -p "$dir/$layerId" | |
echo '1.0' > "$dir/$layerId/VERSION" | |
echo "$imageJson" > "$dir/$layerId/json" | |
# TODO figure out why "-C -" doesn't work here | |
# "curl: (33) HTTP server doesn't seem to support byte ranges. Cannot resume." | |
# "HTTP/1.1 416 Requested Range Not Satisfiable" | |
if [ -f "$dir/$layerId/layer.tar" ]; then | |
# TODO hackpatch for no -C support :'( | |
echo "skipping existing ${layerId:0:12}" | |
continue | |
fi | |
token="$(curl -fsSL "$authBase/token?service=$authService&scope=repository:$image:pull" | py_jq --raw-output '.token')" | |
fetch_blob "$token" "$image" "$imageLayer" "$dir/$layerId/layer.tar" --progress | |
done | |
;; | |
*) | |
echo >&2 "error: unknown manifest schemaVersion ($imageIdentifier): '$schemaVersion'" | |
exit 1 | |
;; | |
esac | |
echo | |
if [ -s "$dir/tags-$imageFile.tmp" ]; then | |
echo -n ', ' >> "$dir/tags-$imageFile.tmp" | |
else | |
images=( "${images[@]}" "$image" ) | |
fi | |
echo -n '"'"$tag"'": "'"$imageId"'"' >> "$dir/tags-$imageFile.tmp" | |
done | |
echo -n '{' > "$dir/repositories" | |
firstImage=1 | |
for image in "${images[@]}"; do | |
imageFile="${image//\//_}" # "/" can't be in filenames :) | |
image="${image#library\/}" | |
[ "$firstImage" ] || echo -n ',' >> "$dir/repositories" | |
firstImage= | |
echo -n $'\n\t' >> "$dir/repositories" | |
echo -n '"'"$image"'": { '"$(cat "$dir/tags-$imageFile.tmp")"' }' >> "$dir/repositories" | |
done | |
echo -n $'\n}\n' >> "$dir/repositories" | |
rm -f "$dir"/tags-*.tmp | |
if [ -z "$doNotGenerateManifestJson" ] && [ "${#manifestJsonEntries[@]}" -gt 0 ]; then | |
ll=""; for e in "${manifestJsonEntries[@]}"; do ll="${ll}$e,"; done | |
python -Ssc "import json; import sys; print(json.dumps([json.loads(k) for k in [$ll]], indent=4))" > "$dir/manifest.json" | |
else | |
rm -f "$dir/manifest.json" | |
fi | |
echo "Download of images into '$dir' complete." | |
echo "Use something like the following to load the result into a Docker daemon:" | |
echo " tar -cC '$dir' . | docker load" | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment