|
#!/usr/bin/env bash |
|
set -eo pipefail |
|
|
|
# This script was developed for use in Moby's CI, and as such the use cases and |
|
# usability are (intentionally) limited. You may find this script useful for |
|
# educational purposes, for example, to learn how pulling images works "under |
|
# the hood", and you may be able to use it for other purposes, but it should not |
|
# be considered a "general purpose" tool for pulling images. |
|
# |
|
# The project maintainers accept contributions to this script within its intended |
|
# scope, but may not accept contributions beyond that. |
|
# |
|
# For users who have a similar need but require more flexibility/functionality, |
|
# refer to the discussion on GitHub, which mentions various alternatives that |
|
# are more suitable for other uses: https://github.com/moby/moby/issues/40857 |
|
|
|
# hello-world latest ef872312fe1b 3 months ago 910 B |
|
# hello-world latest ef872312fe1bbc5e05aae626791a47ee9b032efa8f3bda39cc0be7b56bfe59b9 3 months ago 910 B |
|
|
|
# debian latest f6fab3b798be 10 weeks ago 85.1 MB |
|
# debian latest f6fab3b798be3174f45aa1eb731f8182705555f89c9026d8c1ef230cbf8301dd 10 weeks ago 85.1 MB |
|
|
|
# check if essential commands are in our PATH |
|
for cmd in curl jq; do |
|
if ! command -v $cmd &> /dev/null; then |
|
echo >&2 "error: \"$cmd\" not found!" |
|
exit 1 |
|
fi |
|
done |
|
|
|
usage() { |
|
echo "usage: $0 dir image[:tag][@digest] ..." |
|
echo " $0 /tmp/old-hello-world hello-world:latest@sha256:8be990ef2aeb16dbcb9271ddfe2610fa6658d13f6dfb8bc72074cc1ca36966a7" |
|
[ -z "$1" ] || exit "$1" |
|
} |
|
|
|
dir="$1" # dir for building tar in |
|
shift || usage 1 >&2 |
|
|
|
if ! [ $# -gt 0 ] && [ "$dir" ]; then |
|
usage 2 >&2 |
|
fi |
|
mkdir -p "$dir" |
|
|
|
# hacky workarounds for Bash 3 support (no associative arrays) |
|
images=() |
|
rm -f "$dir"/tags-*.tmp |
|
manifestJsonEntries=() |
|
doNotGenerateManifestJson= |
|
# repositories[busybox]='"latest": "...", "ubuntu-14.04": "..."' |
|
|
|
# bash v4 on Windows CI requires CRLF separator... and linux doesn't seem to care either way |
|
newlineIFS=$'\n' |
|
major=$(echo "${BASH_VERSION%%[^0.9]}" | cut -d. -f1) |
|
if [ "$major" -ge 4 ]; then |
|
newlineIFS=$'\r\n' |
|
fi |
|
|
|
registryBase='https://registry-1.docker.io' |
|
authBase='https://auth.docker.io' |
|
authService='registry.docker.io' |
|
|
|
# https://github.com/moby/moby/issues/33700 |
|
fetch_blob() { |
|
local token="$1" |
|
shift |
|
local image="$1" |
|
shift |
|
local digest="$1" |
|
shift |
|
local targetFile="$1" |
|
shift |
|
local curlArgs=("$@") |
|
|
|
local curlHeaders |
|
curlHeaders="$( |
|
curl -S "${curlArgs[@]}" \ |
|
-H "Authorization: Bearer $token" \ |
|
"$registryBase/v2/$image/blobs/$digest" \ |
|
-o "$targetFile" \ |
|
-D- |
|
)" |
|
curlHeaders="$(echo "$curlHeaders" | tr -d '\r')" |
|
if grep -qE "^HTTP/[0-9].[0-9] 3" <<< "$curlHeaders"; then |
|
rm -f "$targetFile" |
|
|
|
local blobRedirect |
|
blobRedirect="$(echo "$curlHeaders" | awk -F ': ' 'tolower($1) == "location" { print $2; exit }')" |
|
if [ -z "$blobRedirect" ]; then |
|
echo >&2 "error: failed fetching '$image' blob '$digest'" |
|
echo "$curlHeaders" | head -1 >&2 |
|
return 1 |
|
fi |
|
|
|
curl -fSL "${curlArgs[@]}" \ |
|
"$blobRedirect" \ |
|
-o "$targetFile" |
|
fi |
|
} |
|
|
|
# handle 'application/vnd.docker.distribution.manifest.v2+json' manifest |
|
handle_single_manifest_v2() { |
|
local manifestJson="$1" |
|
shift |
|
|
|
local configDigest |
|
configDigest="$(echo "$manifestJson" | jq --raw-output '.config.digest')" |
|
local imageId="${configDigest#*:}" # strip off "sha256:" |
|
|
|
local configFile="$imageId.json" |
|
fetch_blob "$token" "$image" "$configDigest" "$dir/$configFile" -s |
|
|
|
local layersFs |
|
layersFs="$(echo "$manifestJson" | jq --raw-output --compact-output '.layers[]')" |
|
local IFS="$newlineIFS" |
|
local layers |
|
mapfile -t layers <<< "$layersFs" |
|
unset IFS |
|
|
|
echo "Downloading '$imageIdentifier' (${#layers[@]} layers)..." |
|
local layerId= |
|
local layerFiles=() |
|
for i in "${!layers[@]}"; do |
|
local layerMeta="${layers[$i]}" |
|
|
|
local layerMediaType |
|
layerMediaType="$(echo "$layerMeta" | jq --raw-output '.mediaType')" |
|
local layerDigest |
|
layerDigest="$(echo "$layerMeta" | jq --raw-output '.digest')" |
|
|
|
# save the previous layer's ID |
|
local parentId="$layerId" |
|
# create a new fake layer ID based on this layer's digest and the previous layer's fake ID |
|
layerId="$(echo "$parentId"$'\n'"$layerDigest" | sha256sum | cut -d' ' -f1)" |
|
# this accounts for the possibility that an image contains the same layer twice (and thus has a duplicate digest value) |
|
|
|
mkdir -p "$dir/$layerId" |
|
echo '1.0' > "$dir/$layerId/VERSION" |
|
|
|
if [ ! -s "$dir/$layerId/json" ]; then |
|
local parentJson |
|
parentJson="$(printf ', parent: "%s"' "$parentId")" |
|
local addJson |
|
addJson="$(printf '{ id: "%s"%s }' "$layerId" "${parentId:+$parentJson}")" |
|
# this starter JSON is taken directly from Docker's own "docker save" output for unimportant layers |
|
jq "$addJson + ." > "$dir/$layerId/json" <<- 'EOJSON' |
|
{ |
|
"created": "0001-01-01T00:00:00Z", |
|
"container_config": { |
|
"Hostname": "", |
|
"Domainname": "", |
|
"User": "", |
|
"AttachStdin": false, |
|
"AttachStdout": false, |
|
"AttachStderr": false, |
|
"Tty": false, |
|
"OpenStdin": false, |
|
"StdinOnce": false, |
|
"Env": null, |
|
"Cmd": null, |
|
"Image": "", |
|
"Volumes": null, |
|
"WorkingDir": "", |
|
"Entrypoint": null, |
|
"OnBuild": null, |
|
"Labels": null |
|
} |
|
} |
|
EOJSON |
|
fi |
|
|
|
case "$layerMediaType" in |
|
application/vnd.docker.image.rootfs.diff.tar.gzip) |
|
local layerTar="$layerId/layer.tar" |
|
layerFiles=("${layerFiles[@]}" "$layerTar") |
|
# TODO figure out why "-C -" doesn't work here |
|
# "curl: (33) HTTP server doesn't seem to support byte ranges. Cannot resume." |
|
# "HTTP/1.1 416 Requested Range Not Satisfiable" |
|
if [ -f "$dir/$layerTar" ]; then |
|
# TODO hackpatch for no -C support :'( |
|
echo "skipping existing ${layerId:0:12}" |
|
continue |
|
fi |
|
local token |
|
token="$(curl -fsSL "$authBase/token?service=$authService&scope=repository:$image:pull" | jq --raw-output '.token')" |
|
fetch_blob "$token" "$image" "$layerDigest" "$dir/$layerTar" --progress-bar |
|
;; |
|
|
|
*) |
|
echo >&2 "error: unknown layer mediaType ($imageIdentifier, $layerDigest): '$layerMediaType'" |
|
exit 1 |
|
;; |
|
esac |
|
done |
|
|
|
# change "$imageId" to be the ID of the last layer we added (needed for old-style "repositories" file which is created later -- specifically for older Docker daemons) |
|
imageId="$layerId" |
|
|
|
# munge the top layer image manifest to have the appropriate image configuration for older daemons |
|
local imageOldConfig |
|
imageOldConfig="$(jq --raw-output --compact-output '{ id: .id } + if .parent then { parent: .parent } else {} end' "$dir/$imageId/json")" |
|
jq --raw-output "$imageOldConfig + del(.history, .rootfs)" "$dir/$configFile" > "$dir/$imageId/json" |
|
|
|
local manifestJsonEntry |
|
manifestJsonEntry="$( |
|
echo '{}' | jq --raw-output '. + { |
|
Config: "'"$configFile"'", |
|
RepoTags: ["'"${image#library\/}:$tag"'"], |
|
Layers: '"$(echo '[]' | jq --raw-output ".$(for layerFile in "${layerFiles[@]}"; do echo " + [ \"$layerFile\" ]"; done)")"' |
|
}' |
|
)" |
|
manifestJsonEntries=("${manifestJsonEntries[@]}" "$manifestJsonEntry") |
|
} |
|
|
|
get_target_arch() { |
|
if [ -n "${TARGETARCH:-}" ]; then |
|
echo "${TARGETARCH}" |
|
return 0 |
|
fi |
|
|
|
if type go > /dev/null; then |
|
go env GOARCH |
|
return 0 |
|
fi |
|
|
|
if type dpkg > /dev/null; then |
|
debArch="$(dpkg --print-architecture)" |
|
case "${debArch}" in |
|
armel | armhf) |
|
echo "arm" |
|
return 0 |
|
;; |
|
*64el) |
|
echo "${debArch%el}le" |
|
return 0 |
|
;; |
|
*) |
|
echo "${debArch}" |
|
return 0 |
|
;; |
|
esac |
|
fi |
|
|
|
if type uname > /dev/null; then |
|
uArch="$(uname -m)" |
|
case "${uArch}" in |
|
x86_64) |
|
echo amd64 |
|
return 0 |
|
;; |
|
arm | armv[0-9]*) |
|
echo arm |
|
return 0 |
|
;; |
|
aarch64) |
|
echo arm64 |
|
return 0 |
|
;; |
|
mips*) |
|
echo >&2 "I see you are running on mips but I don't know how to determine endianness yet, so I cannot select a correct arch to fetch." |
|
echo >&2 "Consider installing \"go\" on the system which I can use to determine the correct arch or specify it explicitly by setting TARGETARCH" |
|
exit 1 |
|
;; |
|
*) |
|
echo "${uArch}" |
|
return 0 |
|
;; |
|
esac |
|
|
|
fi |
|
|
|
# default value |
|
echo >&2 "Unable to determine CPU arch, falling back to amd64. You can specify a target arch by setting TARGETARCH" |
|
echo amd64 |
|
} |
|
|
|
get_target_variant() { |
|
echo "${TARGETVARIANT:-}" |
|
} |
|
|
|
while [ $# -gt 0 ]; do |
|
imageTag="$1" |
|
shift |
|
image="${imageTag%%[:@]*}" |
|
imageTag="${imageTag#*:}" |
|
digest="${imageTag##*@}" |
|
tag="${imageTag%%@*}" |
|
|
|
# add prefix library if passed official image |
|
if [[ "$image" != *"/"* ]]; then |
|
image="library/$image" |
|
fi |
|
|
|
imageFile="${image//\//_}" # "/" can't be in filenames :) |
|
|
|
token="$(curl -fsSL "$authBase/token?service=$authService&scope=repository:$image:pull" | jq --raw-output '.token')" |
|
|
|
manifestJson="$( |
|
curl -fsSL \ |
|
-H "Authorization: Bearer $token" \ |
|
-H 'Accept: application/vnd.docker.distribution.manifest.v2+json' \ |
|
-H 'Accept: application/vnd.docker.distribution.manifest.list.v2+json' \ |
|
-H 'Accept: application/vnd.docker.distribution.manifest.v1+json' \ |
|
"$registryBase/v2/$image/manifests/$digest" |
|
)" |
|
if [ "${manifestJson:0:1}" != '{' ]; then |
|
echo >&2 "error: /v2/$image/manifests/$digest returned something unexpected:" |
|
echo >&2 " $manifestJson" |
|
exit 1 |
|
fi |
|
|
|
imageIdentifier="$image:$tag@$digest" |
|
|
|
schemaVersion="$(echo "$manifestJson" | jq --raw-output '.schemaVersion')" |
|
case "$schemaVersion" in |
|
2) |
|
mediaType="$(echo "$manifestJson" | jq --raw-output '.mediaType')" |
|
|
|
case "$mediaType" in |
|
application/vnd.docker.distribution.manifest.v2+json) |
|
handle_single_manifest_v2 "$manifestJson" |
|
;; |
|
application/vnd.docker.distribution.manifest.list.v2+json) |
|
layersFs="$(echo "$manifestJson" | jq --raw-output --compact-output '.manifests[]')" |
|
IFS="$newlineIFS" |
|
mapfile -t layers <<< "$layersFs" |
|
unset IFS |
|
|
|
found="" |
|
targetArch="$(get_target_arch)" |
|
targetVariant="$(get_target_variant)" |
|
# parse first level multi-arch manifest |
|
for i in "${!layers[@]}"; do |
|
layerMeta="${layers[$i]}" |
|
maniArch="$(echo "$layerMeta" | jq --raw-output '.platform.architecture')" |
|
maniVariant="$(echo "$layerMeta" | jq --raw-output '.platform.variant')" |
|
if [[ "$maniArch" = "${targetArch}" ]] && [[ -z "${targetVariant}" || "$maniVariant" = "${targetVariant}" ]]; then |
|
digest="$(echo "$layerMeta" | jq --raw-output '.digest')" |
|
# get second level single manifest |
|
submanifestJson="$( |
|
curl -fsSL \ |
|
-H "Authorization: Bearer $token" \ |
|
-H 'Accept: application/vnd.docker.distribution.manifest.v2+json' \ |
|
-H 'Accept: application/vnd.docker.distribution.manifest.list.v2+json' \ |
|
-H 'Accept: application/vnd.docker.distribution.manifest.v1+json' \ |
|
"$registryBase/v2/$image/manifests/$digest" |
|
)" |
|
handle_single_manifest_v2 "$submanifestJson" |
|
found="found" |
|
break |
|
fi |
|
done |
|
if [ -z "$found" ]; then |
|
echo >&2 "error: manifest for ${targetArch}${targetVariant:+/${targetVariant}} is not found" |
|
exit 1 |
|
fi |
|
;; |
|
*) |
|
echo >&2 "error: unknown manifest mediaType ($imageIdentifier): '$mediaType'" |
|
exit 1 |
|
;; |
|
esac |
|
;; |
|
|
|
1) |
|
if [ -z "$doNotGenerateManifestJson" ]; then |
|
echo >&2 "warning: '$imageIdentifier' uses schemaVersion '$schemaVersion'" |
|
echo >&2 " this script cannot (currently) recreate the 'image config' to put in a 'manifest.json' (thus any schemaVersion 2+ images will be imported in the old way, and their 'docker history' will suffer)" |
|
echo >&2 |
|
doNotGenerateManifestJson=1 |
|
fi |
|
|
|
layersFs="$(echo "$manifestJson" | jq --raw-output '.fsLayers | .[] | .blobSum')" |
|
IFS="$newlineIFS" |
|
mapfile -t layers <<< "$layersFs" |
|
unset IFS |
|
|
|
history="$(echo "$manifestJson" | jq '.history | [.[] | .v1Compatibility]')" |
|
imageId="$(echo "$history" | jq --raw-output '.[0]' | jq --raw-output '.id')" |
|
|
|
echo "Downloading '$imageIdentifier' (${#layers[@]} layers)..." |
|
for i in "${!layers[@]}"; do |
|
imageJson="$(echo "$history" | jq --raw-output ".[${i}]")" |
|
layerId="$(echo "$imageJson" | jq --raw-output '.id')" |
|
imageLayer="${layers[$i]}" |
|
|
|
mkdir -p "$dir/$layerId" |
|
echo '1.0' > "$dir/$layerId/VERSION" |
|
|
|
echo "$imageJson" > "$dir/$layerId/json" |
|
|
|
# TODO figure out why "-C -" doesn't work here |
|
# "curl: (33) HTTP server doesn't seem to support byte ranges. Cannot resume." |
|
# "HTTP/1.1 416 Requested Range Not Satisfiable" |
|
if [ -f "$dir/$layerId/layer.tar" ]; then |
|
# TODO hackpatch for no -C support :'( |
|
echo "skipping existing ${layerId:0:12}" |
|
continue |
|
fi |
|
token="$(curl -fsSL "$authBase/token?service=$authService&scope=repository:$image:pull" | jq --raw-output '.token')" |
|
fetch_blob "$token" "$image" "$imageLayer" "$dir/$layerId/layer.tar" --progress-bar |
|
done |
|
;; |
|
|
|
*) |
|
echo >&2 "error: unknown manifest schemaVersion ($imageIdentifier): '$schemaVersion'" |
|
exit 1 |
|
;; |
|
esac |
|
|
|
echo |
|
|
|
if [ -s "$dir/tags-$imageFile.tmp" ]; then |
|
echo -n ', ' >> "$dir/tags-$imageFile.tmp" |
|
else |
|
images=("${images[@]}" "$image") |
|
fi |
|
echo -n '"'"$tag"'": "'"$imageId"'"' >> "$dir/tags-$imageFile.tmp" |
|
done |
|
|
|
echo -n '{' > "$dir/repositories" |
|
firstImage=1 |
|
for image in "${images[@]}"; do |
|
imageFile="${image//\//_}" # "/" can't be in filenames :) |
|
image="${image#library\/}" |
|
|
|
[ "$firstImage" ] || echo -n ',' >> "$dir/repositories" |
|
firstImage= |
|
echo -n $'\n\t' >> "$dir/repositories" |
|
echo -n '"'"$image"'": { '"$(cat "$dir/tags-$imageFile.tmp")"' }' >> "$dir/repositories" |
|
done |
|
echo -n $'\n}\n' >> "$dir/repositories" |
|
|
|
rm -f "$dir"/tags-*.tmp |
|
|
|
if [ -z "$doNotGenerateManifestJson" ] && [ "${#manifestJsonEntries[@]}" -gt 0 ]; then |
|
echo '[]' | jq --raw-output ".$(for entry in "${manifestJsonEntries[@]}"; do echo " + [ $entry ]"; done)" > "$dir/manifest.json" |
|
else |
|
rm -f "$dir/manifest.json" |
|
fi |
|
|
|
echo "Download of images into '$dir' complete." |
|
echo "Use something like the following to load the result into a Docker daemon:" |
|
echo " tar -cC '$dir' . | docker load" |