varenc · April 24, 2023 20:07
diff --git a/extract_vcf_image.sh b/extract_vcf_image.sh
 ### Extract the contact image from a .vcf file.  This is otherwise impossible with macOS contacts. 
 ### To work, first export a single contact as a .vcf file.
 #
 # Example Usage: 
 # $ extract_vcf_image input.vcf > output.jpg
 # $ file output.jpg
 # output.jpg:  JPEG image data, JFIF standard 1.01, aspect ratio, density 72x72, segment length 16, Exif Standard: [TIFF image data, big-endian, direntries=1TIFF image data, big-endian, direntries=1], baseline, precision 8, 148x148, components 3
 #
 # Notes:
 # - 2nd argument is the output path, if not specified, will output to stdout
 # - Assumes a contact image per .vcf file. Will likely break if there are multiple contact images in a single .vcf file.
 # - requires ggrep (GNU grep) or a grep that supports -P (perl regex)
 # - requires base64 and a sed that supports -E (extended regex)
 # - Tested only with base64 encoded jpg images, but should work with other formats
 # - Only supports images that are base64 encoded. Other encodings, which I'm not sure exist, are not supported.

 function extract_vcf_image() {
    [ -z $1 ] && { echo "Usage: $0 <input.vcf> [<output_image>] (outputs to STDOUT if 2nd arg not provided)"; return 1; }
    [ ! -f $1 ] && { echo "File not found: $1"; return 1; }
    [ -z $2 ] && { local output_path='-'; } || { local output_path="$2"; }

    local GCMD="ggrep"
    command -v ggrep >/dev/null 2>&1 || { echo >&2 "WARNING: I require 'ggrep' on macOS or a system where  'grep' supports -P.  Trying 'grep' but this won't work on macOS. "; GCMD="grep"; }

    # Gets everything after the photo header to the next line not-starting with a space. The base64 lines following the first are all prefixed with a space.
    # Then remove the non-base64 text. Forunately, base64 -D ignores whitespace.
    cat input.vcf | $GCMD -Pzo 'PHOTO;ENCODING=b;TYPE=[A-Z]+:(.|\n)*\n\S'  | $GCMD --text -E '^(\s|PHOTO;ENCODING=b;)' | sed -E 's|PHOTO;ENCODING=b;TYPE=[A-Z]+:||g' | base64 -D - -o $output_path
 }
	### Extract the contact image from a .vcf file. This is otherwise impossible with macOS contacts.
	### To work, first export a single contact as a .vcf file.
	#
	# Example Usage:
	# $ extract_vcf_image input.vcf > output.jpg
	# $ file output.jpg
	# output.jpg: JPEG image data, JFIF standard 1.01, aspect ratio, density 72x72, segment length 16, Exif Standard: [TIFF image data, big-endian, direntries=1TIFF image data, big-endian, direntries=1], baseline, precision 8, 148x148, components 3
	#
	# Notes:
	# - 2nd argument is the output path, if not specified, will output to stdout
	# - Assumes a contact image per .vcf file. Will likely break if there are multiple contact images in a single .vcf file.
	# - requires ggrep (GNU grep) or a grep that supports -P (perl regex)
	# - requires base64 and a sed that supports -E (extended regex)
	# - Tested only with base64 encoded jpg images, but should work with other formats
	# - Only supports images that are base64 encoded. Other encodings, which I'm not sure exist, are not supported.

	function extract_vcf_image() {
	[ -z $1 ] && { echo "Usage: $0 <input.vcf> [<output_image>] (outputs to STDOUT if 2nd arg not provided)"; return 1; }
	[ ! -f $1 ] && { echo "File not found: $1"; return 1; }
	[ -z $2 ] && { local output_path='-'; } \|\| { local output_path="$2"; }

	local GCMD="ggrep"
	command -v ggrep >/dev/null 2>&1 \|\| { echo >&2 "WARNING: I require 'ggrep' on macOS or a system where 'grep' supports -P. Trying 'grep' but this won't work on macOS. "; GCMD="grep"; }

	# Gets everything after the photo header to the next line not-starting with a space. The base64 lines following the first are all prefixed with a space.
	# Then remove the non-base64 text. Forunately, base64 -D ignores whitespace.
	cat input.vcf \| $GCMD -Pzo 'PHOTO;ENCODING=b;TYPE=[A-Z]+:(.\|\n)*\n\S' \| $GCMD --text -E '^(\s\|PHOTO;ENCODING=b;)' \| sed -E 's\|PHOTO;ENCODING=b;TYPE=[A-Z]+:\|\|g' \| base64 -D - -o $output_path
	}