Last active
April 22, 2025 12:39
-
-
Save elmimmo/f354056bac694ba06be643648c58819c to your computer and use it in GitHub Desktop.
Get legacy Mac OS file type and creator codes from files stored on a Synology NAS running DSM
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys, os, struct | |
def parse_appledouble(ad_path): | |
""" | |
Open an AppleDouble file and return (file_type, creator_code). | |
""" | |
with open(ad_path, "rb") as f: | |
# skip to number of entries (big‑endian uint16 at offset 24) | |
f.seek(24) | |
num_entries, = struct.unpack(">H", f.read(2)) | |
# find offsets and lengths for entries 9 and 2 | |
off9 = len9 = off2 = len2 = None | |
f.seek(26) | |
for _ in range(num_entries): | |
entry_id, entry_off, entry_len = struct.unpack(">III", f.read(12)) | |
if entry_id == 9: | |
off9, len9 = entry_off, entry_len | |
elif entry_id == 2: | |
off2, len2 = entry_off, entry_len | |
# 1) Try Finder Info (entry 9) | |
file_type = creator = "" | |
if off9 is not None and len9 >= 8: | |
f.seek(off9) | |
data = f.read(8) | |
ft, cr = data[:4].decode("latin1"), data[4:8].decode("latin1") | |
if ft.isalnum() and cr.isalnum(): | |
return ft, cr | |
# 2) Fallback to Resource Fork's Resource Map | |
if off2 is not None and len2 >= 16: | |
# read mapOffset (big‑endian uint32 at off2+4) | |
f.seek(off2 + 4) | |
map_off, = struct.unpack(">I", f.read(4)) | |
# embedded Finder Info is at off2 + mapOffset + 16 | |
f.seek(off2 + map_off + 16) | |
data = f.read(8) | |
ft, cr = data[:4].decode("latin1"), data[4:8].decode("latin1") | |
return ft, cr | |
# if all else fails, return empty strings | |
return "", "" | |
def main(): | |
if len(sys.argv) < 2: | |
print("Usage: get_type_creator.py <datafile> [<datafile>...]", file=sys.stderr) | |
sys.exit(1) | |
for datafile in sys.argv[1:]: | |
dirpath, base = os.path.split(datafile) | |
apple = os.path.join(dirpath, "@eaDir", f"{base}@SynoResource") | |
if not os.path.isfile(apple): | |
# print empty fields if no AppleDouble | |
# print(f"{datafile}\t\t") | |
continue | |
ft, cr = parse_appledouble(apple) | |
# only print if both codes are non‐empty | |
if ft and cr: | |
print(f"{datafile}\t{ft}\t{cr}") | |
if __name__ == "__main__": | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
set -euo pipefail | |
# Print usage message | |
usage() { | |
cat <<EOF | |
Usage: $0 <data-file> [<data-file>...] | |
For each <data-file>: | |
1) Look for its AppleDouble at <data-dir>/@eaDir/<basename>@SynoResource | |
2) Try to read File Type + Creator from Finder Info (entry ID 9) | |
3) If invalid or missing, parse Resource Fork: | |
• read mapOffset at RF offset+4 | |
• go to RF offset + mapOffset + 16 and read 8 bytes | |
(first 4 = File Type, next 4 = Creator) | |
4) Print: <data-file><TAB><FileTypeASCII><TAB><CreatorASCII> | |
EOF | |
exit 1 | |
} | |
# Require at least one argument | |
[ $# -ge 1 ] || usage | |
for datafile in "$@"; do | |
# derive directory and filename | |
dirpath=$(dirname -- "$datafile") | |
base=$(basename -- "$datafile") | |
# path to the AppleDouble companion | |
apple="$dirpath/@eaDir/${base}@SynoResource" | |
if [ ! -f "$apple" ]; then | |
printf 'Error: no AppleDouble for "%s": %s\n' \ | |
"$datafile" "$apple" >&2 | |
continue | |
fi | |
# read number of entry descriptors (big‑endian uint16 at offset 24) | |
num_entries=$( | |
printf "%d\n" 0x$( | |
xxd -ps -s 24 -l 2 -- "$apple" | |
) | |
) | |
# initialize Finder Info and Resource Fork offsets/lengths | |
off9=0; len9=0 | |
off2=0; len2=0 | |
entry_table_start=26 | |
# loop through each 12‑byte descriptor | |
for ((i=0; i<num_entries; i++)); do | |
b=$((entry_table_start + i*12)) | |
id=$((0x$(xxd -ps -s "$b" -l 4 -- "$apple"))) # entry ID | |
off=$((0x$(xxd -ps -s $((b+4)) -l 4 -- "$apple"))) # entry offset | |
ln=$((0x$(xxd -ps -s $((b+8)) -l 4 -- "$apple"))) # entry length | |
[ "$id" -eq 9 ] && { off9=$off; len9=$ln; } # Finder Info | |
[ "$id" -eq 2 ] && { off2=$off; len2=$ln; } # Resource Fork | |
done | |
filetype_ascii='' | |
creator_ascii='' | |
# 1) Try Finder Info if at least 8 bytes | |
if [ "$len9" -ge 8 ]; then | |
# read 8 bytes as hex | |
h=$(dd if="$apple" bs=1 skip="$off9" count=8 status=none | xxd -ps) | |
ft_hex=${h:0:8}; cr_hex=${h:8:8} | |
# convert hex → ASCII, strip nulls, suppress warnings | |
filetype_ascii=$( | |
echo -n "$ft_hex" \ | |
| xxd -r -p 2>/dev/null \ | |
| tr -d '\000' | |
) | |
creator_ascii=$( | |
echo -n "$cr_hex" \ | |
| xxd -r -p 2>/dev/null \ | |
| tr -d '\000' | |
) | |
# validate exactly 4 alphanumeric chars | |
if ! [[ $filetype_ascii =~ ^[A-Za-z0-9]{4}$ ]]; then filetype_ascii=''; fi | |
if ! [[ $creator_ascii =~ ^[A-Za-z0-9]{4}$ ]]; then creator_ascii=''; fi | |
fi | |
# 2) Fallback: parse Resource Fork’s Resource Map | |
if [ -z "$filetype_ascii" ] && [ "$len2" -ge 16 ]; then | |
# read mapOffset (big‑endian uint32 at off2+4) | |
map_hex=$(dd if="$apple" bs=1 skip=$((off2+4)) count=4 status=none | xxd -ps) | |
map_off=$((0x$map_hex)) | |
# compute where the embedded Finder Info lives | |
attr_off=$((off2 + map_off + 16)) | |
# read 8 bytes from that location | |
h2=$(dd if="$apple" bs=1 skip="$attr_off" count=8 status=none | xxd -ps) | |
ft_hex=${h2:0:8}; cr_hex=${h2:8:8} | |
# convert hex → ASCII, strip nulls | |
filetype_ascii=$( | |
echo -n "$ft_hex" \ | |
| xxd -r -p 2>/dev/null \ | |
| tr -d '\000' | |
) | |
creator_ascii=$( | |
echo -n "$cr_hex" \ | |
| xxd -r -p 2>/dev/null \ | |
| tr -d '\000' | |
) | |
fi | |
# only print if both FileType and Creator are non‐empty | |
if [[ -n "$filetype_ascii" && -n "$creator_ascii" ]]; then | |
printf '%s\t%s\t%s\n' "$datafile" "$filetype_ascii" "$creator_ascii" | |
fi | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The Python version is much faster than the Bash one. I include both just for reference.
Either provide the path to a specific file as an argument or process a whole folder with
find
like so:You might want to dump the output to a file:
You might also want to parallelize xargs (
-P 8
for 8 parallel tasks), but in such case all tasks will write simultaneously to the log and produce mixed and fragmented paths unless you prevent it somehow.