Skip to content

Instantly share code, notes, and snippets.

@iso2022jp
Created December 14, 2024 09:35
Show Gist options
  • Save iso2022jp/c3f5d39ab4856c9f24650f1ffac7490f to your computer and use it in GitHub Desktop.
Save iso2022jp/c3f5d39ab4856c9f24650f1ffac7490f to your computer and use it in GitHub Desktop.
ZIP on the web
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Web Zipper</title>
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet"
integrity="sha384-QWTKZyjpPEjISv5WaRU9OFeRpok6YctnYmDr5pNlyT2bRjXh0JMhjY6hW+ALEwIH" crossorigin="anonymous">
</head>
<body>
<main class="container-fluid">
<div class="position-relative">
<div inert class="d-flex justify-content-center align-items-center position-absolute w-100 h-100 z-1">
<div hidden id="spinner" class="spinner-border" role="status">
<span class="visually-hidden">Loading...</span>
</div>
</div>
<div class="card m-5 text-center">
<a class="card-body p-5 " id="picker" tabindex="0" style="cursor: pointer">
Drop a directory here to zip.
</a>
</div>
</div>
</main>
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"
integrity="sha384-YvpcrYf0tY3lHB60NNkmXc5s9fDVZLESaAA55NDzOxhy9GkcIdslK1eN7N6jIeHz"
crossorigin="anonymous"></script>
<script>
// @ts-check
/**
* @typedef ZipEntry
* @property {File|Blob} blob
* @property {?string} name
* @property {?number} lastModified
*/
/**
* @param {FileSystemHandle|FileSystemEntry|DataTransfer|File|AsyncIterable<ZipEntry>|Iterable<ZipEntry|Promise<ZipEntry>>} stuff
* @see {@link https://support.pkware.com/pkzip/appnote PKWARE's appnote}
* @see {@link https://libzip.org/specifications/extrafld.txt Third Party Extra fields}
* @todo useWindows31J option for legacy tools
* @return {Promise<Blob>}
*/
const zip = async stuff => {
console.time('zip')
console.group('zip')
try {
// Generic objects
/**
* @template Product
* @param {(() => Product)} factory
* @return {() => Product}
*/
const lazy = factory => {
let cache = null
return () => cache ?? (cache = factory())
}
/**
* ArrayBuffer builder
* @param {number} length
*/
const packer = length => {
const LITTLE_ENDIAN = true
const buffer = new ArrayBuffer(length)
const view = new DataView(buffer)
let position = 0
/** @param {number} length */
const forward = length => {
position += length
return builder
}
const builder = {
/** @param {number} value */
u8: value => (view.setUint8(position, value), forward(1)),
/** @param {number} value */
u16: value => (view.setUint16(position, value, LITTLE_ENDIAN), forward(2)),
/** @param {number} value */
u32: value => (view.setUint32(position, value, LITTLE_ENDIAN), forward(4)),
/** @param {bigint} value */
u64: value => (view.setBigUint64(position, value, LITTLE_ENDIAN), forward(8)),
build: () => buffer,
}
return builder
}
const CRC32 = {
// ISO/IEC/IEEE 802-3
defaultTable: lazy(() => {
const e = 0xedb88320 // reversed
const eightTimes = Array.from({ length: 8 })
return new Uint32Array(256).map(
(_, i) => eightTimes.reduce(
u => u & 0x1 ? (u >>> 1) ^ e : u >>> 1,
i
)
)
}),
/** @param {Blob} blob */
compute: async (blob, table = CRC32.defaultTable()) =>
~new Uint8Array(await blob.arrayBuffer()).reduce(
(value, b) => (value >>> 8) ^ table[(b ^ value) & 0xff],
0xffffffff
)
}
// ZIP file format related objects
const Version = {
DEFLATE: 20,
LANGUAGE_ENCODING: 63, // EFS (Early Feature Specification)
/** @param {boolean} cp437Safe */
for: cp437Safe => cp437Safe ? Version.DEFLATE : Version.LANGUAGE_ENCODING
}
const VersionMadeBy = {
FAT: 0 << 8,
LANGUAGE_ENCODING: 63, // EFS
default: () => VersionMadeBy.FAT | VersionMadeBy.LANGUAGE_ENCODING
}
const GeneralPurposeBitFlag = {
NONE: 0,
DATA_DESCRIPTOR: 1 << 3, // Streaming
LANGUAGE_ENCODING: 1 << 11, // EFS: UTF-8
/**
* @param {boolean} cp437Safe
* @param {boolean} streaming
*/
for: (cp437Safe, streaming) =>
(cp437Safe ? GeneralPurposeBitFlag.NONE : GeneralPurposeBitFlag.LANGUAGE_ENCODING)
| (streaming ? GeneralPurposeBitFlag.DATA_DESCRIPTOR : GeneralPurposeBitFlag.NONE)
}
const CompressionMethod = {
STORED: 0,
DEFLATED: 8,
default: () => globalThis.CompressionStream ? CompressionMethod.DEFLATED : CompressionMethod.STORED
}
/**
* @typedef ZipEntryCharacteristics
* @property {number} compressionMethod
* @property {number} versionNeededToExtract
* @property {number} versionMadeBy
* @property {number} generalPurposeBitFlag
* @property {Blob} fileName
* @property {Blob} extraFields
* @property {number} lastModifiedDosTime
* @property {number} lastModifiedDosDate
*/
/**
* @typedef ZipEntryDataDescriptor
* @property {number} crc32
* @property {number} compressedSize
* @property {number} uncompressedSize
*/
/**
* @param {Blob} blob
* @return {Promise<{compressed: Blob, descriptor: ZipEntryDataDescriptor}>}
*/
const store = async blob => ({
compressed: blob,
descriptor: {
crc32: await CRC32.compute(blob),
compressedSize: blob.size,
uncompressedSize: blob.size,
},
})
/**
* @param {Blob} blob
* @return {Promise<{compressed: Blob, descriptor: ZipEntryDataDescriptor}>}
* @see {@link https://datatracker.ietf.org/doc/html/rfc1951 DEFLATE Compressed Data Format Specification version 1.3}
*/
const deflate = async blob => {
// Compute Deflate & CRC-32 at once using GZIP
// https://datatracker.ietf.org/doc/html/rfc1952
// https://compression.spec.whatwg.org/#ref-for-dom-compressionformat-gzip
const GZIP_HEADER_SIZE = 10 // "compliant"; No FEXTRA, FNAME, FCOMMENT
const GZIP_TRAILER_SIZE = 8 // CRC32, ISIZE
const GZIP_TRAILER_CRC32_OFFSET = 0
const LITTLE_ENDIAN = true
// @ts-ignore
const gzip = new Blob(await Array.fromAsync(blob.stream().pipeThrough(new CompressionStream('gzip'))))
// Extract deflated body
const compressed = gzip.slice(GZIP_HEADER_SIZE, -GZIP_TRAILER_SIZE)
// Extract CRC-32 from GZIP trailer
const trailer = gzip.slice(-GZIP_TRAILER_SIZE)
const crc32 = new DataView(await trailer.arrayBuffer()).getUint32(GZIP_TRAILER_CRC32_OFFSET, LITTLE_ENDIAN)
return {
compressed,
descriptor: {
crc32,
compressedSize: compressed.size,
uncompressedSize: blob.size,
},
}
}
const compressor = {
[CompressionMethod.STORED]: store,
[CompressionMethod.DEFLATED]: deflate,
/**
* @param {number} method
* @param {Blob} blob
*/
compress: async (method, blob) => compressor[method](blob)
}
/** @param {number} timestamp */
const toDosDate = timestamp => {
const date = new Date(timestamp)
return (date.getFullYear() - 1980) << 9 | (date.getMonth() + 1) << 5 | date.getDate()
}
/** @param {number} timestamp */
const toDosTime = timestamp => {
const date = new Date(timestamp)
return date.getHours() << 11 | date.getMinutes() << 5 | date.getSeconds() >> 1
}
/** @param {string} name */
const isInAsciiGraphicRange = name => /^[\x21-\x7e]*$/.test(name)
const EntryCharacteristics = {
/**
* @param {ZipEntry} entry
* @return {Promise<ZipEntryCharacteristics>}
*/
inspect: async entry => {
const {
blob,
name = blob['name'] ?? 'file',
lastModified = blob['lastModified'] ?? Date.now(),
} = entry
const normalizedName = name.normalize()
const fileName = new Blob([normalizedName])
const cp437Safe = isInAsciiGraphicRange(normalizedName)
const extraFields = [
[ExtendedTimestampExtraField.pack(lastModified)], // UT
cp437Safe ? [] : [await InfoZIPUnicodePathExtraField.pack(fileName)], // up
].flat()
return {
compressionMethod: CompressionMethod.default(),
versionNeededToExtract: Version.for(cp437Safe),
versionMadeBy: VersionMadeBy.default(),
generalPurposeBitFlag: GeneralPurposeBitFlag.for(cp437Safe, true),
fileName,
extraFields: new Blob(extraFields),
lastModifiedDosTime: toDosTime(lastModified),
lastModifiedDosDate: toDosDate(lastModified),
}
}
}
// Third Party Mappings: Extended Timestamp Extra Field (0x5455):
// https://libzip.org/specifications/extrafld.txt
// XXX: 7-zip follows, Windows zip ignores
const ExtendedTimestampExtraField = {
ID: 0x5455, // UT
CONTENT_LENGTH: 5,// ModTime only
Flag: {
MODIFICATION_TIME: 1 << 0, // if set, modification time is present
default: () => ExtendedTimestampExtraField.Flag.MODIFICATION_TIME
},
/** @param {number} timestamp */
pack: timestamp => packer(ExtendedTimestampExtraField.CONTENT_LENGTH + 4)
.u16(ExtendedTimestampExtraField.ID)
.u16(ExtendedTimestampExtraField.CONTENT_LENGTH) // TSize: total data size for this block
.u8(ExtendedTimestampExtraField.Flag.default()) // Flags: info bits
.u32(Math.trunc(timestamp / 1000)) // ModTime: time of last modification (UTC/GMT)
.build(),
}
// 4.6.9 -Info-ZIP Unicode Path Extra Field (0x7075):
const InfoZIPUnicodePathExtraField = {
ID: 0x7075, // up
CONTENT_LENGTH: 5,
VERSION: 1,
/** @param {Blob} fileName */
pack: async fileName => new Blob([
packer(InfoZIPUnicodePathExtraField.CONTENT_LENGTH + 4)
.u16(InfoZIPUnicodePathExtraField.ID)
.u16(InfoZIPUnicodePathExtraField.CONTENT_LENGTH + fileName.size) // TSize: total data size for this block
.u8(InfoZIPUnicodePathExtraField.VERSION) // Version: version of this extra field, currently 1
.u32(await CRC32.compute(fileName)) // NameCRC32: File Name Field CRC32 Checksum
.build(),
fileName,
]),
}
// 4.3.7 local file header + Extra fields
const LocalFileHeader = {
SIGNATURE: 0x04034b50, // PK^C^D
SIZE: 30,
/** @param {ZipEntryCharacteristics} characteristics */
pack: characteristics => new Blob([
packer(LocalFileHeader.SIZE)
.u32(LocalFileHeader.SIGNATURE)
.u16(characteristics.versionNeededToExtract)
.u16(characteristics.generalPurposeBitFlag)
.u16(characteristics.compressionMethod)
.u16(characteristics.lastModifiedDosTime)
.u16(characteristics.lastModifiedDosDate)
.u32(0) // crc-32 (Lazy; specify with data descriptor)
.u32(0) // compressed size (Lazy; specify with data descriptor)
.u32(0) // uncompressed size (Lazy; specify with data descriptor)
.u16(characteristics.fileName.size)
.u16(characteristics.extraFields.size)
.build(),
characteristics.fileName,
characteristics.extraFields,
]),
}
// 4.3.9 Data descriptor
const DataDescriptor = {
SIGNATURE: 0x08074b50, // PK^G^H: commonly adopted
SIZE: 16,
/** @param {ZipEntryDataDescriptor} descriptor */
pack: descriptor => packer(DataDescriptor.SIZE)
.u32(DataDescriptor.SIGNATURE) // implementors SHOULD include the signature value marking the data descriptor record
.u32(descriptor.crc32)
.u32(descriptor.compressedSize)
.u32(descriptor.uncompressedSize)
.build(),
}
// 4.3.12 Central directory structure - File header
const CentralDirectoryHeader = {
SIGNATURE: 0x02014b50, // PK^A^B
SIZE: 46,
InternalAttribute: {
BINARY: 0,
default: () => CentralDirectoryHeader.InternalAttribute.BINARY
},
ExternalAttribute: {
FAT_ARCHIVE: 0x20,
default: () => CentralDirectoryHeader.ExternalAttribute.FAT_ARCHIVE
},
/**
* @param {ZipEntryCharacteristics} characteristics
* @param {ZipEntryDataDescriptor} descriptor
* @param {number} offset
*/
pack: (characteristics, descriptor, offset) => new Blob([
packer(CentralDirectoryHeader.SIZE)
.u32(CentralDirectoryHeader.SIGNATURE)
.u16(characteristics.versionMadeBy)
.u16(characteristics.versionNeededToExtract)
.u16(characteristics.generalPurposeBitFlag)
.u16(characteristics.compressionMethod)
.u16(characteristics.lastModifiedDosTime)
.u16(characteristics.lastModifiedDosDate)
.u32(descriptor.crc32)
.u32(descriptor.compressedSize)
.u32(descriptor.uncompressedSize)
.u16(characteristics.fileName.size)
.u16(characteristics.extraFields.size)
.u16(0) // file comment length (none)
.u16(0) // disk number start (#0)
.u16(CentralDirectoryHeader.InternalAttribute.default())
.u32(CentralDirectoryHeader.ExternalAttribute.default())
.u32(offset) // relative offset of local header
.build(),
characteristics.fileName,
characteristics.extraFields,
]),
}
// 4.3.16: End of central directory record
const EndOfCentralDirectoryRecord = {
SIGNATURE: 0x06054b50, // PK^E^F
SIZE: 22,
/**
* @param {Blob[]} centralRecords
* @param {number} offset
*/
pack: (centralRecords, offset) => packer(EndOfCentralDirectoryRecord.SIZE)
.u32(EndOfCentralDirectoryRecord.SIGNATURE)
.u16(0) // number of this disk (#0)
.u16(0) // number of the disk with the start of the central directory (#0)
.u16(centralRecords.length) // total number of entries in the central directory on this disk
.u16(centralRecords.length) // total number of entries in the central directory
.u32(centralRecords.reduce((size, blob) => size + blob.size, 0)) // size of the central directory
.u32(offset) // offset of start of central directory with respect to the starting disk number
.u16(0) // .ZIP file comment length (none)
.build(),
}
const collector = {
/** @param {FileSystemDirectoryHandle} handle */
async *FileSystemDirectoryHandle(handle, name = handle.name) {
for await (const [subname, subhandle] of handle.entries()) {
yield* collector.collect(subhandle, `${name}/${subname}`)
}
},
/** @param {FileSystemFileHandle} handle */
async *FileSystemFileHandle(handle, name = handle.name) {
yield { blob: await handle.getFile(), name }
},
/** @param {FileSystemDirectoryEntry} entry */
async *FileSystemDirectoryEntry(entry, name = entry.name) {
const subentries = await new Promise(resolve => { entry.createReader().readEntries(resolve) })
for (const subentry of subentries) {
yield* collector.collect(subentry, `${name}/${subentry.name}`)
}
},
/** @param {FileSystemFileEntry} entry */
async *FileSystemFileEntry(entry, name = entry.name) {
const file = await new Promise(resolve => { entry.file(resolve) })
yield { blob: file, name }
},
/** @param {File} file */
async *File(file, name = file.name) { yield { blob: file, name } },
/** @param {Blob} blob */
async *Blob(blob) { yield { blob } },
/** @param {DataTransfer} transfer */
async *DataTransfer(transfer) { yield* collector.collect(transfer.items) },
/** @param {Iterable<DataTransferItem>} list */
async *DataTransferItemList(list) {
// DataTransferItem is valid only in the event handler. Collect objects synchronously first.
const objects = Array.from(list).map(item => {
if (item.kind !== 'file') {
throw new TypeError('DataTransferItem is not represented as a file.')
}
// interface DataTransferItem {
// getAsFileSystemHandle(): Promise<FileSystemHandle | null>
// }
// @ts-ignore
if (item.getAsFileSystemHandle) {
/** @type {Promise<?FileSystemHandle>} */
// @ts-ignore
const promise = item.getAsFileSystemHandle()
if (!promise) {
throw new TypeError('DataTransferItem.getAsFileSystemHandle() is not represented as a file handle.')
}
return promise
} else if (item.webkitGetAsEntry) {
const entry = item.webkitGetAsEntry()
if (!entry) {
throw new TypeError('DataTransferItem.webkitGetAsEntry() is not represented as a file entry.')
}
return entry
} else if (item.getAsFile) {
const file = item.getAsFile()
if (!file) {
throw new TypeError('DataTransferItem.getAsFile() is not represented as a file.')
}
return file
} else {
throw new TypeError('DataTransferItem is not recognized as a file.')
}
})
// Then, iterate asynchronously
for await (const object of objects) {
if (object === null) {
throw new TypeError('DataTransferItem.getAsFileSystemHandle() is not resolved as a file handle.')
}
yield* collector.collect(object)
}
},
/** @param {DataTransferItem} item */
async *DataTransferItem(item) { yield* collector.DataTransferItemList([item]) },
/**
* @param {any} object
* @param {string|undefined} name
* @return {AsyncIterable<ZipEntry>|Iterable<Promise<ZipEntry>>|Iterable<ZipEntry>}
*/
async *collect(object, name = undefined) {
const className = Object.getPrototypeOf(object)?.constructor?.name
if (collector[className]) {
yield* collector[className](object, name)
return
}
if (object[Symbol.asyncIterator] || object[Symbol.iterator]) {
yield* object
return
}
yield object
}
}
// Pseudo stream
const writer = () => {
/** @type {(Blob|ArrayBuffer|ArrayBufferView)[]} */
const chunks = []
let position = 0
const writable = {
get position() { return position },
/** @param {Blob|ArrayBufferView|ArrayBuffer} part */
write: part => {
chunks.push(part)
position += part instanceof Blob ? part.size : part.byteLength
return writable
},
/** @param {string} type */
build: type => new Blob(chunks, { type }),
}
return writable
}
/**
* @param {FileSystemHandle|FileSystemEntry|DataTransfer|File|AsyncIterable<ZipEntry>|Iterable<ZipEntry|Promise<ZipEntry>>} stuff
* @return {Promise<Blob>}
*/
const create = async stuff => {
console.time('Collecting files')
const entries = collector.collect(stuff)
console.timeEnd('Collecting files')
const centrals = []
const writable = writer()
for await (const entry of entries) {
const label = `Compressing: size=${entry.blob.size}, path=${entry.name}`
console.time(label)
try {
const offset = writable.position
const characteristics = await EntryCharacteristics.inspect(entry)
writable.write(LocalFileHeader.pack(characteristics))
const { compressed, descriptor } = await compressor.compress(characteristics.compressionMethod, entry.blob)
writable
.write(compressed)
.write(DataDescriptor.pack(descriptor))
centrals.push(CentralDirectoryHeader.pack(characteristics, descriptor, offset))
} finally {
console.timeEnd(label)
}
}
console.time('Finalizing')
const offset = writable.position
centrals.forEach(writable.write)
writable.write(EndOfCentralDirectoryRecord.pack(centrals, offset))
const zipped = writable.build('application/zip')
console.timeEnd('Finalizing')
return zipped
}
return await create(stuff)
} finally {
console.groupEnd()
console.timeEnd('zip')
}
}
</script>
<script type="module">
/**
* @param {HTMLElement} element
* @param {string} activeClass
* @param {(this: HTMLElement, e: DragEvent) => void} onDrop
*/
const acceptDropFiles = (element, activeClass, onDrop) => {
const readonlyEffects = new Set(['copy', 'copyLink', 'copyMove', 'all', 'uninitialized'])
const isFiles = d => readonlyEffects.has(d.effectAllowed) && d.types.some(t => t === 'Files')
const handleDrag = e => {
if (isFiles(e.dataTransfer)) {
e.dataTransfer.dropEffect = 'copy'
element.classList.add(activeClass)
} else {
e.dataTransfer.dropEffect = 'none'
}
e.preventDefault()
e.stopPropagation()
}
element.addEventListener('dragenter', handleDrag)
element.addEventListener('dragover', handleDrag)
element.addEventListener('dragleave', e => {
element.classList.remove(activeClass)
})
element.addEventListener('drop', e => {
e.preventDefault()
e.stopPropagation()
element.classList.remove(activeClass)
onDrop.call(element, e)
})
}
/**
* @param {Blob} blob
* @param {string} filename
* @param {string} type
*/
const download = (blob, filename, type = blob.type || 'application/octet-stream') => {
const a = document.createElement('a')
a.href = window.URL.createObjectURL(blob)
a.download = filename
a.type = type
a.click()
window.URL.revokeObjectURL(a.href)
}
const spinner = /** @type {HTMLElement} */ (document.querySelector('#spinner'))
const picker = /** @type {HTMLElement} */ (document.querySelector('#picker'))
const zipThenDownload = async (entries, basename) => {
spinner.hidden = false
picker.inert = true
try {
const zipped = await zip(entries)
download(zipped, `${basename}.zip`)
} finally {
picker.inert = false
spinner.hidden = true
}
}
acceptDropFiles(picker, 'border-info', async e => {
const name = e.dataTransfer?.items?.length === 1
? e.dataTransfer.items[0].getAsFile()?.name ?? 'files'
: 'files'
await zipThenDownload(e.dataTransfer, name)
})
picker.addEventListener('click', async e => {
if (globalThis.showDirectoryPicker) {
/** @type {FileSystemDirectoryHandle} */
// @ts-ignore
const handle = await showDirectoryPicker()
zipThenDownload(handle, handle.name)
}
})
</script>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment