-
-
Save dcollien/76d17f69afe748afad7ff3a15ff9a08a to your computer and use it in GitHub Desktop.
var Multipart = { | |
parse: (function() { | |
function Parser(arraybuf, boundary) { | |
this.array = arraybuf; | |
this.token = null; | |
this.current = null; | |
this.i = 0; | |
this.boundary = boundary; | |
} | |
Parser.prototype.skipPastNextBoundary = function() { | |
var boundaryIndex = 0; | |
var isBoundary = false; | |
while (!isBoundary) { | |
if (this.next() === null) { | |
return false; | |
} | |
if (this.current === this.boundary[boundaryIndex]) { | |
boundaryIndex++; | |
if (boundaryIndex === this.boundary.length) { | |
isBoundary = true; | |
} | |
} else { | |
boundaryIndex = 0; | |
} | |
} | |
return true; | |
} | |
Parser.prototype.parseHeader = function() { | |
var header = ''; | |
var _this = this; | |
var skipUntilNextLine = function() { | |
header += _this.next(); | |
while (_this.current !== '\n' && _this.current !== null) { | |
header += _this.next(); | |
} | |
if (_this.current === null) { | |
return null; | |
} | |
}; | |
var hasSkippedHeader = false; | |
while (!hasSkippedHeader) { | |
skipUntilNextLine(); | |
header += this.next(); | |
if (this.current === '\r') { | |
header += this.next(); // skip | |
} | |
if (this.current === '\n') { | |
hasSkippedHeader = true; | |
} else if (this.current === null) { | |
return null; | |
} | |
} | |
return header; | |
} | |
Parser.prototype.next = function() { | |
if (this.i >= this.array.byteLength) { | |
this.current = null; | |
return null; | |
} | |
this.current = String.fromCharCode(this.array[this.i]); | |
this.i++; | |
return this.current; | |
} | |
function buf2String(buf) { | |
var string = ''; | |
buf.forEach(function (byte) { | |
string += String.fromCharCode(byte); | |
}); | |
return string; | |
} | |
function processSections(arraybuf, sections) { | |
for (var i = 0; i !== sections.length; ++i) { | |
var section = sections[i]; | |
if (section.header['content-type'] === 'text/plain') { | |
section.text = buf2String(arraybuf.slice(section.bodyStart, section.end)); | |
} else { | |
var imgData = arraybuf.slice(section.bodyStart, section.end); | |
section.file = new Blob([imgData], { | |
type: section.header['content-type'] | |
}); | |
var fileNameMatching = (/\bfilename\=\"([^\"]*)\"/g).exec(section.header['content-disposition']) || []; | |
section.fileName = fileNameMatching[1] || ''; | |
} | |
var matching = (/\bname\=\"([^\"]*)\"/g).exec(section.header['content-disposition']) || []; | |
section.name = matching[1] || ''; | |
delete section.headerStart; | |
delete section.bodyStart; | |
delete section.end; | |
} | |
return sections; | |
} | |
function multiparts(arraybuf, boundary) { | |
boundary = '--' + boundary; | |
var parser = new Parser(arraybuf, boundary); | |
var sections = []; | |
while (parser.skipPastNextBoundary()) { | |
var header = parser.parseHeader(); | |
if (header !== null) { | |
var headerLength = header.length; | |
var headerParts = header.trim().split('\n'); | |
var headerObj = {}; | |
for (var i = 0; i !== headerParts.length; ++i) { | |
var parts = headerParts[i].split(':'); | |
headerObj[parts[0].trim().toLowerCase()] = (parts[1] || '').trim(); | |
} | |
sections.push({ | |
'bodyStart': parser.i, | |
'header': headerObj, | |
'headerStart': parser.i - headerLength | |
}); | |
} | |
} | |
// add dummy section for end | |
sections.push({ | |
'headerStart': arraybuf.byteLength - 2 // 2 hyphens at end | |
}); | |
for (var i = 0; i !== sections.length - 1; ++i) { | |
sections[i].end = sections[i+1].headerStart - boundary.length; | |
if (String.fromCharCode(arraybuf[sections[i].end]) === '\r' || '\n') { | |
sections[i].end -= 1; | |
} | |
if (String.fromCharCode(arraybuf[sections[i].end]) === '\r' || '\n') { | |
sections[i].end -= 1; | |
} | |
} | |
// remove dummy section | |
sections.pop(); | |
sections = processSections(arraybuf, sections); | |
return sections; | |
} | |
return multiparts; | |
})() | |
}; |
sure (provided there is no implied warranty/liability using it)
Ok fine thank you.
I've re-written it in TypeScript as a ES-module, but I've not tested it yet. It works fine in my case in Cypress anyway.
class Parser {
private array: Uint8Array;
// private token: null;
private current: string | null;
public i: number;
private boundary: string;
public constructor(arraybuf: Uint8Array, boundary: string) {
this.array = arraybuf;
// this.token = null;
this.current = null;
this.i = 0;
this.boundary = boundary;
}
public skipPastNextBoundary(): boolean {
let boundaryIndex = 0;
let isBoundary = false;
while (!isBoundary) {
if (this.next() === null) {
return false;
}
if (this.current === this.boundary[boundaryIndex]) {
boundaryIndex++;
if (boundaryIndex === this.boundary.length) {
isBoundary = true;
}
} else {
boundaryIndex = 0;
}
}
return true;
}
public parseHeader() {
let header = '';
const skipUntilNextLine = () => {
header += this.next();
while (this.current !== '\n' && this.current !== null) {
header += this.next();
}
if (this.current === null) {
return null;
}
};
let hasSkippedHeader = false;
while (!hasSkippedHeader) {
skipUntilNextLine();
header += this.next();
if (this.current === '\r') {
header += this.next(); // skip
}
if (this.current === '\n') {
hasSkippedHeader = true;
} else if (this.current === null) {
return null;
}
}
return header;
}
public next() {
if (this.i >= this.array.byteLength) {
this.current = null;
return null;
}
this.current = String.fromCharCode(this.array[this.i]);
this.i++;
return this.current;
}
}
function buf2String(buf: Uint8Array): string {
return Array.from(buf)
.map((byte) => String.fromCharCode(byte))
.join('');
}
interface Section {
header?: Record<string, string>;
text?: string;
file?: Blob;
fileName?: string;
name?: string;
bodyStart?: number;
end?: number;
headerStart?: number;
}
function processSections(arraybuf: Uint8Array, sections: Section[]): Section[] {
for (let i = 0; i !== sections.length; ++i) {
const section = sections[i];
if (section.header!['content-type'] === 'text/plain') {
section.text = buf2String(arraybuf.slice(section.bodyStart, section.end));
} else {
const imgData = arraybuf.slice(section.bodyStart, section.end);
section.file = new Blob([imgData], {
type: section.header!['content-type'],
});
const fileNameMatching = /\bfilename="([^"]*)"/g.exec(section.header!['content-disposition']) || [];
section.fileName = fileNameMatching[1] || '';
}
const matching = /\bname="([^"]*)"/g.exec(section.header!['content-disposition']) || [];
section.name = matching[1] || '';
delete section.headerStart;
delete section.bodyStart;
delete section.end;
}
return sections;
}
function multiparts(arraybuf: Uint8Array, boundary: string) {
boundary = '--' + boundary;
const parser = new Parser(arraybuf, boundary);
let sections: Section[] = [];
while (parser.skipPastNextBoundary()) {
const header = parser.parseHeader();
if (header !== null) {
const headerLength = header.length;
const headerParts = header.trim().split('\n');
const headerObj: Record<string, string> = {};
for (let i = 0; i !== headerParts.length; ++i) {
const parts = headerParts[i].split(':');
headerObj[parts[0].trim().toLowerCase()] = (parts[1] || '').trim();
}
sections.push({
bodyStart: parser.i,
header: headerObj,
headerStart: parser.i - headerLength,
});
}
}
// add dummy section for end
sections.push({
headerStart: arraybuf.byteLength - boundary.length - 2, // 2 hyphens at end
});
for (let i = 0; i !== sections.length - 1; ++i) {
sections[i].end = sections[i + 1].headerStart! - boundary.length;
if (['\r', '\n'].includes(String.fromCharCode(arraybuf[sections[i].end!]))) {
sections[i].end! -= 1;
}
}
// remove dummy section
sections.pop();
sections = processSections(arraybuf, sections);
return sections;
}
export interface ParsedSection {
blob: Blob;
fileName?: string;
}
export type ParseResult = Record<string, ParsedSection>;
export function parse(arraybuf: Uint8Array, boundary: string): ParseResult {
return multiparts(arraybuf, boundary).reduce<ParseResult>((acc, section) => {
acc[section.name!] = {
blob: section.file!,
fileName: section.fileName,
};
return acc;
}, {});
}
There is bug in this code.
The following part is buggy,
sections.push({
'headerStart': arraybuf.byteLength - boundary.length - 2 // 2 hyphens at end
});
You are removing boundary length two times for last section. You already remove boundary when define section.end
. So, fix is following
sections.push({
'headerStart': arraybuf.byteLength - 2 // 2 hyphens at end
});
Modern browsers can parse multipart/form-data
natively. Example:
const payload =
`------WebKitFormBoundaryU5rJUDxGnj15hIGW\r
Content-Disposition: form-data; name="field1"\r
\r
Hello
World,
This is me\r
------WebKitFormBoundaryU5rJUDxGnj15hIGW--`
const boundary = payload.slice(2, payload.indexOf('\r\n'))
new Response(payload, {
headers: {
'Content-Type': `multipart/form-data; boundary=${boundary}`
}
})
.formData()
.then(formData => {
console.log([...formData]) // [['field1', 'Hello\nWorld,\nThis is me']]
})
The \r
inside payload
are necessary, because the line breaks must be \r\n
, except the values themselves. If you have a properly formed multipart/form-data
blob, you don't need to add \r
.
If you want to parse an HTTP response, you can use the fetch response directly:
fetch('/formdata-response')
.then(response => response.formData())
.then(formData => console.log([...formData]))
Modern browsers can parse
multipart/form-data
natively. Example:
Thank you! We've come a long way since 2017
@Finesse entries()
is not necessary, FormData
spreads to an array of arrays (entries) [...fd]
.
@guest271314 You are right, I've amended my code snippet
Very helpful snippet. If you have access to fetch()
it should be possible to use text()
to get the raw multipart/form-data content with \r\n
included
{
var formdata = new FormData();
var dirname = "web-directory";
formdata.append(dirname, new Blob(["123"], {
type: "text/plain"
}), `${dirname}/file.txt`);
formdata.append(dirname, new Blob(["src"], {
type: "text/plain"
}), `${dirname}/src/file.txt`);
var body = await new Response(formdata).text();
console.log(body);
const boundary = body.slice(2, body.indexOf('\r\n'))
console.log(boundary)
new Response(body, {
headers: {
'Content-Type': `multipart/form-data; boundary=${boundary}`
}
})
.formData()
.then(formData => {
console.log([...formData])
})
}
@Finesse Another way to do this when payload
is a TypedArray
(or ArrayBuffer
)
let ab = new Uint8Array(await response.clone().arrayBuffer());
let boundary = ab.subarray(2, ab.indexOf(13) + 1);
let archive = await new Response(ab, {
headers: {
"Content-Type": `multipart/form-data; boundary=${new TextDecoder().decode(boundary)}`,
},
})
.formData()
.then((data) => {
console.log([...data]);
return data;
}).catch((e) => {
console.warn(e);
});
can I use this in my project?