Skip to content

fetch fails on ByteString header with non-ASCII chars #1317

@AlttiRi

Description

@AlttiRi

fetch fails on ByteString header if the header contains char codes 128 and over.

The code:

server.js
import http from "http";

const host = "localhost";
const port = 8000;
const server = http.createServer(requestListener);
server.listen(port, host, () => {
    console.log(`Server is running on http://${host}:${port}`);
});

const names = [
    `rock&roll.png`,
    `rock'n'roll.png`,
    `image — copy (1).png`,
    `_圖片_🖼_image_.png`,
    `100 % loading&perf.png`,
];

const CD1 = str2BStr(`inline; filename=${names[0]}`);
const CD2 = str2BStr(`inline; filename="${names[1]}"`);
const CD3 = str2BStr(`inline; filename="${names[2]}"; filename*=UTF-8''${encodeURIComponent(names[2])}`);
const CD4 = str2BStr(`inline; filename="${names[3]}"; filename*=UTF-8''${encodeURIComponent(names[3])}`);
const CD5 = str2BStr(`inline; filename="${names[4]}"; filename*=UTF-8''${encodeURIComponent(names[4])}`);


function requestListener(req, res) {
    res.setHeader("Content-Type", "text/html; charset=utf-8");
    res.setHeader("Content-Disposition-1", CD1);
    res.setHeader("Content-Disposition-2", CD2);
    res.setHeader("Content-Disposition-3", CD3);
    res.setHeader("Content-Disposition-4", CD4);
    res.setHeader("Content-Disposition-5", CD5);
    res.writeHead(200);
    res.end(names.map(name => `<li>${name}</li>`).join(""));
}


// --- Util ---
function str2BStr(string) {
    return arrayBufferToBinaryString(new TextEncoder().encode(string));
}
function bSrt2Str(bString) {
    return new TextDecoder().decode(binaryStringToArrayBuffer(bString));
}
function arrayBufferToBinaryString(arrayBuffer) {
    return arrayBuffer.reduce((accumulator, byte) => accumulator + String.fromCharCode(byte), "");
}
function binaryStringToArrayBuffer(binaryString) {
    const u8Array = new Uint8Array(binaryString.length);
    for (let i = 0; i < binaryString.length; i++) {
        u8Array[i] = binaryString.charCodeAt(i);
    }
    return u8Array;
}
client.js
import {fetch} from "undici";
// import fetch from "node-fetch";

let result = [...(await fetch("http://localhost:8000/", {method: "head"})).headers.entries()]
    .filter(([k, v]) => k.startsWith("content-disposition"))
    .map(([k, v]) => v)
console.log(result);
console.log(result.map(v => v.length));


// import {contentDispositionFilename} from "./util.js";
// console.log(result.map(v => contentDispositionFilename(v)));
util.js
export {contentDispositionFilename};

/* Using:
const cd = response.headers.get("content-disposition");
const name = contentDispositionFilename(cd);
 */
// RFC 5987:
// [1] inline; filename="file.jpg"; filename*=UTF-8''file.jpg
// Quoted:
// [2] inline; filename="file.jpg"
// Without quotes:
// [3] attachment; filename=file.jpg
//
// `filename=` in rare cases may be also encoded as URIComponent
function contentDispositionFilename(headerByteString, decode = false) {
    if (!headerByteString) {
        return;
    }

    const headerString = byteStringToString(headerByteString);
    if (headerString !== headerByteString) {
        console.log("contentDispositionHeaderByteString:", headerByteString);
        console.log("contentDispositionHeaderString:",     headerString);
    }

    let result;
    const encodedFilename = headerString.match(/(?<=filename\*=UTF-8'')[^;]+(?=;?$)/)?.[0]; // [1]
    if (encodedFilename) {
        result = decodeURIComponent(encodedFilename);
    } else {
        const filename = headerString.match(/(?<=filename=").+(?="$)/)?.[0] // [2]
                      || headerString.match(/(?<=filename=).+$/)[0];        // [3]

        if (decode) {
            result = decodeURIComponent(filename);
        } else {
            result = filename;
        }
    }
    console.log("contentDispositionFilename:", result);

    return result;
}
function byteStringToString(byteString) {
    const chars = [...byteString];
    const isBinaryString = chars.every(ch => ch.charCodeAt(0) < 256);
    if (isBinaryString) {
        console.log("isBinaryString", isBinaryString);

        // It's optional, just to skip `TextDecoder.decode`
        const isASCII = chars.every(ch => ch.charCodeAt(0) < 128);
        if (isASCII) {
            console.log("isASCII", isBinaryString);
            return byteString;
        }

        const bytes = new Uint8Array(chars.map(ch => ch.charCodeAt(0)));
        return new TextDecoder().decode(bytes);
    }
    return byteString;
}

Result

...\node_modules\undici\lib\fetch\index.js:186
        Object.assign(new TypeError('fetch failed'), { cause: response.error })
                      ^

TypeError: fetch failed
    at Object.processResponse (...\node_modules\undici\lib\fetch\index.js:186:23)
    at Fetch.fetchFinale (...\node_modules\undici\lib\fetch\index.js:940:17)
    at Fetch.mainFetch (...\node_modules\undici\lib\fetch\index.js:747:17)
    at processTicksAndRejections (node:internal/process/task_queues:96:5) {
  cause: TypeError [ERR_INVALID_CHAR]: Invalid character in header content ["Content-Disposition-3"]
      at normalizeAndValidateHeaderValue (...\node_modules\undici\lib\fetch\headers.js:48:3)
      at HeadersList.append (...\node_modules\undici\lib\fetch\headers.js:98:29)
      at Headers.append (...\node_modules\undici\lib\fetch\headers.js:220:31)
      at Object.onHeaders (...\node_modules\undici\lib\fetch\index.js:1844:21)
      at Request.onHeaders (...\node_modules\undici\lib\core\request.js:176:27)
      at Parser.onHeadersComplete (...\node_modules\undici\lib\client.js:859:23)
      at wasm_on_headers_complete (...\node_modules\undici\lib\client.js:458:30)
      at wasm://wasm/0002afd2:wasm-function[11]:0x427
      at wasm://wasm/0002afd2:wasm-function[44]:0x8ad
      at wasm://wasm/0002afd2:wasm-function[56]:0x5c62 {
    code: 'ERR_INVALID_CHAR'
  }
}

The expected result

Deno:
image

Browser console:
image

Browser:
image

Deno, with 2 last lines uncommented:
image

Node.js v17.5.0
undici v4.16.0

BTW, HTTP headers are ByteString (BinaryString).

https://developer.mozilla.org/en-US/docs/Web/API/DOMString/Binary
https://webidl.spec.whatwg.org/#idl-ByteString

Hm... someone have deleted the article about ByteString:
https://web.archive.org/web/20210608032047/https://developer.mozilla.org/en-US/docs/Web/API/ByteString
and replaced ByteString with String:
https://web.archive.org/web/20210731105134/https://developer.mozilla.org/en-US/docs/Web/API/Headers/get
That's not OK.

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions