It looks like there is a problem regarding record_delimiter with utf16le encoding. The example below generates different outputs for utf8 and utf16le. If the record_delimiter is explicitly given, the result looks good.
const { Readable } = require('stream')
const { Parser } = require('csv-parse')
async function parse (bom, content, record_delimiter) {
const input = Readable.from(Buffer.concat([bom, content]))
const parser = new Parser({ bom: true, record_delimiter })
input.pipe(parser)
for await (const line of parser) {
console.log(line)
}
}
async function main () {
const lines = ['a,b,c', '1,2,3'].join('\r\n')
console.log('utf8')
await parse(Buffer.from([0xef,0xbb,0xbf]), Buffer.from(lines, 'utf8'))
console.log('utf16le')
await parse(Buffer.from([0xff, 0xfe]), Buffer.from(lines, 'utf16le'))
console.log('utf16le \\r\\n')
await parse(Buffer.from([0xff, 0xfe]), Buffer.from(lines, 'utf16le'), ['\r\n'])
}
main()
/* output:
utf8
[ 'a', 'b', 'c' ]
[ '1', '2', '3' ]
utf16le
[ 'a', 'b', 'c' ]
[ '\n1', '2', '3' ]
utf16le \r\n
[ 'a', 'b', 'c' ]
[ '1', '2', '3' ]
*/
It looks like there is a problem regarding record_delimiter with utf16le encoding. The example below generates different outputs for utf8 and utf16le. If the record_delimiter is explicitly given, the result looks good.